From 045ed0731ed1997f6ccf41b2a7688d028ea73a0a Mon Sep 17 00:00:00 2001 From: "cmzmasek@gmail.com" Date: Tue, 27 Sep 2011 23:35:42 +0000 Subject: [PATCH] in progress --- .../src/org/forester/application/map_lengths.java | 88 ++++++++++++++++++++ .../application/simple_node_processor.java | 12 +-- .../org/forester/archaeopteryx/Configuration.java | 24 +++--- .../org/forester/archaeopteryx/ControlPanel.java | 23 ++++- .../src/org/forester/archaeopteryx/TreePanel.java | 25 ++++++ .../src/org/forester/io/parsers/FastaParser.java | 10 +++ .../src/org/forester/phylogeny/data/NodeData.java | 2 +- .../src/org/forester/sequence/BasicSequence.java | 11 +++ .../java/src/org/forester/sequence/Sequence.java | 2 + 9 files changed, 170 insertions(+), 27 deletions(-) create mode 100644 forester/java/src/org/forester/application/map_lengths.java diff --git a/forester/java/src/org/forester/application/map_lengths.java b/forester/java/src/org/forester/application/map_lengths.java new file mode 100644 index 0000000..70bde38 --- /dev/null +++ b/forester/java/src/org/forester/application/map_lengths.java @@ -0,0 +1,88 @@ +// $Id: +// +// forester -- software libraries and applications +// for genomics and evolutionary biology research. +// +// Copyright (C) 2011 Christian M Zmasek +// Copyright (C) 2011 Sanford-Burnham Medical Research Institute +// All rights reserved +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA +// +// Contact: phylosoft @ gmail . com +// WWW: www.phylosoft.org/forester + +package org.forester.application; + +import java.io.File; +import java.io.IOException; +import java.util.List; + +import org.forester.archaeopteryx.Archaeopteryx; +import org.forester.io.parsers.FastaParser; +import org.forester.io.parsers.phyloxml.PhyloXmlParser; +import org.forester.phylogeny.Phylogeny; +import org.forester.phylogeny.PhylogenyNode; +import org.forester.phylogeny.data.PropertiesMap; +import org.forester.phylogeny.data.Property; +import org.forester.phylogeny.data.Property.AppliesTo; +import org.forester.phylogeny.factories.ParserBasedPhylogenyFactory; +import org.forester.phylogeny.factories.PhylogenyFactory; +import org.forester.sequence.Sequence; +import org.forester.util.CommandLineArguments; +import org.forester.util.ForesterUtil; + +public class map_lengths { + + final static private String PRG_NAME = "map_lengths"; + + public static void main( final String[] args ) { + CommandLineArguments cla = null; + try { + cla = new CommandLineArguments( args ); + } + catch ( final Exception e ) { + ForesterUtil.fatalError( PRG_NAME, e.getMessage() ); + } + try { + final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance(); + final PhyloXmlParser xml_parser = new PhyloXmlParser(); + final Phylogeny[] phylogenies_0 = factory.create( cla.getFile( 0 ), xml_parser ); + final Phylogeny phy = phylogenies_0[ 0 ]; + for( int i = 1; i < cla.getNumberOfNames(); i++ ) { + final String fasta_name = cla.getName( i ); + final List seqs = FastaParser.parse( new File( fasta_name ) ); + for( int s = 0; s < seqs.size(); s++ ) { + final Sequence seq = seqs.get( s ); + final int actual_length = seq.getLength() - seq.getNumberOfGapResidues(); + String node_name = "" + seq.getIdentifier(); + node_name = node_name.substring( 0, node_name.indexOf( "/" ) ); + final PhylogenyNode n = phy.getNode( node_name ); + if ( n.getNodeData().getProperties() == null ) { + n.getNodeData().setProperties( new PropertiesMap() ); + } + final PropertiesMap properties = n.getNodeData().getProperties(); + final Property p = new Property( "r:" + i, "" + actual_length, "", "xsd:integer", AppliesTo.NODE ); + properties.addProperty( p ); + } + } + Archaeopteryx.createApplication( phy ); + } + catch ( final IOException e ) { + // TODO Auto-generated catch block + e.printStackTrace(); + } + } +} diff --git a/forester/java/src/org/forester/application/simple_node_processor.java b/forester/java/src/org/forester/application/simple_node_processor.java index 5fc4180..a190edf 100644 --- a/forester/java/src/org/forester/application/simple_node_processor.java +++ b/forester/java/src/org/forester/application/simple_node_processor.java @@ -41,19 +41,11 @@ public class simple_node_processor { public static void main( final String args[] ) { File in = null; - File out = null; - if ( ( args.length != 2 ) ) { - // System.exit( -1 ); - if ( ( args.length == 0 ) ) { - in = new File( "C:\\Users\\zma\\dollo.xml" ); - out = null; - } - } + final File out = null; try { - System.out.println( "..." ); CommandLineArguments cla = null; cla = new CommandLineArguments( args ); - // in = cla.getFile( 0 ); + in = cla.getFile( 0 ); // out = cla.getFile( 1 ); // if ( out.exists() ) { // System.out.println( out + " already exists" ); diff --git a/forester/java/src/org/forester/archaeopteryx/Configuration.java b/forester/java/src/org/forester/archaeopteryx/Configuration.java index 32cec55..c516041 100644 --- a/forester/java/src/org/forester/archaeopteryx/Configuration.java +++ b/forester/java/src/org/forester/archaeopteryx/Configuration.java @@ -119,12 +119,12 @@ public final class Configuration { final static int show_taxonomy_scientific_names = 17; final static int show_taxonomy_common_names = 18; final static int color_according_to_annotation = 19; - final static int show_property = 20; - final static int show_gene_symbols = 21; - final static int node_data_popup = 22; - final static int show_relation_confidence = 23; - final static int show_vector_data = 24; - final static int show_taxonomy_images = 25; + final static int show_gene_symbols = 20; + final static int node_data_popup = 21; + final static int show_relation_confidence = 22; + final static int show_vector_data = 23; + final static int show_taxonomy_images = 24; + final static int show_properties = 25; // ------------------ // Click-to options // ------------------ @@ -162,9 +162,9 @@ public final class Configuration { { "Prot/Gene Acc", "display", "no" }, { "Show Internal Data", "display", "yes" }, { "Dyna Hide", "display", "yes" }, { "Taxonomy Scientific", "display", "yes" }, { "Taxonomy Common", "display", "no" }, { "Annotation Colorize", "nodisplay", "no" }, - { "Property", "nodisplay", "no" }, { "Prot/Gene Symbol", "display", "yes" }, - { "Rollover", "display", "yes" }, { "Relation Confidence", "display", "no" }, - { "Vector Data", "display", "no" }, { "Taxonomy Images", "display", "no" } }; + { "Prot/Gene Symbol", "display", "yes" }, { "Rollover", "display", "yes" }, + { "Relation Confidence", "display", "no" }, { "Vector Data", "display", "no" }, + { "Taxonomy Images", "display", "no" }, { "Properties", "display", "no" } }; final static String clickto_options[][] = { { "Display Node Data", "display" }, { "Collapse/Uncollapse", "display" }, { "Root/Reroot", "display" }, { "Sub/Super Tree", "display" }, { "Swap Descendants", "display" }, { "Colorize Subtree", "display" }, @@ -1225,12 +1225,12 @@ public final class Configuration { else if ( key.equals( "color_according_to_annotation" ) ) { key_index = Configuration.color_according_to_annotation; } - else if ( key.equals( "show_property" ) ) { - key_index = Configuration.show_property; - } else if ( key.equals( "show_vector_data" ) ) { key_index = Configuration.show_vector_data; } + else if ( key.equals( "show_properties" ) ) { + key_index = Configuration.show_properties; + } else if ( key.equals( "show_relation_confidence" ) ) { key_index = Configuration.show_relation_confidence; } diff --git a/forester/java/src/org/forester/archaeopteryx/ControlPanel.java b/forester/java/src/org/forester/archaeopteryx/ControlPanel.java index 9729cb8..b308854 100644 --- a/forester/java/src/org/forester/archaeopteryx/ControlPanel.java +++ b/forester/java/src/org/forester/archaeopteryx/ControlPanel.java @@ -109,6 +109,7 @@ final class ControlPanel extends JPanel implements ActionListener { private JComboBox _show_sequence_relations; private JComboBox _sequence_relation_type_box; private JCheckBox _show_vector_data_cb; + private JCheckBox _show_properties_cb; private JLabel _click_to_label; private JLabel _zoom_label; private JLabel _domain_display_label; @@ -495,6 +496,11 @@ final class ControlPanel extends JPanel implements ActionListener { addJCheckBox( _show_vector_data_cb, ch_panel ); add( ch_panel ); break; + case Configuration.show_properties: + _show_properties_cb = new JCheckBox( title ); + addJCheckBox( _show_properties_cb, ch_panel ); + add( ch_panel ); + break; default: throw new RuntimeException( "unknown checkbox: " + which ); } @@ -816,6 +822,10 @@ final class ControlPanel extends JPanel implements ActionListener { return ( ( _show_vector_data_cb != null ) && _show_vector_data_cb.isSelected() ); } + public boolean isShowProperties() { + return ( ( _show_properties_cb != null ) && _show_properties_cb.isSelected() ); + } + boolean isShowGeneSymbols() { return ( ( _show_gene_symbols != null ) && _show_gene_symbols.isSelected() ); } @@ -828,10 +838,6 @@ final class ControlPanel extends JPanel implements ActionListener { return ( ( _show_node_names != null ) && _show_node_names.isSelected() ); } - boolean isShowProperty() { - return ( ( _show_annotation != null ) && _show_annotation.isSelected() ); - } - boolean isShowSequenceAcc() { return ( ( _show_sequence_acc != null ) && _show_sequence_acc.isSelected() ); } @@ -1065,6 +1071,11 @@ final class ControlPanel extends JPanel implements ActionListener { _show_vector_data_cb.setSelected( state ); } break; + case Configuration.show_properties: + if ( _show_properties_cb != null ) { + _show_properties_cb.setSelected( state ); + } + break; case Configuration.show_sequence_acc: if ( _show_sequence_acc != null ) { _show_sequence_acc.setSelected( state ); @@ -1579,6 +1590,10 @@ final class ControlPanel extends JPanel implements ActionListener { _configuration.getDisplayTitle( Configuration.show_vector_data ) ); setCheckbox( Configuration.show_vector_data, _configuration.doCheckOption( Configuration.show_vector_data ) ); } + if ( _configuration.doDisplayOption( Configuration.show_properties ) ) { + addCheckbox( Configuration.show_properties, _configuration.getDisplayTitle( Configuration.show_properties ) ); + setCheckbox( Configuration.show_properties, _configuration.doCheckOption( Configuration.show_properties ) ); + } } void setupSearchTools() { diff --git a/forester/java/src/org/forester/archaeopteryx/TreePanel.java b/forester/java/src/org/forester/archaeopteryx/TreePanel.java index 136bfd5..aa2582d 100644 --- a/forester/java/src/org/forester/archaeopteryx/TreePanel.java +++ b/forester/java/src/org/forester/archaeopteryx/TreePanel.java @@ -575,6 +575,9 @@ public final class TreePanel extends JPanel implements ActionListener, MouseWhee sum += getTreeFontSet()._fm_large_italic.stringWidth( tax.getCommonName() + " ()" ); } } + if ( getControlPanel().isShowProperties() && node.getNodeData().isHasProperties() ) { + sum += getTreeFontSet()._fm_large.stringWidth( propertiesToString( node ).toString() ); + } if ( getControlPanel().isShowBinaryCharacters() && node.getNodeData().isHasBinaryCharacters() ) { sum += getTreeFontSet()._fm_large.stringWidth( node.getNodeData().getBinaryCharacters() .getGainedCharactersAsStringBuffer().toString() ); @@ -2967,6 +2970,12 @@ public final class TreePanel extends JPanel implements ActionListener, MouseWhee _sb.append( node.getNodeData().getSequence().getAccession().getValue() ); } } + if ( getControlPanel().isShowProperties() && node.getNodeData().isHasProperties() ) { + if ( _sb.length() > 0 ) { + _sb.append( " " ); + } + _sb.append( propertiesToString( node ) ); + } g.setFont( getTreeFontSet().getLargeFont() ); if ( is_in_found_nodes ) { g.setFont( getTreeFontSet().getLargeFont().deriveFont( Font.BOLD ) ); @@ -3116,6 +3125,22 @@ public final class TreePanel extends JPanel implements ActionListener, MouseWhee } } + private StringBuffer propertiesToString( final PhylogenyNode node ) { + final PropertiesMap properties = node.getNodeData().getProperties(); + final StringBuffer sb = new StringBuffer(); + boolean first = true; + for( final String ref : properties.getPropertyRefs() ) { + if ( first ) { + first = false; + } + else { + sb.append( " " ); + } + sb.append( properties.getProperty( ref ).asText() ); + } + return sb; + } + private double drawTaxonomyImage( final double x, final double y, final PhylogenyNode node, final Graphics2D g ) { final List us = new ArrayList(); for( final Taxonomy t : node.getNodeData().getTaxonomies() ) { diff --git a/forester/java/src/org/forester/io/parsers/FastaParser.java b/forester/java/src/org/forester/io/parsers/FastaParser.java index b04e254..7b7bfac 100644 --- a/forester/java/src/org/forester/io/parsers/FastaParser.java +++ b/forester/java/src/org/forester/io/parsers/FastaParser.java @@ -28,6 +28,8 @@ package org.forester.io.parsers; import java.io.BufferedReader; import java.io.ByteArrayInputStream; +import java.io.File; +import java.io.FileInputStream; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; @@ -92,6 +94,10 @@ public class FastaParser { return false; } + static public Msa parseMsa( final File f ) throws IOException { + return parseMsa( new FileInputStream( f ) ); + } + static public Msa parseMsa( final InputStream is ) throws IOException { return BasicMsa.createInstance( parse( is ) ); } @@ -104,6 +110,10 @@ public class FastaParser { return parseMsa( new ByteArrayInputStream( bytes ) ); } + static public List parse( final File f ) throws IOException { + return parse( new FileInputStream( f ) ); + } + static public List parse( final InputStream is ) throws IOException { final BufferedReader reader = new BufferedReader( new InputStreamReader( is, "UTF-8" ) ); String line = null; diff --git a/forester/java/src/org/forester/phylogeny/data/NodeData.java b/forester/java/src/org/forester/phylogeny/data/NodeData.java index d0a7fc2..eb41d94 100644 --- a/forester/java/src/org/forester/phylogeny/data/NodeData.java +++ b/forester/java/src/org/forester/phylogeny/data/NodeData.java @@ -281,7 +281,7 @@ public class NodeData implements PhylogenyData { } public boolean isHasProperties() { - return getProperties() != null; + return ( getProperties() != null ) && ( getProperties().size() > 0 ); } public boolean isHasReference() { diff --git a/forester/java/src/org/forester/sequence/BasicSequence.java b/forester/java/src/org/forester/sequence/BasicSequence.java index ed56ed8..786cc03 100644 --- a/forester/java/src/org/forester/sequence/BasicSequence.java +++ b/forester/java/src/org/forester/sequence/BasicSequence.java @@ -71,6 +71,17 @@ public class BasicSequence implements Sequence { } @Override + public int getNumberOfGapResidues() { + int gaps = 0; + for( int i = 0; i < _mol_sequence.length; ++i ) { + if ( _mol_sequence[ i ] == GAP ) { + ++gaps; + } + } + return gaps; + } + + @Override public String toString() { final StringBuffer sb = new StringBuffer(); sb.append( _identifier.toString() ); diff --git a/forester/java/src/org/forester/sequence/Sequence.java b/forester/java/src/org/forester/sequence/Sequence.java index b794af9..fd13abf 100644 --- a/forester/java/src/org/forester/sequence/Sequence.java +++ b/forester/java/src/org/forester/sequence/Sequence.java @@ -41,6 +41,8 @@ public interface Sequence { public abstract int getLength(); + public abstract int getNumberOfGapResidues(); + public abstract char[] getMolecularSequence(); public abstract char getResidueAt( final int position ); -- 1.7.10.2