From 9bb791edf52887de31d1a49ff9606f85724a09a8 Mon Sep 17 00:00:00 2001 From: "cmzmasek@gmail.com" Date: Sat, 17 Sep 2011 03:18:22 +0000 Subject: [PATCH] in progress --- .../analysis/AncestralTaxonomyInference.java | 74 ++++++++++-------- .../archaeopteryx/MainFrameApplication.java | 6 +- forester/java/src/org/forester/test/Test.java | 9 ++- .../org/forester/ws/uniprot/UniProtTaxonomy.java | 81 ++++++++++++++++---- .../org/forester/ws/uniprot/UniProtWsTools.java | 12 ++- 5 files changed, 125 insertions(+), 57 deletions(-) diff --git a/forester/java/src/org/forester/analysis/AncestralTaxonomyInference.java b/forester/java/src/org/forester/analysis/AncestralTaxonomyInference.java index 7ddfe99..fb34a14 100644 --- a/forester/java/src/org/forester/analysis/AncestralTaxonomyInference.java +++ b/forester/java/src/org/forester/analysis/AncestralTaxonomyInference.java @@ -151,27 +151,6 @@ public final class AncestralTaxonomyInference { final SortedSet not_found = new TreeSet(); for( final PhylogenyNodeIterator iter = phy.iteratorPostorder(); iter.hasNext(); ) { final PhylogenyNode node = iter.next(); - // final QUERY_TYPE qt = null; - // Taxonomy tax = null; - // if ( node.getNodeData().isHasTaxonomy() ) { - // tax = node.getNodeData().getTaxonomy(); - // } - // UniProtTaxonomy up_tax = null; - // if ( ( tax != null ) - // && ( isHasAppropriateId( tax ) || !ForesterUtil.isEmpty( - // tax.getScientificName() ) - // || !ForesterUtil.isEmpty( tax.getTaxonomyCode() ) || - // !ForesterUtil.isEmpty( tax - // .getCommonName() ) ) ) { - // final String query = null; - // up_tax = obtainUniProtTaxonomy( tax, query, qt ); - // if ( up_tax == null ) { - // not_found.add( query ); - // } - // else { - // updateTaxonomy( qt, node, tax, up_tax ); - // } - // } if ( !node.isExternal() ) { inferTaxonomyFromDescendents( node, not_found ); } @@ -200,9 +179,12 @@ public final class AncestralTaxonomyInference { final UniProtTaxonomy up_tax = obtainUniProtTaxonomy( desc.getNodeData().getTaxonomy(), query, qt ); String[] lineage = null; if ( up_tax != null ) { - lineage = obtainLineagePlusOwnScientificName( up_tax ); + //lineage = obtainLineagePlusOwnScientificName( up_tax ); + lineage = up_tax.getLineageAsArray(); } if ( ( lineage == null ) || ( lineage.length < 1 ) ) { + //TODO remove me + System.out.println( "node " + desc.getNodeData().getTaxonomy().toString() + " has no lineage!" ); not_found.add( desc.getNodeData().getTaxonomy().asText().toString() ); return; } @@ -213,9 +195,25 @@ public final class AncestralTaxonomyInference { } else { String msg = "Node(s) with no or inappropriate taxonomic information found"; + String node = ""; if ( !ForesterUtil.isEmpty( desc.getName() ) ) { - msg = "Node " + desc.getName() + " has no or inappropriate taxonomic information"; + node = "\"" + desc.getName() + "\""; } + else { + node = "[" + desc.getId() + "]"; + } + msg = "Node " + node + " has no or inappropriate taxonomic information"; + List e = desc.getAllExternalDescendants(); + //TODO remove me! + System.out.println(); + int x = 0; + for( PhylogenyNode object : e ) { + System.out.println( x + ":" ); + System.out.println( object.getName() + " " ); + x++; + } + System.out.println(); + // throw new IllegalArgumentException( msg ); } } @@ -232,6 +230,16 @@ public final class AncestralTaxonomyInference { } } if ( last_common_lineage == null ) { + System.out.println( "No common lineage for:" ); + int counter = 0; + for( String[] strings : lineages ) { + System.out.print( counter + ": " ); + ++counter; + for( String string : strings ) { + System.out.print( string + " " ); + } + System.out.println(); + } return; } // if ( !n.getNodeData().isHasTaxonomy() ) { @@ -328,16 +336,16 @@ public final class AncestralTaxonomyInference { return not_found; } - synchronized private static String[] obtainLineagePlusOwnScientificName( final UniProtTaxonomy up_tax ) { - final String[] lineage = up_tax.getLineage(); - final String[] lin_plus_self = new String[ lineage.length + 1 ]; - for( int i = 0; i < lineage.length; ++i ) { - lin_plus_self[ i ] = lineage[ i ]; - } - lin_plus_self[ lineage.length ] = up_tax.getScientificName(); - return lin_plus_self; - } - + // TODO this might not be needed anymore + // synchronized private static String[] obtainLineagePlusOwnScientificName( final UniProtTaxonomy up_tax ) { + // final String[] lineage = up_tax.getLineageAsArray(); + // final String[] lin_plus_self = new String[ lineage.length + 1 ]; + // for( int i = 0; i < lineage.length; ++i ) { + // lin_plus_self[ i ] = lineage[ i ]; + // } + // lin_plus_self[ lineage.length ] = up_tax.getScientificName(); + // return lin_plus_self; + // } synchronized private static UniProtTaxonomy obtainUniProtTaxonomy( final Taxonomy tax, String query, QUERY_TYPE qt ) throws IOException { if ( isHasAppropriateId( tax ) ) { diff --git a/forester/java/src/org/forester/archaeopteryx/MainFrameApplication.java b/forester/java/src/org/forester/archaeopteryx/MainFrameApplication.java index 80c3b5b..97a7a7a 100644 --- a/forester/java/src/org/forester/archaeopteryx/MainFrameApplication.java +++ b/forester/java/src/org/forester/archaeopteryx/MainFrameApplication.java @@ -876,7 +876,7 @@ public final class MainFrameApplication extends MainFrame { _obtain_detailed_taxonomic_information_jmi .setToolTipText( "To add additional taxonomic information (from UniProt Taxonomy)" ); _tools_menu - .add( _obtain_detailed_taxonomic_information_deleting_jmi = new JMenuItem( "Obtain Detailed Taxonomic Information (Delete Nodes)" ) ); + .add( _obtain_detailed_taxonomic_information_deleting_jmi = new JMenuItem( "Obtain Detailed Taxonomic Information (deletes nodes!)" ) ); customizeJMenuItem( _obtain_detailed_taxonomic_information_deleting_jmi ); _obtain_detailed_taxonomic_information_deleting_jmi .setToolTipText( "To add additional taxonomic information, deletes nodes for which taxonomy cannot found (from UniProt Taxonomy)" ); @@ -1171,10 +1171,10 @@ public final class MainFrameApplication extends MainFrame { JOptionPane.ERROR_MESSAGE ); return; } - final Phylogeny phy = _mainpanel.getCurrentPhylogeny().copy(); final AncestralTaxonomyInferrer inferrer = new AncestralTaxonomyInferrer( this, _mainpanel.getCurrentTreePanel(), - phy ); + _mainpanel.getCurrentPhylogeny() + .copy() ); new Thread( inferrer ).start(); } diff --git a/forester/java/src/org/forester/test/Test.java b/forester/java/src/org/forester/test/Test.java index b5a3aeb..78fcc21 100644 --- a/forester/java/src/org/forester/test/Test.java +++ b/forester/java/src/org/forester/test/Test.java @@ -38,7 +38,6 @@ import java.util.Locale; import java.util.Set; import org.forester.application.support_transfer; -import org.forester.archaeopteryx.Archaeopteryx; import org.forester.development.DevelopmentTools; import org.forester.evoinference.TestPhylogenyReconstruction; import org.forester.evoinference.matrix.character.CharacterStateMatrix; @@ -7753,13 +7752,15 @@ public final class Test { if ( !results.get( 0 ).getScientificName().equals( "Nematostella vectensis" ) ) { return false; } - if ( !results.get( 0 ).getLineage()[ 0 ].equals( "Eukaryota" ) ) { + if ( !results.get( 0 ).getLineage().get( 1 ).equals( "Eukaryota" ) ) { return false; } - if ( !results.get( 0 ).getLineage()[ 1 ].equals( "Metazoa" ) ) { + if ( !results.get( 0 ).getLineage().get( 2 ).equals( "Metazoa" ) ) { return false; } - if ( !results.get( 0 ).getLineage()[ results.get( 0 ).getLineage().length - 1 ].equals( "Nematostella" ) ) { + if ( !results.get( 0 ).getLineage().get( results.get( 0 ).getLineage().size() - 1 ) + .equals( "Nematostella vectensis" ) ) { + System.out.println( results.get( 0 ).getLineage() ); return false; } } diff --git a/forester/java/src/org/forester/ws/uniprot/UniProtTaxonomy.java b/forester/java/src/org/forester/ws/uniprot/UniProtTaxonomy.java index 6a1ec5a..5e9053a 100644 --- a/forester/java/src/org/forester/ws/uniprot/UniProtTaxonomy.java +++ b/forester/java/src/org/forester/ws/uniprot/UniProtTaxonomy.java @@ -32,27 +32,38 @@ import org.forester.util.ForesterUtil; public final class UniProtTaxonomy { - private final String[] _lineage; + private static final String ARCHAEA = "Archaea"; + private static final String BACTERIA = "Bacteria"; + private static final String EUKARYOTA = "Eukaryota"; + private final List _lineage; private final String _code; private final String _scientific_name; private final String _common_name; private final String _synonym; private final String _rank; private final String _id; - public final static UniProtTaxonomy DROSOPHILA_GENUS = new UniProtTaxonomy( new String[] { "Eukaryota", - "Metazoa", "Arthropoda", "Hexapoda", "Insecta", "Pterygota", "Neoptera", "Endopterygota", "Diptera", - "Brachycera", "Muscomorpha", "Ephydroidea", "Drosophilidae" }, + public final static String CELLULAR_ORGANISMS = "cellular organisms"; + public final static UniProtTaxonomy DROSOPHILA_GENUS = new UniProtTaxonomy( new String[] { + CELLULAR_ORGANISMS, EUKARYOTA, "Metazoa", "Arthropoda", "Hexapoda", "Insecta", "Pterygota", "Neoptera", + "Endopterygota", "Diptera", "Brachycera", "Muscomorpha", "Ephydroidea", "Drosophilidae" }, "", "fruit flies", "Drosophila", "", "genus", "7215" ); - public final static UniProtTaxonomy XENOPUS_GENUS = new UniProtTaxonomy( new String[] { "Eukaryota", - "Metazoa", "Chordata", "Craniata", "Vertebrata", "Euteleostomi", "Amphibia", "Batrachia", "Anura", - "Mesobatrachia", "Pipoidea", "Pipidae", "Xenopodinae" }, "", "", "Xenopus", "", "genus", "8353" ); - public final static UniProtTaxonomy CAPITELLA_TELATA_SPECIES = new UniProtTaxonomy( new String[] { "Eukaryota", - "Metazoa", "Annelida", "Polychaeta", "Scolecida", "Capitellida", "Capitellidae", "Capitella" }, + public final static UniProtTaxonomy XENOPUS_GENUS = new UniProtTaxonomy( new String[] { + CELLULAR_ORGANISMS, EUKARYOTA, "Metazoa", "Chordata", "Craniata", "Vertebrata", "Euteleostomi", "Amphibia", + "Batrachia", "Anura", "Mesobatrachia", "Pipoidea", "Pipidae", "Xenopodinae" }, + "", + "", + "Xenopus", + "", + "genus", + "8353" ); + public final static UniProtTaxonomy CAPITELLA_TELATA_SPECIES = new UniProtTaxonomy( new String[] { + CELLULAR_ORGANISMS, EUKARYOTA, "Metazoa", "Annelida", "Polychaeta", "Scolecida", "Capitellida", + "Capitellidae", "Capitella" }, "", "", "Capitella teleta", @@ -80,6 +91,7 @@ public final class UniProtTaxonomy { if ( items.length > 7 ) { lin = items[ 8 ].split( "; " ); } + _lineage = new ArrayList(); if ( ( lin != null ) && ( lin.length > 0 ) ) { final List temp = new ArrayList(); for( final String t : lin ) { @@ -87,17 +99,28 @@ public final class UniProtTaxonomy { temp.add( t.trim() ); } } - _lineage = new String[ temp.size() ]; for( int i = 0; i < temp.size(); ++i ) { - _lineage[ i ] = temp.get( i ); + if ( i == 0 + && ( temp.get( i ).equalsIgnoreCase( EUKARYOTA ) || temp.get( i ).equalsIgnoreCase( BACTERIA ) || temp + .get( i ).equalsIgnoreCase( ARCHAEA ) ) ) { + _lineage.add( CELLULAR_ORGANISMS ); + } + _lineage.add( temp.get( i ) ); } } - else { - _lineage = new String[ 0 ]; + if ( _lineage.isEmpty() + && ( _scientific_name.equalsIgnoreCase( EUKARYOTA ) || _scientific_name.equalsIgnoreCase( BACTERIA ) || _scientific_name + .equalsIgnoreCase( ARCHAEA ) ) ) { + System.out.println( " >>>>>>>>>>>>>>>>>>>>>>>>> did it!" ); + _lineage.add( CELLULAR_ORGANISMS ); + } + _lineage.add( _scientific_name ); + if ( _lineage.isEmpty() ) { + throw new IllegalArgumentException( "lineage in a UniProt Taxonomy can not be empty\n: " + line ); } } - public UniProtTaxonomy( final String[] lineage, + public UniProtTaxonomy( final List lineage, final String code, final String common_name, final String scientific_name, @@ -113,6 +136,25 @@ public final class UniProtTaxonomy { _id = id; } + public UniProtTaxonomy( final String[] lineage, + final String code, + final String common_name, + final String scientific_name, + final String synonym, + final String rank, + final String id ) { + _lineage = new ArrayList(); + for( String l : lineage ) { + _lineage.add( l ); + } + _code = code; + _scientific_name = scientific_name; + _common_name = common_name; + _synonym = synonym; + _rank = rank; + _id = id; + } + /** * Creates deep copy for all fields, except lineage. * @@ -140,10 +182,19 @@ public final class UniProtTaxonomy { return _id; } - public String[] getLineage() { + public List getLineage() { return _lineage; } + public String[] getLineageAsArray() { + String[] str = new String[ _lineage.size() ]; + int i = 0; + for( String l : _lineage ) { + str[ i++ ] = l; + } + return str; + } + public String getRank() { return _rank; } diff --git a/forester/java/src/org/forester/ws/uniprot/UniProtWsTools.java b/forester/java/src/org/forester/ws/uniprot/UniProtWsTools.java index d24b171..5906a8c 100644 --- a/forester/java/src/org/forester/ws/uniprot/UniProtWsTools.java +++ b/forester/java/src/org/forester/ws/uniprot/UniProtWsTools.java @@ -201,7 +201,13 @@ public final class UniProtWsTools { // Ignore empty lines. } else if ( line.startsWith( "Taxon" ) ) { - //TODO next the check format FIXME + final String[] items = line.split( "\t" ); + if ( !( items[ 1 ].equalsIgnoreCase( "Mnemonic" ) && items[ 2 ].equalsIgnoreCase( "Scientific name" ) + && items[ 3 ].equalsIgnoreCase( "Common name" ) && items[ 4 ].equalsIgnoreCase( "Synonym" ) + && items[ 5 ].equalsIgnoreCase( "Other Names" ) && items[ 6 ].equalsIgnoreCase( "Reviewed" ) + && items[ 7 ].equalsIgnoreCase( "Rank" ) && items[ 8 ].equalsIgnoreCase( "Lineage" ) ) ) { + throw new IOException( "Unreconized UniProt Taxonomy format: " + line ); + } } else { if ( line.split( "\t" ).length > 4 ) { @@ -237,7 +243,9 @@ public final class UniProtWsTools { String line; final List result = new ArrayList(); while ( ( line = in.readLine() ) != null ) { - System.out.println( line ); + if ( DEBUG ) { + System.out.println( line ); + } result.add( line ); if ( result.size() > max_lines_to_return ) { break; -- 1.7.10.2