in progress
authorcmzmasek@gmail.com <cmzmasek@gmail.com@ca865154-3058-d1c3-3e42-d8f55a55bdbd>
Sat, 17 Sep 2011 03:18:22 +0000 (03:18 +0000)
committercmzmasek@gmail.com <cmzmasek@gmail.com@ca865154-3058-d1c3-3e42-d8f55a55bdbd>
Sat, 17 Sep 2011 03:18:22 +0000 (03:18 +0000)
forester/java/src/org/forester/analysis/AncestralTaxonomyInference.java
forester/java/src/org/forester/archaeopteryx/MainFrameApplication.java
forester/java/src/org/forester/test/Test.java
forester/java/src/org/forester/ws/uniprot/UniProtTaxonomy.java
forester/java/src/org/forester/ws/uniprot/UniProtWsTools.java

index 7ddfe99..fb34a14 100644 (file)
@@ -151,27 +151,6 @@ public final class AncestralTaxonomyInference {
         final SortedSet<String> not_found = new TreeSet<String>();
         for( final PhylogenyNodeIterator iter = phy.iteratorPostorder(); iter.hasNext(); ) {
             final PhylogenyNode node = iter.next();
-            // final QUERY_TYPE qt = null;
-            // Taxonomy tax = null;
-            // if ( node.getNodeData().isHasTaxonomy() ) {
-            // tax = node.getNodeData().getTaxonomy();
-            // }
-            // UniProtTaxonomy up_tax = null;
-            // if ( ( tax != null )
-            // && ( isHasAppropriateId( tax ) || !ForesterUtil.isEmpty(
-            // tax.getScientificName() )
-            // || !ForesterUtil.isEmpty( tax.getTaxonomyCode() ) ||
-            // !ForesterUtil.isEmpty( tax
-            // .getCommonName() ) ) ) {
-            // final String query = null;
-            // up_tax = obtainUniProtTaxonomy( tax, query, qt );
-            // if ( up_tax == null ) {
-            // not_found.add( query );
-            // }
-            // else {
-            // updateTaxonomy( qt, node, tax, up_tax );
-            // }
-            // }
             if ( !node.isExternal() ) {
                 inferTaxonomyFromDescendents( node, not_found );
             }
@@ -200,9 +179,12 @@ public final class AncestralTaxonomyInference {
                 final UniProtTaxonomy up_tax = obtainUniProtTaxonomy( desc.getNodeData().getTaxonomy(), query, qt );
                 String[] lineage = null;
                 if ( up_tax != null ) {
-                    lineage = obtainLineagePlusOwnScientificName( up_tax );
+                    //lineage = obtainLineagePlusOwnScientificName( up_tax );
+                    lineage = up_tax.getLineageAsArray();
                 }
                 if ( ( lineage == null ) || ( lineage.length < 1 ) ) {
+                    //TODO remove me
+                    System.out.println( "node " + desc.getNodeData().getTaxonomy().toString() + " has no lineage!" );
                     not_found.add( desc.getNodeData().getTaxonomy().asText().toString() );
                     return;
                 }
@@ -213,9 +195,25 @@ public final class AncestralTaxonomyInference {
             }
             else {
                 String msg = "Node(s) with no or inappropriate taxonomic information found";
+                String node = "";
                 if ( !ForesterUtil.isEmpty( desc.getName() ) ) {
-                    msg = "Node " + desc.getName() + " has no or inappropriate taxonomic information";
+                    node = "\"" + desc.getName() + "\"";
                 }
+                else {
+                    node = "[" + desc.getId() + "]";
+                }
+                msg = "Node " + node + " has no or inappropriate taxonomic information";
+                List<PhylogenyNode> e = desc.getAllExternalDescendants();
+                //TODO remove me!
+                System.out.println();
+                int x = 0;
+                for( PhylogenyNode object : e ) {
+                    System.out.println( x + ":" );
+                    System.out.println( object.getName() + "  " );
+                    x++;
+                }
+                System.out.println();
+                //
                 throw new IllegalArgumentException( msg );
             }
         }
@@ -232,6 +230,16 @@ public final class AncestralTaxonomyInference {
             }
         }
         if ( last_common_lineage == null ) {
+            System.out.println( "No common lineage for:" );
+            int counter = 0;
+            for( String[] strings : lineages ) {
+                System.out.print( counter + ": " );
+                ++counter;
+                for( String string : strings ) {
+                    System.out.print( string + " " );
+                }
+                System.out.println();
+            }
             return;
         }
         // if ( !n.getNodeData().isHasTaxonomy() ) {
@@ -328,16 +336,16 @@ public final class AncestralTaxonomyInference {
         return not_found;
     }
 
-    synchronized private static String[] obtainLineagePlusOwnScientificName( final UniProtTaxonomy up_tax ) {
-        final String[] lineage = up_tax.getLineage();
-        final String[] lin_plus_self = new String[ lineage.length + 1 ];
-        for( int i = 0; i < lineage.length; ++i ) {
-            lin_plus_self[ i ] = lineage[ i ];
-        }
-        lin_plus_self[ lineage.length ] = up_tax.getScientificName();
-        return lin_plus_self;
-    }
-
+    // TODO this might not be needed anymore
+    //  synchronized private static String[] obtainLineagePlusOwnScientificName( final UniProtTaxonomy up_tax ) {
+    //      final String[] lineage = up_tax.getLineageAsArray();
+    //      final String[] lin_plus_self = new String[ lineage.length + 1 ];
+    //      for( int i = 0; i < lineage.length; ++i ) {
+    //          lin_plus_self[ i ] = lineage[ i ];
+    //      }
+    //      lin_plus_self[ lineage.length ] = up_tax.getScientificName();
+    //      return lin_plus_self;
+    //  }
     synchronized private static UniProtTaxonomy obtainUniProtTaxonomy( final Taxonomy tax, String query, QUERY_TYPE qt )
             throws IOException {
         if ( isHasAppropriateId( tax ) ) {
index 80c3b5b..97a7a7a 100644 (file)
@@ -876,7 +876,7 @@ public final class MainFrameApplication extends MainFrame {
         _obtain_detailed_taxonomic_information_jmi
                 .setToolTipText( "To add additional taxonomic information (from UniProt Taxonomy)" );
         _tools_menu
-                .add( _obtain_detailed_taxonomic_information_deleting_jmi = new JMenuItem( "Obtain Detailed Taxonomic Information (Delete Nodes)" ) );
+                .add( _obtain_detailed_taxonomic_information_deleting_jmi = new JMenuItem( "Obtain Detailed Taxonomic Information (deletes nodes!)" ) );
         customizeJMenuItem( _obtain_detailed_taxonomic_information_deleting_jmi );
         _obtain_detailed_taxonomic_information_deleting_jmi
                 .setToolTipText( "To add additional taxonomic information, deletes nodes for which taxonomy cannot found (from UniProt Taxonomy)" );
@@ -1171,10 +1171,10 @@ public final class MainFrameApplication extends MainFrame {
                                            JOptionPane.ERROR_MESSAGE );
             return;
         }
-        final Phylogeny phy = _mainpanel.getCurrentPhylogeny().copy();
         final AncestralTaxonomyInferrer inferrer = new AncestralTaxonomyInferrer( this,
                                                                                   _mainpanel.getCurrentTreePanel(),
-                                                                                  phy );
+                                                                                  _mainpanel.getCurrentPhylogeny()
+                                                                                          .copy() );
         new Thread( inferrer ).start();
     }
 
index b5a3aeb..78fcc21 100644 (file)
@@ -38,7 +38,6 @@ import java.util.Locale;
 import java.util.Set;
 
 import org.forester.application.support_transfer;
-import org.forester.archaeopteryx.Archaeopteryx;
 import org.forester.development.DevelopmentTools;
 import org.forester.evoinference.TestPhylogenyReconstruction;
 import org.forester.evoinference.matrix.character.CharacterStateMatrix;
@@ -7753,13 +7752,15 @@ public final class Test {
             if ( !results.get( 0 ).getScientificName().equals( "Nematostella vectensis" ) ) {
                 return false;
             }
-            if ( !results.get( 0 ).getLineage()[ 0 ].equals( "Eukaryota" ) ) {
+            if ( !results.get( 0 ).getLineage().get( 1 ).equals( "Eukaryota" ) ) {
                 return false;
             }
-            if ( !results.get( 0 ).getLineage()[ 1 ].equals( "Metazoa" ) ) {
+            if ( !results.get( 0 ).getLineage().get( 2 ).equals( "Metazoa" ) ) {
                 return false;
             }
-            if ( !results.get( 0 ).getLineage()[ results.get( 0 ).getLineage().length - 1 ].equals( "Nematostella" ) ) {
+            if ( !results.get( 0 ).getLineage().get( results.get( 0 ).getLineage().size() - 1 )
+                    .equals( "Nematostella vectensis" ) ) {
+                System.out.println( results.get( 0 ).getLineage() );
                 return false;
             }
         }
index 6a1ec5a..5e9053a 100644 (file)
@@ -32,27 +32,38 @@ import org.forester.util.ForesterUtil;
 
 public final class UniProtTaxonomy {
 
-    private final String[]              _lineage;
+    private static final String         ARCHAEA                  = "Archaea";
+    private static final String         BACTERIA                 = "Bacteria";
+    private static final String         EUKARYOTA                = "Eukaryota";
+    private final List<String>          _lineage;
     private final String                _code;
     private final String                _scientific_name;
     private final String                _common_name;
     private final String                _synonym;
     private final String                _rank;
     private final String                _id;
-    public final static UniProtTaxonomy DROSOPHILA_GENUS         = new UniProtTaxonomy( new String[] { "Eukaryota",
-            "Metazoa", "Arthropoda", "Hexapoda", "Insecta", "Pterygota", "Neoptera", "Endopterygota", "Diptera",
-            "Brachycera", "Muscomorpha", "Ephydroidea", "Drosophilidae"                },
+    public final static String          CELLULAR_ORGANISMS       = "cellular organisms";
+    public final static UniProtTaxonomy DROSOPHILA_GENUS         = new UniProtTaxonomy( new String[] {
+            CELLULAR_ORGANISMS, EUKARYOTA, "Metazoa", "Arthropoda", "Hexapoda", "Insecta", "Pterygota", "Neoptera",
+            "Endopterygota", "Diptera", "Brachycera", "Muscomorpha", "Ephydroidea", "Drosophilidae" },
                                                                                         "",
                                                                                         "fruit flies",
                                                                                         "Drosophila",
                                                                                         "",
                                                                                         "genus",
                                                                                         "7215" );
-    public final static UniProtTaxonomy XENOPUS_GENUS            = new UniProtTaxonomy( new String[] { "Eukaryota",
-            "Metazoa", "Chordata", "Craniata", "Vertebrata", "Euteleostomi", "Amphibia", "Batrachia", "Anura",
-            "Mesobatrachia", "Pipoidea", "Pipidae", "Xenopodinae" }, "", "", "Xenopus", "", "genus", "8353" );
-    public final static UniProtTaxonomy CAPITELLA_TELATA_SPECIES = new UniProtTaxonomy( new String[] { "Eukaryota",
-            "Metazoa", "Annelida", "Polychaeta", "Scolecida", "Capitellida", "Capitellidae", "Capitella" },
+    public final static UniProtTaxonomy XENOPUS_GENUS            = new UniProtTaxonomy( new String[] {
+            CELLULAR_ORGANISMS, EUKARYOTA, "Metazoa", "Chordata", "Craniata", "Vertebrata", "Euteleostomi", "Amphibia",
+            "Batrachia", "Anura", "Mesobatrachia", "Pipoidea", "Pipidae", "Xenopodinae" },
+                                                                                        "",
+                                                                                        "",
+                                                                                        "Xenopus",
+                                                                                        "",
+                                                                                        "genus",
+                                                                                        "8353" );
+    public final static UniProtTaxonomy CAPITELLA_TELATA_SPECIES = new UniProtTaxonomy( new String[] {
+            CELLULAR_ORGANISMS, EUKARYOTA, "Metazoa", "Annelida", "Polychaeta", "Scolecida", "Capitellida",
+            "Capitellidae", "Capitella"                                                },
                                                                                         "",
                                                                                         "",
                                                                                         "Capitella teleta",
@@ -80,6 +91,7 @@ public final class UniProtTaxonomy {
         if ( items.length > 7 ) {
             lin = items[ 8 ].split( "; " );
         }
+        _lineage = new ArrayList<String>();
         if ( ( lin != null ) && ( lin.length > 0 ) ) {
             final List<String> temp = new ArrayList<String>();
             for( final String t : lin ) {
@@ -87,17 +99,28 @@ public final class UniProtTaxonomy {
                     temp.add( t.trim() );
                 }
             }
-            _lineage = new String[ temp.size() ];
             for( int i = 0; i < temp.size(); ++i ) {
-                _lineage[ i ] = temp.get( i );
+                if ( i == 0
+                        && ( temp.get( i ).equalsIgnoreCase( EUKARYOTA ) || temp.get( i ).equalsIgnoreCase( BACTERIA ) || temp
+                                .get( i ).equalsIgnoreCase( ARCHAEA ) ) ) {
+                    _lineage.add( CELLULAR_ORGANISMS );
+                }
+                _lineage.add( temp.get( i ) );
             }
         }
-        else {
-            _lineage = new String[ 0 ];
+        if ( _lineage.isEmpty()
+                && ( _scientific_name.equalsIgnoreCase( EUKARYOTA ) || _scientific_name.equalsIgnoreCase( BACTERIA ) || _scientific_name
+                        .equalsIgnoreCase( ARCHAEA ) ) ) {
+            System.out.println( "  >>>>>>>>>>>>>>>>>>>>>>>>>        did it!" );
+            _lineage.add( CELLULAR_ORGANISMS );
+        }
+        _lineage.add( _scientific_name );
+        if ( _lineage.isEmpty() ) {
+            throw new IllegalArgumentException( "lineage in a UniProt Taxonomy can not be empty\n: " + line );
         }
     }
 
-    public UniProtTaxonomy( final String[] lineage,
+    public UniProtTaxonomy( final List<String> lineage,
                             final String code,
                             final String common_name,
                             final String scientific_name,
@@ -113,6 +136,25 @@ public final class UniProtTaxonomy {
         _id = id;
     }
 
+    public UniProtTaxonomy( final String[] lineage,
+                            final String code,
+                            final String common_name,
+                            final String scientific_name,
+                            final String synonym,
+                            final String rank,
+                            final String id ) {
+        _lineage = new ArrayList<String>();
+        for( String l : lineage ) {
+            _lineage.add( l );
+        }
+        _code = code;
+        _scientific_name = scientific_name;
+        _common_name = common_name;
+        _synonym = synonym;
+        _rank = rank;
+        _id = id;
+    }
+
     /**
      * Creates deep copy for all fields, except lineage.
      * 
@@ -140,10 +182,19 @@ public final class UniProtTaxonomy {
         return _id;
     }
 
-    public String[] getLineage() {
+    public List<String> getLineage() {
         return _lineage;
     }
 
+    public String[] getLineageAsArray() {
+        String[] str = new String[ _lineage.size() ];
+        int i = 0;
+        for( String l : _lineage ) {
+            str[ i++ ] = l;
+        }
+        return str;
+    }
+
     public String getRank() {
         return _rank;
     }
index d24b171..5906a8c 100644 (file)
@@ -201,7 +201,13 @@ public final class UniProtWsTools {
                 // Ignore empty lines.
             }
             else if ( line.startsWith( "Taxon" ) ) {
-                //TODO next the check format FIXME
+                final String[] items = line.split( "\t" );
+                if ( !( items[ 1 ].equalsIgnoreCase( "Mnemonic" ) && items[ 2 ].equalsIgnoreCase( "Scientific name" )
+                        && items[ 3 ].equalsIgnoreCase( "Common name" ) && items[ 4 ].equalsIgnoreCase( "Synonym" )
+                        && items[ 5 ].equalsIgnoreCase( "Other Names" ) && items[ 6 ].equalsIgnoreCase( "Reviewed" )
+                        && items[ 7 ].equalsIgnoreCase( "Rank" ) && items[ 8 ].equalsIgnoreCase( "Lineage" ) ) ) {
+                    throw new IOException( "Unreconized UniProt Taxonomy format: " + line );
+                }
             }
             else {
                 if ( line.split( "\t" ).length > 4 ) {
@@ -237,7 +243,9 @@ public final class UniProtWsTools {
         String line;
         final List<String> result = new ArrayList<String>();
         while ( ( line = in.readLine() ) != null ) {
-            System.out.println( line );
+            if ( DEBUG ) {
+                System.out.println( line );
+            }
             result.add( line );
             if ( result.size() > max_lines_to_return ) {
                 break;