cleanup
[jalview.git] / forester / java / src / org / forester / analysis / TaxonomyDataManager.java
index 8f9dcc4..52b5fdf 100644 (file)
@@ -22,7 +22,7 @@
 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
 //
 // Contact: phylosoft @ gmail . com
-// WWW: www.phylosoft.org/forester
+// WWW: https://sites.google.com/site/cmzmasek/home/software/forester
 
 package org.forester.analysis;
 
@@ -47,6 +47,7 @@ import org.forester.phylogeny.data.Identifier;
 import org.forester.phylogeny.data.Taxonomy;
 import org.forester.phylogeny.iterators.PhylogenyNodeIterator;
 import org.forester.util.ForesterUtil;
+import org.forester.util.TaxonomyUtil;
 import org.forester.ws.seqdb.SequenceDbWsTools;
 import org.forester.ws.seqdb.UniProtTaxonomy;
 
@@ -184,10 +185,21 @@ public final class TaxonomyDataManager extends RunnableProcess {
     }
 
     private final static List<UniProtTaxonomy> getTaxonomiesFromScientificName( final String query ) throws IOException {
+        if ( query.equalsIgnoreCase( UniProtTaxonomy.BACTERIA ) || query.equalsIgnoreCase( UniProtTaxonomy.ARCHAEA )
+                || query.equalsIgnoreCase( UniProtTaxonomy.VIRUSES )
+                || query.equalsIgnoreCase( UniProtTaxonomy.EUKARYOTA ) || query.equalsIgnoreCase( UniProtTaxonomy.X ) ) {
+            final List<UniProtTaxonomy> l = new ArrayList<UniProtTaxonomy>();
+            l.add( UniProtTaxonomy.createSpecialFromScientificName( query ) );
+            return l;
+        }
         return SequenceDbWsTools.getTaxonomiesFromScientificNameStrict( query, MAX_TAXONOMIES_TO_RETURN );
     }
 
     private final static List<UniProtTaxonomy> getTaxonomiesFromTaxonomyCode( final String query ) throws IOException {
+        if ( ( query.indexOf( "XX" ) == 3 ) && TaxonomyUtil.isHasTaxIdFromFakeTaxCode( query ) ) {
+            final int id = TaxonomyUtil.getTaxIdFromFakeTaxCode( query );
+            return SequenceDbWsTools.getTaxonomiesFromId( String.valueOf( id ), MAX_TAXONOMIES_TO_RETURN );
+        }
         return SequenceDbWsTools.getTaxonomiesFromTaxonomyCode( query, MAX_TAXONOMIES_TO_RETURN );
     }
 
@@ -252,7 +264,7 @@ public final class TaxonomyDataManager extends RunnableProcess {
                         not_found.add( tax.toString() );
                     }
                     else {
-                        not_found.add(node.getName() );
+                        not_found.add( node.getName() );
                     }
                     if ( delete && node.isExternal() ) {
                         not_found_external_nodes.add( node );
@@ -265,7 +277,7 @@ public final class TaxonomyDataManager extends RunnableProcess {
                 phy.deleteSubtree( node, true );
             }
             phy.externalNodesHaveChanged();
-            phy.hashIDs();
+            phy.clearHashIdToNodeMap();
             phy.recalculateNumberOfExternalDescendants( true );
         }
         return not_found;
@@ -326,56 +338,71 @@ public final class TaxonomyDataManager extends RunnableProcess {
     static final UniProtTaxonomy obtainUniProtTaxonomyFromLineage( final List<String> lineage )
             throws AncestralTaxonomyInferenceException, IOException {
         final String lineage_str = ForesterUtil.stringListToString( lineage, ">" );
-        UniProtTaxonomy up_tax = null;
         if ( TaxonomyDataManager.getLineageTaxCacheMap().containsKey( lineage_str ) ) {
-            up_tax = TaxonomyDataManager.getLineageTaxCacheMap().get( lineage_str ).copy();
+            return TaxonomyDataManager.getLineageTaxCacheMap().get( lineage_str ).copy();
         }
         else {
+            final List<UniProtTaxonomy> matching_taxonomies = new ArrayList<UniProtTaxonomy>();
             final List<UniProtTaxonomy> up_taxonomies = getTaxonomiesFromScientificName( lineage
                     .get( lineage.size() - 1 ) );
             if ( ( up_taxonomies != null ) && ( up_taxonomies.size() > 0 ) ) {
                 for( final UniProtTaxonomy up_taxonomy : up_taxonomies ) {
                     boolean match = true;
                     I: for( int i = 0; i < lineage.size(); ++i ) {
-                        if ( !lineage.get( i ).equalsIgnoreCase( up_taxonomy.getLineage().get( i ) ) ) {
+                        if ( ( i == up_taxonomy.getLineage().size() )
+                                || !lineage.get( i ).equalsIgnoreCase( up_taxonomy.getLineage().get( i ) ) ) {
                             match = false;
                             break I;
                         }
                     }
                     if ( match ) {
-                        if ( up_tax != null ) {
-                            throw new AncestralTaxonomyInferenceException( "lineage \""
-                                    + ForesterUtil.stringListToString( lineage, " > " ) + "\" is not unique" );
-                        }
-                        up_tax = up_taxonomy;
+                        matching_taxonomies.add( up_taxonomy );
                     }
                 }
-                if ( up_tax == null ) {
+                if ( matching_taxonomies.isEmpty() ) {
                     throw new AncestralTaxonomyInferenceException( "lineage \""
                             + ForesterUtil.stringListToString( lineage, " > " ) + "\" not found" );
                 }
-                TaxonomyDataManager.getLineageTaxCacheMap().put( lineage_str, up_tax );
-                if ( !ForesterUtil.isEmpty( up_tax.getScientificName() ) ) {
-                    TaxonomyDataManager.getSnTaxCacheMap().put( up_tax.getScientificName(), up_tax );
+                //in case of more than one (e.g. "Xenopus" Genus and Subgenus), keep shorter, less specific  one:
+                int shortest = Integer.MAX_VALUE;
+                UniProtTaxonomy least_specific_up_tax = null;
+                for( final UniProtTaxonomy m : matching_taxonomies ) {
+                    final int s = m.getLineage().size();
+                    if ( s < shortest ) {
+                        shortest = s;
+                        least_specific_up_tax = m;
+                    }
                 }
-                if ( !ForesterUtil.isEmpty( up_tax.getCode() ) ) {
-                    TaxonomyDataManager.getCodeTaxCacheMap().put( up_tax.getCode(), up_tax );
+                TaxonomyDataManager.getLineageTaxCacheMap().put( lineage_str, least_specific_up_tax );
+                if ( !ForesterUtil.isEmpty( least_specific_up_tax.getScientificName() ) ) {
+                    TaxonomyDataManager.getSnTaxCacheMap().put( least_specific_up_tax.getScientificName(),
+                                                                least_specific_up_tax );
                 }
-                if ( !ForesterUtil.isEmpty( up_tax.getCommonName() ) ) {
-                    TaxonomyDataManager.getCnTaxCacheMap().put( up_tax.getCommonName(), up_tax );
+                if ( !ForesterUtil.isEmpty( least_specific_up_tax.getCode() ) ) {
+                    TaxonomyDataManager.getCodeTaxCacheMap().put( least_specific_up_tax.getCode(),
+                                                                  least_specific_up_tax );
                 }
-                if ( !ForesterUtil.isEmpty( up_tax.getId() ) ) {
-                    TaxonomyDataManager.getIdTaxCacheMap().put( up_tax.getId(), up_tax );
+                if ( !ForesterUtil.isEmpty( least_specific_up_tax.getCommonName() ) ) {
+                    TaxonomyDataManager.getCnTaxCacheMap().put( least_specific_up_tax.getCommonName(),
+                                                                least_specific_up_tax );
                 }
+                if ( !ForesterUtil.isEmpty( least_specific_up_tax.getId() ) ) {
+                    TaxonomyDataManager.getIdTaxCacheMap().put( least_specific_up_tax.getId(), least_specific_up_tax );
+                }
+                return least_specific_up_tax;
+            }
+            else {
+                throw new AncestralTaxonomyInferenceException( "taxonomy \"" + ( lineage.get( lineage.size() - 1 ) )
+                        + "\" not found" );
             }
         }
-        return up_tax;
     }
 
     synchronized final private static void updateTaxonomy( final QUERY_TYPE qt,
                                                            final PhylogenyNode node,
                                                            final Taxonomy tax,
-                                                           final UniProtTaxonomy up_tax ) {
+                                                           final UniProtTaxonomy up_tax )
+            throws PhyloXmlDataFormatException {
         if ( ( qt != QUERY_TYPE.SN ) && !ForesterUtil.isEmpty( up_tax.getScientificName() )
                 && ForesterUtil.isEmpty( tax.getScientificName() ) ) {
             tax.setScientificName( up_tax.getScientificName() );