From 4795b04a6c0709d85e47aee7aa59fdbd6d178a7e Mon Sep 17 00:00:00 2001 From: cmzmasek Date: Mon, 19 Sep 2011 05:02:45 +0000 Subject: [PATCH] needs: testing, proper error messages and dialogs, code cleanup, cache mechanism for lineages! --- .../analysis/AncestralTaxonomyInference.java | 63 ++++++++++++++++++-- .../org/forester/ws/uniprot/UniProtTaxonomy.java | 2 +- 2 files changed, 58 insertions(+), 7 deletions(-) diff --git a/forester/java/src/org/forester/analysis/AncestralTaxonomyInference.java b/forester/java/src/org/forester/analysis/AncestralTaxonomyInference.java index dec0a63..ba0a6df 100644 --- a/forester/java/src/org/forester/analysis/AncestralTaxonomyInference.java +++ b/forester/java/src/org/forester/analysis/AncestralTaxonomyInference.java @@ -216,7 +216,8 @@ public final class AncestralTaxonomyInference { throw new IllegalArgumentException( msg ); } } - String last_common_lineage = null; + List last_common_lineage = new ArrayList(); + String last_common = null; if ( shortest_lin_length > 0 ) { I: for( int i = 0; i < shortest_lin_length; ++i ) { final String lineage_0 = lineages.get( 0 )[ i ]; @@ -225,10 +226,13 @@ public final class AncestralTaxonomyInference { break I; } } - last_common_lineage = lineage_0; + // last_common_lineage = lineage_0; + last_common_lineage.add( lineage_0 ) ; + last_common =lineage_0; } } - if ( last_common_lineage == null ) { + // if ( last_common_lineage == null ) { + if ( last_common_lineage.isEmpty() ) { System.out.println( "No common lineage for:" ); int counter = 0; for( final String[] strings : lineages ) { @@ -243,8 +247,8 @@ public final class AncestralTaxonomyInference { } final Taxonomy tax = new Taxonomy(); n.getNodeData().setTaxonomy( tax ); - tax.setScientificName( last_common_lineage ); - final UniProtTaxonomy up_tax = obtainUniProtTaxonomyFromSn( last_common_lineage, lineage ); + tax.setScientificName( last_common ); + final UniProtTaxonomy up_tax = obtainUniProtTaxonomyFromCommonLineage( last_common_lineage ); if ( up_tax != null ) { if ( !ForesterUtil.isEmpty( up_tax.getRank() ) ) { try { @@ -379,7 +383,7 @@ public final class AncestralTaxonomyInference { } } - synchronized private static UniProtTaxonomy obtainUniProtTaxonomyFromSn( final String sn, List lineage ) throws IOException { + synchronized private static UniProtTaxonomy obtainUniProtTaxonomyFromSn( final String sn) throws IOException { UniProtTaxonomy up_tax = null; if ( getSnTaxCacheMap().containsKey( sn ) ) { up_tax = getSnTaxCacheMap().get( sn ).copy(); @@ -403,6 +407,53 @@ public final class AncestralTaxonomyInference { } return up_tax; } + + synchronized private static UniProtTaxonomy obtainUniProtTaxonomyFromCommonLineage( List lineage ) throws IOException { + UniProtTaxonomy up_tax = null; + // -- if ( getSnTaxCacheMap().containsKey( sn ) ) { + // -- up_tax = getSnTaxCacheMap().get( sn ).copy(); + // -- } + // else { + final List up_taxonomies = getTaxonomiesFromScientificName( lineage.get(lineage.size() -1 ) ); + //-- if ( ( up_taxonomies != null ) && ( up_taxonomies.size() == 1 ) ) { + + if ( ( up_taxonomies != null ) && ( up_taxonomies.size() > 0 ) ) { + for( UniProtTaxonomy up_taxonomy : up_taxonomies ) { + boolean match = true; + + I: for( int i = 0; i < lineage.size(); ++i ) { + if ( !lineage.get( i ).equalsIgnoreCase( up_taxonomy.getLineage().get( i ) ) ) { + match = false; + break I; + } + } + if ( match ) { + if ( up_tax != null ) { + throw new IOException( "not unique!"); + } + up_tax = up_taxonomy; + } + } + + if ( up_tax == null ) { + throw new IOException( "not found!"); + } + //-- up_tax = up_taxonomies.get( 0 ); + //-- getSnTaxCacheMap().put( sn, up_tax ); + if ( !ForesterUtil.isEmpty( up_tax.getCode() ) ) { + getCodeTaxCacheMap().put( up_tax.getCode(), up_tax ); + } + if ( !ForesterUtil.isEmpty( up_tax.getCommonName() ) ) { + getCnTaxCacheMap().put( up_tax.getCommonName(), up_tax ); + } + if ( !ForesterUtil.isEmpty( up_tax.getId() ) ) { + getIdTaxCacheMap().put( up_tax.getId(), up_tax ); + } + + } + // } + return up_tax; + } synchronized private static void updateTaxonomy( final QUERY_TYPE qt, final PhylogenyNode node, diff --git a/forester/java/src/org/forester/ws/uniprot/UniProtTaxonomy.java b/forester/java/src/org/forester/ws/uniprot/UniProtTaxonomy.java index 14fa14a..ac462f9 100644 --- a/forester/java/src/org/forester/ws/uniprot/UniProtTaxonomy.java +++ b/forester/java/src/org/forester/ws/uniprot/UniProtTaxonomy.java @@ -88,7 +88,7 @@ public final class UniProtTaxonomy { _rank = ""; } String[] lin = null; - if ( items.length > 7 ) { + if ( items.length > 8 ) { lin = items[ 8 ].split( "; " ); } _lineage = new ArrayList(); -- 1.7.10.2