X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=forester%2Fjava%2Fsrc%2Forg%2Fforester%2Fanalysis%2FAncestralTaxonomyInference.java;h=dec0a635a22def5b0097f958aaca0c0fc2fe536e;hb=c365c2e336ee79677d9e0f5d5c8d280afb56a3ab;hp=7ddfe99cda099da044e63f09039fc09e3d8518e4;hpb=93b3ffddb203151c92200b3498d5559cc4de7d18;p=jalview.git diff --git a/forester/java/src/org/forester/analysis/AncestralTaxonomyInference.java b/forester/java/src/org/forester/analysis/AncestralTaxonomyInference.java index 7ddfe99..dec0a63 100644 --- a/forester/java/src/org/forester/analysis/AncestralTaxonomyInference.java +++ b/forester/java/src/org/forester/analysis/AncestralTaxonomyInference.java @@ -151,27 +151,6 @@ public final class AncestralTaxonomyInference { final SortedSet not_found = new TreeSet(); for( final PhylogenyNodeIterator iter = phy.iteratorPostorder(); iter.hasNext(); ) { final PhylogenyNode node = iter.next(); - // final QUERY_TYPE qt = null; - // Taxonomy tax = null; - // if ( node.getNodeData().isHasTaxonomy() ) { - // tax = node.getNodeData().getTaxonomy(); - // } - // UniProtTaxonomy up_tax = null; - // if ( ( tax != null ) - // && ( isHasAppropriateId( tax ) || !ForesterUtil.isEmpty( - // tax.getScientificName() ) - // || !ForesterUtil.isEmpty( tax.getTaxonomyCode() ) || - // !ForesterUtil.isEmpty( tax - // .getCommonName() ) ) ) { - // final String query = null; - // up_tax = obtainUniProtTaxonomy( tax, query, qt ); - // if ( up_tax == null ) { - // not_found.add( query ); - // } - // else { - // updateTaxonomy( qt, node, tax, up_tax ); - // } - // } if ( !node.isExternal() ) { inferTaxonomyFromDescendents( node, not_found ); } @@ -195,14 +174,16 @@ public final class AncestralTaxonomyInference { || !ForesterUtil.isEmpty( desc.getNodeData().getTaxonomy().getScientificName() ) || !ForesterUtil.isEmpty( desc.getNodeData().getTaxonomy().getTaxonomyCode() ) || !ForesterUtil .isEmpty( desc.getNodeData().getTaxonomy().getCommonName() ) ) ) { - final QUERY_TYPE qt = null; - final String query = null; - final UniProtTaxonomy up_tax = obtainUniProtTaxonomy( desc.getNodeData().getTaxonomy(), query, qt ); + + final UniProtTaxonomy up_tax = obtainUniProtTaxonomy( desc.getNodeData().getTaxonomy(), null, null ); String[] lineage = null; if ( up_tax != null ) { - lineage = obtainLineagePlusOwnScientificName( up_tax ); + //lineage = obtainLineagePlusOwnScientificName( up_tax ); + lineage = up_tax.getLineageAsArray(); } if ( ( lineage == null ) || ( lineage.length < 1 ) ) { + //TODO remove me + System.out.println( "node " + desc.getNodeData().getTaxonomy().toString() + " has no lineage!" ); not_found.add( desc.getNodeData().getTaxonomy().asText().toString() ); return; } @@ -213,9 +194,25 @@ public final class AncestralTaxonomyInference { } else { String msg = "Node(s) with no or inappropriate taxonomic information found"; + String node = ""; if ( !ForesterUtil.isEmpty( desc.getName() ) ) { - msg = "Node " + desc.getName() + " has no or inappropriate taxonomic information"; + node = "\"" + desc.getName() + "\""; } + else { + node = "[" + desc.getId() + "]"; + } + msg = "Node " + node + " has no or inappropriate taxonomic information"; + // final List e = desc.getAllExternalDescendants(); + //TODO remove me! +// System.out.println(); +// int x = 0; +// for( final PhylogenyNode object : e ) { +// System.out.println( x + ":" ); +// System.out.println( object.getName() + " " ); +// x++; +// } +// System.out.println(); + // throw new IllegalArgumentException( msg ); } } @@ -232,15 +229,22 @@ public final class AncestralTaxonomyInference { } } if ( last_common_lineage == null ) { + System.out.println( "No common lineage for:" ); + int counter = 0; + for( final String[] strings : lineages ) { + System.out.print( counter + ": " ); + ++counter; + for( final String string : strings ) { + System.out.print( string + " " ); + } + System.out.println(); + } return; } - // if ( !n.getNodeData().isHasTaxonomy() ) { - // n.getNodeData().setTaxonomy( new Taxonomy() ); - // } final Taxonomy tax = new Taxonomy(); n.getNodeData().setTaxonomy( tax ); tax.setScientificName( last_common_lineage ); - final UniProtTaxonomy up_tax = obtainUniProtTaxonomyFromSn( last_common_lineage ); + final UniProtTaxonomy up_tax = obtainUniProtTaxonomyFromSn( last_common_lineage, lineage ); if ( up_tax != null ) { if ( !ForesterUtil.isEmpty( up_tax.getRank() ) ) { try { @@ -259,6 +263,15 @@ public final class AncestralTaxonomyInference { if ( !ForesterUtil.isEmpty( up_tax.getSynonym() ) && !tax.getSynonyms().contains( up_tax.getSynonym() ) ) { tax.getSynonyms().add( up_tax.getSynonym() ); } + if ( up_tax.getLineage() != null ) { + tax.setLineage( new ArrayList() ); + for( final String lin : up_tax.getLineage() ) { + if ( !ForesterUtil.isEmpty( lin ) ) { + tax.getLineage().add( lin ); + } + } + } + } for( final PhylogenyNode desc : descs ) { if ( !desc.isExternal() && desc.getNodeData().isHasTaxonomy() @@ -302,14 +315,14 @@ public final class AncestralTaxonomyInference { not_found_external_nodes.add( node ); } } - UniProtTaxonomy up_tax = null; + UniProtTaxonomy uniprot_tax = null; if ( ( tax != null ) && ( isHasAppropriateId( tax ) || !ForesterUtil.isEmpty( tax.getScientificName() ) || !ForesterUtil.isEmpty( tax.getTaxonomyCode() ) || !ForesterUtil.isEmpty( tax .getCommonName() ) ) ) { - up_tax = obtainUniProtTaxonomy( tax, null, qt ); - if ( up_tax != null ) { - updateTaxonomy( qt, node, tax, up_tax ); + uniprot_tax = obtainUniProtTaxonomy( tax, null, qt ); + if ( uniprot_tax != null ) { + updateTaxonomy( qt, node, tax, uniprot_tax ); } else { not_found.add( tax.toString() ); @@ -320,34 +333,38 @@ public final class AncestralTaxonomyInference { } } if ( delete ) { - for( PhylogenyNode node : not_found_external_nodes ) { - phy.deleteSubtree( node, false ); + for( final PhylogenyNode node : not_found_external_nodes ) { + phy.deleteSubtree( node, true ); } + phy.externalNodesHaveChanged(); + phy.hashIDs(); phy.recalculateNumberOfExternalDescendants( true ); } return not_found; } - synchronized private static String[] obtainLineagePlusOwnScientificName( final UniProtTaxonomy up_tax ) { - final String[] lineage = up_tax.getLineage(); - final String[] lin_plus_self = new String[ lineage.length + 1 ]; - for( int i = 0; i < lineage.length; ++i ) { - lin_plus_self[ i ] = lineage[ i ]; - } - lin_plus_self[ lineage.length ] = up_tax.getScientificName(); - return lin_plus_self; - } - + // TODO this might not be needed anymore + // synchronized private static String[] obtainLineagePlusOwnScientificName( final UniProtTaxonomy up_tax ) { + // final String[] lineage = up_tax.getLineageAsArray(); + // final String[] lin_plus_self = new String[ lineage.length + 1 ]; + // for( int i = 0; i < lineage.length; ++i ) { + // lin_plus_self[ i ] = lineage[ i ]; + // } + // lin_plus_self[ lineage.length ] = up_tax.getScientificName(); + // return lin_plus_self; + // } synchronized private static UniProtTaxonomy obtainUniProtTaxonomy( final Taxonomy tax, String query, QUERY_TYPE qt ) throws IOException { if ( isHasAppropriateId( tax ) ) { query = tax.getIdentifier().getValue(); qt = QUERY_TYPE.ID; + System.out.println( "query by id: " + query); return getTaxonomies( getIdTaxCacheMap(), query, qt ); } else if ( !ForesterUtil.isEmpty( tax.getScientificName() ) ) { query = tax.getScientificName(); qt = QUERY_TYPE.SN; + System.out.println( "query by sn: " + query); return getTaxonomies( getSnTaxCacheMap(), query, qt ); } else if ( !ForesterUtil.isEmpty( tax.getTaxonomyCode() ) ) { @@ -362,7 +379,7 @@ public final class AncestralTaxonomyInference { } } - synchronized private static UniProtTaxonomy obtainUniProtTaxonomyFromSn( final String sn ) throws IOException { + synchronized private static UniProtTaxonomy obtainUniProtTaxonomyFromSn( final String sn, List lineage ) throws IOException { UniProtTaxonomy up_tax = null; if ( getSnTaxCacheMap().containsKey( sn ) ) { up_tax = getSnTaxCacheMap().get( sn ).copy(); @@ -381,6 +398,7 @@ public final class AncestralTaxonomyInference { if ( !ForesterUtil.isEmpty( up_tax.getId() ) ) { getIdTaxCacheMap().put( up_tax.getId(), up_tax ); } + } } return up_tax; @@ -394,9 +412,9 @@ public final class AncestralTaxonomyInference { && ForesterUtil.isEmpty( tax.getScientificName() ) ) { tax.setScientificName( up_tax.getScientificName() ); } - if ( node.isExternal() - && ( ( qt != QUERY_TYPE.CODE ) && !ForesterUtil.isEmpty( up_tax.getCode() ) && ForesterUtil - .isEmpty( tax.getTaxonomyCode() ) ) ) { + // if ( node.isExternal() + if ( ( qt != QUERY_TYPE.CODE ) && !ForesterUtil.isEmpty( up_tax.getCode() ) + && ForesterUtil.isEmpty( tax.getTaxonomyCode() ) ) { tax.setTaxonomyCode( up_tax.getCode() ); } if ( ( qt != QUERY_TYPE.CN ) && !ForesterUtil.isEmpty( up_tax.getCommonName() ) @@ -417,6 +435,15 @@ public final class AncestralTaxonomyInference { if ( ( qt != QUERY_TYPE.ID ) && !ForesterUtil.isEmpty( up_tax.getId() ) && ( tax.getIdentifier() == null ) ) { tax.setIdentifier( new Identifier( up_tax.getId(), "uniprot" ) ); } + if ( up_tax.getLineage() != null ) { + tax.setLineage( new ArrayList() ); + for( final String lin : up_tax.getLineage() ) { + if ( !ForesterUtil.isEmpty( lin ) ) { + tax.getLineage().add( lin ); + } + } + } + } private enum QUERY_TYPE {