final SortedSet<String> not_found = new TreeSet<String>();
for( final PhylogenyNodeIterator iter = phy.iteratorPostorder(); iter.hasNext(); ) {
final PhylogenyNode node = iter.next();
- // final QUERY_TYPE qt = null;
- // Taxonomy tax = null;
- // if ( node.getNodeData().isHasTaxonomy() ) {
- // tax = node.getNodeData().getTaxonomy();
- // }
- // UniProtTaxonomy up_tax = null;
- // if ( ( tax != null )
- // && ( isHasAppropriateId( tax ) || !ForesterUtil.isEmpty(
- // tax.getScientificName() )
- // || !ForesterUtil.isEmpty( tax.getTaxonomyCode() ) ||
- // !ForesterUtil.isEmpty( tax
- // .getCommonName() ) ) ) {
- // final String query = null;
- // up_tax = obtainUniProtTaxonomy( tax, query, qt );
- // if ( up_tax == null ) {
- // not_found.add( query );
- // }
- // else {
- // updateTaxonomy( qt, node, tax, up_tax );
- // }
- // }
if ( !node.isExternal() ) {
inferTaxonomyFromDescendents( node, not_found );
}
|| !ForesterUtil.isEmpty( desc.getNodeData().getTaxonomy().getScientificName() )
|| !ForesterUtil.isEmpty( desc.getNodeData().getTaxonomy().getTaxonomyCode() ) || !ForesterUtil
.isEmpty( desc.getNodeData().getTaxonomy().getCommonName() ) ) ) {
- final QUERY_TYPE qt = null;
- final String query = null;
- final UniProtTaxonomy up_tax = obtainUniProtTaxonomy( desc.getNodeData().getTaxonomy(), query, qt );
+
+ final UniProtTaxonomy up_tax = obtainUniProtTaxonomy( desc.getNodeData().getTaxonomy(), null, null );
String[] lineage = null;
if ( up_tax != null ) {
- lineage = obtainLineagePlusOwnScientificName( up_tax );
+ //lineage = obtainLineagePlusOwnScientificName( up_tax );
+ lineage = up_tax.getLineageAsArray();
}
if ( ( lineage == null ) || ( lineage.length < 1 ) ) {
+ //TODO remove me
+ System.out.println( "node " + desc.getNodeData().getTaxonomy().toString() + " has no lineage!" );
not_found.add( desc.getNodeData().getTaxonomy().asText().toString() );
return;
}
}
else {
String msg = "Node(s) with no or inappropriate taxonomic information found";
+ String node = "";
if ( !ForesterUtil.isEmpty( desc.getName() ) ) {
- msg = "Node " + desc.getName() + " has no or inappropriate taxonomic information";
+ node = "\"" + desc.getName() + "\"";
}
+ else {
+ node = "[" + desc.getId() + "]";
+ }
+ msg = "Node " + node + " has no or inappropriate taxonomic information";
+ // final List<PhylogenyNode> e = desc.getAllExternalDescendants();
+ //TODO remove me!
+// System.out.println();
+// int x = 0;
+// for( final PhylogenyNode object : e ) {
+// System.out.println( x + ":" );
+// System.out.println( object.getName() + " " );
+// x++;
+// }
+// System.out.println();
+ //
throw new IllegalArgumentException( msg );
}
}
}
}
if ( last_common_lineage == null ) {
+ System.out.println( "No common lineage for:" );
+ int counter = 0;
+ for( final String[] strings : lineages ) {
+ System.out.print( counter + ": " );
+ ++counter;
+ for( final String string : strings ) {
+ System.out.print( string + " " );
+ }
+ System.out.println();
+ }
return;
}
- // if ( !n.getNodeData().isHasTaxonomy() ) {
- // n.getNodeData().setTaxonomy( new Taxonomy() );
- // }
final Taxonomy tax = new Taxonomy();
n.getNodeData().setTaxonomy( tax );
tax.setScientificName( last_common_lineage );
- final UniProtTaxonomy up_tax = obtainUniProtTaxonomyFromSn( last_common_lineage );
+ final UniProtTaxonomy up_tax = obtainUniProtTaxonomyFromSn( last_common_lineage, lineage );
if ( up_tax != null ) {
if ( !ForesterUtil.isEmpty( up_tax.getRank() ) ) {
try {
if ( !ForesterUtil.isEmpty( up_tax.getSynonym() ) && !tax.getSynonyms().contains( up_tax.getSynonym() ) ) {
tax.getSynonyms().add( up_tax.getSynonym() );
}
+ if ( up_tax.getLineage() != null ) {
+ tax.setLineage( new ArrayList<String>() );
+ for( final String lin : up_tax.getLineage() ) {
+ if ( !ForesterUtil.isEmpty( lin ) ) {
+ tax.getLineage().add( lin );
+ }
+ }
+ }
+
}
for( final PhylogenyNode desc : descs ) {
if ( !desc.isExternal() && desc.getNodeData().isHasTaxonomy()
not_found_external_nodes.add( node );
}
}
- UniProtTaxonomy up_tax = null;
+ UniProtTaxonomy uniprot_tax = null;
if ( ( tax != null )
&& ( isHasAppropriateId( tax ) || !ForesterUtil.isEmpty( tax.getScientificName() )
|| !ForesterUtil.isEmpty( tax.getTaxonomyCode() ) || !ForesterUtil.isEmpty( tax
.getCommonName() ) ) ) {
- up_tax = obtainUniProtTaxonomy( tax, null, qt );
- if ( up_tax != null ) {
- updateTaxonomy( qt, node, tax, up_tax );
+ uniprot_tax = obtainUniProtTaxonomy( tax, null, qt );
+ if ( uniprot_tax != null ) {
+ updateTaxonomy( qt, node, tax, uniprot_tax );
}
else {
not_found.add( tax.toString() );
}
}
if ( delete ) {
- for( PhylogenyNode node : not_found_external_nodes ) {
- phy.deleteSubtree( node, false );
+ for( final PhylogenyNode node : not_found_external_nodes ) {
+ phy.deleteSubtree( node, true );
}
+ phy.externalNodesHaveChanged();
+ phy.hashIDs();
phy.recalculateNumberOfExternalDescendants( true );
}
return not_found;
}
- synchronized private static String[] obtainLineagePlusOwnScientificName( final UniProtTaxonomy up_tax ) {
- final String[] lineage = up_tax.getLineage();
- final String[] lin_plus_self = new String[ lineage.length + 1 ];
- for( int i = 0; i < lineage.length; ++i ) {
- lin_plus_self[ i ] = lineage[ i ];
- }
- lin_plus_self[ lineage.length ] = up_tax.getScientificName();
- return lin_plus_self;
- }
-
+ // TODO this might not be needed anymore
+ // synchronized private static String[] obtainLineagePlusOwnScientificName( final UniProtTaxonomy up_tax ) {
+ // final String[] lineage = up_tax.getLineageAsArray();
+ // final String[] lin_plus_self = new String[ lineage.length + 1 ];
+ // for( int i = 0; i < lineage.length; ++i ) {
+ // lin_plus_self[ i ] = lineage[ i ];
+ // }
+ // lin_plus_self[ lineage.length ] = up_tax.getScientificName();
+ // return lin_plus_self;
+ // }
synchronized private static UniProtTaxonomy obtainUniProtTaxonomy( final Taxonomy tax, String query, QUERY_TYPE qt )
throws IOException {
if ( isHasAppropriateId( tax ) ) {
query = tax.getIdentifier().getValue();
qt = QUERY_TYPE.ID;
+ System.out.println( "query by id: " + query);
return getTaxonomies( getIdTaxCacheMap(), query, qt );
}
else if ( !ForesterUtil.isEmpty( tax.getScientificName() ) ) {
query = tax.getScientificName();
qt = QUERY_TYPE.SN;
+ System.out.println( "query by sn: " + query);
return getTaxonomies( getSnTaxCacheMap(), query, qt );
}
else if ( !ForesterUtil.isEmpty( tax.getTaxonomyCode() ) ) {
}
}
- synchronized private static UniProtTaxonomy obtainUniProtTaxonomyFromSn( final String sn ) throws IOException {
+ synchronized private static UniProtTaxonomy obtainUniProtTaxonomyFromSn( final String sn, List<String> lineage ) throws IOException {
UniProtTaxonomy up_tax = null;
if ( getSnTaxCacheMap().containsKey( sn ) ) {
up_tax = getSnTaxCacheMap().get( sn ).copy();
if ( !ForesterUtil.isEmpty( up_tax.getId() ) ) {
getIdTaxCacheMap().put( up_tax.getId(), up_tax );
}
+
}
}
return up_tax;
&& ForesterUtil.isEmpty( tax.getScientificName() ) ) {
tax.setScientificName( up_tax.getScientificName() );
}
- if ( node.isExternal()
- && ( ( qt != QUERY_TYPE.CODE ) && !ForesterUtil.isEmpty( up_tax.getCode() ) && ForesterUtil
- .isEmpty( tax.getTaxonomyCode() ) ) ) {
+ // if ( node.isExternal()
+ if ( ( qt != QUERY_TYPE.CODE ) && !ForesterUtil.isEmpty( up_tax.getCode() )
+ && ForesterUtil.isEmpty( tax.getTaxonomyCode() ) ) {
tax.setTaxonomyCode( up_tax.getCode() );
}
if ( ( qt != QUERY_TYPE.CN ) && !ForesterUtil.isEmpty( up_tax.getCommonName() )
if ( ( qt != QUERY_TYPE.ID ) && !ForesterUtil.isEmpty( up_tax.getId() ) && ( tax.getIdentifier() == null ) ) {
tax.setIdentifier( new Identifier( up_tax.getId(), "uniprot" ) );
}
+ if ( up_tax.getLineage() != null ) {
+ tax.setLineage( new ArrayList<String>() );
+ for( final String lin : up_tax.getLineage() ) {
+ if ( !ForesterUtil.isEmpty( lin ) ) {
+ tax.getLineage().add( lin );
+ }
+ }
+ }
+
}
private enum QUERY_TYPE {