return could_extract;\r
}\r
\r
- public static DescriptiveStatistics calculatBranchLengthStatistics( final Phylogeny phy ) {\r
+ public static DescriptiveStatistics calculateBranchLengthStatistics( final Phylogeny phy ) {\r
final DescriptiveStatistics stats = new BasicDescriptiveStatistics();\r
for( final PhylogenyNodeIterator iter = phy.iteratorPreorder(); iter.hasNext(); ) {\r
final PhylogenyNode n = iter.next();\r
return stats;\r
}\r
\r
- public static List<DescriptiveStatistics> calculatConfidenceStatistics( final Phylogeny phy ) {\r
+ public static List<DescriptiveStatistics> calculateConfidenceStatistics( final Phylogeny phy ) {\r
final List<DescriptiveStatistics> stats = new ArrayList<DescriptiveStatistics>();\r
for( final PhylogenyNodeIterator iter = phy.iteratorPreorder(); iter.hasNext(); ) {\r
final PhylogenyNode n = iter.next();\r
if ( !ForesterUtil.isEmpty( stats.get( i ).getDescription() ) ) {\r
if ( !stats.get( i ).getDescription().equalsIgnoreCase( c.getType() ) ) {\r
throw new IllegalArgumentException( "support values in node [" + n.toString()\r
- + "] appear inconsistently ordered" );\r
+ + "] appear inconsistently ordered" );\r
}\r
}\r
stats.get( i ).setDescription( c.getType() );\r
return x;\r
}\r
\r
- public static DescriptiveStatistics calculatNumberOfDescendantsPerNodeStatistics( final Phylogeny phy ) {\r
+ public static DescriptiveStatistics calculateNumberOfDescendantsPerNodeStatistics( final Phylogeny phy ) {\r
final DescriptiveStatistics stats = new BasicDescriptiveStatistics();\r
for( final PhylogenyNodeIterator iter = phy.iteratorPreorder(); iter.hasNext(); ) {\r
final PhylogenyNode n = iter.next();\r
}\r
}\r
\r
+ private static enum NDF {\r
+ NodeName( "NN" ),\r
+ TaxonomyCode( "TC" ),\r
+ TaxonomyCommonName( "CN" ),\r
+ TaxonomyScientificName( "TS" ),\r
+ TaxonomyIdentifier( "TI" ),\r
+ TaxonomySynonym( "SY" ),\r
+ SequenceName( "SN" ),\r
+ GeneName( "GN" ),\r
+ SequenceSymbol( "SS" ),\r
+ SequenceAccession( "SA" ),\r
+ Domain( "DO" ),\r
+ Annotation( "AN" ),\r
+ CrossRef( "XR" ),\r
+ BinaryCharacter( "BC" ),\r
+ MolecularSequence( "MS" );\r
+\r
+ private final String _text;\r
+\r
+ NDF( final String text ) {\r
+ _text = text;\r
+ }\r
+\r
+ public static NDF fromString( final String text ) {\r
+ for( final NDF n : NDF.values() ) {\r
+ if ( text.startsWith( n._text ) ) {\r
+ return n;\r
+ }\r
+ }\r
+ return null;\r
+ }\r
+ }\r
+\r
public static List<PhylogenyNode> searchData( final String query,\r
final Phylogeny phy,\r
final boolean case_sensitive,\r
final boolean partial,\r
final boolean regex,\r
- final boolean search_domains ) {\r
+ final boolean search_domains,\r
+ final double domains_confidence_threshold ) {\r
final List<PhylogenyNode> nodes = new ArrayList<PhylogenyNode>();\r
if ( phy.isEmpty() || ( query == null ) ) {\r
return nodes;\r
if ( ForesterUtil.isEmpty( query ) ) {\r
return nodes;\r
}\r
+ String my_query = query;\r
+ NDF ndf = null;\r
+ if ( ( my_query.length() > 2 ) && ( my_query.indexOf( ":" ) == 2 ) ) {\r
+ ndf = NDF.fromString( my_query );\r
+ if ( ndf != null ) {\r
+ my_query = my_query.substring( 3 );\r
+ }\r
+ }\r
for( final PhylogenyNodeIterator iter = phy.iteratorPreorder(); iter.hasNext(); ) {\r
final PhylogenyNode node = iter.next();\r
boolean match = false;\r
- if ( match( node.getName(), query, case_sensitive, partial, regex ) ) {\r
+ if ( ( ( ndf == null ) || ( ndf == NDF.NodeName ) )\r
+ && match( node.getName(), my_query, case_sensitive, partial, regex ) ) {\r
match = true;\r
}\r
- else if ( node.getNodeData().isHasTaxonomy()\r
- && match( node.getNodeData().getTaxonomy().getTaxonomyCode(), query, case_sensitive, partial, regex ) ) {\r
+ else if ( ( ( ndf == null ) || ( ndf == NDF.TaxonomyCode ) )\r
+ && node.getNodeData().isHasTaxonomy()\r
+ && match( node.getNodeData().getTaxonomy().getTaxonomyCode(),\r
+ my_query,\r
+ case_sensitive,\r
+ partial,\r
+ regex ) ) {\r
match = true;\r
}\r
- else if ( node.getNodeData().isHasTaxonomy()\r
- && match( node.getNodeData().getTaxonomy().getCommonName(), query, case_sensitive, partial, regex ) ) {\r
+ else if ( ( ( ndf == null ) || ( ndf == NDF.TaxonomyCommonName ) )\r
+ && node.getNodeData().isHasTaxonomy()\r
+ && match( node.getNodeData().getTaxonomy().getCommonName(),\r
+ my_query,\r
+ case_sensitive,\r
+ partial,\r
+ regex ) ) {\r
match = true;\r
}\r
- else if ( node.getNodeData().isHasTaxonomy()\r
+ else if ( ( ( ndf == null ) || ( ndf == NDF.TaxonomyScientificName ) )\r
+ && node.getNodeData().isHasTaxonomy()\r
&& match( node.getNodeData().getTaxonomy().getScientificName(),\r
- query,\r
+ my_query,\r
case_sensitive,\r
partial,\r
regex ) ) {\r
match = true;\r
}\r
- else if ( node.getNodeData().isHasTaxonomy()\r
+ else if ( ( ( ndf == null ) || ( ndf == NDF.TaxonomyIdentifier ) )\r
+ && node.getNodeData().isHasTaxonomy()\r
&& ( node.getNodeData().getTaxonomy().getIdentifier() != null )\r
&& match( node.getNodeData().getTaxonomy().getIdentifier().getValue(),\r
- query,\r
+ my_query,\r
case_sensitive,\r
partial,\r
regex ) ) {\r
match = true;\r
}\r
- else if ( node.getNodeData().isHasTaxonomy() && !node.getNodeData().getTaxonomy().getSynonyms().isEmpty() ) {\r
+ else if ( ( ( ndf == null ) || ( ndf == NDF.TaxonomySynonym ) ) && node.getNodeData().isHasTaxonomy()\r
+ && !node.getNodeData().getTaxonomy().getSynonyms().isEmpty() ) {\r
final List<String> syns = node.getNodeData().getTaxonomy().getSynonyms();\r
I: for( final String syn : syns ) {\r
- if ( match( syn, query, case_sensitive, partial, regex ) ) {\r
+ if ( match( syn, my_query, case_sensitive, partial, regex ) ) {\r
match = true;\r
break I;\r
}\r
}\r
}\r
- if ( !match && node.getNodeData().isHasSequence()\r
- && match( node.getNodeData().getSequence().getName(), query, case_sensitive, partial, regex ) ) {\r
+ if ( !match && ( ( ndf == null ) || ( ndf == NDF.SequenceName ) ) && node.getNodeData().isHasSequence()\r
+ && match( node.getNodeData().getSequence().getName(), my_query, case_sensitive, partial, regex ) ) {\r
match = true;\r
}\r
- if ( !match && node.getNodeData().isHasSequence()\r
- && match( node.getNodeData().getSequence().getGeneName(), query, case_sensitive, partial, regex ) ) {\r
+ if ( !match && ( ( ndf == null ) || ( ndf == NDF.GeneName ) ) && node.getNodeData().isHasSequence()\r
+ && match( node.getNodeData().getSequence().getGeneName(), my_query, case_sensitive, partial, regex ) ) {\r
match = true;\r
}\r
- if ( !match && node.getNodeData().isHasSequence()\r
- && match( node.getNodeData().getSequence().getSymbol(), query, case_sensitive, partial, regex ) ) {\r
+ if ( !match && ( ( ndf == null ) || ( ndf == NDF.SequenceSymbol ) ) && node.getNodeData().isHasSequence()\r
+ && match( node.getNodeData().getSequence().getSymbol(), my_query, case_sensitive, partial, regex ) ) {\r
match = true;\r
}\r
if ( !match\r
+ && ( ( ndf == null ) || ( ndf == NDF.SequenceAccession ) )\r
&& node.getNodeData().isHasSequence()\r
&& ( node.getNodeData().getSequence().getAccession() != null )\r
&& match( node.getNodeData().getSequence().getAccession().getValue(),\r
- query,\r
+ my_query,\r
case_sensitive,\r
partial,\r
regex ) ) {\r
match = true;\r
}\r
- if ( search_domains && !match && node.getNodeData().isHasSequence()\r
+ if ( !match && ( ( ( ndf == null ) && search_domains ) || ( ndf == NDF.Domain ) )\r
+ && node.getNodeData().isHasSequence()\r
&& ( node.getNodeData().getSequence().getDomainArchitecture() != null ) ) {\r
final DomainArchitecture da = node.getNodeData().getSequence().getDomainArchitecture();\r
I: for( int i = 0; i < da.getNumberOfDomains(); ++i ) {\r
- if ( match( da.getDomain( i ).getName(), query, case_sensitive, partial, regex ) ) {\r
+ if ( ( da.getDomain( i ).getConfidence() <= domains_confidence_threshold )\r
+ && ( match( da.getDomain( i ).getName(), my_query, case_sensitive, partial, regex ) ) ) {\r
match = true;\r
break I;\r
}\r
}\r
}\r
- if ( !match && node.getNodeData().isHasSequence()\r
+ if ( !match && ( ( ndf == null ) || ( ndf == NDF.Annotation ) ) && node.getNodeData().isHasSequence()\r
&& ( node.getNodeData().getSequence().getAnnotations() != null ) ) {\r
for( final Annotation ann : node.getNodeData().getSequence().getAnnotations() ) {\r
- if ( match( ann.getDesc(), query, case_sensitive, partial, regex ) ) {\r
+ if ( match( ann.getDesc(), my_query, case_sensitive, partial, regex ) ) {\r
match = true;\r
break;\r
}\r
- if ( match( ann.getRef(), query, case_sensitive, partial, regex ) ) {\r
+ if ( match( ann.getRef(), my_query, case_sensitive, partial, regex ) ) {\r
match = true;\r
break;\r
}\r
}\r
}\r
- if ( !match && node.getNodeData().isHasSequence()\r
+ if ( !match && ( ( ndf == null ) || ( ndf == NDF.CrossRef ) ) && node.getNodeData().isHasSequence()\r
&& ( node.getNodeData().getSequence().getCrossReferences() != null ) ) {\r
for( final Accession x : node.getNodeData().getSequence().getCrossReferences() ) {\r
- if ( match( x.getComment(), query, case_sensitive, partial, regex ) ) {\r
+ if ( match( x.getComment(), my_query, case_sensitive, partial, regex ) ) {\r
match = true;\r
break;\r
}\r
- if ( match( x.getSource(), query, case_sensitive, partial, regex ) ) {\r
+ if ( match( x.getSource(), my_query, case_sensitive, partial, regex ) ) {\r
match = true;\r
break;\r
}\r
- if ( match( x.getValue(), query, case_sensitive, partial, regex ) ) {\r
+ if ( match( x.getValue(), my_query, case_sensitive, partial, regex ) ) {\r
match = true;\r
break;\r
}\r
}\r
}\r
- //\r
- if ( !match && ( node.getNodeData().getBinaryCharacters() != null ) ) {\r
+ if ( !match && ( ( ndf == null ) || ( ndf == NDF.BinaryCharacter ) )\r
+ && ( node.getNodeData().getBinaryCharacters() != null ) ) {\r
Iterator<String> it = node.getNodeData().getBinaryCharacters().getPresentCharacters().iterator();\r
I: while ( it.hasNext() ) {\r
- if ( match( it.next(), query, case_sensitive, partial, regex ) ) {\r
+ if ( match( it.next(), my_query, case_sensitive, partial, regex ) ) {\r
match = true;\r
break I;\r
}\r
}\r
it = node.getNodeData().getBinaryCharacters().getGainedCharacters().iterator();\r
I: while ( it.hasNext() ) {\r
- if ( match( it.next(), query, case_sensitive, partial, regex ) ) {\r
+ if ( match( it.next(), my_query, case_sensitive, partial, regex ) ) {\r
match = true;\r
break I;\r
}\r
}\r
}\r
+ if ( !match\r
+ && ( ndf == NDF.MolecularSequence )\r
+ && node.getNodeData().isHasSequence()\r
+ && match( node.getNodeData().getSequence().getMolecularSequence(),\r
+ my_query,\r
+ case_sensitive,\r
+ true,\r
+ regex ) ) {\r
+ match = true;\r
+ }\r
if ( match ) {\r
nodes.add( node );\r
}\r
final Phylogeny phy,\r
final boolean case_sensitive,\r
final boolean partial,\r
- final boolean search_domains ) {\r
+ final boolean search_domains,\r
+ final double domains_confidence_threshold ) {\r
final List<PhylogenyNode> nodes = new ArrayList<PhylogenyNode>();\r
if ( phy.isEmpty() || ( queries == null ) || ( queries.length < 1 ) ) {\r
return nodes;\r
for( final PhylogenyNodeIterator iter = phy.iteratorPreorder(); iter.hasNext(); ) {\r
final PhylogenyNode node = iter.next();\r
boolean all_matched = true;\r
- for( final String query : queries ) {\r
+ for( String query : queries ) {\r
+ if ( query == null ) {\r
+ continue;\r
+ }\r
+ query = query.trim();\r
+ NDF ndf = null;\r
+ if ( ( query.length() > 2 ) && ( query.indexOf( ":" ) == 2 ) ) {\r
+ ndf = NDF.fromString( query );\r
+ if ( ndf != null ) {\r
+ query = query.substring( 3 );\r
+ }\r
+ }\r
boolean match = false;\r
if ( ForesterUtil.isEmpty( query ) ) {\r
continue;\r
}\r
- if ( match( node.getName(), query, case_sensitive, partial, false ) ) {\r
+ if ( ( ( ndf == null ) || ( ndf == NDF.NodeName ) )\r
+ && match( node.getName(), query, case_sensitive, partial, false ) ) {\r
match = true;\r
}\r
- else if ( node.getNodeData().isHasTaxonomy()\r
+ else if ( ( ( ndf == null ) || ( ndf == NDF.TaxonomyCode ) )\r
+ && node.getNodeData().isHasTaxonomy()\r
&& match( node.getNodeData().getTaxonomy().getTaxonomyCode(),\r
query,\r
case_sensitive,\r
false ) ) {\r
match = true;\r
}\r
- else if ( node.getNodeData().isHasTaxonomy()\r
+ else if ( ( ( ndf == null ) || ( ndf == NDF.TaxonomyCommonName ) )\r
+ && node.getNodeData().isHasTaxonomy()\r
&& match( node.getNodeData().getTaxonomy().getCommonName(),\r
query,\r
case_sensitive,\r
false ) ) {\r
match = true;\r
}\r
- else if ( node.getNodeData().isHasTaxonomy()\r
+ else if ( ( ( ndf == null ) || ( ndf == NDF.TaxonomyScientificName ) )\r
+ && node.getNodeData().isHasTaxonomy()\r
&& match( node.getNodeData().getTaxonomy().getScientificName(),\r
query,\r
case_sensitive,\r
false ) ) {\r
match = true;\r
}\r
- else if ( node.getNodeData().isHasTaxonomy()\r
+ else if ( ( ( ndf == null ) || ( ndf == NDF.TaxonomyIdentifier ) )\r
+ && node.getNodeData().isHasTaxonomy()\r
&& ( node.getNodeData().getTaxonomy().getIdentifier() != null )\r
&& match( node.getNodeData().getTaxonomy().getIdentifier().getValue(),\r
query,\r
false ) ) {\r
match = true;\r
}\r
- else if ( node.getNodeData().isHasTaxonomy()\r
+ else if ( ( ( ndf == null ) || ( ndf == NDF.TaxonomySynonym ) ) && node.getNodeData().isHasTaxonomy()\r
&& !node.getNodeData().getTaxonomy().getSynonyms().isEmpty() ) {\r
final List<String> syns = node.getNodeData().getTaxonomy().getSynonyms();\r
I: for( final String syn : syns ) {\r
}\r
}\r
}\r
- if ( !match && node.getNodeData().isHasSequence()\r
+ if ( !match && ( ( ndf == null ) || ( ndf == NDF.SequenceName ) ) && node.getNodeData().isHasSequence()\r
&& match( node.getNodeData().getSequence().getName(), query, case_sensitive, partial, false ) ) {\r
match = true;\r
}\r
if ( !match\r
+ && ( ( ndf == null ) || ( ndf == NDF.GeneName ) )\r
&& node.getNodeData().isHasSequence()\r
&& match( node.getNodeData().getSequence().getGeneName(), query, case_sensitive, partial, false ) ) {\r
match = true;\r
}\r
- if ( !match && node.getNodeData().isHasSequence()\r
+ if ( !match && ( ( ndf == null ) || ( ndf == NDF.SequenceSymbol ) )\r
+ && node.getNodeData().isHasSequence()\r
&& match( node.getNodeData().getSequence().getSymbol(), query, case_sensitive, partial, false ) ) {\r
match = true;\r
}\r
if ( !match\r
+ && ( ( ndf == null ) || ( ndf == NDF.SequenceAccession ) )\r
&& node.getNodeData().isHasSequence()\r
&& ( node.getNodeData().getSequence().getAccession() != null )\r
&& match( node.getNodeData().getSequence().getAccession().getValue(),\r
false ) ) {\r
match = true;\r
}\r
- if ( search_domains && !match && node.getNodeData().isHasSequence()\r
+ if ( !match && ( ( ( ndf == null ) && search_domains ) || ( ndf == NDF.Domain ) )\r
+ && node.getNodeData().isHasSequence()\r
&& ( node.getNodeData().getSequence().getDomainArchitecture() != null ) ) {\r
final DomainArchitecture da = node.getNodeData().getSequence().getDomainArchitecture();\r
I: for( int i = 0; i < da.getNumberOfDomains(); ++i ) {\r
- if ( match( da.getDomain( i ).getName(), query, case_sensitive, partial, false ) ) {\r
+ if ( ( da.getDomain( i ).getConfidence() <= domains_confidence_threshold )\r
+ && match( da.getDomain( i ).getName(), query, case_sensitive, partial, false ) ) {\r
match = true;\r
break I;\r
}\r
}\r
}\r
- //\r
- if ( !match && node.getNodeData().isHasSequence()\r
+ if ( !match && ( ( ndf == null ) || ( ndf == NDF.Annotation ) ) && node.getNodeData().isHasSequence()\r
&& ( node.getNodeData().getSequence().getAnnotations() != null ) ) {\r
for( final Annotation ann : node.getNodeData().getSequence().getAnnotations() ) {\r
if ( match( ann.getDesc(), query, case_sensitive, partial, false ) ) {\r
}\r
}\r
}\r
- if ( !match && node.getNodeData().isHasSequence()\r
+ if ( !match && ( ( ndf == null ) || ( ndf == NDF.CrossRef ) ) && node.getNodeData().isHasSequence()\r
&& ( node.getNodeData().getSequence().getCrossReferences() != null ) ) {\r
for( final Accession x : node.getNodeData().getSequence().getCrossReferences() ) {\r
if ( match( x.getComment(), query, case_sensitive, partial, false ) ) {\r
}\r
}\r
}\r
- //\r
- if ( !match && ( node.getNodeData().getBinaryCharacters() != null ) ) {\r
+ if ( !match && ( ( ndf == null ) || ( ndf == NDF.BinaryCharacter ) )\r
+ && ( node.getNodeData().getBinaryCharacters() != null ) ) {\r
Iterator<String> it = node.getNodeData().getBinaryCharacters().getPresentCharacters().iterator();\r
I: while ( it.hasNext() ) {\r
if ( match( it.next(), query, case_sensitive, partial, false ) ) {\r
}\r
}\r
}\r
+ if ( !match\r
+ && ( ndf == NDF.MolecularSequence )\r
+ && node.getNodeData().isHasSequence()\r
+ && match( node.getNodeData().getSequence().getMolecularSequence(),\r
+ query,\r
+ case_sensitive,\r
+ true,\r
+ false ) ) {\r
+ match = true;\r
+ }\r
if ( !match ) {\r
all_matched = false;\r
break;\r
else if ( !( ref_ext_taxo.contains( n.getNodeData().getTaxonomy().getScientificName() ) )\r
&& !( ref_ext_taxo.contains( n.getNodeData().getTaxonomy().getTaxonomyCode() ) )\r
&& !( ( n.getNodeData().getTaxonomy().getIdentifier() != null ) && ref_ext_taxo.contains( n\r
- .getNodeData().getTaxonomy().getIdentifier().getValuePlusProvider() ) ) ) {\r
+ .getNodeData().getTaxonomy().getIdentifier().getValuePlusProvider() ) ) ) {\r
nodes_to_delete.add( n );\r
}\r
}\r
n.setName( "" );\r
}\r
n.getNodeData().getTaxonomy()\r
- .setIdentifier( new Identifier( id, PhyloXmlUtil.UNIPROT_TAX_PROVIDER ) );\r
+ .setIdentifier( new Identifier( id, PhyloXmlUtil.UNIPROT_TAX_PROVIDER ) );\r
break;\r
}\r
case TAXONOMY_ID_UNIPROT_2: {\r
n.setName( "" );\r
}\r
n.getNodeData().getTaxonomy()\r
- .setIdentifier( new Identifier( id, PhyloXmlUtil.UNIPROT_TAX_PROVIDER ) );\r
+ .setIdentifier( new Identifier( id, PhyloXmlUtil.UNIPROT_TAX_PROVIDER ) );\r
break;\r
}\r
case TAXONOMY_ID: {\r