2 package org.forester.sdi;
5 import java.io.FileNotFoundException;
6 import java.io.IOException;
8 import org.forester.io.parsers.PhylogenyParser;
9 import org.forester.io.parsers.nexus.NexusPhylogeniesParser;
10 import org.forester.io.parsers.nhx.NHXParser;
11 import org.forester.io.parsers.nhx.NHXParser.TAXONOMY_EXTRACTION;
12 import org.forester.io.parsers.phyloxml.PhyloXmlDataFormatException;
13 import org.forester.io.parsers.phyloxml.PhyloXmlParser;
14 import org.forester.io.parsers.util.ParserUtils;
15 import org.forester.phylogeny.Phylogeny;
16 import org.forester.phylogeny.PhylogenyMethods;
17 import org.forester.phylogeny.PhylogenyNode;
18 import org.forester.phylogeny.data.Identifier;
19 import org.forester.phylogeny.data.Taxonomy;
20 import org.forester.phylogeny.factories.ParserBasedPhylogenyFactory;
21 import org.forester.phylogeny.factories.PhylogenyFactory;
22 import org.forester.phylogeny.iterators.PhylogenyNodeIterator;
23 import org.forester.util.ForesterUtil;
25 public class SDIutil {
27 public final static TaxonomyComparisonBase determineTaxonomyComparisonBase( final Phylogeny gene_tree )
29 int with_id_count = 0;
30 int with_code_count = 0;
31 int with_sn_count = 0;
33 for( final PhylogenyNodeIterator iter = gene_tree.iteratorExternalForward(); iter.hasNext(); ) {
34 final PhylogenyNode g = iter.next();
35 if ( g.getNodeData().isHasTaxonomy() ) {
36 final Taxonomy tax = g.getNodeData().getTaxonomy();
37 if ( ( tax.getIdentifier() != null ) && !ForesterUtil.isEmpty( tax.getIdentifier().getValue() ) ) {
38 if ( ++with_id_count > max ) {
42 if ( !ForesterUtil.isEmpty( tax.getTaxonomyCode() ) ) {
43 if ( ++with_code_count > max ) {
44 max = with_code_count;
47 if ( !ForesterUtil.isEmpty( tax.getScientificName() ) ) {
48 if ( ++with_sn_count > max ) {
55 throw new SDIException( "gene tree has no taxonomic data" );
57 else if ( max == 1 ) {
58 throw new SDIException( "gene tree has only one node with taxonomic data" );
60 else if ( max == with_id_count ) {
61 return TaxonomyComparisonBase.ID;
63 else if ( max == with_sn_count ) {
64 return TaxonomyComparisonBase.SCIENTIFIC_NAME;
67 return TaxonomyComparisonBase.CODE;
71 public final static Phylogeny parseSpeciesTree( final Phylogeny gene_tree,
72 final File species_tree_file,
73 final boolean replace_undescores_in_nhx_trees,
74 final boolean ignore_quotes_in_nhx_trees,
75 final TAXONOMY_EXTRACTION taxonomy_extraction_in_nhx_trees )
76 throws FileNotFoundException, PhyloXmlDataFormatException, IOException, SDIException {
77 Phylogeny species_tree;
78 final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance();
79 final PhylogenyParser p = ParserUtils.createParserDependingOnFileType( species_tree_file, true );
80 if ( p instanceof PhyloXmlParser ) {
81 species_tree = factory.create( species_tree_file, p )[ 0 ];
84 if ( p instanceof NHXParser ) {
85 final NHXParser nhx = ( NHXParser ) p;
86 nhx.setReplaceUnderscores( replace_undescores_in_nhx_trees );
87 nhx.setIgnoreQuotes( ignore_quotes_in_nhx_trees );
88 nhx.setTaxonomyExtraction( taxonomy_extraction_in_nhx_trees );
90 else if ( p instanceof NexusPhylogeniesParser ) {
91 final NexusPhylogeniesParser nex = ( NexusPhylogeniesParser ) p;
92 nex.setReplaceUnderscores( replace_undescores_in_nhx_trees );
93 nex.setIgnoreQuotes( ignore_quotes_in_nhx_trees );
94 nex.setTaxonomyExtraction( taxonomy_extraction_in_nhx_trees );
96 species_tree = factory.create( species_tree_file, p )[ 0 ];
97 species_tree.setRooted( true );
98 final TaxonomyComparisonBase comp_base = determineTaxonomyComparisonBase( gene_tree );
99 switch ( comp_base ) {
100 case SCIENTIFIC_NAME:
102 .transferNodeNameToField( species_tree,
103 PhylogenyMethods.PhylogenyNodeField.TAXONOMY_SCIENTIFIC_NAME,
107 PhylogenyMethods.transferNodeNameToField( species_tree,
108 PhylogenyMethods.PhylogenyNodeField.TAXONOMY_CODE,
112 PhylogenyMethods.transferNodeNameToField( species_tree,
113 PhylogenyMethods.PhylogenyNodeField.TAXONOMY_ID,
117 throw new SDIException( "unable to determine comparison base" );
123 static String taxonomyToString( final PhylogenyNode n, final TaxonomyComparisonBase base ) {
126 final Identifier id = n.getNodeData().getTaxonomy().getIdentifier();
130 return id.getValuePlusProvider();
132 return n.getNodeData().getTaxonomy().getTaxonomyCode();
133 case SCIENTIFIC_NAME:
134 return n.getNodeData().getTaxonomy().getScientificName();
136 throw new IllegalArgumentException( "unknown comparison base for taxonomies: " + base );
140 public enum ALGORITHM {
141 GSDIR, GSDI, SDI, SDIR
144 public enum TaxonomyComparisonBase {
148 public String toString() {
149 return "taxonomy id";
155 public String toString() {
156 return "taxonomy code/mnemonic";
162 public String toString() {
163 return "scientific name";