2 package org.forester.sdi;
5 import java.io.FileNotFoundException;
6 import java.io.IOException;
8 import org.forester.io.parsers.PhylogenyParser;
9 import org.forester.io.parsers.nhx.NHXParser;
10 import org.forester.io.parsers.phyloxml.PhyloXmlDataFormatException;
11 import org.forester.io.parsers.phyloxml.PhyloXmlParser;
12 import org.forester.io.parsers.util.ParserUtils;
13 import org.forester.phylogeny.Phylogeny;
14 import org.forester.phylogeny.PhylogenyMethods;
15 import org.forester.phylogeny.PhylogenyNode;
16 import org.forester.phylogeny.data.Identifier;
17 import org.forester.phylogeny.data.Taxonomy;
18 import org.forester.phylogeny.factories.ParserBasedPhylogenyFactory;
19 import org.forester.phylogeny.factories.PhylogenyFactory;
20 import org.forester.phylogeny.iterators.PhylogenyNodeIterator;
21 import org.forester.util.ForesterUtil;
23 public class SDIutil {
25 public final static TaxonomyComparisonBase determineTaxonomyComparisonBase( final Phylogeny gene_tree )
27 int with_id_count = 0;
28 int with_code_count = 0;
29 int with_sn_count = 0;
31 for( final PhylogenyNodeIterator iter = gene_tree.iteratorExternalForward(); iter.hasNext(); ) {
32 final PhylogenyNode g = iter.next();
33 if ( g.getNodeData().isHasTaxonomy() ) {
34 final Taxonomy tax = g.getNodeData().getTaxonomy();
35 if ( ( tax.getIdentifier() != null ) && !ForesterUtil.isEmpty( tax.getIdentifier().getValue() ) ) {
36 if ( ++with_id_count > max ) {
40 if ( !ForesterUtil.isEmpty( tax.getTaxonomyCode() ) ) {
41 if ( ++with_code_count > max ) {
42 max = with_code_count;
45 if ( !ForesterUtil.isEmpty( tax.getScientificName() ) ) {
46 if ( ++with_sn_count > max ) {
53 throw new SDIException( "gene tree has no taxonomic data" );
55 else if ( max == 1 ) {
56 throw new SDIException( "gene tree has only one node with taxonomic data" );
58 else if ( max == with_id_count ) {
59 return TaxonomyComparisonBase.ID;
61 else if ( max == with_sn_count ) {
62 return TaxonomyComparisonBase.SCIENTIFIC_NAME;
65 return TaxonomyComparisonBase.CODE;
69 public final static Phylogeny parseSpeciesTree( final Phylogeny gene_tree,
70 final File species_tree_file,
71 final boolean replace_undescores_in_nhx_trees,
72 final boolean ignore_quotes_in_nhx_trees,
73 final NHXParser.TAXONOMY_EXTRACTION taxonomy_extraction_in_nhx_trees )
74 throws FileNotFoundException, PhyloXmlDataFormatException, IOException, SDIException {
75 Phylogeny species_tree;
76 final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance();
77 final PhylogenyParser p = ParserUtils.createParserDependingOnFileType( species_tree_file, true );
78 if ( p instanceof PhyloXmlParser ) {
79 species_tree = factory.create( species_tree_file, p )[ 0 ];
82 if ( p instanceof NHXParser ) {
83 final NHXParser nhx = ( NHXParser ) p;
84 nhx.setReplaceUnderscores( replace_undescores_in_nhx_trees );
85 nhx.setIgnoreQuotes( ignore_quotes_in_nhx_trees );
86 nhx.setTaxonomyExtraction( taxonomy_extraction_in_nhx_trees );
88 species_tree = factory.create( species_tree_file, p )[ 0 ];
89 species_tree.setRooted( true );
90 final TaxonomyComparisonBase comp_base = determineTaxonomyComparisonBase( gene_tree );
91 switch ( comp_base ) {
94 .transferNodeNameToField( species_tree,
95 PhylogenyMethods.PhylogenyNodeField.TAXONOMY_SCIENTIFIC_NAME,
99 PhylogenyMethods.transferNodeNameToField( species_tree,
100 PhylogenyMethods.PhylogenyNodeField.TAXONOMY_CODE,
104 PhylogenyMethods.transferNodeNameToField( species_tree,
105 PhylogenyMethods.PhylogenyNodeField.TAXONOMY_ID,
109 throw new SDIException( "unable to determine comparison base" );
115 static String taxonomyToString( final PhylogenyNode n, final TaxonomyComparisonBase base ) {
118 final Identifier id = n.getNodeData().getTaxonomy().getIdentifier();
122 return id.getValuePlusProvider();
124 return n.getNodeData().getTaxonomy().getTaxonomyCode();
125 case SCIENTIFIC_NAME:
126 return n.getNodeData().getTaxonomy().getScientificName();
128 throw new IllegalArgumentException( "unknown comparison base for taxonomies: " + base );
132 public enum ALGORITHM {
133 GSDIR, GSDI, SDI, SDIR
136 public enum TaxonomyComparisonBase {
140 public String toString() {
141 return "taxonomy id";
147 public String toString() {
148 return "taxonomy code/mnemonic";
154 public String toString() {
155 return "scientific name";