in progress...
[jalview.git] / forester / java / src / org / forester / sdi / SDIutil.java
1
2 package org.forester.sdi;
3
4 import java.io.File;
5 import java.io.FileNotFoundException;
6 import java.io.IOException;
7
8 import org.forester.io.parsers.PhylogenyParser;
9 import org.forester.io.parsers.nexus.NexusPhylogeniesParser;
10 import org.forester.io.parsers.nhx.NHXParser;
11 import org.forester.io.parsers.nhx.NHXParser.TAXONOMY_EXTRACTION;
12 import org.forester.io.parsers.phyloxml.PhyloXmlDataFormatException;
13 import org.forester.io.parsers.phyloxml.PhyloXmlParser;
14 import org.forester.io.parsers.util.ParserUtils;
15 import org.forester.phylogeny.Phylogeny;
16 import org.forester.phylogeny.PhylogenyMethods;
17 import org.forester.phylogeny.PhylogenyNode;
18 import org.forester.phylogeny.data.Identifier;
19 import org.forester.phylogeny.data.Taxonomy;
20 import org.forester.phylogeny.factories.ParserBasedPhylogenyFactory;
21 import org.forester.phylogeny.factories.PhylogenyFactory;
22 import org.forester.phylogeny.iterators.PhylogenyNodeIterator;
23 import org.forester.util.ForesterUtil;
24
25 public class SDIutil {
26
27     public final static TaxonomyComparisonBase determineTaxonomyComparisonBase( final Phylogeny gene_tree )
28             throws SDIException {
29         int with_id_count = 0;
30         int with_code_count = 0;
31         int with_sn_count = 0;
32         int max = 0;
33         for( final PhylogenyNodeIterator iter = gene_tree.iteratorExternalForward(); iter.hasNext(); ) {
34             final PhylogenyNode g = iter.next();
35             if ( g.getNodeData().isHasTaxonomy() ) {
36                 final Taxonomy tax = g.getNodeData().getTaxonomy();
37                 if ( ( tax.getIdentifier() != null ) && !ForesterUtil.isEmpty( tax.getIdentifier().getValue() ) ) {
38                     if ( ++with_id_count > max ) {
39                         max = with_id_count;
40                     }
41                 }
42                 if ( !ForesterUtil.isEmpty( tax.getTaxonomyCode() ) ) {
43                     if ( ++with_code_count > max ) {
44                         max = with_code_count;
45                     }
46                 }
47                 if ( !ForesterUtil.isEmpty( tax.getScientificName() ) ) {
48                     if ( ++with_sn_count > max ) {
49                         max = with_sn_count;
50                     }
51                 }
52             }
53         }
54         if ( max == 0 ) {
55             throw new SDIException( "gene tree has no taxonomic data" );
56         }
57         else if ( max == 1 ) {
58             throw new SDIException( "gene tree has only one node with taxonomic data" );
59         }
60         else if ( max == with_id_count ) {
61             return TaxonomyComparisonBase.ID;
62         }
63         else if ( max == with_sn_count ) {
64             return TaxonomyComparisonBase.SCIENTIFIC_NAME;
65         }
66         else {
67             return TaxonomyComparisonBase.CODE;
68         }
69     }
70
71     public final static Phylogeny parseSpeciesTree( final Phylogeny gene_tree,
72                                                     final File species_tree_file,
73                                                     final boolean replace_undescores_in_nhx_trees,
74                                                     final boolean ignore_quotes_in_nhx_trees,
75                                                     final TAXONOMY_EXTRACTION taxonomy_extraction_in_nhx_trees )
76                                                             throws FileNotFoundException, PhyloXmlDataFormatException, IOException, SDIException {
77         Phylogeny species_tree;
78         final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance();
79         final PhylogenyParser p = ParserUtils.createParserDependingOnFileType( species_tree_file, true );
80         if ( p instanceof PhyloXmlParser ) {
81             species_tree = factory.create( species_tree_file, p )[ 0 ];
82         }
83         else {
84             if ( p instanceof NHXParser ) {
85                 final NHXParser nhx = ( NHXParser ) p;
86                 nhx.setReplaceUnderscores( replace_undescores_in_nhx_trees );
87                 nhx.setIgnoreQuotes( ignore_quotes_in_nhx_trees );
88                 nhx.setTaxonomyExtraction( taxonomy_extraction_in_nhx_trees );
89             }
90             else if ( p instanceof NexusPhylogeniesParser ) {
91                 final NexusPhylogeniesParser nex = ( NexusPhylogeniesParser ) p;
92                 nex.setReplaceUnderscores( replace_undescores_in_nhx_trees );
93                 nex.setIgnoreQuotes( ignore_quotes_in_nhx_trees );
94                 nex.setTaxonomyExtraction( taxonomy_extraction_in_nhx_trees );
95             }
96             species_tree = factory.create( species_tree_file, p )[ 0 ];
97             species_tree.setRooted( true );
98             final TaxonomyComparisonBase comp_base = determineTaxonomyComparisonBase( gene_tree );
99             switch ( comp_base ) {
100                 case SCIENTIFIC_NAME:
101                     PhylogenyMethods
102                     .transferNodeNameToField( species_tree,
103                                               PhylogenyMethods.PhylogenyNodeField.TAXONOMY_SCIENTIFIC_NAME,
104                                               true );
105                     break;
106                 case CODE:
107                     PhylogenyMethods.transferNodeNameToField( species_tree,
108                                                               PhylogenyMethods.PhylogenyNodeField.TAXONOMY_CODE,
109                                                               true );
110                     break;
111                 case ID:
112                     PhylogenyMethods.transferNodeNameToField( species_tree,
113                                                               PhylogenyMethods.PhylogenyNodeField.TAXONOMY_ID,
114                                                               true );
115                     break;
116                 default:
117                     throw new SDIException( "unable to determine comparison base" );
118             }
119         }
120         return species_tree;
121     }
122
123     static String taxonomyToString( final PhylogenyNode n, final TaxonomyComparisonBase base ) {
124         switch ( base ) {
125             case ID:
126                 final Identifier id = n.getNodeData().getTaxonomy().getIdentifier();
127                 if ( id == null ) {
128                     return null;
129                 }
130                 return id.getValuePlusProvider();
131             case CODE:
132                 return n.getNodeData().getTaxonomy().getTaxonomyCode();
133             case SCIENTIFIC_NAME:
134                 return n.getNodeData().getTaxonomy().getScientificName();
135             default:
136                 throw new IllegalArgumentException( "unknown comparison base for taxonomies: " + base );
137         }
138     }
139
140     public enum ALGORITHM {
141         GSDIR, GSDI, SDI, SDIR
142     }
143
144     public enum TaxonomyComparisonBase {
145         ID {
146
147             @Override
148             public String toString() {
149                 return "taxonomy id";
150             }
151         },
152         CODE {
153
154             @Override
155             public String toString() {
156                 return "taxonomy code/mnemonic";
157             }
158         },
159         SCIENTIFIC_NAME {
160
161             @Override
162             public String toString() {
163                 return "scientific name";
164             }
165         }
166     }
167 }