"rio" work + clean up
[jalview.git] / forester / java / src / org / forester / sdi / SDIutil.java
1
2 package org.forester.sdi;
3
4 import java.io.File;
5 import java.io.FileNotFoundException;
6 import java.io.IOException;
7
8 import org.forester.io.parsers.PhylogenyParser;
9 import org.forester.io.parsers.nhx.NHXParser;
10 import org.forester.io.parsers.phyloxml.PhyloXmlDataFormatException;
11 import org.forester.io.parsers.phyloxml.PhyloXmlParser;
12 import org.forester.io.parsers.util.ParserUtils;
13 import org.forester.phylogeny.Phylogeny;
14 import org.forester.phylogeny.PhylogenyMethods;
15 import org.forester.phylogeny.PhylogenyNode;
16 import org.forester.phylogeny.data.Identifier;
17 import org.forester.phylogeny.data.Taxonomy;
18 import org.forester.phylogeny.factories.ParserBasedPhylogenyFactory;
19 import org.forester.phylogeny.factories.PhylogenyFactory;
20 import org.forester.phylogeny.iterators.PhylogenyNodeIterator;
21 import org.forester.util.ForesterUtil;
22
23 public class SDIutil {
24
25     public final static TaxonomyComparisonBase determineTaxonomyComparisonBase( final Phylogeny gene_tree )
26             throws SDIException {
27         int with_id_count = 0;
28         int with_code_count = 0;
29         int with_sn_count = 0;
30         int max = 0;
31         for( final PhylogenyNodeIterator iter = gene_tree.iteratorExternalForward(); iter.hasNext(); ) {
32             final PhylogenyNode g = iter.next();
33             if ( g.getNodeData().isHasTaxonomy() ) {
34                 final Taxonomy tax = g.getNodeData().getTaxonomy();
35                 if ( ( tax.getIdentifier() != null ) && !ForesterUtil.isEmpty( tax.getIdentifier().getValue() ) ) {
36                     if ( ++with_id_count > max ) {
37                         max = with_id_count;
38                     }
39                 }
40                 if ( !ForesterUtil.isEmpty( tax.getTaxonomyCode() ) ) {
41                     if ( ++with_code_count > max ) {
42                         max = with_code_count;
43                     }
44                 }
45                 if ( !ForesterUtil.isEmpty( tax.getScientificName() ) ) {
46                     if ( ++with_sn_count > max ) {
47                         max = with_sn_count;
48                     }
49                 }
50             }
51         }
52         if ( max == 0 ) {
53             throw new SDIException( "gene tree has no taxonomic data" );
54         }
55         else if ( max == 1 ) {
56             throw new SDIException( "gene tree has only one node with taxonomic data" );
57         }
58         else if ( max == with_id_count ) {
59             return TaxonomyComparisonBase.ID;
60         }
61         else if ( max == with_sn_count ) {
62             return TaxonomyComparisonBase.SCIENTIFIC_NAME;
63         }
64         else {
65             return TaxonomyComparisonBase.CODE;
66         }
67     }
68
69     public final static Phylogeny parseSpeciesTree( final Phylogeny gene_tree,
70                                                     final File species_tree_file,
71                                                     final boolean replace_undescores_in_nhx_trees,
72                                                     final boolean ignore_quotes_in_nhx_trees,
73                                                     final NHXParser.TAXONOMY_EXTRACTION taxonomy_extraction_in_nhx_trees )
74             throws FileNotFoundException, PhyloXmlDataFormatException, IOException, SDIException {
75         Phylogeny species_tree;
76         final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance();
77         final PhylogenyParser p = ParserUtils.createParserDependingOnFileType( species_tree_file, true );
78         if ( p instanceof PhyloXmlParser ) {
79             species_tree = factory.create( species_tree_file, p )[ 0 ];
80         }
81         else {
82             if ( p instanceof NHXParser ) {
83                 final NHXParser nhx = ( NHXParser ) p;
84                 nhx.setReplaceUnderscores( replace_undescores_in_nhx_trees );
85                 nhx.setIgnoreQuotes( ignore_quotes_in_nhx_trees );
86                 nhx.setTaxonomyExtraction( taxonomy_extraction_in_nhx_trees );
87             }
88             species_tree = factory.create( species_tree_file, p )[ 0 ];
89             species_tree.setRooted( true );
90             final TaxonomyComparisonBase comp_base = determineTaxonomyComparisonBase( gene_tree );
91             switch ( comp_base ) {
92                 case SCIENTIFIC_NAME:
93                     PhylogenyMethods
94                             .transferNodeNameToField( species_tree,
95                                                       PhylogenyMethods.PhylogenyNodeField.TAXONOMY_SCIENTIFIC_NAME,
96                                                       true );
97                     break;
98                 case CODE:
99                     PhylogenyMethods.transferNodeNameToField( species_tree,
100                                                               PhylogenyMethods.PhylogenyNodeField.TAXONOMY_CODE,
101                                                               true );
102                     break;
103                 case ID:
104                     PhylogenyMethods.transferNodeNameToField( species_tree,
105                                                               PhylogenyMethods.PhylogenyNodeField.TAXONOMY_ID,
106                                                               true );
107                     break;
108                 default:
109                     throw new SDIException( "unable to determine comparison base" );
110             }
111         }
112         return species_tree;
113     }
114
115     static String taxonomyToString( final PhylogenyNode n, final TaxonomyComparisonBase base ) {
116         switch ( base ) {
117             case ID:
118                 final Identifier id = n.getNodeData().getTaxonomy().getIdentifier();
119                 if ( id == null ) {
120                     return null;
121                 }
122                 return id.getValuePlusProvider();
123             case CODE:
124                 return n.getNodeData().getTaxonomy().getTaxonomyCode();
125             case SCIENTIFIC_NAME:
126                 return n.getNodeData().getTaxonomy().getScientificName();
127             default:
128                 throw new IllegalArgumentException( "unknown comparison base for taxonomies: " + base );
129         }
130     }
131
132     public enum ALGORITHM {
133         GSDIR, GSDI, SDI, SDIR
134     }
135
136     public enum TaxonomyComparisonBase {
137         ID {
138
139             @Override
140             public String toString() {
141                 return "taxonomy id";
142             }
143         },
144         CODE {
145
146             @Override
147             public String toString() {
148                 return "taxonomy code/mnemonic";
149             }
150         },
151         SCIENTIFIC_NAME {
152
153             @Override
154             public String toString() {
155                 return "scientific name";
156             }
157         }
158     }
159 }