1 // forester -- software libraries and applications
2 // for genomics and evolutionary biology research.
4 // Copyright (C) 2010 Christian M Zmasek
5 // Copyright (C) 2010 Sanford-Burnham Medical Research Institute
8 // This library is free software; you can redistribute it and/or
9 // modify it under the terms of the GNU Lesser General Public
10 // License as published by the Free Software Foundation; either
11 // version 2.1 of the License, or (at your option) any later version.
13 // This library is distributed in the hope that it will be useful,
14 // but WITHOUT ANY WARRANTY; without even the implied warranty of
15 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 // Lesser General Public License for more details.
18 // You should have received a copy of the GNU Lesser General Public
19 // License along with this library; if not, write to the Free Software
20 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
22 // Contact: phylosoft @ gmail . com
23 // WWW: www.phylosoft.org/forester
25 package org.forester.analysis;
27 import java.io.IOException;
28 import java.util.ArrayList;
29 import java.util.List;
31 import org.forester.io.parsers.phyloxml.PhyloXmlDataFormatException;
32 import org.forester.phylogeny.Phylogeny;
33 import org.forester.phylogeny.PhylogenyNode;
34 import org.forester.phylogeny.data.Identifier;
35 import org.forester.phylogeny.data.Taxonomy;
36 import org.forester.phylogeny.iterators.PhylogenyNodeIterator;
37 import org.forester.util.ForesterUtil;
38 import org.forester.ws.seqdb.UniProtTaxonomy;
40 public final class AncestralTaxonomyInference {
42 public static void inferTaxonomyFromDescendents( final Phylogeny phy ) throws IOException,
43 AncestralTaxonomyInferenceException {
44 TaxonomyDataManager.clearCachesIfTooLarge();
45 for( final PhylogenyNodeIterator iter = phy.iteratorPostorder(); iter.hasNext(); ) {
46 final PhylogenyNode node = iter.next();
47 if ( !node.isExternal() ) {
48 inferTaxonomyFromDescendents( node );
53 private static void inferTaxonomyFromDescendents( final PhylogenyNode n ) throws IOException,
54 AncestralTaxonomyInferenceException {
55 if ( n.isExternal() ) {
56 throw new IllegalArgumentException( "attempt to infer taxonomy from descendants of external node" );
58 n.getNodeData().setTaxonomy( null );
59 final List<PhylogenyNode> descs = n.getDescendants();
60 final List<String[]> lineages = new ArrayList<String[]>();
61 int shortest_lin_length = Integer.MAX_VALUE;
62 for( final PhylogenyNode desc : descs ) {
63 if ( desc.getNodeData().isHasTaxonomy()
64 && ( TaxonomyDataManager.isHasAppropriateId( desc.getNodeData().getTaxonomy() )
65 || !ForesterUtil.isEmpty( desc.getNodeData().getTaxonomy().getScientificName() )
66 || !ForesterUtil.isEmpty( desc.getNodeData().getTaxonomy().getLineage() )
67 || !ForesterUtil.isEmpty( desc.getNodeData().getTaxonomy().getTaxonomyCode() ) || !ForesterUtil
68 .isEmpty( desc.getNodeData().getTaxonomy().getCommonName() ) ) ) {
69 final UniProtTaxonomy up_tax = TaxonomyDataManager.obtainUniProtTaxonomy( desc.getNodeData()
70 .getTaxonomy(), null, null );
71 if ( ( up_tax == null ) && ForesterUtil.isEmpty( desc.getNodeData().getTaxonomy().getLineage() ) ) {
73 if ( !ForesterUtil.isEmpty( desc.getName() ) ) {
74 desc_str = "\"" + desc.getName() + "\"";
77 desc_str = "[" + desc.getId() + "]";
79 System.out.println( desc.getNodeData().getTaxonomy().toString() );
80 System.out.println( ForesterUtil.stringListToString( desc.getNodeData().getTaxonomy().getLineage(),
82 throw new AncestralTaxonomyInferenceException( "a taxonomy for node " + desc_str
83 + " could not be established from the database" );
85 String[] lineage = ForesterUtil.stringListToArray( desc.getNodeData().getTaxonomy().getLineage() );
86 if ( ( lineage == null ) || ( lineage.length < 1 ) ) {
87 lineage = ForesterUtil.stringListToArray( up_tax.getLineage() );
89 if ( ( lineage == null ) || ( lineage.length < 1 ) ) {
90 throw new AncestralTaxonomyInferenceException( "a taxonomic lineage for node \""
91 + desc.getNodeData().getTaxonomy().toString() + "\" could not be established" );
93 if ( lineage.length < shortest_lin_length ) {
94 shortest_lin_length = lineage.length;
96 lineages.add( lineage );
100 if ( !ForesterUtil.isEmpty( desc.getName() ) ) {
101 node = "\"" + desc.getName() + "\"";
104 node = "[" + desc.getId() + "]";
106 // final List<PhylogenyNode> e = desc.getAllExternalDescendants();
108 // System.out.println();
110 // for( final PhylogenyNode object : e ) {
111 // System.out.println( x + ":" );
112 // System.out.println( object.getName() + " " );
115 // System.out.println();
117 throw new AncestralTaxonomyInferenceException( "node " + node
118 + " has no or inappropriate taxonomic information" );
121 final List<String> last_common_lineage = new ArrayList<String>();
122 String last_common = null;
123 if ( shortest_lin_length > 0 ) {
124 I: for( int i = 0; i < shortest_lin_length; ++i ) {
125 final String lineage_0 = lineages.get( 0 )[ i ];
126 for( int j = 1; j < lineages.size(); ++j ) {
127 if ( !lineage_0.equals( lineages.get( j )[ i ] ) ) {
131 last_common_lineage.add( lineage_0 );
132 last_common = lineage_0;
135 if ( last_common_lineage.isEmpty() ) {
136 boolean saw_viruses = false;
137 boolean saw_cellular_organism = false;
138 for( final String[] lineage : lineages ) {
139 if ( lineage.length > 0 ) {
140 if ( lineage[ 0 ].equalsIgnoreCase( UniProtTaxonomy.VIRUSES ) ) {
143 else if ( lineage[ 0 ].equalsIgnoreCase( UniProtTaxonomy.CELLULAR_ORGANISMS ) ) {
144 saw_cellular_organism = true;
146 if ( saw_cellular_organism && saw_viruses ) {
151 if ( saw_cellular_organism && saw_viruses ) {
152 last_common_lineage.add( UniProtTaxonomy.CELLULAR_ORGANISMS );
153 last_common = UniProtTaxonomy.CELLULAR_ORGANISMS;
156 String msg = "no common lineage for:\n";
158 for( final String[] strings : lineages ) {
159 msg += counter + ": ";
161 for( final String string : strings ) {
166 throw new AncestralTaxonomyInferenceException( msg );
169 final Taxonomy tax = new Taxonomy();
170 n.getNodeData().setTaxonomy( tax );
171 tax.setScientificName( last_common );
172 final UniProtTaxonomy up_tax = TaxonomyDataManager.obtainUniProtTaxonomyFromLineage( last_common_lineage );
173 if ( up_tax != null ) {
174 if ( !ForesterUtil.isEmpty( up_tax.getRank() ) ) {
176 tax.setRank( up_tax.getRank().toLowerCase() );
178 catch ( final PhyloXmlDataFormatException ex ) {
182 if ( !ForesterUtil.isEmpty( up_tax.getId() ) ) {
183 tax.setIdentifier( new Identifier( up_tax.getId(), "uniprot" ) );
185 if ( !ForesterUtil.isEmpty( up_tax.getCommonName() ) ) {
186 tax.setCommonName( up_tax.getCommonName() );
188 if ( !ForesterUtil.isEmpty( up_tax.getSynonym() ) && !tax.getSynonyms().contains( up_tax.getSynonym() ) ) {
189 tax.getSynonyms().add( up_tax.getSynonym() );
191 if ( up_tax.getLineage() != null ) {
192 tax.setLineage( new ArrayList<String>() );
193 for( final String lin : up_tax.getLineage() ) {
194 if ( !ForesterUtil.isEmpty( lin ) ) {
195 tax.getLineage().add( lin );
200 if ( ForesterUtil.isEmpty( tax.getLineage() ) ) {
201 tax.setLineage( new ArrayList<String>() );
202 for( final String lin : last_common_lineage ) {
203 if ( !ForesterUtil.isEmpty( lin ) ) {
204 tax.getLineage().add( lin );
208 for( final PhylogenyNode desc : descs ) {
209 if ( !desc.isExternal() && desc.getNodeData().isHasTaxonomy()
210 && desc.getNodeData().getTaxonomy().isEqual( tax ) ) {
211 desc.getNodeData().setTaxonomy( null );