X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=forester%2Fjava%2Fsrc%2Forg%2Fforester%2Fsdi%2FSDIutil.java;h=4e12ae6713b9b1d7910b865357d2a656a1fd7aac;hb=0b49b8e750b34d28a5989facdd8a7959870de996;hp=5ddd905045528fbe70086035a5c1b98af1d2adb3;hpb=03001eb3d298d53e97e4194787fa21bcbc43c5d8;p=jalview.git diff --git a/forester/java/src/org/forester/sdi/SDIutil.java b/forester/java/src/org/forester/sdi/SDIutil.java index 5ddd905..4e12ae6 100644 --- a/forester/java/src/org/forester/sdi/SDIutil.java +++ b/forester/java/src/org/forester/sdi/SDIutil.java @@ -1,15 +1,125 @@ package org.forester.sdi; +import java.io.File; +import java.io.FileNotFoundException; +import java.io.IOException; + +import org.forester.io.parsers.PhylogenyParser; +import org.forester.io.parsers.nexus.NexusPhylogeniesParser; +import org.forester.io.parsers.nhx.NHXParser; +import org.forester.io.parsers.nhx.NHXParser.TAXONOMY_EXTRACTION; +import org.forester.io.parsers.phyloxml.PhyloXmlDataFormatException; +import org.forester.io.parsers.phyloxml.PhyloXmlParser; +import org.forester.io.parsers.util.ParserUtils; import org.forester.phylogeny.Phylogeny; +import org.forester.phylogeny.PhylogenyMethods; import org.forester.phylogeny.PhylogenyNode; import org.forester.phylogeny.data.Identifier; import org.forester.phylogeny.data.Taxonomy; +import org.forester.phylogeny.factories.ParserBasedPhylogenyFactory; +import org.forester.phylogeny.factories.PhylogenyFactory; import org.forester.phylogeny.iterators.PhylogenyNodeIterator; import org.forester.util.ForesterUtil; public class SDIutil { + public final static TaxonomyComparisonBase determineTaxonomyComparisonBase( final Phylogeny gene_tree ) + throws SDIException { + int with_id_count = 0; + int with_code_count = 0; + int with_sn_count = 0; + int max = 0; + for( final PhylogenyNodeIterator iter = gene_tree.iteratorExternalForward(); iter.hasNext(); ) { + final PhylogenyNode g = iter.next(); + if ( g.getNodeData().isHasTaxonomy() ) { + final Taxonomy tax = g.getNodeData().getTaxonomy(); + if ( ( tax.getIdentifier() != null ) && !ForesterUtil.isEmpty( tax.getIdentifier().getValue() ) ) { + if ( ++with_id_count > max ) { + max = with_id_count; + } + } + if ( !ForesterUtil.isEmpty( tax.getTaxonomyCode() ) ) { + if ( ++with_code_count > max ) { + max = with_code_count; + } + } + if ( !ForesterUtil.isEmpty( tax.getScientificName() ) ) { + if ( ++with_sn_count > max ) { + max = with_sn_count; + } + } + } + } + if ( max == 0 ) { + throw new SDIException( "gene tree has no taxonomic data" ); + } + else if ( max == 1 ) { + throw new SDIException( "gene tree has only one node with taxonomic data" ); + } + else if ( max == with_id_count ) { + return TaxonomyComparisonBase.ID; + } + else if ( max == with_sn_count ) { + return TaxonomyComparisonBase.SCIENTIFIC_NAME; + } + else { + return TaxonomyComparisonBase.CODE; + } + } + + public final static Phylogeny parseSpeciesTree( final Phylogeny gene_tree, + final File species_tree_file, + final boolean replace_undescores_in_nhx_trees, + final boolean ignore_quotes_in_nhx_trees, + final TAXONOMY_EXTRACTION taxonomy_extraction_in_nhx_trees ) + throws FileNotFoundException, PhyloXmlDataFormatException, IOException, SDIException { + Phylogeny species_tree; + final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance(); + final PhylogenyParser p = ParserUtils.createParserDependingOnFileType( species_tree_file, true ); + if ( p instanceof PhyloXmlParser ) { + species_tree = factory.create( species_tree_file, p )[ 0 ]; + } + else { + if ( p instanceof NHXParser ) { + final NHXParser nhx = ( NHXParser ) p; + nhx.setReplaceUnderscores( replace_undescores_in_nhx_trees ); + nhx.setIgnoreQuotes( ignore_quotes_in_nhx_trees ); + nhx.setTaxonomyExtraction( taxonomy_extraction_in_nhx_trees ); + } + else if ( p instanceof NexusPhylogeniesParser ) { + final NexusPhylogeniesParser nex = ( NexusPhylogeniesParser ) p; + nex.setReplaceUnderscores( replace_undescores_in_nhx_trees ); + nex.setIgnoreQuotes( ignore_quotes_in_nhx_trees ); + nex.setTaxonomyExtraction( taxonomy_extraction_in_nhx_trees ); + } + species_tree = factory.create( species_tree_file, p )[ 0 ]; + species_tree.setRooted( true ); + final TaxonomyComparisonBase comp_base = determineTaxonomyComparisonBase( gene_tree ); + switch ( comp_base ) { + case SCIENTIFIC_NAME: + PhylogenyMethods + .transferNodeNameToField( species_tree, + PhylogenyMethods.PhylogenyNodeField.TAXONOMY_SCIENTIFIC_NAME, + true ); + break; + case CODE: + PhylogenyMethods.transferNodeNameToField( species_tree, + PhylogenyMethods.PhylogenyNodeField.TAXONOMY_CODE, + true ); + break; + case ID: + PhylogenyMethods.transferNodeNameToField( species_tree, + PhylogenyMethods.PhylogenyNodeField.TAXONOMY_ID, + true ); + break; + default: + throw new SDIException( "unable to determine comparison base" ); + } + } + return species_tree; + } + static String taxonomyToString( final PhylogenyNode n, final TaxonomyComparisonBase base ) { switch ( base ) { case ID: @@ -54,48 +164,4 @@ public class SDIutil { } } } - - public final static TaxonomyComparisonBase determineTaxonomyComparisonBase( final Phylogeny gene_tree ) - throws SDIException { - int with_id_count = 0; - int with_code_count = 0; - int with_sn_count = 0; - int max = 0; - for( final PhylogenyNodeIterator iter = gene_tree.iteratorExternalForward(); iter.hasNext(); ) { - final PhylogenyNode g = iter.next(); - if ( g.getNodeData().isHasTaxonomy() ) { - final Taxonomy tax = g.getNodeData().getTaxonomy(); - if ( ( tax.getIdentifier() != null ) && !ForesterUtil.isEmpty( tax.getIdentifier().getValue() ) ) { - if ( ++with_id_count > max ) { - max = with_id_count; - } - } - if ( !ForesterUtil.isEmpty( tax.getTaxonomyCode() ) ) { - if ( ++with_code_count > max ) { - max = with_code_count; - } - } - if ( !ForesterUtil.isEmpty( tax.getScientificName() ) ) { - if ( ++with_sn_count > max ) { - max = with_sn_count; - } - } - } - } - if ( max == 0 ) { - throw new SDIException( "gene tree has no taxonomic data" ); - } - else if ( max == 1 ) { - throw new SDIException( "gene tree has only one node with taxonomic data" ); - } - else if ( max == with_id_count ) { - return TaxonomyComparisonBase.ID; - } - else if ( max == with_sn_count ) { - return TaxonomyComparisonBase.SCIENTIFIC_NAME; - } - else { - return TaxonomyComparisonBase.CODE; - } - } }