X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=forester%2Fjava%2Fsrc%2Forg%2Fforester%2Fsdi%2FGSDI.java;h=fa7a52c8baa3db2ab8cbb8fd08d0e88967cd533c;hb=4e1878f8b3c7dbff20ae4cd834b0ffd917f3ad72;hp=fd6497002ae453c38f42eac84d29599fcc880977;hpb=9956a18ae590422c0500a1770ff4cdaf8d787f82;p=jalview.git diff --git a/forester/java/src/org/forester/sdi/GSDI.java b/forester/java/src/org/forester/sdi/GSDI.java index fd64970..fa7a52c 100644 --- a/forester/java/src/org/forester/sdi/GSDI.java +++ b/forester/java/src/org/forester/sdi/GSDI.java @@ -21,7 +21,7 @@ // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com -// WWW: www.phylosoft.org/forester +// WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.sdi; @@ -41,7 +41,7 @@ import org.forester.phylogeny.iterators.PhylogenyNodeIterator; import org.forester.sdi.SDIutil.TaxonomyComparisonBase; import org.forester.util.ForesterUtil; -public final class GSDI implements GSDII { +public final class GSDI implements GSDII { private final boolean _most_parsimonious_duplication_model; private final int _speciation_or_duplication_events_sum; @@ -58,10 +58,21 @@ public final class GSDI implements GSDII { final boolean most_parsimonious_duplication_model, final boolean strip_gene_tree, final boolean strip_species_tree ) throws SDIException { + this( gene_tree, species_tree, most_parsimonious_duplication_model, strip_gene_tree, strip_species_tree, true ); + } + + public GSDI( final Phylogeny gene_tree, + final Phylogeny species_tree, + final boolean most_parsimonious_duplication_model, + final boolean strip_gene_tree, + final boolean strip_species_tree, + final boolean transfer_taxonomy ) throws SDIException { _most_parsimonious_duplication_model = most_parsimonious_duplication_model; + if ( gene_tree.getRoot().getNumberOfDescendants() == 3 ) { + gene_tree.reRoot( gene_tree.getRoot().getChildNode( 2 ) ); + } final NodesLinkingResult nodes_linking_result = linkNodesOfG( gene_tree, species_tree, - null, strip_gene_tree, strip_species_tree ); _stripped_gene_tree_nodes = nodes_linking_result.getStrippedGeneTreeNodes(); @@ -72,7 +83,8 @@ public final class GSDI implements GSDII { _tax_comp_base = nodes_linking_result.getTaxCompBase(); PhylogenyMethods.preOrderReId( species_tree ); final GSDIsummaryResult gsdi_summary_result = geneTreePostOrderTraversal( gene_tree, - _most_parsimonious_duplication_model ); + _most_parsimonious_duplication_model, + transfer_taxonomy ); _speciation_or_duplication_events_sum = gsdi_summary_result.getSpeciationOrDuplicationEventsSum(); _speciations_sum = gsdi_summary_result.getSpeciationsSum(); _duplications_sum = gsdi_summary_result.getDuplicationsSum(); @@ -82,10 +94,12 @@ public final class GSDI implements GSDII { return _duplications_sum; } + @Override public Set getMappedExternalSpeciesTreeNodes() { return _mapped_species_tree_nodes; } + @Override public final SortedSet getReMappedScientificNamesFromGeneTree() { return _scientific_names_mapped_to_reduced_specificity; } @@ -94,18 +108,22 @@ public final class GSDI implements GSDII { return _speciation_or_duplication_events_sum; } + @Override public final int getSpeciationsSum() { return _speciations_sum; } + @Override public List getStrippedExternalGeneTreeNodes() { return _stripped_gene_tree_nodes; } + @Override public List getStrippedSpeciesTreeNodes() { return _stripped_species_tree_nodes; } + @Override public TaxonomyComparisonBase getTaxCompBase() { return _tax_comp_base; } @@ -134,15 +152,22 @@ public final class GSDI implements GSDII { * Preconditions: Mapping M for external nodes must have been calculated and * the species tree must be labeled in preorder. *

+ * @param transfer_taxonomy * @return + * @throws SDIException * */ final static GSDIsummaryResult geneTreePostOrderTraversal( final Phylogeny gene_tree, - final boolean most_parsimonious_duplication_model ) { + final boolean most_parsimonious_duplication_model, + final boolean transfer_taxonomy ) throws SDIException { final GSDIsummaryResult res = new GSDIsummaryResult(); for( final PhylogenyNodeIterator it = gene_tree.iteratorPostorder(); it.hasNext(); ) { final PhylogenyNode g = it.next(); if ( g.isInternal() ) { + if ( g.getNumberOfDescendants() != 2 ) { + throw new SDIException( "gene tree contains internal node with " + g.getNumberOfDescendants() + + " descendents" ); + } PhylogenyNode s1 = g.getChildNode1().getLink(); PhylogenyNode s2 = g.getChildNode2().getLink(); while ( s1 != s2 ) { @@ -156,10 +181,55 @@ public final class GSDI implements GSDII { g.setLink( s1 ); determineEvent( s1, g, most_parsimonious_duplication_model, res ); } + if ( transfer_taxonomy ) { + transferTaxonomy( g ); + } } return res; } + final static GSDIsummaryResult geneTreePostOrderTraversal( final Phylogeny gene_tree, + final boolean most_parsimonious_duplication_model, + final int min_duplications ) throws SDIException { + final GSDIsummaryResult res = new GSDIsummaryResult(); + for( final PhylogenyNodeIterator it = gene_tree.iteratorPostorder(); it.hasNext(); ) { + final PhylogenyNode g = it.next(); + if ( g.isInternal() ) { + if ( g.getNumberOfDescendants() != 2 ) { + throw new SDIException( "gene tree contains internal node with " + g.getNumberOfDescendants() + + " descendents" ); + } + PhylogenyNode s1 = g.getChildNode1().getLink(); + PhylogenyNode s2 = g.getChildNode2().getLink(); + while ( s1 != s2 ) { + if ( s1.getId() > s2.getId() ) { + s1 = s1.getParent(); + } + else { + s2 = s2.getParent(); + } + } + g.setLink( s1 ); + determineEvent( s1, g, most_parsimonious_duplication_model, res ); + if ( res.getDuplicationsSum() > min_duplications ) { + return null; + } + } + } + return res; + } + + final static NodesLinkingResult linkNodesOfG( final Phylogeny gene_tree, + final Phylogeny species_tree, + final boolean strip_gene_tree, + final boolean strip_species_tree ) throws SDIException { + final TaxonomyComparisonBase tax_comp_base = SDIutil.determineTaxonomyComparisonBase( gene_tree ); + if ( tax_comp_base == null ) { + throw new RuntimeException( "failed to establish taxonomy linking base (taxonomy linking base is null)" ); + } + return linkNodesOfG( gene_tree, species_tree, tax_comp_base, strip_gene_tree, strip_species_tree ); + } + /** * This allows for linking of internal nodes of the species tree (as opposed * to just external nodes, as in the method it overrides. @@ -172,15 +242,13 @@ public final class GSDI implements GSDII { final TaxonomyComparisonBase tax_comp_base, final boolean strip_gene_tree, final boolean strip_species_tree ) throws SDIException { + if ( tax_comp_base == null ) { + throw new IllegalArgumentException( "taxonomy linking base is null" ); + } final Map species_to_node_map = new HashMap(); final List species_tree_ext_nodes = new ArrayList(); final NodesLinkingResult res = new NodesLinkingResult(); - if ( tax_comp_base == null ) { - res.setTaxCompBase( SDIutil.determineTaxonomyComparisonBase( gene_tree ) ); - } - else { - res.setTaxCompBase( tax_comp_base ); - } + res.setTaxCompBase( tax_comp_base ); // Stringyfied taxonomy is the key, node is the value. for( final PhylogenyNodeIterator iter = species_tree.iteratorExternalForward(); iter.hasNext(); ) { final PhylogenyNode s = iter.next(); @@ -189,7 +257,8 @@ public final class GSDI implements GSDII { final String tax_str = SDIutil.taxonomyToString( s, res.getTaxCompBase() ); if ( !ForesterUtil.isEmpty( tax_str ) ) { if ( species_to_node_map.containsKey( tax_str ) ) { - throw new SDIException( "taxonomy \"" + s + "\" is not unique in species tree" ); + throw new SDIException( "taxonomy \"" + tax_str + "\" is not unique in species tree (using " + + res.getTaxCompBase() + " for linking to gene tree)" ); } species_to_node_map.put( tax_str, s ); } @@ -243,7 +312,8 @@ public final class GSDI implements GSDII { if ( strip_gene_tree ) { stripTree( gene_tree, res.getStrippedGeneTreeNodes() ); if ( gene_tree.isEmpty() || ( gene_tree.getNumberOfExternalNodes() < 2 ) ) { - throw new SDIException( "species could not be mapped between gene tree and species tree" ); + throw new SDIException( "species could not be mapped between gene tree and species tree (based on " + + res.getTaxCompBase() + ")" ); } } if ( strip_species_tree ) { @@ -252,6 +322,40 @@ public final class GSDI implements GSDII { return res; } + static final void transferTaxonomy( final PhylogenyNode g ) { + if ( g == null ) { + throw new IllegalArgumentException( "gene tree node is null" ); + } + final PhylogenyNode s = g.getLink(); + if ( s == null ) { + throw new IllegalArgumentException( "mapped species tree node is null" ); + } + if ( s.getNodeData().isHasTaxonomy() ) { + g.getNodeData().setTaxonomy( s.getNodeData().getTaxonomy() ); + if ( g.isInternal() ) { + if ( g.getChildNode1().isInternal() && g.getChildNode1().getNodeData().isHasTaxonomy() + && ( g.getChildNode1().getNodeData().getTaxonomy() == s.getNodeData().getTaxonomy() ) ) { + g.getChildNode1().getNodeData().setTaxonomy( null ); + } + if ( g.getChildNode2().isInternal() && g.getChildNode2().getNodeData().isHasTaxonomy() + && ( g.getChildNode2().getNodeData().getTaxonomy() == s.getNodeData().getTaxonomy() ) ) { + g.getChildNode2().getNodeData().setTaxonomy( null ); + } + } + } + else if ( ForesterUtil.isEmpty( g.getName() ) && !ForesterUtil.isEmpty( s.getName() ) ) { + g.setName( s.getName() ); + if ( g.isInternal() ) { + if ( g.getChildNode1().isInternal() && ( g.getChildNode1().getName() == s.getName() ) ) { + g.getChildNode1().setName( "" ); + } + if ( g.getChildNode2().isInternal() && ( g.getChildNode2().getName() == s.getName() ) ) { + g.getChildNode2().setName( "" ); + } + } + } + } + private final static void addScientificNamesMappedToReducedSpecificity( final String s1, final String s2, final SortedSet scientific_names_mapped_to_reduced_specificity ) { @@ -281,7 +385,7 @@ public final class GSDI implements GSDII { final Set set = new HashSet(); for( PhylogenyNode n : g.getChildNode1().getAllExternalDescendants() ) { n = n.getLink(); - while ( n.getParent() != s ) { + while ( ( n.getParent() != s ) && ( n.getParent() != null ) ) { n = n.getParent(); if ( n.isRoot() ) { break; @@ -292,7 +396,7 @@ public final class GSDI implements GSDII { boolean multiple = false; for( PhylogenyNode n : g.getChildNode2().getAllExternalDescendants() ) { n = n.getLink(); - while ( n.getParent() != s ) { + while ( ( n.getParent() != s ) && ( n.getParent() != null ) ) { n = n.getParent(); if ( n.isRoot() ) { break;