X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=forester%2Fjava%2Fsrc%2Forg%2Fforester%2Fsdi%2FGSDI.java;h=4ab6395af79a9f6928d9863cf88723acbf5770e5;hb=10297bd8b8a4b4ab198a17a42fc6ff24ae2ed49b;hp=d6e6fb6f9a13a600983ceb0e5a7abde025133f52;hpb=9bad57b1ba0f75075ab8c6bda1dedb906f8c6280;p=jalview.git diff --git a/forester/java/src/org/forester/sdi/GSDI.java b/forester/java/src/org/forester/sdi/GSDI.java index d6e6fb6..4ab6395 100644 --- a/forester/java/src/org/forester/sdi/GSDI.java +++ b/forester/java/src/org/forester/sdi/GSDI.java @@ -21,7 +21,7 @@ // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com -// WWW: www.phylosoft.org/forester +// WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.sdi; @@ -41,7 +41,7 @@ import org.forester.phylogeny.iterators.PhylogenyNodeIterator; import org.forester.sdi.SDIutil.TaxonomyComparisonBase; import org.forester.util.ForesterUtil; -public final class GSDI { +public final class GSDI implements GSDII { private final boolean _most_parsimonious_duplication_model; private final int _speciation_or_duplication_events_sum; @@ -58,10 +58,21 @@ public final class GSDI { final boolean most_parsimonious_duplication_model, final boolean strip_gene_tree, final boolean strip_species_tree ) throws SDIException { + this( gene_tree, species_tree, most_parsimonious_duplication_model, strip_gene_tree, strip_species_tree, true ); + } + + public GSDI( final Phylogeny gene_tree, + final Phylogeny species_tree, + final boolean most_parsimonious_duplication_model, + final boolean strip_gene_tree, + final boolean strip_species_tree, + final boolean transfer_taxonomy ) throws SDIException { _most_parsimonious_duplication_model = most_parsimonious_duplication_model; + if ( gene_tree.getRoot().getNumberOfDescendants() == 3 ) { + gene_tree.reRoot( gene_tree.getRoot().getChildNode( 2 ) ); + } final NodesLinkingResult nodes_linking_result = linkNodesOfG( gene_tree, species_tree, - null, strip_gene_tree, strip_species_tree ); _stripped_gene_tree_nodes = nodes_linking_result.getStrippedGeneTreeNodes(); @@ -71,8 +82,9 @@ public final class GSDI { .getScientificNamesMappedToReducedSpecificity(); _tax_comp_base = nodes_linking_result.getTaxCompBase(); PhylogenyMethods.preOrderReId( species_tree ); - final GSDIsummaryResult gsdi_summary_result = new GSDIsummaryResult(); - geneTreePostOrderTraversal( gene_tree, _most_parsimonious_duplication_model, gsdi_summary_result ); + final GSDIsummaryResult gsdi_summary_result = geneTreePostOrderTraversal( gene_tree, + _most_parsimonious_duplication_model, + transfer_taxonomy ); _speciation_or_duplication_events_sum = gsdi_summary_result.getSpeciationOrDuplicationEventsSum(); _speciations_sum = gsdi_summary_result.getSpeciationsSum(); _duplications_sum = gsdi_summary_result.getDuplicationsSum(); @@ -82,10 +94,12 @@ public final class GSDI { return _duplications_sum; } + @Override public Set getMappedExternalSpeciesTreeNodes() { return _mapped_species_tree_nodes; } + @Override public final SortedSet getReMappedScientificNamesFromGeneTree() { return _scientific_names_mapped_to_reduced_specificity; } @@ -94,18 +108,22 @@ public final class GSDI { return _speciation_or_duplication_events_sum; } + @Override public final int getSpeciationsSum() { return _speciations_sum; } + @Override public List getStrippedExternalGeneTreeNodes() { return _stripped_gene_tree_nodes; } + @Override public List getStrippedSpeciesTreeNodes() { return _stripped_species_tree_nodes; } + @Override public TaxonomyComparisonBase getTaxCompBase() { return _tax_comp_base; } @@ -134,14 +152,22 @@ public final class GSDI { * Preconditions: Mapping M for external nodes must have been calculated and * the species tree must be labeled in preorder. *

- * + * @param transfer_taxonomy + * @return + * @throws SDIException + * */ - final static void geneTreePostOrderTraversal( final Phylogeny gene_tree, - final boolean most_parsimonious_duplication_model, - final GSDIsummaryResult res ) { + final static GSDIsummaryResult geneTreePostOrderTraversal( final Phylogeny gene_tree, + final boolean most_parsimonious_duplication_model, + final boolean transfer_taxonomy ) throws SDIException { + final GSDIsummaryResult res = new GSDIsummaryResult(); for( final PhylogenyNodeIterator it = gene_tree.iteratorPostorder(); it.hasNext(); ) { final PhylogenyNode g = it.next(); if ( g.isInternal() ) { + if ( g.getNumberOfDescendants() != 2 ) { + throw new SDIException( "gene tree contains internal node with " + g.getNumberOfDescendants() + + " descendents" ); + } PhylogenyNode s1 = g.getChildNode1().getLink(); PhylogenyNode s2 = g.getChildNode2().getLink(); while ( s1 != s2 ) { @@ -155,30 +181,74 @@ public final class GSDI { g.setLink( s1 ); determineEvent( s1, g, most_parsimonious_duplication_model, res ); } + if ( transfer_taxonomy ) { + transferTaxonomy( g ); + } } + return res; + } + + final static GSDIsummaryResult geneTreePostOrderTraversal( final Phylogeny gene_tree, + final boolean most_parsimonious_duplication_model, + final int min_duplications ) throws SDIException { + final GSDIsummaryResult res = new GSDIsummaryResult(); + for( final PhylogenyNodeIterator it = gene_tree.iteratorPostorder(); it.hasNext(); ) { + final PhylogenyNode g = it.next(); + if ( g.isInternal() ) { + if ( g.getNumberOfDescendants() != 2 ) { + throw new SDIException( "gene tree contains internal node with " + g.getNumberOfDescendants() + + " descendents" ); + } + PhylogenyNode s1 = g.getChildNode1().getLink(); + PhylogenyNode s2 = g.getChildNode2().getLink(); + while ( s1 != s2 ) { + if ( s1.getId() > s2.getId() ) { + s1 = s1.getParent(); + } + else { + s2 = s2.getParent(); + } + } + g.setLink( s1 ); + determineEvent( s1, g, most_parsimonious_duplication_model, res ); + if ( res.getDuplicationsSum() > min_duplications ) { + return null; + } + } + } + return res; + } + + final static NodesLinkingResult linkNodesOfG( final Phylogeny gene_tree, + final Phylogeny species_tree, + final boolean strip_gene_tree, + final boolean strip_species_tree ) throws SDIException { + final TaxonomyComparisonBase tax_comp_base = SDIutil.determineTaxonomyComparisonBase( gene_tree ); + if ( tax_comp_base == null ) { + throw new RuntimeException( "failed to establish taxonomy linking base (taxonomy linking base is null)" ); + } + return linkNodesOfG( gene_tree, species_tree, tax_comp_base, strip_gene_tree, strip_species_tree ); } /** * This allows for linking of internal nodes of the species tree (as opposed * to just external nodes, as in the method it overrides. * If TaxonomyComparisonBase is null, it will try to determine it. - * @throws SDIException - * + * @throws SDIException + * */ final static NodesLinkingResult linkNodesOfG( final Phylogeny gene_tree, final Phylogeny species_tree, final TaxonomyComparisonBase tax_comp_base, final boolean strip_gene_tree, final boolean strip_species_tree ) throws SDIException { + if ( tax_comp_base == null ) { + throw new IllegalArgumentException( "taxonomy linking base is null" ); + } final Map species_to_node_map = new HashMap(); final List species_tree_ext_nodes = new ArrayList(); final NodesLinkingResult res = new NodesLinkingResult(); - if ( tax_comp_base == null ) { - res.setTaxCompBase( SDIutil.determineTaxonomyComparisonBase( gene_tree ) ); - } - else { - res.setTaxCompBase( tax_comp_base ); - } + res.setTaxCompBase( tax_comp_base ); // Stringyfied taxonomy is the key, node is the value. for( final PhylogenyNodeIterator iter = species_tree.iteratorExternalForward(); iter.hasNext(); ) { final PhylogenyNode s = iter.next(); @@ -187,7 +257,8 @@ public final class GSDI { final String tax_str = SDIutil.taxonomyToString( s, res.getTaxCompBase() ); if ( !ForesterUtil.isEmpty( tax_str ) ) { if ( species_to_node_map.containsKey( tax_str ) ) { - throw new SDIException( "taxonomy \"" + s + "\" is not unique in species tree" ); + throw new SDIException( "taxonomy \"" + tax_str + "\" is not unique in species tree (using " + + res.getTaxCompBase() + " for linking to gene tree)" ); } species_to_node_map.put( tax_str, s ); } @@ -228,7 +299,7 @@ public final class GSDI { } else { throw new SDIException( "taxonomy \"" + g.getNodeData().getTaxonomy() - + "\" not present in species tree" ); + + "\" not present in species tree" ); } } else { @@ -241,35 +312,56 @@ public final class GSDI { if ( strip_gene_tree ) { stripTree( gene_tree, res.getStrippedGeneTreeNodes() ); if ( gene_tree.isEmpty() || ( gene_tree.getNumberOfExternalNodes() < 2 ) ) { - throw new SDIException( "species could not be mapped between gene tree and species tree" ); + throw new SDIException( "species could not be mapped between gene tree and species tree (based on " + + res.getTaxCompBase() + ")" ); } } if ( strip_species_tree ) { - stripSpeciesTree( species_tree, species_tree_ext_nodes, res.getMappedSpeciesTreeNodes() ); + stripSpeciesTree( species_tree, species_tree_ext_nodes, res ); } return res; } + static final void transferTaxonomy( final PhylogenyNode g ) { + if ( g == null ) { + throw new IllegalArgumentException( "gene tree node is null" ); + } + final PhylogenyNode s = g.getLink(); + if ( s == null ) { + throw new IllegalArgumentException( "mapped species tree node is null" ); + } + if ( s.getNodeData().isHasTaxonomy() ) { + g.getNodeData().setTaxonomy( s.getNodeData().getTaxonomy() ); + if ( g.isInternal() ) { + if ( g.getChildNode1().isInternal() && g.getChildNode1().getNodeData().isHasTaxonomy() + && ( g.getChildNode1().getNodeData().getTaxonomy() == s.getNodeData().getTaxonomy() ) ) { + g.getChildNode1().getNodeData().setTaxonomy( null ); + } + if ( g.getChildNode2().isInternal() && g.getChildNode2().getNodeData().isHasTaxonomy() + && ( g.getChildNode2().getNodeData().getTaxonomy() == s.getNodeData().getTaxonomy() ) ) { + g.getChildNode2().getNodeData().setTaxonomy( null ); + } + } + } + else if ( ForesterUtil.isEmpty( g.getName() ) && !ForesterUtil.isEmpty( s.getName() ) ) { + g.setName( s.getName() ); + if ( g.isInternal() ) { + if ( g.getChildNode1().isInternal() && ( g.getChildNode1().getName() == s.getName() ) ) { + g.getChildNode1().setName( "" ); + } + if ( g.getChildNode2().isInternal() && ( g.getChildNode2().getName() == s.getName() ) ) { + g.getChildNode2().setName( "" ); + } + } + } + } + private final static void addScientificNamesMappedToReducedSpecificity( final String s1, final String s2, final SortedSet scientific_names_mapped_to_reduced_specificity ) { scientific_names_mapped_to_reduced_specificity.add( s1 + " -> " + s2 ); } - // Used by GSDIR - // protected GSDI( final Phylogeny gene_tree, final Phylogeny species_tree, final boolean strip_gene_tree ) - // throws SDIException { - // super( gene_tree, species_tree ); - // _speciation_or_duplication_events_sum = -1; - // _speciations_sum = 0; - // _most_parsimonious_duplication_model = true; - // _duplications_sum = 0; - // _stripped_gene_tree_nodes = new ArrayList(); - // _stripped_species_tree_nodes = new ArrayList(); - // _mapped_species_tree_nodes = new HashSet(); - // _scientific_names_mapped_to_reduced_specificity = new TreeSet(); - // } - // s is the node on the species tree g maps to. private final static void determineEvent( final PhylogenyNode s, final PhylogenyNode g, final boolean most_parsimonious_duplication_model, @@ -293,7 +385,7 @@ public final class GSDI { final Set set = new HashSet(); for( PhylogenyNode n : g.getChildNode1().getAllExternalDescendants() ) { n = n.getLink(); - while ( n.getParent() != s ) { + while ( ( n.getParent() != s ) && ( n.getParent() != null ) ) { n = n.getParent(); if ( n.isRoot() ) { break; @@ -304,7 +396,7 @@ public final class GSDI { boolean multiple = false; for( PhylogenyNode n : g.getChildNode2().getAllExternalDescendants() ) { n = n.getLink(); - while ( n.getParent() != s ) { + while ( ( n.getParent() != s ) && ( n.getParent() != null ) ) { n = n.getParent(); if ( n.isRoot() ) { break; @@ -337,19 +429,17 @@ public final class GSDI { } } - private final static List stripSpeciesTree( final Phylogeny species_tree, - final List species_tree_ext_nodes, - final Set keep ) { - final List stripped_species_tree_nodes = new ArrayList(); + private final static void stripSpeciesTree( final Phylogeny species_tree, + final List species_tree_ext_nodes, + final NodesLinkingResult res ) { for( final PhylogenyNode s : species_tree_ext_nodes ) { - if ( !keep.contains( s ) ) { + if ( !res.getMappedSpeciesTreeNodes().contains( s ) ) { species_tree.deleteSubtree( s, true ); - stripped_species_tree_nodes.add( s ); + res.getStrippedSpeciesTreeNodes().add( s ); } } species_tree.clearHashIdToNodeMap(); species_tree.externalNodesHaveChanged(); - return stripped_species_tree_nodes; } private final static void stripTree( final Phylogeny phy, final List strip_nodes ) {