X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=forester%2Fjava%2Fsrc%2Forg%2Fforester%2Fsdi%2FGSDI.java;h=4ab6395af79a9f6928d9863cf88723acbf5770e5;hb=c73326f29a91a733a71bfe36192dbd30e4f49af8;hp=7c41e0cb9981b33248c2cddbafe4eb4842c9a975;hpb=99c1b5211c817dbe35a646589d9c4dbf508b8d50;p=jalview.git diff --git a/forester/java/src/org/forester/sdi/GSDI.java b/forester/java/src/org/forester/sdi/GSDI.java index 7c41e0c..4ab6395 100644 --- a/forester/java/src/org/forester/sdi/GSDI.java +++ b/forester/java/src/org/forester/sdi/GSDI.java @@ -21,7 +21,7 @@ // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com -// WWW: www.phylosoft.org/forester +// WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.sdi; @@ -58,13 +58,21 @@ public final class GSDI implements GSDII { final boolean most_parsimonious_duplication_model, final boolean strip_gene_tree, final boolean strip_species_tree ) throws SDIException { + this( gene_tree, species_tree, most_parsimonious_duplication_model, strip_gene_tree, strip_species_tree, true ); + } + + public GSDI( final Phylogeny gene_tree, + final Phylogeny species_tree, + final boolean most_parsimonious_duplication_model, + final boolean strip_gene_tree, + final boolean strip_species_tree, + final boolean transfer_taxonomy ) throws SDIException { _most_parsimonious_duplication_model = most_parsimonious_duplication_model; if ( gene_tree.getRoot().getNumberOfDescendants() == 3 ) { gene_tree.reRoot( gene_tree.getRoot().getChildNode( 2 ) ); } final NodesLinkingResult nodes_linking_result = linkNodesOfG( gene_tree, species_tree, - null, strip_gene_tree, strip_species_tree ); _stripped_gene_tree_nodes = nodes_linking_result.getStrippedGeneTreeNodes(); @@ -75,7 +83,8 @@ public final class GSDI implements GSDII { _tax_comp_base = nodes_linking_result.getTaxCompBase(); PhylogenyMethods.preOrderReId( species_tree ); final GSDIsummaryResult gsdi_summary_result = geneTreePostOrderTraversal( gene_tree, - _most_parsimonious_duplication_model ); + _most_parsimonious_duplication_model, + transfer_taxonomy ); _speciation_or_duplication_events_sum = gsdi_summary_result.getSpeciationOrDuplicationEventsSum(); _speciations_sum = gsdi_summary_result.getSpeciationsSum(); _duplications_sum = gsdi_summary_result.getDuplicationsSum(); @@ -143,20 +152,52 @@ public final class GSDI implements GSDII { * Preconditions: Mapping M for external nodes must have been calculated and * the species tree must be labeled in preorder. *

- * @return - * @throws SDIException - * + * @param transfer_taxonomy + * @return + * @throws SDIException + * */ final static GSDIsummaryResult geneTreePostOrderTraversal( final Phylogeny gene_tree, - final boolean most_parsimonious_duplication_model ) - throws SDIException { + final boolean most_parsimonious_duplication_model, + final boolean transfer_taxonomy ) throws SDIException { + final GSDIsummaryResult res = new GSDIsummaryResult(); + for( final PhylogenyNodeIterator it = gene_tree.iteratorPostorder(); it.hasNext(); ) { + final PhylogenyNode g = it.next(); + if ( g.isInternal() ) { + if ( g.getNumberOfDescendants() != 2 ) { + throw new SDIException( "gene tree contains internal node with " + g.getNumberOfDescendants() + + " descendents" ); + } + PhylogenyNode s1 = g.getChildNode1().getLink(); + PhylogenyNode s2 = g.getChildNode2().getLink(); + while ( s1 != s2 ) { + if ( s1.getId() > s2.getId() ) { + s1 = s1.getParent(); + } + else { + s2 = s2.getParent(); + } + } + g.setLink( s1 ); + determineEvent( s1, g, most_parsimonious_duplication_model, res ); + } + if ( transfer_taxonomy ) { + transferTaxonomy( g ); + } + } + return res; + } + + final static GSDIsummaryResult geneTreePostOrderTraversal( final Phylogeny gene_tree, + final boolean most_parsimonious_duplication_model, + final int min_duplications ) throws SDIException { final GSDIsummaryResult res = new GSDIsummaryResult(); for( final PhylogenyNodeIterator it = gene_tree.iteratorPostorder(); it.hasNext(); ) { final PhylogenyNode g = it.next(); if ( g.isInternal() ) { if ( g.getNumberOfDescendants() != 2 ) { throw new SDIException( "gene tree contains internal node with " + g.getNumberOfDescendants() - + " descendents" ); + + " descendents" ); } PhylogenyNode s1 = g.getChildNode1().getLink(); PhylogenyNode s2 = g.getChildNode2().getLink(); @@ -170,32 +211,44 @@ public final class GSDI implements GSDII { } g.setLink( s1 ); determineEvent( s1, g, most_parsimonious_duplication_model, res ); + if ( res.getDuplicationsSum() > min_duplications ) { + return null; + } } } return res; } + final static NodesLinkingResult linkNodesOfG( final Phylogeny gene_tree, + final Phylogeny species_tree, + final boolean strip_gene_tree, + final boolean strip_species_tree ) throws SDIException { + final TaxonomyComparisonBase tax_comp_base = SDIutil.determineTaxonomyComparisonBase( gene_tree ); + if ( tax_comp_base == null ) { + throw new RuntimeException( "failed to establish taxonomy linking base (taxonomy linking base is null)" ); + } + return linkNodesOfG( gene_tree, species_tree, tax_comp_base, strip_gene_tree, strip_species_tree ); + } + /** * This allows for linking of internal nodes of the species tree (as opposed * to just external nodes, as in the method it overrides. * If TaxonomyComparisonBase is null, it will try to determine it. - * @throws SDIException - * + * @throws SDIException + * */ final static NodesLinkingResult linkNodesOfG( final Phylogeny gene_tree, final Phylogeny species_tree, final TaxonomyComparisonBase tax_comp_base, final boolean strip_gene_tree, final boolean strip_species_tree ) throws SDIException { + if ( tax_comp_base == null ) { + throw new IllegalArgumentException( "taxonomy linking base is null" ); + } final Map species_to_node_map = new HashMap(); final List species_tree_ext_nodes = new ArrayList(); final NodesLinkingResult res = new NodesLinkingResult(); - if ( tax_comp_base == null ) { - res.setTaxCompBase( SDIutil.determineTaxonomyComparisonBase( gene_tree ) ); - } - else { - res.setTaxCompBase( tax_comp_base ); - } + res.setTaxCompBase( tax_comp_base ); // Stringyfied taxonomy is the key, node is the value. for( final PhylogenyNodeIterator iter = species_tree.iteratorExternalForward(); iter.hasNext(); ) { final PhylogenyNode s = iter.next(); @@ -246,7 +299,7 @@ public final class GSDI implements GSDII { } else { throw new SDIException( "taxonomy \"" + g.getNodeData().getTaxonomy() - + "\" not present in species tree" ); + + "\" not present in species tree" ); } } else { @@ -259,7 +312,8 @@ public final class GSDI implements GSDII { if ( strip_gene_tree ) { stripTree( gene_tree, res.getStrippedGeneTreeNodes() ); if ( gene_tree.isEmpty() || ( gene_tree.getNumberOfExternalNodes() < 2 ) ) { - throw new SDIException( "species could not be mapped between gene tree and species tree" ); + throw new SDIException( "species could not be mapped between gene tree and species tree (based on " + + res.getTaxCompBase() + ")" ); } } if ( strip_species_tree ) { @@ -268,6 +322,40 @@ public final class GSDI implements GSDII { return res; } + static final void transferTaxonomy( final PhylogenyNode g ) { + if ( g == null ) { + throw new IllegalArgumentException( "gene tree node is null" ); + } + final PhylogenyNode s = g.getLink(); + if ( s == null ) { + throw new IllegalArgumentException( "mapped species tree node is null" ); + } + if ( s.getNodeData().isHasTaxonomy() ) { + g.getNodeData().setTaxonomy( s.getNodeData().getTaxonomy() ); + if ( g.isInternal() ) { + if ( g.getChildNode1().isInternal() && g.getChildNode1().getNodeData().isHasTaxonomy() + && ( g.getChildNode1().getNodeData().getTaxonomy() == s.getNodeData().getTaxonomy() ) ) { + g.getChildNode1().getNodeData().setTaxonomy( null ); + } + if ( g.getChildNode2().isInternal() && g.getChildNode2().getNodeData().isHasTaxonomy() + && ( g.getChildNode2().getNodeData().getTaxonomy() == s.getNodeData().getTaxonomy() ) ) { + g.getChildNode2().getNodeData().setTaxonomy( null ); + } + } + } + else if ( ForesterUtil.isEmpty( g.getName() ) && !ForesterUtil.isEmpty( s.getName() ) ) { + g.setName( s.getName() ); + if ( g.isInternal() ) { + if ( g.getChildNode1().isInternal() && ( g.getChildNode1().getName() == s.getName() ) ) { + g.getChildNode1().setName( "" ); + } + if ( g.getChildNode2().isInternal() && ( g.getChildNode2().getName() == s.getName() ) ) { + g.getChildNode2().setName( "" ); + } + } + } + } + private final static void addScientificNamesMappedToReducedSpecificity( final String s1, final String s2, final SortedSet scientific_names_mapped_to_reduced_specificity ) { @@ -297,7 +385,7 @@ public final class GSDI implements GSDII { final Set set = new HashSet(); for( PhylogenyNode n : g.getChildNode1().getAllExternalDescendants() ) { n = n.getLink(); - while ( n.getParent() != s ) { + while ( ( n.getParent() != s ) && ( n.getParent() != null ) ) { n = n.getParent(); if ( n.isRoot() ) { break; @@ -308,7 +396,7 @@ public final class GSDI implements GSDII { boolean multiple = false; for( PhylogenyNode n : g.getChildNode2().getAllExternalDescendants() ) { n = n.getLink(); - while ( n.getParent() != s ) { + while ( ( n.getParent() != s ) && ( n.getParent() != null ) ) { n = n.getParent(); if ( n.isRoot() ) { break;