X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=forester%2Fjava%2Fsrc%2Forg%2Fforester%2Fsdi%2FGSDI.java;h=4ab6395af79a9f6928d9863cf88723acbf5770e5;hb=39e8a64f2c7b3b250566387f0c2f675d42da04b4;hp=63c6215253b89bc58f5ef013d91167a220504bbf;hpb=cec0663378230521f24a851cb1c1c9491026b70a;p=jalview.git diff --git a/forester/java/src/org/forester/sdi/GSDI.java b/forester/java/src/org/forester/sdi/GSDI.java index 63c6215..4ab6395 100644 --- a/forester/java/src/org/forester/sdi/GSDI.java +++ b/forester/java/src/org/forester/sdi/GSDI.java @@ -21,7 +21,7 @@ // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com -// WWW: www.phylosoft.org/forester +// WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.sdi; @@ -58,6 +58,15 @@ public final class GSDI implements GSDII { final boolean most_parsimonious_duplication_model, final boolean strip_gene_tree, final boolean strip_species_tree ) throws SDIException { + this( gene_tree, species_tree, most_parsimonious_duplication_model, strip_gene_tree, strip_species_tree, true ); + } + + public GSDI( final Phylogeny gene_tree, + final Phylogeny species_tree, + final boolean most_parsimonious_duplication_model, + final boolean strip_gene_tree, + final boolean strip_species_tree, + final boolean transfer_taxonomy ) throws SDIException { _most_parsimonious_duplication_model = most_parsimonious_duplication_model; if ( gene_tree.getRoot().getNumberOfDescendants() == 3 ) { gene_tree.reRoot( gene_tree.getRoot().getChildNode( 2 ) ); @@ -74,7 +83,8 @@ public final class GSDI implements GSDII { _tax_comp_base = nodes_linking_result.getTaxCompBase(); PhylogenyMethods.preOrderReId( species_tree ); final GSDIsummaryResult gsdi_summary_result = geneTreePostOrderTraversal( gene_tree, - _most_parsimonious_duplication_model ); + _most_parsimonious_duplication_model, + transfer_taxonomy ); _speciation_or_duplication_events_sum = gsdi_summary_result.getSpeciationOrDuplicationEventsSum(); _speciations_sum = gsdi_summary_result.getSpeciationsSum(); _duplications_sum = gsdi_summary_result.getDuplicationsSum(); @@ -142,20 +152,52 @@ public final class GSDI implements GSDII { * Preconditions: Mapping M for external nodes must have been calculated and * the species tree must be labeled in preorder. *

- * @return - * @throws SDIException - * + * @param transfer_taxonomy + * @return + * @throws SDIException + * */ final static GSDIsummaryResult geneTreePostOrderTraversal( final Phylogeny gene_tree, - final boolean most_parsimonious_duplication_model ) - throws SDIException { + final boolean most_parsimonious_duplication_model, + final boolean transfer_taxonomy ) throws SDIException { + final GSDIsummaryResult res = new GSDIsummaryResult(); + for( final PhylogenyNodeIterator it = gene_tree.iteratorPostorder(); it.hasNext(); ) { + final PhylogenyNode g = it.next(); + if ( g.isInternal() ) { + if ( g.getNumberOfDescendants() != 2 ) { + throw new SDIException( "gene tree contains internal node with " + g.getNumberOfDescendants() + + " descendents" ); + } + PhylogenyNode s1 = g.getChildNode1().getLink(); + PhylogenyNode s2 = g.getChildNode2().getLink(); + while ( s1 != s2 ) { + if ( s1.getId() > s2.getId() ) { + s1 = s1.getParent(); + } + else { + s2 = s2.getParent(); + } + } + g.setLink( s1 ); + determineEvent( s1, g, most_parsimonious_duplication_model, res ); + } + if ( transfer_taxonomy ) { + transferTaxonomy( g ); + } + } + return res; + } + + final static GSDIsummaryResult geneTreePostOrderTraversal( final Phylogeny gene_tree, + final boolean most_parsimonious_duplication_model, + final int min_duplications ) throws SDIException { final GSDIsummaryResult res = new GSDIsummaryResult(); for( final PhylogenyNodeIterator it = gene_tree.iteratorPostorder(); it.hasNext(); ) { final PhylogenyNode g = it.next(); if ( g.isInternal() ) { if ( g.getNumberOfDescendants() != 2 ) { throw new SDIException( "gene tree contains internal node with " + g.getNumberOfDescendants() - + " descendents" ); + + " descendents" ); } PhylogenyNode s1 = g.getChildNode1().getLink(); PhylogenyNode s2 = g.getChildNode2().getLink(); @@ -169,6 +211,9 @@ public final class GSDI implements GSDII { } g.setLink( s1 ); determineEvent( s1, g, most_parsimonious_duplication_model, res ); + if ( res.getDuplicationsSum() > min_duplications ) { + return null; + } } } return res; @@ -189,8 +234,8 @@ public final class GSDI implements GSDII { * This allows for linking of internal nodes of the species tree (as opposed * to just external nodes, as in the method it overrides. * If TaxonomyComparisonBase is null, it will try to determine it. - * @throws SDIException - * + * @throws SDIException + * */ final static NodesLinkingResult linkNodesOfG( final Phylogeny gene_tree, final Phylogeny species_tree, @@ -254,7 +299,7 @@ public final class GSDI implements GSDII { } else { throw new SDIException( "taxonomy \"" + g.getNodeData().getTaxonomy() - + "\" not present in species tree" ); + + "\" not present in species tree" ); } } else { @@ -277,6 +322,40 @@ public final class GSDI implements GSDII { return res; } + static final void transferTaxonomy( final PhylogenyNode g ) { + if ( g == null ) { + throw new IllegalArgumentException( "gene tree node is null" ); + } + final PhylogenyNode s = g.getLink(); + if ( s == null ) { + throw new IllegalArgumentException( "mapped species tree node is null" ); + } + if ( s.getNodeData().isHasTaxonomy() ) { + g.getNodeData().setTaxonomy( s.getNodeData().getTaxonomy() ); + if ( g.isInternal() ) { + if ( g.getChildNode1().isInternal() && g.getChildNode1().getNodeData().isHasTaxonomy() + && ( g.getChildNode1().getNodeData().getTaxonomy() == s.getNodeData().getTaxonomy() ) ) { + g.getChildNode1().getNodeData().setTaxonomy( null ); + } + if ( g.getChildNode2().isInternal() && g.getChildNode2().getNodeData().isHasTaxonomy() + && ( g.getChildNode2().getNodeData().getTaxonomy() == s.getNodeData().getTaxonomy() ) ) { + g.getChildNode2().getNodeData().setTaxonomy( null ); + } + } + } + else if ( ForesterUtil.isEmpty( g.getName() ) && !ForesterUtil.isEmpty( s.getName() ) ) { + g.setName( s.getName() ); + if ( g.isInternal() ) { + if ( g.getChildNode1().isInternal() && ( g.getChildNode1().getName() == s.getName() ) ) { + g.getChildNode1().setName( "" ); + } + if ( g.getChildNode2().isInternal() && ( g.getChildNode2().getName() == s.getName() ) ) { + g.getChildNode2().setName( "" ); + } + } + } + } + private final static void addScientificNamesMappedToReducedSpecificity( final String s1, final String s2, final SortedSet scientific_names_mapped_to_reduced_specificity ) { @@ -306,7 +385,7 @@ public final class GSDI implements GSDII { final Set set = new HashSet(); for( PhylogenyNode n : g.getChildNode1().getAllExternalDescendants() ) { n = n.getLink(); - while ( n.getParent() != s ) { + while ( ( n.getParent() != s ) && ( n.getParent() != null ) ) { n = n.getParent(); if ( n.isRoot() ) { break; @@ -317,7 +396,7 @@ public final class GSDI implements GSDII { boolean multiple = false; for( PhylogenyNode n : g.getChildNode2().getAllExternalDescendants() ) { n = n.getLink(); - while ( n.getParent() != s ) { + while ( ( n.getParent() != s ) && ( n.getParent() != null ) ) { n = n.getParent(); if ( n.isRoot() ) { break;