From: gmungoc Date: Mon, 6 Mar 2017 09:45:54 +0000 (+0000) Subject: JAL-2428 separate TreeModel from TreeBuilder/NJTree/AverageDistanceTree X-Git-Tag: Release_2_10_2~3^2~105^2~2^2~71 X-Git-Url: http://source.jalview.org/gitweb/?a=commitdiff_plain;h=be27d7b3cadbfb5c488066034c48b1735799b429;p=jalview.git JAL-2428 separate TreeModel from TreeBuilder/NJTree/AverageDistanceTree (first pass) --- diff --git a/src/jalview/analysis/AlignmentSorter.java b/src/jalview/analysis/AlignmentSorter.java index 59cdccf..a3810da 100755 --- a/src/jalview/analysis/AlignmentSorter.java +++ b/src/jalview/analysis/AlignmentSorter.java @@ -66,7 +66,7 @@ public class AlignmentSorter static boolean sortOrderAscending = true; - static NJTree lastTree = null; + static TreeModel lastTree = null; static boolean sortTreeAscending = true; @@ -447,7 +447,7 @@ public class AlignmentSorter * @return DOCUMENT ME! */ private static List getOrderByTree(AlignmentI align, - NJTree tree) + TreeModel tree) { int nSeq = align.getHeight(); @@ -487,7 +487,7 @@ public class AlignmentSorter * @param tree * tree which has */ - public static void sortByTree(AlignmentI align, NJTree tree) + public static void sortByTree(AlignmentI align, TreeModel tree) { List tmp = getOrderByTree(align, tree); diff --git a/src/jalview/analysis/AverageDistanceTree.java b/src/jalview/analysis/AverageDistanceTree.java new file mode 100644 index 0000000..7e3cfe1 --- /dev/null +++ b/src/jalview/analysis/AverageDistanceTree.java @@ -0,0 +1,134 @@ +package jalview.analysis; + +import jalview.api.analysis.ScoreModelI; +import jalview.api.analysis.SimilarityParamsI; +import jalview.datamodel.SequenceNode; +import jalview.viewmodel.AlignmentViewport; + +public class AverageDistanceTree extends TreeBuilder +{ + /** + * Constructor + * + * @param av + * @param sm + * @param scoreParameters + */ + public AverageDistanceTree(AlignmentViewport av, ScoreModelI sm, + SimilarityParamsI scoreParameters) + { + super(av, sm, scoreParameters); + } + + /** + * Calculates and saves the distance between the combination of cluster(i) and + * cluster(j) and all other clusters. An average of the distances from + * cluster(i) and cluster(j) is calculated, weighted by the sizes of each + * cluster. + * + * @param i + * @param j + */ + @Override + protected void findClusterDistance(int i, int j) + { + int noi = clusters.elementAt(i).cardinality(); + int noj = clusters.elementAt(j).cardinality(); + + // New distances from cluster i to others + double[] newdist = new double[noseqs]; + + for (int l = 0; l < noseqs; l++) + { + if ((l != i) && (l != j)) + { + newdist[l] = ((distances.getValue(i, l) * noi) + (distances + .getValue(j, l) * noj)) / (noi + noj); + } + else + { + newdist[l] = 0; + } + } + + for (int ii = 0; ii < noseqs; ii++) + { + distances.setValue(i, ii, newdist[ii]); + distances.setValue(ii, i, newdist[ii]); + System.out.println(String.format( + "findClusterDistance(%d, %d) newdist to %d is %f", i, j, ii, + newdist[ii])); + } + } + + /** + * Returns the minimum distance between two clusters, and also sets the + * indices of the clusters in fields mini and minj + * + * @return DOCUMENT ME! + */ + @Override + protected double findMinDistance() + { + double min = Double.MAX_VALUE; + + for (int i = 0; i < (noseqs - 1); i++) + { + for (int j = i + 1; j < noseqs; j++) + { + if (!done.get(i) && !done.get(j)) + { + if (distances.getValue(i, j) < min) + { + mini = i; + minj = j; + + min = distances.getValue(i, j); + } + } + } + } + System.out.println("findMinDistance found " + min + " at " + mini + "," + + minj); + return min; + } + + /** + * DOCUMENT ME! + * + * @param tmpi + * DOCUMENT ME! + * @param tmpj + * DOCUMENT ME! + * @param dist + * DOCUMENT ME! + */ + @Override + protected void findNewDistances(SequenceNode tmpi, SequenceNode tmpj, + double dist) + { + double ih = 0; + double jh = 0; + + SequenceNode sni = tmpi; + SequenceNode snj = tmpj; + + while (sni != null) + { + ih = ih + sni.dist; + sni = (SequenceNode) sni.left(); + } + + while (snj != null) + { + jh = jh + snj.dist; + snj = (SequenceNode) snj.left(); + } + + tmpi.dist = ((dist / 2) - ih); + tmpj.dist = ((dist / 2) - jh); + System.out.println("findNewDistances set tmpi to " + tmpi.dist + + ", tmpj to " + tmpj.dist); + } + +} diff --git a/src/jalview/analysis/NJTree.java b/src/jalview/analysis/NJTree.java index 50c1795..82e0284 100644 --- a/src/jalview/analysis/NJTree.java +++ b/src/jalview/analysis/NJTree.java @@ -20,523 +20,67 @@ */ package jalview.analysis; -import jalview.api.analysis.DistanceScoreModelI; import jalview.api.analysis.ScoreModelI; import jalview.api.analysis.SimilarityParamsI; -import jalview.api.analysis.SimilarityScoreModelI; -import jalview.datamodel.AlignmentView; -import jalview.datamodel.BinaryNode; -import jalview.datamodel.CigarArray; -import jalview.datamodel.NodeTransformI; -import jalview.datamodel.SeqCigar; -import jalview.datamodel.Sequence; -import jalview.datamodel.SequenceI; import jalview.datamodel.SequenceNode; -import jalview.io.NewickFile; -import jalview.math.MatrixI; import jalview.viewmodel.AlignmentViewport; -import java.util.BitSet; -import java.util.Enumeration; -import java.util.List; -import java.util.Vector; - /** * DOCUMENT ME! * * @author $author$ * @version $Revision$ */ -public class NJTree +public class NJTree extends TreeBuilder { - public static final String AVERAGE_DISTANCE = "AV"; - - public static final String NEIGHBOUR_JOINING = "NJ"; - - public static final String FROM_FILE = "FromFile"; - - /* - * Bit j in each BitSet is set if the cluster includes the j'th sequence. - * Clusters are grouped as the tree is built, from an initial state - * where each cluster is a single sequence, until only two clusters are left. - * These are the children of the root of the tree. - */ - Vector clusters; - - SequenceI[] sequences; - - /* - * SequenceData is a string representation of what the user - * sees. The display may contain hidden columns. - */ - public AlignmentView seqData = null; - - /* - * Bit j is set when cluster j has been combined to another cluster. - * The last two bits left unset are the indices of the clusters which - * are the children of the root node. - */ - BitSet done; - - int noseqs; - - int noClus; - - /* - * Value [i, j] is the distance between cluster[i] and cluster[j]. - * Initially these are the pairwise distances of all sequences. - * As the tree is built, these are updated to be the distances - * between the clusters as they are assembled. - */ - MatrixI distances; - - int mini; - - int minj; - - double ri; - - double rj; - - Vector groups = new Vector(); - - SequenceNode maxdist; - - SequenceNode top; - - double maxDistValue; - - double maxheight; - - int ycount; - - Vector node; - - String type; - - String pwtype; - - boolean hasDistances = true; // normal case for jalview trees - - boolean hasBootstrap = false; // normal case for jalview trees - - private boolean hasRootDistance = true; - - /** - * Create a new NJTree object with leaves associated with sequences in seqs, - * and original alignment data represented by Cigar strings. - * - * @param seqs - * SequenceI[] - * @param odata - * Cigar[] - * @param treefile - * NewickFile - */ - public NJTree(SequenceI[] seqs, AlignmentView odata, NewickFile treefile) - { - this(seqs, treefile); - if (odata != null) - { - seqData = odata; - } - /* - * sequenceString = new String[odata.length]; char gapChar = - * jalview.util.Comparison.GapChars.charAt(0); for (int i = 0; i < - * odata.length; i++) { SequenceI oseq_aligned = odata[i].getSeq(gapChar); - * sequenceString[i] = oseq_aligned.getSequence(); } - */ - } - - /** - * Creates a new NJTree object from a tree from an external source - * - * @param seqs - * SequenceI which should be associated with leafs of treefile - * @param treefile - * A parsed tree - */ - public NJTree(SequenceI[] seqs, NewickFile treefile) - { - this.sequences = seqs; - top = treefile.getTree(); - - /** - * There is no dependent alignment to be recovered from an imported tree. - * - * if (sequenceString == null) { sequenceString = new String[seqs.length]; - * for (int i = 0; i < seqs.length; i++) { sequenceString[i] = - * seqs[i].getSequence(); } } - */ - - hasDistances = treefile.HasDistances(); - hasBootstrap = treefile.HasBootstrap(); - hasRootDistance = treefile.HasRootDistance(); - - maxheight = findHeight(top); - - SequenceIdMatcher algnIds = new SequenceIdMatcher(seqs); - - Vector leaves = findLeaves(top); - - int i = 0; - int namesleft = seqs.length; - - SequenceNode j; - SequenceI nam; - String realnam; - Vector one2many = new Vector(); - int countOne2Many = 0; - while (i < leaves.size()) - { - j = leaves.elementAt(i++); - realnam = j.getName(); - nam = null; - - if (namesleft > -1) - { - nam = algnIds.findIdMatch(realnam); - } - - if (nam != null) - { - j.setElement(nam); - if (one2many.contains(nam)) - { - countOne2Many++; - // if (jalview.bin.Cache.log.isDebugEnabled()) - // jalview.bin.Cache.log.debug("One 2 many relationship for - // "+nam.getName()); - } - else - { - one2many.addElement(nam); - namesleft--; - } - } - else - { - j.setElement(new Sequence(realnam, "THISISAPLACEHLDER")); - j.setPlaceholder(true); - } - } - // if (jalview.bin.Cache.log.isDebugEnabled() && countOne2Many>0) { - // jalview.bin.Cache.log.debug("There were "+countOne2Many+" alignment - // sequence ids (out of "+one2many.size()+" unique ids) linked to two or - // more leaves."); - // } - // one2many.clear(); - } - /** * Constructor given a viewport, tree type and score model * * @param av * the current alignment viewport - * @param treeType - * NJ or AV * @param sm * a distance or similarity score model to use to compute the tree - * @param scoreParameters TODO - */ - public NJTree(AlignmentViewport av, String treeType, ScoreModelI sm, SimilarityParamsI scoreParameters) - { - // TODO handle type "FromFile" more elegantly - if (!(treeType.equals(NEIGHBOUR_JOINING))) - { - treeType = AVERAGE_DISTANCE; - } - this.type = treeType; - int start, end; - boolean selview = av.getSelectionGroup() != null - && av.getSelectionGroup().getSize() > 1; - AlignmentView seqStrings = av.getAlignmentView(selview); - if (!selview) - { - start = 0; - end = av.getAlignment().getWidth(); - this.sequences = av.getAlignment().getSequencesArray(); - } - else - { - start = av.getSelectionGroup().getStartRes(); - end = av.getSelectionGroup().getEndRes() + 1; - this.sequences = av.getSelectionGroup().getSequencesInOrder( - av.getAlignment()); - } - - init(seqStrings, start, end); - - computeTree(sm, scoreParameters); - } - - void init(AlignmentView seqView, int start, int end) - { - this.node = new Vector(); - if (seqView != null) - { - this.seqData = seqView; - } - else - { - SeqCigar[] seqs = new SeqCigar[sequences.length]; - for (int i = 0; i < sequences.length; i++) - { - seqs[i] = new SeqCigar(sequences[i], start, end); - } - CigarArray sdata = new CigarArray(seqs); - sdata.addOperation(CigarArray.M, end - start + 1); - this.seqData = new AlignmentView(sdata, start); - } - - /* - * count the non-null sequences - */ - noseqs = 0; - - done = new BitSet(); - - for (SequenceI seq : sequences) - { - if (seq != null) - { - noseqs++; - } - } - } - - /** - * Calculates the tree using the given score model and parameters, and the - * configured tree type - *

- * If the score model computes pairwise distance scores, then these are used - * directly to derive the tree - *

- * If the score model computes similarity scores, then the range of the scores - * is reversed to give a distance measure, and this is used to derive the tree - * - * @param sm - * @param scoreOptions + * @param scoreParameters */ - protected void computeTree(ScoreModelI sm, SimilarityParamsI scoreOptions) + public NJTree(AlignmentViewport av, ScoreModelI sm, + SimilarityParamsI scoreParameters) { - if (sm instanceof DistanceScoreModelI) - { - distances = ((DistanceScoreModelI) sm).findDistances(seqData, - scoreOptions); - } - else if (sm instanceof SimilarityScoreModelI) - { - /* - * compute similarity and invert it to give a distance measure - */ - MatrixI result = ((SimilarityScoreModelI) sm).findSimilarities( - seqData, scoreOptions); - result.reverseRange(true); - distances = result; - } - - makeLeaves(); - - noClus = clusters.size(); - - cluster(); + super(av, sm, scoreParameters); } + // private long _lycount = 0, _lylimit = 0; /** - * Generate a string representation of the Tree + * DOCUMENT ME! * - * @return Newick File with all tree data available + * @return DOCUMENT ME! */ @Override - public String toString() - { - jalview.io.NewickFile fout = new jalview.io.NewickFile(getTopNode()); - - return fout.print(hasBootstrap(), hasDistances(), - hasRootDistance()); // output all data available for tree - } - - /** - * - * used when the alignment associated to a tree has changed. - * - * @param list - * Sequence set to be associated with tree nodes - */ - public void updatePlaceHolders(List list) + protected + double findMinDistance() { - Vector leaves = findLeaves(top); - - int sz = leaves.size(); - SequenceIdMatcher seqmatcher = null; - int i = 0; + double min = Double.MAX_VALUE; - while (i < sz) + for (int i = 0; i < (noseqs - 1); i++) { - SequenceNode leaf = leaves.elementAt(i++); - - if (list.contains(leaf.element())) - { - leaf.setPlaceholder(false); - } - else + for (int j = i + 1; j < noseqs; j++) { - if (seqmatcher == null) + if (!done.get(i) && !done.get(j)) { - // Only create this the first time we need it - SequenceI[] seqs = new SequenceI[list.size()]; + double tmp = distances.getValue(i, j) + - (findr(i, j) + findr(j, i)); - for (int j = 0; j < seqs.length; j++) + if (tmp < min) { - seqs[j] = list.get(j); - } - - seqmatcher = new SequenceIdMatcher(seqs); - } - - SequenceI nam = seqmatcher.findIdMatch(leaf.getName()); + mini = i; + minj = j; - if (nam != null) - { - if (!leaf.isPlaceholder()) - { - // remapping the node to a new sequenceI - should remove any refs to - // old one. - // TODO - make many sequenceI to one leaf mappings possible! - // (JBPNote) - } - leaf.setPlaceholder(false); - leaf.setElement(nam); - } - else - { - if (!leaf.isPlaceholder()) - { - // Construct a new placeholder sequence object for this leaf - leaf.setElement(new Sequence(leaf.getName(), - "THISISAPLACEHLDER")); + min = tmp; } - leaf.setPlaceholder(true); - - } - } - } - } - - /** - * rename any nodes according to their associated sequence. This will modify - * the tree's metadata! (ie the original NewickFile or newly generated - * BinaryTree's label data) - */ - public void renameAssociatedNodes() - { - applyToNodes(new NodeTransformI() - { - - @Override - public void transform(BinaryNode nd) - { - Object el = nd.element(); - if (el != null && el instanceof SequenceI) - { - nd.setName(((SequenceI) el).getName()); } } - }); - } - - /** - * Form clusters by grouping sub-clusters, starting from one sequence per - * cluster, and finishing when only two clusters remain - */ - void cluster() - { - while (noClus > 2) - { - if (type.equals(NEIGHBOUR_JOINING)) - { - findMinNJDistance(); - } - else - { - findMinDistance(); - } - - BitSet combined = joinClusters(mini, minj); - - done.set(minj); - - clusters.setElementAt(null, minj); - clusters.setElementAt(combined, mini); - - noClus--; - } - - int rightChild = done.nextClearBit(0); - int leftChild = done.nextClearBit(rightChild + 1); - - joinClusters(leftChild, rightChild); - top = (node.elementAt(leftChild)); - - reCount(top); - findHeight(top); - findMaxDist(top); - } - - /** - * DOCUMENT ME! - * - * @param i - * DOCUMENT ME! - * @param j - * DOCUMENT ME! - * - * @return DOCUMENT ME! - */ - BitSet joinClusters(int i, int j) - { - double dist = distances.getValue(i, j); - - BitSet combined = new BitSet(); - combined.or(clusters.get(i)); - combined.or(clusters.get(j)); - - ri = findr(i, j); - rj = findr(j, i); - - if (type.equals(NEIGHBOUR_JOINING)) - { - findClusterNJDistance(i, j); - } - else - { - findClusterDistance(i, j); - } - - SequenceNode sn = new SequenceNode(); - - sn.setLeft((node.elementAt(i))); - sn.setRight((node.elementAt(j))); - - SequenceNode tmpi = (node.elementAt(i)); - SequenceNode tmpj = (node.elementAt(j)); - - if (type.equals(NEIGHBOUR_JOINING)) - { - findNewNJDistances(tmpi, tmpj, dist); - } - else - { - findNewDistances(tmpi, tmpj, dist); } - tmpi.setParent(sn); - tmpj.setParent(sn); - - node.setElementAt(sn, i); - - return combined; + return min; } /** @@ -549,8 +93,9 @@ public class NJTree * @param dist * DOCUMENT ME! */ - void findNewNJDistances(SequenceNode tmpi, SequenceNode tmpj, - double dist) + @Override + protected + void findNewDistances(SequenceNode tmpi, SequenceNode tmpj, double dist) { tmpi.dist = ((dist + ri) - rj) / 2; @@ -567,719 +112,41 @@ public class NJTree } } - /** - * DOCUMENT ME! - * - * @param tmpi - * DOCUMENT ME! - * @param tmpj - * DOCUMENT ME! - * @param dist - * DOCUMENT ME! - */ - void findNewDistances(SequenceNode tmpi, SequenceNode tmpj, - double dist) - { - double ih = 0; - double jh = 0; - - SequenceNode sni = tmpi; - SequenceNode snj = tmpj; - - while (sni != null) - { - ih = ih + sni.dist; - sni = (SequenceNode) sni.left(); - } - - while (snj != null) - { - jh = jh + snj.dist; - snj = (SequenceNode) snj.left(); - } - tmpi.dist = ((dist / 2) - ih); - tmpj.dist = ((dist / 2) - jh); - } /** - * DOCUMENT ME! + * Calculates and saves the distance between the combination of cluster(i) and + * cluster(j) and all other clusters. The new distance to cluster k is + * calculated as the average of the distances from i to k and from j to k, + * less half the distance from i to j. * * @param i - * DOCUMENT ME! * @param j - * DOCUMENT ME! */ + @Override + protected void findClusterDistance(int i, int j) { - int noi = clusters.elementAt(i).cardinality(); - int noj = clusters.elementAt(j).cardinality(); - - // New distances from cluster to others + // New distances from cluster i to others double[] newdist = new double[noseqs]; - + + double ijDistance = distances.getValue(i, j); for (int l = 0; l < noseqs; l++) { if ((l != i) && (l != j)) { - newdist[l] = ((distances.getValue(i, l) * noi) + (distances.getValue( - j, l) * noj)) - / (noi + noj); + newdist[l] = (distances.getValue(i, l) + distances.getValue(j, l) - ijDistance) / 2; } else { newdist[l] = 0; } } - + for (int ii = 0; ii < noseqs; ii++) { distances.setValue(i, ii, newdist[ii]); distances.setValue(ii, i, newdist[ii]); } } - - /** - * DOCUMENT ME! - * - * @param i - * DOCUMENT ME! - * @param j - * DOCUMENT ME! - */ - void findClusterNJDistance(int i, int j) - { - - // New distances from cluster to others - double[] newdist = new double[noseqs]; - - for (int l = 0; l < noseqs; l++) - { - if ((l != i) && (l != j)) - { - newdist[l] = (distances.getValue(i, l) + distances.getValue(j, l) - distances - .getValue(i, j)) / 2; - } - else - { - newdist[l] = 0; - } - } - - for (int ii = 0; ii < noseqs; ii++) - { - distances.setValue(i, ii, newdist[ii]); - distances.setValue(ii, i, newdist[ii]); - } - } - - /** - * DOCUMENT ME! - * - * @param i - * DOCUMENT ME! - * @param j - * DOCUMENT ME! - * - * @return DOCUMENT ME! - */ - double findr(int i, int j) - { - double tmp = 1; - - for (int k = 0; k < noseqs; k++) - { - if ((k != i) && (k != j) && (!done.get(k))) - { - tmp = tmp + distances.getValue(i, k); - } - } - - if (noClus > 2) - { - tmp = tmp / (noClus - 2); - } - - return tmp; - } - - /** - * DOCUMENT ME! - * - * @return DOCUMENT ME! - */ - double findMinNJDistance() - { - double min = Double.MAX_VALUE; - - for (int i = 0; i < (noseqs - 1); i++) - { - for (int j = i + 1; j < noseqs; j++) - { - if (!done.get(i) && !done.get(j)) - { - double tmp = distances.getValue(i, j) - - (findr(i, j) + findr(j, i)); - - if (tmp < min) - { - mini = i; - minj = j; - - min = tmp; - } - } - } - } - - return min; - } - - /** - * DOCUMENT ME! - * - * @return DOCUMENT ME! - */ - double findMinDistance() - { - double min = Double.MAX_VALUE; - - for (int i = 0; i < (noseqs - 1); i++) - { - for (int j = i + 1; j < noseqs; j++) - { - if (!done.get(i) && !done.get(j)) - { - if (distances.getValue(i, j) < min) - { - mini = i; - minj = j; - - min = distances.getValue(i, j); - } - } - } - } - - return min; - } - - /** - * Start by making a cluster for each individual sequence - */ - void makeLeaves() - { - clusters = new Vector(); - - for (int i = 0; i < noseqs; i++) - { - SequenceNode sn = new SequenceNode(); - - sn.setElement(sequences[i]); - sn.setName(sequences[i].getName()); - node.addElement(sn); - BitSet bs = new BitSet(); - bs.set(i); - clusters.addElement(bs); - } - } - - /** - * Search for leaf nodes below (or at) the given node - * - * @param nd - * root node to search from - * - * @return - */ - public Vector findLeaves(SequenceNode nd) - { - Vector leaves = new Vector(); - findLeaves(nd, leaves); - return leaves; - } - - /** - * Search for leaf nodes. - * - * @param nd - * root node to search from - * @param leaves - * Vector of leaves to add leaf node objects too. - * - * @return Vector of leaf nodes on binary tree - */ - Vector findLeaves(SequenceNode nd, - Vector leaves) - { - if (nd == null) - { - return leaves; - } - - if ((nd.left() == null) && (nd.right() == null)) // Interior node - // detection - { - leaves.addElement(nd); - - return leaves; - } - else - { - /* - * TODO: Identify internal nodes... if (node.isSequenceLabel()) { - * leaves.addElement(node); } - */ - findLeaves((SequenceNode) nd.left(), leaves); - findLeaves((SequenceNode) nd.right(), leaves); - } - - return leaves; - } - - /** - * printNode is mainly for debugging purposes. - * - * @param nd - * SequenceNode - */ - void printNode(SequenceNode nd) - { - if (nd == null) - { - return; - } - - if ((nd.left() == null) && (nd.right() == null)) - { - System.out.println("Leaf = " + ((SequenceI) nd.element()).getName()); - System.out.println("Dist " + nd.dist); - System.out.println("Boot " + nd.getBootstrap()); - } - else - { - System.out.println("Dist " + nd.dist); - printNode((SequenceNode) nd.left()); - printNode((SequenceNode) nd.right()); - } - } - - /** - * DOCUMENT ME! - * - * @param nd - * DOCUMENT ME! - */ - void findMaxDist(SequenceNode nd) - { - if (nd == null) - { - return; - } - - if ((nd.left() == null) && (nd.right() == null)) - { - double dist = nd.dist; - - if (dist > maxDistValue) - { - maxdist = nd; - maxDistValue = dist; - } - } - else - { - findMaxDist((SequenceNode) nd.left()); - findMaxDist((SequenceNode) nd.right()); - } - } - - /** - * DOCUMENT ME! - * - * @return DOCUMENT ME! - */ - public Vector getGroups() - { - return groups; - } - - /** - * DOCUMENT ME! - * - * @return DOCUMENT ME! - */ - public double getMaxHeight() - { - return maxheight; - } - - /** - * DOCUMENT ME! - * - * @param nd - * DOCUMENT ME! - * @param threshold - * DOCUMENT ME! - */ - public void groupNodes(SequenceNode nd, float threshold) - { - if (nd == null) - { - return; - } - - if ((nd.height / maxheight) > threshold) - { - groups.addElement(nd); - } - else - { - groupNodes((SequenceNode) nd.left(), threshold); - groupNodes((SequenceNode) nd.right(), threshold); - } - } - - /** - * DOCUMENT ME! - * - * @param nd - * DOCUMENT ME! - * - * @return DOCUMENT ME! - */ - public double findHeight(SequenceNode nd) - { - if (nd == null) - { - return maxheight; - } - - if ((nd.left() == null) && (nd.right() == null)) - { - nd.height = ((SequenceNode) nd.parent()).height + nd.dist; - - if (nd.height > maxheight) - { - return nd.height; - } - else - { - return maxheight; - } - } - else - { - if (nd.parent() != null) - { - nd.height = ((SequenceNode) nd.parent()).height + nd.dist; - } - else - { - maxheight = 0; - nd.height = (float) 0.0; - } - - maxheight = findHeight((SequenceNode) (nd.left())); - maxheight = findHeight((SequenceNode) (nd.right())); - } - - return maxheight; - } - - /** - * DOCUMENT ME! - * - * @return DOCUMENT ME! - */ - SequenceNode reRoot() - { - // TODO not used - remove? - if (maxdist != null) - { - ycount = 0; - - double tmpdist = maxdist.dist; - - // New top - SequenceNode sn = new SequenceNode(); - sn.setParent(null); - - // New right hand of top - SequenceNode snr = (SequenceNode) maxdist.parent(); - changeDirection(snr, maxdist); - System.out.println("Printing reversed tree"); - printN(snr); - snr.dist = tmpdist / 2; - maxdist.dist = tmpdist / 2; - - snr.setParent(sn); - maxdist.setParent(sn); - - sn.setRight(snr); - sn.setLeft(maxdist); - - top = sn; - - ycount = 0; - reCount(top); - findHeight(top); - } - - return top; - } - - /** - * - * @return true if original sequence data can be recovered - */ - public boolean hasOriginalSequenceData() - { - return seqData != null; - } - - /** - * Returns original alignment data used for calculation - or null where not - * available. - * - * @return null or cut'n'pasteable alignment - */ - public String printOriginalSequenceData(char gapChar) - { - if (seqData == null) - { - return null; - } - - StringBuffer sb = new StringBuffer(); - String[] seqdatas = seqData.getSequenceStrings(gapChar); - for (int i = 0; i < seqdatas.length; i++) - { - sb.append(new jalview.util.Format("%-" + 15 + "s").form(sequences[i] - .getName())); - sb.append(" " + seqdatas[i] + "\n"); - } - return sb.toString(); - } - - /** - * DOCUMENT ME! - * - * @param nd - * DOCUMENT ME! - */ - void printN(SequenceNode nd) - { - if (nd == null) - { - return; - } - - if ((nd.left() != null) && (nd.right() != null)) - { - printN((SequenceNode) nd.left()); - printN((SequenceNode) nd.right()); - } - else - { - System.out.println(" name = " + ((SequenceI) nd.element()).getName()); - } - - System.out.println(" dist = " + nd.dist + " " + nd.count + " " - + nd.height); - } - - /** - * DOCUMENT ME! - * - * @param nd - * DOCUMENT ME! - */ - public void reCount(SequenceNode nd) - { - ycount = 0; - // _lycount = 0; - // _lylimit = this.node.size(); - _reCount(nd); - } - - // private long _lycount = 0, _lylimit = 0; - - /** - * DOCUMENT ME! - * - * @param nd - * DOCUMENT ME! - */ - void _reCount(SequenceNode nd) - { - // if (_lycount<_lylimit) - // { - // System.err.println("Warning: depth of _recount greater than number of nodes."); - // } - if (nd == null) - { - return; - } - // _lycount++; - - if ((nd.left() != null) && (nd.right() != null)) - { - - _reCount((SequenceNode) nd.left()); - _reCount((SequenceNode) nd.right()); - - SequenceNode l = (SequenceNode) nd.left(); - SequenceNode r = (SequenceNode) nd.right(); - - nd.count = l.count + r.count; - nd.ycount = (l.ycount + r.ycount) / 2; - } - else - { - nd.count = 1; - nd.ycount = ycount++; - } - // _lycount--; - } - - /** - * DOCUMENT ME! - * - * @param nd - * DOCUMENT ME! - */ - public void swapNodes(SequenceNode nd) - { - if (nd == null) - { - return; - } - - SequenceNode tmp = (SequenceNode) nd.left(); - - nd.setLeft(nd.right()); - nd.setRight(tmp); - } - - /** - * DOCUMENT ME! - * - * @param nd - * DOCUMENT ME! - * @param dir - * DOCUMENT ME! - */ - void changeDirection(SequenceNode nd, SequenceNode dir) - { - if (nd == null) - { - return; - } - - if (nd.parent() != top) - { - changeDirection((SequenceNode) nd.parent(), nd); - - SequenceNode tmp = (SequenceNode) nd.parent(); - - if (dir == nd.left()) - { - nd.setParent(dir); - nd.setLeft(tmp); - } - else if (dir == nd.right()) - { - nd.setParent(dir); - nd.setRight(tmp); - } - } - else - { - if (dir == nd.left()) - { - nd.setParent(nd.left()); - - if (top.left() == nd) - { - nd.setRight(top.right()); - } - else - { - nd.setRight(top.left()); - } - } - else - { - nd.setParent(nd.right()); - - if (top.left() == nd) - { - nd.setLeft(top.right()); - } - else - { - nd.setLeft(top.left()); - } - } - } - } - - /** - * DOCUMENT ME! - * - * @return DOCUMENT ME! - */ - public SequenceNode getMaxDist() - { - return maxdist; - } - - /** - * DOCUMENT ME! - * - * @return DOCUMENT ME! - */ - public SequenceNode getTopNode() - { - return top; - } - - /** - * - * @return true if tree has real distances - */ - public boolean hasDistances() - { - return hasDistances; - } - - /** - * - * @return true if tree has real bootstrap values - */ - public boolean hasBootstrap() - { - return hasBootstrap; - } - - public boolean hasRootDistance() - { - return hasRootDistance; - } - - /** - * apply the given transform to all the nodes in the tree. - * - * @param nodeTransformI - */ - public void applyToNodes(NodeTransformI nodeTransformI) - { - for (Enumeration nodes = node.elements(); nodes - .hasMoreElements(); nodeTransformI.transform(nodes - .nextElement())) - { - ; - } - } } diff --git a/src/jalview/analysis/TreeBuilder.java b/src/jalview/analysis/TreeBuilder.java new file mode 100644 index 0000000..49c56c9 --- /dev/null +++ b/src/jalview/analysis/TreeBuilder.java @@ -0,0 +1,464 @@ +package jalview.analysis; + +import jalview.api.analysis.DistanceScoreModelI; +import jalview.api.analysis.ScoreModelI; +import jalview.api.analysis.SimilarityParamsI; +import jalview.api.analysis.SimilarityScoreModelI; +import jalview.datamodel.AlignmentView; +import jalview.datamodel.CigarArray; +import jalview.datamodel.SeqCigar; +import jalview.datamodel.SequenceI; +import jalview.datamodel.SequenceNode; +import jalview.math.MatrixI; +import jalview.viewmodel.AlignmentViewport; + +import java.util.BitSet; +import java.util.Vector; + +public abstract class TreeBuilder +{ + public static final String AVERAGE_DISTANCE = "AV"; + + public static final String NEIGHBOUR_JOINING = "NJ"; + + protected Vector clusters; + + protected SequenceI[] sequences; + + public AlignmentView seqData; + + protected BitSet done; + + protected int noseqs; + + int noClus; + + protected MatrixI distances; + + protected int mini; + + protected int minj; + + protected double ri; + + protected double rj; + + SequenceNode maxdist; + + SequenceNode top; + + double maxDistValue; + + double maxheight; + + int ycount; + + Vector node; + + /** + * Constructor + * + * @param av + * @param sm + * @param scoreParameters + */ + public TreeBuilder(AlignmentViewport av, ScoreModelI sm, + SimilarityParamsI scoreParameters) + { + int start, end; + boolean selview = av.getSelectionGroup() != null + && av.getSelectionGroup().getSize() > 1; + AlignmentView seqStrings = av.getAlignmentView(selview); + if (!selview) + { + start = 0; + end = av.getAlignment().getWidth(); + this.sequences = av.getAlignment().getSequencesArray(); + } + else + { + start = av.getSelectionGroup().getStartRes(); + end = av.getSelectionGroup().getEndRes() + 1; + this.sequences = av.getSelectionGroup().getSequencesInOrder( + av.getAlignment()); + } + + init(seqStrings, start, end); + + computeTree(sm, scoreParameters); + } + + public SequenceI[] getSequences() + { + return sequences; + } + + /** + * DOCUMENT ME! + * + * @param nd + * DOCUMENT ME! + * + * @return DOCUMENT ME! + */ + double findHeight(SequenceNode nd) + { + if (nd == null) + { + return maxheight; + } + + if ((nd.left() == null) && (nd.right() == null)) + { + nd.height = ((SequenceNode) nd.parent()).height + nd.dist; + + if (nd.height > maxheight) + { + return nd.height; + } + else + { + return maxheight; + } + } + else + { + if (nd.parent() != null) + { + nd.height = ((SequenceNode) nd.parent()).height + nd.dist; + } + else + { + maxheight = 0; + nd.height = (float) 0.0; + } + + maxheight = findHeight((SequenceNode) (nd.left())); + maxheight = findHeight((SequenceNode) (nd.right())); + } + + return maxheight; + } + + /** + * DOCUMENT ME! + * + * @param nd + * DOCUMENT ME! + */ + void reCount(SequenceNode nd) + { + ycount = 0; + // _lycount = 0; + // _lylimit = this.node.size(); + _reCount(nd); + } + + /** + * DOCUMENT ME! + * + * @param nd + * DOCUMENT ME! + */ + void _reCount(SequenceNode nd) + { + // if (_lycount<_lylimit) + // { + // System.err.println("Warning: depth of _recount greater than number of nodes."); + // } + if (nd == null) + { + return; + } + // _lycount++; + + if ((nd.left() != null) && (nd.right() != null)) + { + + _reCount((SequenceNode) nd.left()); + _reCount((SequenceNode) nd.right()); + + SequenceNode l = (SequenceNode) nd.left(); + SequenceNode r = (SequenceNode) nd.right(); + + nd.count = l.count + r.count; + nd.ycount = (l.ycount + r.ycount) / 2; + } + else + { + nd.count = 1; + nd.ycount = ycount++; + } + // _lycount--; + } + + /** + * DOCUMENT ME! + * + * @return DOCUMENT ME! + */ + public SequenceNode getTopNode() + { + return top; + } + + /** + * + * @return true if tree has real distances + */ + public boolean hasDistances() + { + return true; + } + + /** + * + * @return true if tree has real bootstrap values + */ + public boolean hasBootstrap() + { + return false; + } + + public boolean hasRootDistance() + { + return true; + } + + /** + * Form clusters by grouping sub-clusters, starting from one sequence per + * cluster, and finishing when only two clusters remain + */ + void cluster() + { + while (noClus > 2) + { + findMinDistance(); + + joinClusters(mini, minj); + + noClus--; + } + + int rightChild = done.nextClearBit(0); + int leftChild = done.nextClearBit(rightChild + 1); + + joinClusters(leftChild, rightChild); + top = (node.elementAt(leftChild)); + + reCount(top); + findHeight(top); + findMaxDist(top); + } + + protected abstract double findMinDistance(); + + /** + * Calculates the tree using the given score model and parameters, and the + * configured tree type + *

+ * If the score model computes pairwise distance scores, then these are used + * directly to derive the tree + *

+ * If the score model computes similarity scores, then the range of the scores + * is reversed to give a distance measure, and this is used to derive the tree + * + * @param sm + * @param scoreOptions + */ + protected void computeTree(ScoreModelI sm, SimilarityParamsI scoreOptions) + { + if (sm instanceof DistanceScoreModelI) + { + distances = ((DistanceScoreModelI) sm).findDistances(seqData, + scoreOptions); + } + else if (sm instanceof SimilarityScoreModelI) + { + /* + * compute similarity and invert it to give a distance measure + */ + MatrixI result = ((SimilarityScoreModelI) sm).findSimilarities( + seqData, scoreOptions); + result.reverseRange(true); + distances = result; + } + + makeLeaves(); + + noClus = clusters.size(); + + cluster(); + } + + /** + * DOCUMENT ME! + * + * @param nd + * DOCUMENT ME! + */ + void findMaxDist(SequenceNode nd) + { + if (nd == null) + { + return; + } + + if ((nd.left() == null) && (nd.right() == null)) + { + double dist = nd.dist; + + if (dist > maxDistValue) + { + maxdist = nd; + maxDistValue = dist; + } + } + else + { + findMaxDist((SequenceNode) nd.left()); + findMaxDist((SequenceNode) nd.right()); + } + } + + /** + * DOCUMENT ME! + * + * @param i + * DOCUMENT ME! + * @param j + * DOCUMENT ME! + * + * @return DOCUMENT ME! + */ + protected double findr(int i, int j) + { + double tmp = 1; + + for (int k = 0; k < noseqs; k++) + { + if ((k != i) && (k != j) && (!done.get(k))) + { + tmp = tmp + distances.getValue(i, k); + } + } + + if (noClus > 2) + { + tmp = tmp / (noClus - 2); + } + + return tmp; + } + + protected void init(AlignmentView seqView, int start, int end) + { + this.node = new Vector(); + if (seqView != null) + { + this.seqData = seqView; + } + else + { + SeqCigar[] seqs = new SeqCigar[sequences.length]; + for (int i = 0; i < sequences.length; i++) + { + seqs[i] = new SeqCigar(sequences[i], start, end); + } + CigarArray sdata = new CigarArray(seqs); + sdata.addOperation(CigarArray.M, end - start + 1); + this.seqData = new AlignmentView(sdata, start); + } + + /* + * count the non-null sequences + */ + noseqs = 0; + + done = new BitSet(); + + for (SequenceI seq : sequences) + { + if (seq != null) + { + noseqs++; + } + } + } + + /** + * Merges cluster(j) to cluster(i) and recalculates cluster and node distances + * + * @param i + * @param j + */ + void joinClusters(final int i, final int j) + { + double dist = distances.getValue(i, j); + + /* + * add the members of cluster(j) to cluster(i) + */ + clusters.get(i).or(clusters.get(j)); + + ri = findr(i, j); + rj = findr(j, i); + + findClusterDistance(i, j); + + SequenceNode sn = new SequenceNode(); + + sn.setLeft((node.elementAt(i))); + sn.setRight((node.elementAt(j))); + + SequenceNode tmpi = (node.elementAt(i)); + SequenceNode tmpj = (node.elementAt(j)); + + findNewDistances(tmpi, tmpj, dist); + + tmpi.setParent(sn); + tmpj.setParent(sn); + + node.setElementAt(sn, i); + + /* + * mark cluster j as disposed of + */ + done.set(j); + clusters.setElementAt(null, j); + } + + protected abstract void findNewDistances(SequenceNode tmpi, SequenceNode tmpj, + double dist); + + /** + * Calculates and saves the distance between the combination of cluster(i) and + * cluster(j) and all other clusters. The form of the calculation depends on + * the tree clustering method being used. + * + * @param i + * @param j + */ + protected abstract void findClusterDistance(int i, int j); + + /** + * Start by making a cluster for each individual sequence + */ + void makeLeaves() + { + clusters = new Vector(); + + for (int i = 0; i < noseqs; i++) + { + SequenceNode sn = new SequenceNode(); + + sn.setElement(sequences[i]); + sn.setName(sequences[i].getName()); + node.addElement(sn); + BitSet bs = new BitSet(); + bs.set(i); + clusters.addElement(bs); + } + } + +} diff --git a/src/jalview/analysis/TreeModel.java b/src/jalview/analysis/TreeModel.java new file mode 100644 index 0000000..2f8d788 --- /dev/null +++ b/src/jalview/analysis/TreeModel.java @@ -0,0 +1,713 @@ +/* + * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$) + * Copyright (C) $$Year-Rel$$ The Jalview Authors + * + * This file is part of Jalview. + * + * Jalview is free software: you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, either version 3 + * of the License, or (at your option) any later version. + * + * Jalview is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR + * PURPOSE. See the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Jalview. If not, see . + * The Jalview Authors are detailed in the 'AUTHORS' file. + */ +package jalview.analysis; + +import jalview.datamodel.AlignmentView; +import jalview.datamodel.BinaryNode; +import jalview.datamodel.NodeTransformI; +import jalview.datamodel.Sequence; +import jalview.datamodel.SequenceI; +import jalview.datamodel.SequenceNode; +import jalview.io.NewickFile; + +import java.util.ArrayList; +import java.util.Enumeration; +import java.util.List; +import java.util.Vector; + +/** + * A model of a tree, either computed by Jalview or loaded from a file or other + * resource or service + */ +public class TreeModel +{ + + SequenceI[] sequences; + + /* + * SequenceData is a string representation of what the user + * sees. The display may contain hidden columns. + */ + public AlignmentView seqData; + + int noseqs; + + SequenceNode top; + + double maxDistValue; + + double maxheight; + + int ycount; + + Vector node; + + boolean hasDistances = true; // normal case for jalview trees + + boolean hasBootstrap = false; // normal case for jalview trees + + private boolean hasRootDistance = true; + + /** + * Create a new TreeModel object with leaves associated with sequences in + * seqs, and original alignment data represented by Cigar strings + * + * @param seqs + * SequenceI[] + * @param odata + * Cigar[] + * @param treefile + * NewickFile + */ + public TreeModel(SequenceI[] seqs, AlignmentView odata, NewickFile treefile) + { + this(seqs, treefile); + seqData = odata; + } + + /** + * Creates a new TreeModel object from a tree from an external source + * + * @param seqs + * SequenceI which should be associated with leafs of treefile + * @param treefile + * A parsed tree + */ + public TreeModel(SequenceI[] seqs, NewickFile treefile) + { + this(seqs, treefile.getTree(), treefile.HasDistances(), treefile + .HasBootstrap(), treefile.HasRootDistance()); + + associateLeavesToSequences(seqs); + } + + /** + * Constructor given a calculated tree + * + * @param tree + */ + public TreeModel(TreeBuilder tree) + { + this(tree.getSequences(), tree.getTopNode(), tree.hasDistances(), tree + .hasBootstrap(), tree.hasRootDistance()); + } + + /** + * Constructor given sequences, root node and tree property flags + * + * @param seqs + * @param root + * @param hasDist + * @param hasBoot + * @param hasRootDist + */ + public TreeModel(SequenceI[] seqs, SequenceNode root, boolean hasDist, + boolean hasBoot, boolean hasRootDist) + { + this.sequences = seqs; + top = root; + + hasDistances = hasDist; + hasBootstrap = hasBoot; + hasRootDistance = hasRootDist; + + maxheight = findHeight(top); + } + + /** + * @param seqs + */ + public void associateLeavesToSequences(SequenceI[] seqs) + { + SequenceIdMatcher algnIds = new SequenceIdMatcher(seqs); + + Vector leaves = findLeaves(top); + + int i = 0; + int namesleft = seqs.length; + + SequenceNode j; + SequenceI nam; + String realnam; + Vector one2many = new Vector(); + // int countOne2Many = 0; + while (i < leaves.size()) + { + j = leaves.elementAt(i++); + realnam = j.getName(); + nam = null; + + if (namesleft > -1) + { + nam = algnIds.findIdMatch(realnam); + } + + if (nam != null) + { + j.setElement(nam); + if (one2many.contains(nam)) + { + // countOne2Many++; + // if (jalview.bin.Cache.log.isDebugEnabled()) + // jalview.bin.Cache.log.debug("One 2 many relationship for + // "+nam.getName()); + } + else + { + one2many.addElement(nam); + namesleft--; + } + } + else + { + j.setElement(new Sequence(realnam, "THISISAPLACEHLDER")); + j.setPlaceholder(true); + } + } + // if (jalview.bin.Cache.log.isDebugEnabled() && countOne2Many>0) { + // jalview.bin.Cache.log.debug("There were "+countOne2Many+" alignment + // sequence ids (out of "+one2many.size()+" unique ids) linked to two or + // more leaves."); + // } + // one2many.clear(); + } + + /** + * Generate a string representation of the Tree + * + * @return Newick File with all tree data available + */ + public String print() + { + NewickFile fout = new NewickFile(getTopNode()); + + return fout.print(hasBootstrap(), hasDistances(), + hasRootDistance()); // output all data available for tree + } + + /** + * + * used when the alignment associated to a tree has changed. + * + * @param list + * Sequence set to be associated with tree nodes + */ + public void updatePlaceHolders(List list) + { + Vector leaves = findLeaves(top); + + int sz = leaves.size(); + SequenceIdMatcher seqmatcher = null; + int i = 0; + + while (i < sz) + { + SequenceNode leaf = leaves.elementAt(i++); + + if (list.contains(leaf.element())) + { + leaf.setPlaceholder(false); + } + else + { + if (seqmatcher == null) + { + // Only create this the first time we need it + SequenceI[] seqs = new SequenceI[list.size()]; + + for (int j = 0; j < seqs.length; j++) + { + seqs[j] = list.get(j); + } + + seqmatcher = new SequenceIdMatcher(seqs); + } + + SequenceI nam = seqmatcher.findIdMatch(leaf.getName()); + + if (nam != null) + { + if (!leaf.isPlaceholder()) + { + // remapping the node to a new sequenceI - should remove any refs to + // old one. + // TODO - make many sequenceI to one leaf mappings possible! + // (JBPNote) + } + leaf.setPlaceholder(false); + leaf.setElement(nam); + } + else + { + if (!leaf.isPlaceholder()) + { + // Construct a new placeholder sequence object for this leaf + leaf.setElement(new Sequence(leaf.getName(), + "THISISAPLACEHLDER")); + } + leaf.setPlaceholder(true); + + } + } + } + } + + /** + * rename any nodes according to their associated sequence. This will modify + * the tree's metadata! (ie the original NewickFile or newly generated + * BinaryTree's label data) + */ + public void renameAssociatedNodes() + { + applyToNodes(new NodeTransformI() + { + + @Override + public void transform(BinaryNode nd) + { + Object el = nd.element(); + if (el != null && el instanceof SequenceI) + { + nd.setName(((SequenceI) el).getName()); + } + } + }); + } + + /** + * Search for leaf nodes below (or at) the given node + * + * @param nd + * root node to search from + * + * @return + */ + public Vector findLeaves(SequenceNode nd) + { + Vector leaves = new Vector(); + findLeaves(nd, leaves); + return leaves; + } + + /** + * Search for leaf nodes. + * + * @param nd + * root node to search from + * @param leaves + * Vector of leaves to add leaf node objects too. + * + * @return Vector of leaf nodes on binary tree + */ + Vector findLeaves(SequenceNode nd, + Vector leaves) + { + if (nd == null) + { + return leaves; + } + + if ((nd.left() == null) && (nd.right() == null)) // Interior node + // detection + { + leaves.addElement(nd); + + return leaves; + } + else + { + /* + * TODO: Identify internal nodes... if (node.isSequenceLabel()) { + * leaves.addElement(node); } + */ + findLeaves((SequenceNode) nd.left(), leaves); + findLeaves((SequenceNode) nd.right(), leaves); + } + + return leaves; + } + + /** + * printNode is mainly for debugging purposes. + * + * @param nd + * SequenceNode + */ + void printNode(SequenceNode nd) + { + if (nd == null) + { + return; + } + + if ((nd.left() == null) && (nd.right() == null)) + { + System.out.println("Leaf = " + ((SequenceI) nd.element()).getName()); + System.out.println("Dist " + nd.dist); + System.out.println("Boot " + nd.getBootstrap()); + } + else + { + System.out.println("Dist " + nd.dist); + printNode((SequenceNode) nd.left()); + printNode((SequenceNode) nd.right()); + } + } + + /** + * DOCUMENT ME! + * + * @return DOCUMENT ME! + */ + public double getMaxHeight() + { + return maxheight; + } + + /** + * Makes a list of groups, where each group is represented by a node whose + * height (distance from the root node), as a fraction of the height of the + * whole tree, is greater than the given threshold. This corresponds to + * selecting the nodes immediately to the right of a vertical line + * partitioning the tree (if the tree is drawn with root to the left). Each + * such node represents a group that contains all of the sequences linked to + * the child leaf nodes. + * + * @param threshold + * @see #getGroups() + */ + public List groupNodes(float threshold) + { + List groups = new ArrayList(); + _groupNodes(groups, getTopNode(), threshold); + return groups; + } + + protected void _groupNodes(List groups, SequenceNode nd, + float threshold) + { + if (nd == null) + { + return; + } + + if ((nd.height / maxheight) > threshold) + { + groups.add(nd); + } + else + { + _groupNodes(groups, (SequenceNode) nd.left(), threshold); + _groupNodes(groups, (SequenceNode) nd.right(), threshold); + } + } + + /** + * DOCUMENT ME! + * + * @param nd + * DOCUMENT ME! + * + * @return DOCUMENT ME! + */ + public double findHeight(SequenceNode nd) + { + if (nd == null) + { + return maxheight; + } + + if ((nd.left() == null) && (nd.right() == null)) + { + nd.height = ((SequenceNode) nd.parent()).height + nd.dist; + + if (nd.height > maxheight) + { + return nd.height; + } + else + { + return maxheight; + } + } + else + { + if (nd.parent() != null) + { + nd.height = ((SequenceNode) nd.parent()).height + nd.dist; + } + else + { + maxheight = 0; + nd.height = (float) 0.0; + } + + maxheight = findHeight((SequenceNode) (nd.left())); + maxheight = findHeight((SequenceNode) (nd.right())); + } + + return maxheight; + } + + /** + * + * @return true if original sequence data can be recovered + */ + public boolean hasOriginalSequenceData() + { + return seqData != null; + } + + /** + * Returns original alignment data used for calculation - or null where not + * available. + * + * @return null or cut'n'pasteable alignment + */ + public String printOriginalSequenceData(char gapChar) + { + if (seqData == null) + { + return null; + } + + StringBuffer sb = new StringBuffer(); + String[] seqdatas = seqData.getSequenceStrings(gapChar); + for (int i = 0; i < seqdatas.length; i++) + { + sb.append(new jalview.util.Format("%-" + 15 + "s").form(sequences[i] + .getName())); + sb.append(" " + seqdatas[i] + "\n"); + } + return sb.toString(); + } + + /** + * DOCUMENT ME! + * + * @param nd + * DOCUMENT ME! + */ + void printN(SequenceNode nd) + { + if (nd == null) + { + return; + } + + if ((nd.left() != null) && (nd.right() != null)) + { + printN((SequenceNode) nd.left()); + printN((SequenceNode) nd.right()); + } + else + { + System.out.println(" name = " + ((SequenceI) nd.element()).getName()); + } + + System.out.println(" dist = " + nd.dist + " " + nd.count + " " + + nd.height); + } + + /** + * DOCUMENT ME! + * + * @param nd + * DOCUMENT ME! + */ + public void reCount(SequenceNode nd) + { + ycount = 0; + // _lycount = 0; + // _lylimit = this.node.size(); + _reCount(nd); + } + + // private long _lycount = 0, _lylimit = 0; + + /** + * DOCUMENT ME! + * + * @param nd + * DOCUMENT ME! + */ + void _reCount(SequenceNode nd) + { + // if (_lycount<_lylimit) + // { + // System.err.println("Warning: depth of _recount greater than number of nodes."); + // } + if (nd == null) + { + return; + } + // _lycount++; + + if ((nd.left() != null) && (nd.right() != null)) + { + + _reCount((SequenceNode) nd.left()); + _reCount((SequenceNode) nd.right()); + + SequenceNode l = (SequenceNode) nd.left(); + SequenceNode r = (SequenceNode) nd.right(); + + nd.count = l.count + r.count; + nd.ycount = (l.ycount + r.ycount) / 2; + } + else + { + nd.count = 1; + nd.ycount = ycount++; + } + // _lycount--; + } + + /** + * DOCUMENT ME! + * + * @param nd + * DOCUMENT ME! + */ + public void swapNodes(SequenceNode nd) + { + if (nd == null) + { + return; + } + + SequenceNode tmp = (SequenceNode) nd.left(); + + nd.setLeft(nd.right()); + nd.setRight(tmp); + } + + /** + * DOCUMENT ME! + * + * @param nd + * DOCUMENT ME! + * @param dir + * DOCUMENT ME! + */ + void changeDirection(SequenceNode nd, SequenceNode dir) + { + if (nd == null) + { + return; + } + + if (nd.parent() != top) + { + changeDirection((SequenceNode) nd.parent(), nd); + + SequenceNode tmp = (SequenceNode) nd.parent(); + + if (dir == nd.left()) + { + nd.setParent(dir); + nd.setLeft(tmp); + } + else if (dir == nd.right()) + { + nd.setParent(dir); + nd.setRight(tmp); + } + } + else + { + if (dir == nd.left()) + { + nd.setParent(nd.left()); + + if (top.left() == nd) + { + nd.setRight(top.right()); + } + else + { + nd.setRight(top.left()); + } + } + else + { + nd.setParent(nd.right()); + + if (top.left() == nd) + { + nd.setLeft(top.right()); + } + else + { + nd.setLeft(top.left()); + } + } + } + } + + /** + * DOCUMENT ME! + * + * @return DOCUMENT ME! + */ + public SequenceNode getTopNode() + { + return top; + } + + /** + * + * @return true if tree has real distances + */ + public boolean hasDistances() + { + return hasDistances; + } + + /** + * + * @return true if tree has real bootstrap values + */ + public boolean hasBootstrap() + { + return hasBootstrap; + } + + public boolean hasRootDistance() + { + return hasRootDistance; + } + + /** + * apply the given transform to all the nodes in the tree. + * + * @param nodeTransformI + */ + public void applyToNodes(NodeTransformI nodeTransformI) + { + for (Enumeration nodes = node.elements(); nodes + .hasMoreElements(); nodeTransformI.transform(nodes + .nextElement())) + { + ; + } + } +} diff --git a/src/jalview/appletgui/AlignViewport.java b/src/jalview/appletgui/AlignViewport.java index fc087c6..f204462 100644 --- a/src/jalview/appletgui/AlignViewport.java +++ b/src/jalview/appletgui/AlignViewport.java @@ -20,7 +20,7 @@ */ package jalview.appletgui; -import jalview.analysis.NJTree; +import jalview.analysis.TreeModel; import jalview.api.AlignViewportI; import jalview.api.FeatureSettingsModelI; import jalview.bin.JalviewLite; @@ -52,7 +52,7 @@ public class AlignViewport extends AlignmentViewport implements boolean validCharWidth = true; - NJTree currentTree = null; + TreeModel currentTree = null; public jalview.bin.JalviewLite applet; @@ -302,12 +302,12 @@ public class AlignViewport extends AlignmentViewport implements setEndSeq(height / getCharHeight()); } - public void setCurrentTree(NJTree tree) + public void setCurrentTree(TreeModel tree) { currentTree = tree; } - public NJTree getCurrentTree() + public TreeModel getCurrentTree() { return currentTree; } diff --git a/src/jalview/appletgui/TreeCanvas.java b/src/jalview/appletgui/TreeCanvas.java index 7040c42..48e9d64 100755 --- a/src/jalview/appletgui/TreeCanvas.java +++ b/src/jalview/appletgui/TreeCanvas.java @@ -21,7 +21,7 @@ package jalview.appletgui; import jalview.analysis.Conservation; -import jalview.analysis.NJTree; +import jalview.analysis.TreeModel; import jalview.api.AlignViewportI; import jalview.datamodel.Sequence; import jalview.datamodel.SequenceGroup; @@ -48,12 +48,13 @@ import java.awt.event.MouseListener; import java.awt.event.MouseMotionListener; import java.util.Enumeration; import java.util.Hashtable; +import java.util.List; import java.util.Vector; public class TreeCanvas extends Panel implements MouseListener, MouseMotionListener { - NJTree tree; + TreeModel tree; ScrollPane scrollPane; @@ -115,13 +116,13 @@ public class TreeCanvas extends Panel implements MouseListener, selected.addOrRemove(sequence, true); } - public void setTree(NJTree tree) + public void setTree(TreeModel tree2) { - this.tree = tree; - tree.findHeight(tree.getTopNode()); + this.tree = tree2; + tree2.findHeight(tree2.getTopNode()); // Now have to calculate longest name based on the leaves - Vector leaves = tree.findLeaves(tree.getTopNode()); + Vector leaves = tree2.findLeaves(tree2.getTopNode()); boolean has_placeholders = false; longestName = ""; @@ -593,8 +594,7 @@ public class TreeCanvas extends Panel implements MouseListener, threshold = (float) (x - offx) / (float) (getSize().width - labelLength - 2 * offx); - tree.getGroups().removeAllElements(); - tree.groupNodes(tree.getTopNode(), threshold); + List groups = tree.groupNodes(threshold); setColor(tree.getTopNode(), Color.black); av.setSelectionGroup(null); @@ -608,7 +608,7 @@ public class TreeCanvas extends Panel implements MouseListener, codingComplement.clearSequenceColours(); } - colourGroups(); + colourGroups(groups); } } @@ -618,17 +618,16 @@ public class TreeCanvas extends Panel implements MouseListener, } - void colourGroups() + void colourGroups(List groups) { - for (int i = 0; i < tree.getGroups().size(); i++) + for (int i = 0; i < groups.size(); i++) { Color col = new Color((int) (Math.random() * 255), (int) (Math.random() * 255), (int) (Math.random() * 255)); - setColor(tree.getGroups().elementAt(i), col.brighter()); + setColor(groups.get(i), col.brighter()); - Vector l = tree.findLeaves(tree.getGroups() - .elementAt(i)); + Vector l = tree.findLeaves(groups.get(i)); Vector sequences = new Vector(); for (int j = 0; j < l.size(); j++) diff --git a/src/jalview/appletgui/TreePanel.java b/src/jalview/appletgui/TreePanel.java index 7266665..a9e11a7 100644 --- a/src/jalview/appletgui/TreePanel.java +++ b/src/jalview/appletgui/TreePanel.java @@ -20,7 +20,10 @@ */ package jalview.appletgui; +import jalview.analysis.AverageDistanceTree; import jalview.analysis.NJTree; +import jalview.analysis.TreeBuilder; +import jalview.analysis.TreeModel; import jalview.analysis.scoremodels.ScoreModels; import jalview.analysis.scoremodels.SimilarityParams; import jalview.api.analysis.ScoreModelI; @@ -59,13 +62,13 @@ public class TreePanel extends EmbmenuFrame implements ActionListener, TreeCanvas treeCanvas; - NJTree tree; + TreeModel tree; AlignmentPanel ap; AlignViewport av; - public NJTree getTree() + public TreeModel getTree() { return tree; } @@ -209,19 +212,23 @@ public class TreePanel extends EmbmenuFrame implements ActionListener, { if (odata == null) { - tree = new NJTree(av.getAlignment().getSequencesArray(), newtree); + tree = new TreeModel(av.getAlignment().getSequencesArray(), + newtree); } else { - tree = new NJTree(av.getAlignment().getSequencesArray(), odata, - newtree); + tree = new TreeModel(av.getAlignment().getSequencesArray(), + odata, newtree); } } else { ScoreModelI sm = configureScoreModel(pwtype); - tree = new NJTree(av, type, sm, SimilarityParams.Jalview); + TreeBuilder njtree = type.equals(TreeBuilder.NEIGHBOUR_JOINING) ? new NJTree( + av, sm, SimilarityParams.Jalview) + : new AverageDistanceTree(av, sm, SimilarityParams.Jalview); + tree = new TreeModel(njtree); } tree.reCount(tree.getTopNode()); diff --git a/src/jalview/gui/AlignViewport.java b/src/jalview/gui/AlignViewport.java index e0efa7c..4036eba 100644 --- a/src/jalview/gui/AlignViewport.java +++ b/src/jalview/gui/AlignViewport.java @@ -22,7 +22,7 @@ package jalview.gui; import jalview.analysis.AlignmentUtils; import jalview.analysis.AnnotationSorter.SequenceAnnotationOrder; -import jalview.analysis.NJTree; +import jalview.analysis.TreeModel; import jalview.api.AlignViewportI; import jalview.api.AlignmentViewPanel; import jalview.api.FeatureColourI; @@ -76,7 +76,7 @@ public class AlignViewport extends AlignmentViewport implements { Font font; - NJTree currentTree = null; + TreeModel currentTree = null; boolean cursorMode = false; @@ -494,7 +494,7 @@ public class AlignViewport extends AlignmentViewport implements * @param tree * DOCUMENT ME! */ - public void setCurrentTree(NJTree tree) + public void setCurrentTree(TreeModel tree) { currentTree = tree; } @@ -504,7 +504,7 @@ public class AlignViewport extends AlignmentViewport implements * * @return DOCUMENT ME! */ - public NJTree getCurrentTree() + public TreeModel getCurrentTree() { return currentTree; } diff --git a/src/jalview/gui/Jalview2XML.java b/src/jalview/gui/Jalview2XML.java index 68184e9..ecd1f38 100644 --- a/src/jalview/gui/Jalview2XML.java +++ b/src/jalview/gui/Jalview2XML.java @@ -1105,7 +1105,7 @@ public class Jalview2XML Tree tree = new Tree(); tree.setTitle(tp.getTitle()); tree.setCurrentTree((av.currentTree == tp.getTree())); - tree.setNewick(tp.getTree().toString()); + tree.setNewick(tp.getTree().print()); tree.setThreshold(tp.treeCanvas.threshold); tree.setFitToWindow(tp.fitToWindow.getState()); diff --git a/src/jalview/gui/TreeCanvas.java b/src/jalview/gui/TreeCanvas.java index eb4253f..3730b56 100755 --- a/src/jalview/gui/TreeCanvas.java +++ b/src/jalview/gui/TreeCanvas.java @@ -21,7 +21,7 @@ package jalview.gui; import jalview.analysis.Conservation; -import jalview.analysis.NJTree; +import jalview.analysis.TreeModel; import jalview.api.AlignViewportI; import jalview.datamodel.Sequence; import jalview.datamodel.SequenceGroup; @@ -53,6 +53,7 @@ import java.awt.print.PrinterException; import java.awt.print.PrinterJob; import java.util.Enumeration; import java.util.Hashtable; +import java.util.List; import java.util.Vector; import javax.swing.JColorChooser; @@ -73,7 +74,7 @@ public class TreeCanvas extends JPanel implements MouseListener, Runnable, /** DOCUMENT ME!! */ public static final String PLACEHOLDER = " * "; - NJTree tree; + TreeModel tree; JScrollPane scrollPane; @@ -168,7 +169,7 @@ public class TreeCanvas extends JPanel implements MouseListener, Runnable, * @param tree * DOCUMENT ME! */ - public void setTree(NJTree tree) + public void setTree(TreeModel tree) { this.tree = tree; tree.findHeight(tree.getTopNode()); @@ -929,8 +930,7 @@ public class TreeCanvas extends JPanel implements MouseListener, Runnable, threshold = (float) (x - offx) / (float) (getWidth() - labelLength - (2 * offx)); - tree.getGroups().removeAllElements(); - tree.groupNodes(tree.getTopNode(), threshold); + List groups = tree.groupNodes(threshold); setColor(tree.getTopNode(), Color.black); AlignmentPanel[] aps = getAssociatedPanels(); @@ -950,7 +950,7 @@ public class TreeCanvas extends JPanel implements MouseListener, Runnable, aps[a].av.getCodingComplement().clearSequenceColours(); } } - colourGroups(); + colourGroups(groups); } PaintRefresher.Refresh(tp, ap.av.getSequenceSetId()); @@ -959,17 +959,16 @@ public class TreeCanvas extends JPanel implements MouseListener, Runnable, } - void colourGroups() + void colourGroups(List groups) { AlignmentPanel[] aps = getAssociatedPanels(); - for (int i = 0; i < tree.getGroups().size(); i++) + for (int i = 0; i < groups.size(); i++) { Color col = new Color((int) (Math.random() * 255), (int) (Math.random() * 255), (int) (Math.random() * 255)); - setColor(tree.getGroups().elementAt(i), col.brighter()); + setColor(groups.get(i), col.brighter()); - Vector l = tree.findLeaves(tree.getGroups() - .elementAt(i)); + Vector l = tree.findLeaves(groups.get(i)); Vector sequences = new Vector(); diff --git a/src/jalview/gui/TreeChooser.java b/src/jalview/gui/TreeChooser.java index 9cbb47a..41b7d48 100644 --- a/src/jalview/gui/TreeChooser.java +++ b/src/jalview/gui/TreeChooser.java @@ -260,6 +260,17 @@ public class TreeChooser extends JPanel { matchGaps.setEnabled(false); } + if (tree.isSelected()) + { + // ?? tree requires specific parameter settings?? + includeGaps.setSelected(true); + includeGaps.setEnabled(false); + matchGaps.setSelected(true); + includeGappedColumns.setSelected(true); + includeGappedColumns.setEnabled(false); + shorterSequence.setSelected(false); + shorterSequence.setEnabled(false); + } } /** diff --git a/src/jalview/gui/TreePanel.java b/src/jalview/gui/TreePanel.java index 9ac2872..1e98535 100755 --- a/src/jalview/gui/TreePanel.java +++ b/src/jalview/gui/TreePanel.java @@ -21,7 +21,10 @@ package jalview.gui; import jalview.analysis.AlignmentSorter; +import jalview.analysis.AverageDistanceTree; import jalview.analysis.NJTree; +import jalview.analysis.TreeBuilder; +import jalview.analysis.TreeModel; import jalview.api.analysis.ScoreModelI; import jalview.api.analysis.SimilarityParamsI; import jalview.api.analysis.ViewBasedAnalysisI; @@ -81,7 +84,7 @@ public class TreePanel extends GTreePanel TreeCanvas treeCanvas; - NJTree tree; + TreeModel tree; AlignViewport av; @@ -267,12 +270,13 @@ public class TreePanel extends GTreePanel { if (odata == null) { - tree = new NJTree(av.getAlignment().getSequencesArray(), newtree); + tree = new TreeModel(av.getAlignment().getSequencesArray(), + newtree); } else { - tree = new NJTree(av.getAlignment().getSequencesArray(), odata, - newtree); + tree = new TreeModel(av.getAlignment().getSequencesArray(), + odata, newtree); } if (!tree.hasOriginalSequenceData()) { @@ -282,7 +286,10 @@ public class TreePanel extends GTreePanel else { ScoreModelI sm = configureScoreModel(); - tree = new NJTree(av, treeType, sm, similarityParams); + TreeBuilder njtree = treeType.equals(TreeBuilder.NEIGHBOUR_JOINING) ? new NJTree( + av, sm, similarityParams) : new AverageDistanceTree(av, sm, + similarityParams); + tree = new TreeModel(njtree); showDistances(true); } @@ -326,7 +333,7 @@ public class TreePanel extends GTreePanel * * @return DOCUMENT ME! */ - public NJTree getTree() + public TreeModel getTree() { return tree; } diff --git a/src/jalview/io/packed/JalviewDataset.java b/src/jalview/io/packed/JalviewDataset.java index c1ca1b7..736591b 100644 --- a/src/jalview/io/packed/JalviewDataset.java +++ b/src/jalview/io/packed/JalviewDataset.java @@ -151,7 +151,7 @@ public class JalviewDataset { // the following works because all trees are already had node/SequenceI // associations created. - jalview.analysis.NJTree njt = new jalview.analysis.NJTree( + jalview.analysis.TreeModel njt = new jalview.analysis.TreeModel( al.getSequencesArray(), nf); // this just updates the displayed leaf name on the tree according to // the SequenceIs. diff --git a/src/jalview/io/vamsas/Tree.java b/src/jalview/io/vamsas/Tree.java index 89877e2..f784fb0 100644 --- a/src/jalview/io/vamsas/Tree.java +++ b/src/jalview/io/vamsas/Tree.java @@ -21,6 +21,7 @@ package jalview.io.vamsas; import jalview.analysis.NJTree; +import jalview.analysis.TreeModel; import jalview.bin.Cache; import jalview.datamodel.AlignmentI; import jalview.datamodel.AlignmentView; @@ -370,13 +371,14 @@ public class Tree extends DatastoreItem /** * construct treenode mappings for mapped sequences * - * @param ntree + * @param treeModel * @param newick * @return */ - public Treenode[] makeTreeNodes(NJTree ntree, Newick newick) + public Treenode[] makeTreeNodes(TreeModel treeModel, Newick newick) { - Vector leaves = ntree.findLeaves(ntree.getTopNode()); + Vector leaves = treeModel.findLeaves(treeModel + .getTopNode()); Vector tnv = new Vector(); Enumeration l = leaves.elements(); Hashtable nodespecs = new Hashtable(); @@ -496,7 +498,7 @@ public class Tree extends DatastoreItem bindjvvobj(tp, tree); tree.setTitle(tp.getTitle()); Newick newick = new Newick(); - newick.setContent(tp.getTree().toString()); + newick.setContent(tp.getTree().print()); newick.setTitle(tp.getTitle()); tree.addNewick(newick); tree.setProvenance(makeTreeProvenance(jal, tp)); diff --git a/test/jalview/io/NewickFileTests.java b/test/jalview/io/NewickFileTests.java index d198f0f..4a32790 100644 --- a/test/jalview/io/NewickFileTests.java +++ b/test/jalview/io/NewickFileTests.java @@ -22,8 +22,8 @@ package jalview.io; import static org.testng.ConversionUtils.wrapDataProvider; -import jalview.analysis.NJTree; import jalview.analysis.SequenceIdMatcher; +import jalview.analysis.TreeModel; import jalview.datamodel.SequenceI; import jalview.datamodel.SequenceNode; import jalview.gui.JvOptionPane; @@ -125,7 +125,7 @@ public class NewickFileTests stage = "Compare original and generated tree" + treename; Vector oseqs, nseqs; - oseqs = new NJTree(new SequenceI[0], nf).findLeaves(nf.getTree()); + oseqs = new TreeModel(new SequenceI[0], nf).findLeaves(nf.getTree()); AssertJUnit.assertTrue(stage + "No nodes in original tree.", oseqs.size() > 0); SequenceI[] olsqs = new SequenceI[oseqs.size()]; @@ -133,7 +133,7 @@ public class NewickFileTests { olsqs[i] = (SequenceI) oseqs.get(i).element(); } - nseqs = new NJTree(new SequenceI[0], nf_regen).findLeaves(nf_regen + nseqs = new TreeModel(new SequenceI[0], nf_regen).findLeaves(nf_regen .getTree()); AssertJUnit.assertTrue(stage + "No nodes in regerated tree.", nseqs.size() > 0);