2 * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3 * Copyright (C) $$Year-Rel$$ The Jalview Authors
5 * This file is part of Jalview.
7 * Jalview is free software: you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation, either version 3
10 * of the License, or (at your option) any later version.
12 * Jalview is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty
14 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15 * PURPOSE. See the GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
19 * The Jalview Authors are detailed in the 'AUTHORS' file.
21 package jalview.analysis;
23 import java.util.ArrayList;
24 import java.util.BitSet;
25 import java.util.List;
26 import java.util.Vector;
28 import jalview.datamodel.AlignmentAnnotation;
29 import jalview.datamodel.BinaryNode;
30 import jalview.datamodel.ContactListI;
31 import jalview.datamodel.ContactMatrixI;
32 import jalview.math.Matrix;
33 import jalview.viewmodel.AlignmentViewport;
36 * This class implements distance calculations used in constructing a Average
37 * Distance tree (also known as UPGMA)
39 public class AverageDistanceEngine extends TreeEngine
45 AlignmentAnnotation aa;
47 // 0 - normalised dot product
48 // 1 - L1 - ie (abs(v_1-v_2)/dim(v))
49 // L1 is more rational - since can reason about value of difference,
50 // normalised dot product might give cleaner clusters, but more difficult to
56 * compute cosine distance matrix for a given contact matrix and create a
60 * @param cosineOrDifference
61 * false - dot product : true - L1
63 public AverageDistanceEngine(AlignmentViewport av, AlignmentAnnotation aa,
64 ContactMatrixI cm, boolean cosineOrDifference)
69 mode = (cosineOrDifference) ? 1 : 0;
74 public void calculate(ContactMatrixI cm)
77 node = new Vector<BinaryNode>();
78 clusters = new Vector<BitSet>();
79 distances = new Matrix(new double[cm.getWidth()][cm.getWidth()]);
80 noseqs = cm.getWidth();
82 double moduli[] = new double[cm.getWidth()];
90 max = cm.getMax() * cm.getMax();
93 for (int i = 0; i < cm.getWidth(); i++)
95 // init the tree engine node for this column
96 BinaryNode cnode = new BinaryNode();
97 cnode.setElement(Integer.valueOf(i));
98 cnode.setName("c" + i);
99 node.addElement(cnode);
100 BitSet bs = new BitSet();
102 clusters.addElement(bs);
104 // compute distance matrix element
105 ContactListI ith = cm.getContactList(i);
106 distances.setValue(i, i, 0);
111 for (int j = 0; j < i; j++)
113 ContactListI jth = cm.getContactList(j);
119 for (int indx = 0; indx < cm.getHeight(); indx++)
125 moduli[i] += ith.getContactAt(indx) * ith.getContactAt(indx);
127 prd += ith.getContactAt(indx) * jth.getContactAt(indx);
132 .abs(ith.getContactAt(indx) - jth.getContactAt(indx));
139 moduli[i] = Math.sqrt(moduli[i]);
141 prd = (moduli[i] != 0 && moduli[j] != 0)
142 ? prd / (moduli[i] * moduli[j])
148 prd /= cm.getHeight();
150 distances.setValue(i, j, prd);
151 distances.setValue(j, i, prd);
155 noClus = clusters.size();
160 * Calculates and saves the distance between the combination of cluster(i) and
161 * cluster(j) and all other clusters. An average of the distances from
162 * cluster(i) and cluster(j) is calculated, weighted by the sizes of each
169 protected void findClusterDistance(int i, int j)
171 int noi = clusters.elementAt(i).cardinality();
172 int noj = clusters.elementAt(j).cardinality();
174 // New distances from cluster i to others
175 double[] newdist = new double[noseqs];
177 for (int l = 0; l < noseqs; l++)
179 if ((l != i) && (l != j))
181 newdist[l] = ((distances.getValue(i, l) * noi)
182 + (distances.getValue(j, l) * noj)) / (noi + noj);
190 for (int ii = 0; ii < noseqs; ii++)
192 distances.setValue(i, ii, newdist[ii]);
193 distances.setValue(ii, i, newdist[ii]);
201 protected double findMinDistance()
203 double min = Double.MAX_VALUE;
205 for (int i = 0; i < (noseqs - 1); i++)
207 for (int j = i + 1; j < noseqs; j++)
209 if (!done.get(i) && !done.get(j))
211 if (distances.getValue(i, j) < min)
216 min = distances.getValue(i, j);
228 protected void findNewDistances(BinaryNode nodei, BinaryNode nodej,
234 BinaryNode sni = nodei;
235 BinaryNode snj = nodej;
240 sni = (BinaryNode) sni.left();
246 snj = (BinaryNode) snj.left();
249 nodei.dist = ((dist / 2) - ih);
250 nodej.dist = ((dist / 2) - jh);
254 * not the right place - OH WELL!
258 * Makes a list of groups, where each group is represented by a node whose
259 * height (distance from the root node), as a fraction of the height of the
260 * whole tree, is greater than the given threshold. This corresponds to
261 * selecting the nodes immediately to the right of a vertical line
262 * partitioning the tree (if the tree is drawn with root to the left). Each
263 * such node represents a group that contains all of the sequences linked to
264 * the child leaf nodes.
269 public List<BinaryNode> groupNodes(float threshold)
271 List<BinaryNode> groups = new ArrayList<BinaryNode>();
272 _groupNodes(groups, getTopNode(), threshold);
276 protected void _groupNodes(List<BinaryNode> groups, BinaryNode nd,
284 if ((nd.height / maxheight) > threshold)
290 _groupNodes(groups, nd.left(), threshold);
291 _groupNodes(groups, nd.right(), threshold);
301 * @return DOCUMENT ME!
303 public double findHeight(BinaryNode nd)
310 if ((nd.left() == null) && (nd.right() == null))
312 nd.height = ((BinaryNode) nd.parent()).height + nd.dist;
314 if (nd.height > maxheight)
325 if (nd.parent() != null)
327 nd.height = ((BinaryNode) nd.parent()).height + nd.dist;
332 nd.height = (float) 0.0;
335 maxheight = findHeight((BinaryNode) (nd.left()));
336 maxheight = findHeight((BinaryNode) (nd.right()));
343 * Search for leaf nodes below (or at) the given node
346 * root node to search from
350 public Vector<BinaryNode> findLeaves(BinaryNode top2)
352 Vector<BinaryNode> leaves = new Vector<BinaryNode>();
353 findLeaves(top2, leaves);
358 * Search for leaf nodes.
361 * root node to search from
363 * Vector of leaves to add leaf node objects too.
365 * @return Vector of leaf nodes on binary tree
367 Vector<BinaryNode> findLeaves(BinaryNode nd, Vector<BinaryNode> leaves)
374 if ((nd.left() == null) && (nd.right() == null)) // Interior node
377 leaves.addElement(nd);
384 * TODO: Identify internal nodes... if (node.isSequenceLabel()) {
385 * leaves.addElement(node); }
387 findLeaves(nd.left(), leaves);
388 findLeaves(nd.right(), leaves);