1 package jalview.analysis;
3 import jalview.api.analysis.DistanceScoreModelI;
4 import jalview.api.analysis.ScoreModelI;
5 import jalview.api.analysis.SimilarityParamsI;
6 import jalview.api.analysis.SimilarityScoreModelI;
7 import jalview.datamodel.AlignmentView;
8 import jalview.datamodel.CigarArray;
9 import jalview.datamodel.SeqCigar;
10 import jalview.datamodel.SequenceI;
11 import jalview.datamodel.SequenceNode;
12 import jalview.math.MatrixI;
13 import jalview.viewmodel.AlignmentViewport;
15 import java.util.BitSet;
16 import java.util.Vector;
18 public abstract class TreeBuilder
20 public static final String AVERAGE_DISTANCE = "AV";
22 public static final String NEIGHBOUR_JOINING = "NJ";
24 protected Vector<BitSet> clusters;
26 protected SequenceI[] sequences;
28 public AlignmentView seqData;
30 protected BitSet done;
36 protected MatrixI distances;
56 Vector<SequenceNode> node;
58 private AlignmentView seqStrings;
65 * @param scoreParameters
67 public TreeBuilder(AlignmentViewport av, ScoreModelI sm,
68 SimilarityParamsI scoreParameters)
71 boolean selview = av.getSelectionGroup() != null
72 && av.getSelectionGroup().getSize() > 1;
73 seqStrings = av.getAlignmentView(selview);
77 end = av.getAlignment().getWidth();
78 this.sequences = av.getAlignment().getSequencesArray();
82 start = av.getSelectionGroup().getStartRes();
83 end = av.getSelectionGroup().getEndRes() + 1;
84 this.sequences = av.getSelectionGroup().getSequencesInOrder(
88 init(seqStrings, start, end);
90 computeTree(sm, scoreParameters);
93 public SequenceI[] getSequences()
104 * @return DOCUMENT ME!
106 double findHeight(SequenceNode nd)
113 if ((nd.left() == null) && (nd.right() == null))
115 nd.height = ((SequenceNode) nd.parent()).height + nd.dist;
117 if (nd.height > maxheight)
128 if (nd.parent() != null)
130 nd.height = ((SequenceNode) nd.parent()).height + nd.dist;
135 nd.height = (float) 0.0;
138 maxheight = findHeight((SequenceNode) (nd.left()));
139 maxheight = findHeight((SequenceNode) (nd.right()));
151 void reCount(SequenceNode nd)
155 // _lylimit = this.node.size();
165 void _reCount(SequenceNode nd)
167 // if (_lycount<_lylimit)
169 // System.err.println("Warning: depth of _recount greater than number of nodes.");
177 if ((nd.left() != null) && (nd.right() != null))
180 _reCount((SequenceNode) nd.left());
181 _reCount((SequenceNode) nd.right());
183 SequenceNode l = (SequenceNode) nd.left();
184 SequenceNode r = (SequenceNode) nd.right();
186 nd.count = l.count + r.count;
187 nd.ycount = (l.ycount + r.ycount) / 2;
192 nd.ycount = ycount++;
200 * @return DOCUMENT ME!
202 public SequenceNode getTopNode()
209 * @return true if tree has real distances
211 public boolean hasDistances()
218 * @return true if tree has real bootstrap values
220 public boolean hasBootstrap()
225 public boolean hasRootDistance()
231 * Form clusters by grouping sub-clusters, starting from one sequence per
232 * cluster, and finishing when only two clusters remain
240 joinClusters(mini, minj);
245 int rightChild = done.nextClearBit(0);
246 int leftChild = done.nextClearBit(rightChild + 1);
248 joinClusters(leftChild, rightChild);
249 top = (node.elementAt(leftChild));
257 * Returns the minimum distance between two clusters, and also sets the
258 * indices of the clusters in fields mini and minj
262 protected abstract double findMinDistance();
265 * Calculates the tree using the given score model and parameters, and the
266 * configured tree type
268 * If the score model computes pairwise distance scores, then these are used
269 * directly to derive the tree
271 * If the score model computes similarity scores, then the range of the scores
272 * is reversed to give a distance measure, and this is used to derive the tree
275 * @param scoreOptions
277 protected void computeTree(ScoreModelI sm, SimilarityParamsI scoreOptions)
279 if (sm instanceof DistanceScoreModelI)
281 distances = ((DistanceScoreModelI) sm).findDistances(seqData,
284 else if (sm instanceof SimilarityScoreModelI)
287 * compute similarity and invert it to give a distance measure
288 * reverseRange(true) converts maximum similarity to zero distance
290 MatrixI result = ((SimilarityScoreModelI) sm).findSimilarities(
291 seqData, scoreOptions);
292 result.reverseRange(true);
298 noClus = clusters.size();
304 * Finds the node, at or below the given node, with the maximum distance, and
305 * saves the node and the distance value
309 void findMaxDist(SequenceNode nd)
316 if ((nd.left() == null) && (nd.right() == null))
318 double dist = nd.dist;
320 if (dist > maxDistValue)
328 findMaxDist((SequenceNode) nd.left());
329 findMaxDist((SequenceNode) nd.right());
334 * Calculates and returns r, whatever that is
341 protected double findr(int i, int j)
345 for (int k = 0; k < noseqs; k++)
347 if ((k != i) && (k != j) && (!done.get(k)))
349 tmp = tmp + distances.getValue(i, k);
355 tmp = tmp / (noClus - 2);
361 protected void init(AlignmentView seqView, int start, int end)
363 this.node = new Vector<SequenceNode>();
366 this.seqData = seqView;
370 SeqCigar[] seqs = new SeqCigar[sequences.length];
371 for (int i = 0; i < sequences.length; i++)
373 seqs[i] = new SeqCigar(sequences[i], start, end);
375 CigarArray sdata = new CigarArray(seqs);
376 sdata.addOperation(CigarArray.M, end - start + 1);
377 this.seqData = new AlignmentView(sdata, start);
381 * count the non-null sequences
387 for (SequenceI seq : sequences)
397 * Merges cluster(j) to cluster(i) and recalculates cluster and node distances
402 void joinClusters(final int i, final int j)
404 double dist = distances.getValue(i, j);
409 findClusterDistance(i, j);
411 SequenceNode sn = new SequenceNode();
413 sn.setLeft((node.elementAt(i)));
414 sn.setRight((node.elementAt(j)));
416 SequenceNode tmpi = (node.elementAt(i));
417 SequenceNode tmpj = (node.elementAt(j));
419 findNewDistances(tmpi, tmpj, dist);
424 node.setElementAt(sn, i);
427 * move the members of cluster(j) to cluster(i)
428 * and mark cluster j as out of the game
430 clusters.get(i).or(clusters.get(j));
431 clusters.get(j).clear();
436 * Computes and stores new distances for nodei and nodej, given the previous
437 * distance between them
439 protected abstract void findNewDistances(SequenceNode nodei,
440 SequenceNode nodej, double previousDistance);
443 * Calculates and saves the distance between the combination of cluster(i) and
444 * cluster(j) and all other clusters. The form of the calculation depends on
445 * the tree clustering method being used.
450 protected abstract void findClusterDistance(int i, int j);
453 * Start by making a cluster for each individual sequence
457 clusters = new Vector<BitSet>();
459 for (int i = 0; i < noseqs; i++)
461 SequenceNode sn = new SequenceNode();
463 sn.setElement(sequences[i]);
464 sn.setName(sequences[i].getName());
466 BitSet bs = new BitSet();
468 clusters.addElement(bs);
472 public AlignmentView getOriginalData()