1 package jalview.analysis;
3 import jalview.api.analysis.ScoreModelI;
4 import jalview.api.analysis.SimilarityParamsI;
5 import jalview.datamodel.AlignmentView;
6 import jalview.datamodel.CigarArray;
7 import jalview.datamodel.SeqCigar;
8 import jalview.datamodel.SequenceI;
9 import jalview.datamodel.SequenceNode;
10 import jalview.math.MatrixI;
11 import jalview.viewmodel.AlignmentViewport;
13 import java.util.BitSet;
14 import java.util.Vector;
16 public abstract class TreeBuilder
18 public static final String AVERAGE_DISTANCE = "AV";
20 public static final String NEIGHBOUR_JOINING = "NJ";
22 protected Vector<BitSet> clusters;
24 protected SequenceI[] sequences;
26 public AlignmentView seqData;
28 protected BitSet done;
34 protected MatrixI distances;
54 Vector<SequenceNode> node;
56 private AlignmentView seqStrings;
63 * @param scoreParameters
65 public TreeBuilder(AlignmentViewport av, ScoreModelI sm,
66 SimilarityParamsI scoreParameters)
69 boolean selview = av.getSelectionGroup() != null
70 && av.getSelectionGroup().getSize() > 1;
71 seqStrings = av.getAlignmentView(selview);
75 end = av.getAlignment().getWidth();
76 this.sequences = av.getAlignment().getSequencesArray();
80 start = av.getSelectionGroup().getStartRes();
81 end = av.getSelectionGroup().getEndRes() + 1;
82 this.sequences = av.getSelectionGroup()
83 .getSequencesInOrder(av.getAlignment());
86 init(seqStrings, start, end);
88 computeTree(sm, scoreParameters);
91 public SequenceI[] getSequences()
102 * @return DOCUMENT ME!
104 double findHeight(SequenceNode nd)
111 if ((nd.left() == null) && (nd.right() == null))
113 nd.height = ((SequenceNode) nd.parent()).height + nd.dist;
115 if (nd.height > maxheight)
126 if (nd.parent() != null)
128 nd.height = ((SequenceNode) nd.parent()).height + nd.dist;
133 nd.height = (float) 0.0;
136 maxheight = findHeight((SequenceNode) (nd.left()));
137 maxheight = findHeight((SequenceNode) (nd.right()));
149 void reCount(SequenceNode nd)
153 // _lylimit = this.node.size();
163 void _reCount(SequenceNode nd)
165 // if (_lycount<_lylimit)
167 // System.err.println("Warning: depth of _recount greater than number of
176 if ((nd.left() != null) && (nd.right() != null))
179 _reCount((SequenceNode) nd.left());
180 _reCount((SequenceNode) nd.right());
182 SequenceNode l = (SequenceNode) nd.left();
183 SequenceNode r = (SequenceNode) nd.right();
185 nd.count = l.count + r.count;
186 nd.ycount = (l.ycount + r.ycount) / 2;
191 nd.ycount = ycount++;
199 * @return DOCUMENT ME!
201 public SequenceNode getTopNode()
208 * @return true if tree has real distances
210 public boolean hasDistances()
217 * @return true if tree has real bootstrap values
219 public boolean hasBootstrap()
224 public boolean hasRootDistance()
230 * Form clusters by grouping sub-clusters, starting from one sequence per
231 * cluster, and finishing when only two clusters remain
239 joinClusters(mini, minj);
244 int rightChild = done.nextClearBit(0);
245 int leftChild = done.nextClearBit(rightChild + 1);
247 joinClusters(leftChild, rightChild);
248 top = (node.elementAt(leftChild));
256 * Returns the minimum distance between two clusters, and also sets the
257 * indices of the clusters in fields mini and minj
261 protected abstract double findMinDistance();
264 * Calculates the tree using the given score model and parameters, and the
265 * configured tree type
267 * If the score model computes pairwise distance scores, then these are used
268 * directly to derive the tree
270 * If the score model computes similarity scores, then the range of the scores
271 * is reversed to give a distance measure, and this is used to derive the tree
274 * @param scoreOptions
276 protected void computeTree(ScoreModelI sm, SimilarityParamsI scoreOptions)
278 distances = sm.findDistances(seqData, scoreOptions);
282 noClus = clusters.size();
288 * Finds the node, at or below the given node, with the maximum distance, and
289 * saves the node and the distance value
293 void findMaxDist(SequenceNode nd)
300 if ((nd.left() == null) && (nd.right() == null))
302 double dist = nd.dist;
304 if (dist > maxDistValue)
312 findMaxDist((SequenceNode) nd.left());
313 findMaxDist((SequenceNode) nd.right());
318 * Calculates and returns r, whatever that is
325 protected double findr(int i, int j)
329 for (int k = 0; k < noseqs; k++)
331 if ((k != i) && (k != j) && (!done.get(k)))
333 tmp = tmp + distances.getValue(i, k);
339 tmp = tmp / (noClus - 2);
345 protected void init(AlignmentView seqView, int start, int end)
347 this.node = new Vector<SequenceNode>();
350 this.seqData = seqView;
354 SeqCigar[] seqs = new SeqCigar[sequences.length];
355 for (int i = 0; i < sequences.length; i++)
357 seqs[i] = new SeqCigar(sequences[i], start, end);
359 CigarArray sdata = new CigarArray(seqs);
360 sdata.addOperation(CigarArray.M, end - start + 1);
361 this.seqData = new AlignmentView(sdata, start);
365 * count the non-null sequences
371 for (SequenceI seq : sequences)
381 * Merges cluster(j) to cluster(i) and recalculates cluster and node distances
386 void joinClusters(final int i, final int j)
388 double dist = distances.getValue(i, j);
393 findClusterDistance(i, j);
395 SequenceNode sn = new SequenceNode();
397 sn.setLeft((node.elementAt(i)));
398 sn.setRight((node.elementAt(j)));
400 SequenceNode tmpi = (node.elementAt(i));
401 SequenceNode tmpj = (node.elementAt(j));
403 findNewDistances(tmpi, tmpj, dist);
408 node.setElementAt(sn, i);
411 * move the members of cluster(j) to cluster(i)
412 * and mark cluster j as out of the game
414 clusters.get(i).or(clusters.get(j));
415 clusters.get(j).clear();
420 * Computes and stores new distances for nodei and nodej, given the previous
421 * distance between them
423 protected abstract void findNewDistances(SequenceNode nodei,
424 SequenceNode nodej, double previousDistance);
427 * Calculates and saves the distance between the combination of cluster(i) and
428 * cluster(j) and all other clusters. The form of the calculation depends on
429 * the tree clustering method being used.
434 protected abstract void findClusterDistance(int i, int j);
437 * Start by making a cluster for each individual sequence
441 clusters = new Vector<BitSet>();
443 for (int i = 0; i < noseqs; i++)
445 SequenceNode sn = new SequenceNode();
447 sn.setElement(sequences[i]);
448 sn.setName(sequences[i].getName());
450 BitSet bs = new BitSet();
452 clusters.addElement(bs);
456 public AlignmentView getOriginalData()