2 * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3 * Copyright (C) $$Year-Rel$$ The Jalview Authors
5 * This file is part of Jalview.
7 * Jalview is free software: you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation, either version 3
10 * of the License, or (at your option) any later version.
12 * Jalview is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty
14 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15 * PURPOSE. See the GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
19 * The Jalview Authors are detailed in the 'AUTHORS' file.
21 package jalview.analysis;
23 import jalview.api.analysis.ScoreModelI;
24 import jalview.api.analysis.SimilarityParamsI;
25 import jalview.datamodel.AlignmentView;
26 import jalview.datamodel.CigarArray;
27 import jalview.datamodel.SeqCigar;
28 import jalview.datamodel.SequenceI;
29 import jalview.datamodel.SequenceNode;
30 import jalview.math.MatrixI;
31 import jalview.viewmodel.AlignmentViewport;
33 import java.util.BitSet;
34 import java.util.Vector;
36 public abstract class TreeBuilder
38 public static final String AVERAGE_DISTANCE = "AV";
40 public static final String NEIGHBOUR_JOINING = "NJ";
42 protected Vector<BitSet> clusters;
44 protected SequenceI[] sequences;
46 public AlignmentView seqData;
48 protected BitSet done;
54 protected MatrixI distances;
56 public MatrixI testDistances;
76 Vector<SequenceNode> node;
78 protected ScoreModelI scoreModel;
80 protected SimilarityParamsI scoreParams;
82 private AlignmentView seqStrings; // redundant? (see seqData)
89 * @param scoreParameters
91 public TreeBuilder(AlignmentViewport av, ScoreModelI sm,
92 SimilarityParamsI scoreParameters)
95 boolean selview = av.getSelectionGroup() != null
96 && av.getSelectionGroup().getSize() > 1;
97 seqStrings = av.getAlignmentView(selview);
101 end = av.getAlignment().getWidth();
102 this.sequences = av.getAlignment().getSequencesArray();
106 start = av.getSelectionGroup().getStartRes();
107 end = av.getSelectionGroup().getEndRes() + 1;
108 this.sequences = av.getSelectionGroup()
109 .getSequencesInOrder(av.getAlignment());
112 init(seqStrings, start, end);
114 computeTree(sm, scoreParameters);
117 public SequenceI[] getSequences()
128 * @return DOCUMENT ME!
130 double findHeight(SequenceNode nd)
137 if ((nd.left() == null) && (nd.right() == null))
139 nd.height = ((SequenceNode) nd.parent()).height + nd.dist;
141 if (nd.height > maxHeight)
152 if (nd.parent() != null)
154 nd.height = ((SequenceNode) nd.parent()).height + nd.dist;
159 nd.height = (float) 0.0;
162 maxHeight = findHeight((SequenceNode) (nd.left()));
163 maxHeight = findHeight((SequenceNode) (nd.right()));
175 void reCount(SequenceNode nd)
179 // _lylimit = this.node.size();
189 void _reCount(SequenceNode nd)
191 // if (_lycount<_lylimit)
193 // System.err.println("Warning: depth of _recount greater than number of
202 if ((nd.left() != null) && (nd.right() != null))
205 _reCount((SequenceNode) nd.left());
206 _reCount((SequenceNode) nd.right());
208 SequenceNode l = (SequenceNode) nd.left();
209 SequenceNode r = (SequenceNode) nd.right();
211 nd.count = l.count + r.count;
212 nd.ycount = (l.ycount + r.ycount) / 2;
217 nd.ycount = ycount++;
225 * @return DOCUMENT ME!
227 public SequenceNode getTopNode()
234 * @return true if tree has real distances
236 public boolean hasDistances()
243 * @return true if tree has real bootstrap values
245 public boolean hasBootstrap()
250 public boolean hasRootDistance()
256 * Form clusters by grouping sub-clusters, starting from one sequence per
257 * cluster, and finishing when only two clusters remain
265 joinClusters(mini, minj);
270 int rightChild = done.nextClearBit(0);
271 int leftChild = done.nextClearBit(rightChild + 1);
273 joinClusters(leftChild, rightChild);
274 top = (node.elementAt(leftChild));
282 * Returns the minimum distance between two clusters, and also sets the
283 * indices of the clusters in fields mini and minj
287 protected abstract double findMinDistance();
290 * Calculates the tree using the given score model and parameters, and the
291 * configured tree type
293 * If the score model computes pairwise distance scores, then these are used
294 * directly to derive the tree
296 * If the score model computes similarity scores, then the range of the scores
297 * is reversed to give a distance measure, and this is used to derive the tree
300 * @param scoreOptions
302 protected void computeTree(ScoreModelI sm, SimilarityParamsI scoreOptions)
305 this.scoreModel = sm;
306 this.scoreParams = scoreOptions;
308 testDistances = scoreModel.findDistances(seqData, scoreParams);
309 distances = scoreModel.findDistances(seqData, scoreParams);
312 noClus = clusters.size();
318 * Finds the node, at or below the given node, with the maximum distance, and
319 * saves the node and the distance value
323 void findMaxDist(SequenceNode nd)
330 if ((nd.left() == null) && (nd.right() == null))
332 double dist = nd.dist;
334 if (dist > maxDistValue)
342 findMaxDist((SequenceNode) nd.left());
343 findMaxDist((SequenceNode) nd.right());
348 * Calculates and returns r, whatever that is
355 protected double findr(int i, int j)
359 for (int k = 0; k < noseqs; k++)
361 if ((k != i) && (k != j) && (!done.get(k)))
363 tmp = tmp + distances.getValue(i, k);
369 tmp = tmp / (noClus - 2);
375 protected void init(AlignmentView seqView, int start, int end)
377 this.node = new Vector<>();
380 this.seqData = seqView;
384 SeqCigar[] seqs = new SeqCigar[sequences.length];
385 for (int i = 0; i < sequences.length; i++)
387 seqs[i] = new SeqCigar(sequences[i], start, end);
389 CigarArray sdata = new CigarArray(seqs);
390 sdata.addOperation(CigarArray.M, end - start + 1);
391 this.seqData = new AlignmentView(sdata, start);
395 * count the non-null sequences
401 for (SequenceI seq : sequences)
411 * Merges cluster(j) to cluster(i) and recalculates cluster and node distances
416 void joinClusters(final int i, final int j)
418 double dist = distances.getValue(i, j);
423 findClusterDistance(i, j);
425 SequenceNode sn = new SequenceNode();
427 sn.setLeft((node.elementAt(i)));
428 sn.setRight((node.elementAt(j)));
430 SequenceNode tmpi = (node.elementAt(i));
431 SequenceNode tmpj = (node.elementAt(j));
433 findNewDistances(tmpi, tmpj, dist);
438 node.setElementAt(sn, i);
441 * move the members of cluster(j) to cluster(i)
442 * and mark cluster j as out of the game
444 clusters.get(i).or(clusters.get(j));
445 clusters.get(j).clear();
450 * Computes and stores new distances for nodei and nodej, given the previous
451 * distance between them
453 protected abstract void findNewDistances(SequenceNode nodei,
454 SequenceNode nodej, double previousDistance);
457 * Calculates and saves the distance between the combination of cluster(i) and
458 * cluster(j) and all other clusters. The form of the calculation depends on
459 * the tree clustering method being used.
464 protected abstract void findClusterDistance(int i, int j);
467 * Start by making a cluster for each individual sequence
471 clusters = new Vector<>();
473 for (int i = 0; i < noseqs; i++)
475 SequenceNode sn = new SequenceNode();
477 sn.setElement(sequences[i]);
478 sn.setName(sequences[i].getName());
481 BitSet bs = new BitSet();
483 clusters.addElement(bs);
487 public AlignmentView getOriginalData()
492 public MatrixI getDistances()
497 public AlignmentView getSeqData()
502 public ScoreModelI getScoreModel()
507 public SimilarityParamsI getScoreParams()