2 * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3 * Copyright (C) $$Year-Rel$$ The Jalview Authors
5 * This file is part of Jalview.
7 * Jalview is free software: you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation, either version 3
10 * of the License, or (at your option) any later version.
12 * Jalview is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty
14 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15 * PURPOSE. See the GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
19 * The Jalview Authors are detailed in the 'AUTHORS' file.
21 package jalview.analysis;
23 import jalview.analysis.scoremodels.SimilarityParams;
24 import jalview.api.analysis.DistanceScoreModelI;
25 import jalview.api.analysis.ScoreModelI;
26 import jalview.api.analysis.SimilarityParamsI;
27 import jalview.api.analysis.SimilarityScoreModelI;
28 import jalview.datamodel.AlignmentView;
29 import jalview.datamodel.BinaryNode;
30 import jalview.datamodel.CigarArray;
31 import jalview.datamodel.NodeTransformI;
32 import jalview.datamodel.SeqCigar;
33 import jalview.datamodel.Sequence;
34 import jalview.datamodel.SequenceI;
35 import jalview.datamodel.SequenceNode;
36 import jalview.io.NewickFile;
37 import jalview.math.MatrixI;
38 import jalview.viewmodel.AlignmentViewport;
40 import java.util.Enumeration;
41 import java.util.List;
42 import java.util.Vector;
52 public static final String AVERAGE_DISTANCE = "AV";
54 public static final String NEIGHBOUR_JOINING = "NJ";
56 public static final String FROM_FILE = "FromFile";
58 Vector<Cluster> cluster;
60 SequenceI[] sequences;
63 * SequenceData is a string representation of what the user
64 * sees. The display may contain hidden columns.
66 public AlignmentView seqData = null;
84 Vector<SequenceNode> groups = new Vector<SequenceNode>();
96 Vector<SequenceNode> node;
104 boolean hasDistances = true; // normal case for jalview trees
106 boolean hasBootstrap = false; // normal case for jalview trees
108 private boolean hasRootDistance = true;
111 * Create a new NJTree object with leaves associated with sequences in seqs,
112 * and original alignment data represented by Cigar strings.
121 public NJTree(SequenceI[] seqs, AlignmentView odata, NewickFile treefile)
123 this(seqs, treefile);
129 * sequenceString = new String[odata.length]; char gapChar =
130 * jalview.util.Comparison.GapChars.charAt(0); for (int i = 0; i <
131 * odata.length; i++) { SequenceI oseq_aligned = odata[i].getSeq(gapChar);
132 * sequenceString[i] = oseq_aligned.getSequence(); }
137 * Creates a new NJTree object from a tree from an external source
140 * SequenceI which should be associated with leafs of treefile
144 public NJTree(SequenceI[] seqs, NewickFile treefile)
146 this.sequences = seqs;
147 top = treefile.getTree();
150 * There is no dependent alignment to be recovered from an imported tree.
152 * if (sequenceString == null) { sequenceString = new String[seqs.length];
153 * for (int i = 0; i < seqs.length; i++) { sequenceString[i] =
154 * seqs[i].getSequence(); } }
157 hasDistances = treefile.HasDistances();
158 hasBootstrap = treefile.HasBootstrap();
159 hasRootDistance = treefile.HasRootDistance();
161 maxheight = findHeight(top);
163 SequenceIdMatcher algnIds = new SequenceIdMatcher(seqs);
165 Vector<SequenceNode> leaves = findLeaves(top);
168 int namesleft = seqs.length;
173 Vector<SequenceI> one2many = new Vector<SequenceI>();
174 int countOne2Many = 0;
175 while (i < leaves.size())
177 j = leaves.elementAt(i++);
178 realnam = j.getName();
183 nam = algnIds.findIdMatch(realnam);
189 if (one2many.contains(nam))
192 // if (jalview.bin.Cache.log.isDebugEnabled())
193 // jalview.bin.Cache.log.debug("One 2 many relationship for
198 one2many.addElement(nam);
204 j.setElement(new Sequence(realnam, "THISISAPLACEHLDER"));
205 j.setPlaceholder(true);
208 // if (jalview.bin.Cache.log.isDebugEnabled() && countOne2Many>0) {
209 // jalview.bin.Cache.log.debug("There were "+countOne2Many+" alignment
210 // sequence ids (out of "+one2many.size()+" unique ids) linked to two or
217 * Constructor given a viewport, tree type and score model
220 * the current alignment viewport
224 * a distance or similarity score model to use to compute the tree
225 * @param scoreParameters TODO
227 public NJTree(AlignmentViewport av, String treeType, ScoreModelI sm, SimilarityParamsI scoreParameters)
229 // TODO handle type "FromFile" more elegantly
230 if (!(treeType.equals(NEIGHBOUR_JOINING)))
232 treeType = AVERAGE_DISTANCE;
234 this.type = treeType;
236 boolean selview = av.getSelectionGroup() != null
237 && av.getSelectionGroup().getSize() > 1;
238 AlignmentView seqStrings = av.getAlignmentView(selview);
242 end = av.getAlignment().getWidth();
243 this.sequences = av.getAlignment().getSequencesArray();
247 start = av.getSelectionGroup().getStartRes();
248 end = av.getSelectionGroup().getEndRes() + 1;
249 this.sequences = av.getSelectionGroup().getSequencesInOrder(
253 init(seqStrings, start, end);
255 computeTree(sm, scoreParameters);
258 void init(AlignmentView seqView, int start, int end)
260 this.node = new Vector<SequenceNode>();
263 this.seqData = seqView;
267 SeqCigar[] seqs = new SeqCigar[sequences.length];
268 for (int i = 0; i < sequences.length; i++)
270 seqs[i] = new SeqCigar(sequences[i], start, end);
272 CigarArray sdata = new CigarArray(seqs);
273 sdata.addOperation(CigarArray.M, end - start + 1);
274 this.seqData = new AlignmentView(sdata, start);
278 * count the non-null sequences
282 done = new int[sequences.length];
284 for (SequenceI seq : sequences)
294 * Calculates the tree using the given score model and parameters, and the
295 * configured tree type
297 * If the score model computes pairwise distance scores, then these are used
298 * directly to derive the tree
300 * If the score model computes similarity scores, then the range of the scores
301 * is reversed to give a distance measure, and this is used to derive the tree
304 * @param scoreOptions
306 protected void computeTree(ScoreModelI sm, SimilarityParamsI scoreOptions)
308 if (sm instanceof DistanceScoreModelI)
310 distances = ((DistanceScoreModelI) sm).findDistances(seqData,
313 else if (sm instanceof SimilarityScoreModelI)
316 * compute similarity and invert it to give a distance measure
318 MatrixI result = ((SimilarityScoreModelI) sm).findSimilarities(
319 seqData, SimilarityParams.Jalview);
320 result.reverseRange(true);
326 noClus = cluster.size();
332 * Generate a string representation of the Tree
334 * @return Newick File with all tree data available
337 public String toString()
339 jalview.io.NewickFile fout = new jalview.io.NewickFile(getTopNode());
341 return fout.print(hasBootstrap(), hasDistances(),
342 hasRootDistance()); // output all data available for tree
347 * used when the alignment associated to a tree has changed.
350 * Sequence set to be associated with tree nodes
352 public void updatePlaceHolders(List<SequenceI> list)
354 Vector<SequenceNode> leaves = findLeaves(top);
356 int sz = leaves.size();
357 SequenceIdMatcher seqmatcher = null;
362 SequenceNode leaf = leaves.elementAt(i++);
364 if (list.contains(leaf.element()))
366 leaf.setPlaceholder(false);
370 if (seqmatcher == null)
372 // Only create this the first time we need it
373 SequenceI[] seqs = new SequenceI[list.size()];
375 for (int j = 0; j < seqs.length; j++)
377 seqs[j] = list.get(j);
380 seqmatcher = new SequenceIdMatcher(seqs);
383 SequenceI nam = seqmatcher.findIdMatch(leaf.getName());
387 if (!leaf.isPlaceholder())
389 // remapping the node to a new sequenceI - should remove any refs to
391 // TODO - make many sequenceI to one leaf mappings possible!
394 leaf.setPlaceholder(false);
395 leaf.setElement(nam);
399 if (!leaf.isPlaceholder())
401 // Construct a new placeholder sequence object for this leaf
402 leaf.setElement(new Sequence(leaf.getName(),
403 "THISISAPLACEHLDER"));
405 leaf.setPlaceholder(true);
413 * rename any nodes according to their associated sequence. This will modify
414 * the tree's metadata! (ie the original NewickFile or newly generated
415 * BinaryTree's label data)
417 public void renameAssociatedNodes()
419 applyToNodes(new NodeTransformI()
423 public void transform(BinaryNode nd)
425 Object el = nd.element();
426 if (el != null && el instanceof SequenceI)
428 nd.setName(((SequenceI) el).getName());
441 if (type.equals(NEIGHBOUR_JOINING))
450 Cluster c = joinClusters(mini, minj);
454 cluster.setElementAt(null, minj);
455 cluster.setElementAt(c, mini);
460 boolean onefound = false;
465 for (int i = 0; i < noseqs; i++)
469 if (onefound == false)
481 joinClusters(one, two);
482 top = (node.elementAt(one));
497 * @return DOCUMENT ME!
499 Cluster joinClusters(int i, int j)
501 double dist = distances.getValue(i, j);
503 int noi = cluster.elementAt(i).value.length;
504 int noj = cluster.elementAt(j).value.length;
506 int[] value = new int[noi + noj];
508 for (int ii = 0; ii < noi; ii++)
510 value[ii] = cluster.elementAt(i).value[ii];
513 for (int ii = noi; ii < (noi + noj); ii++)
515 value[ii] = cluster.elementAt(j).value[ii - noi];
518 Cluster c = new Cluster(value);
523 if (type.equals(NEIGHBOUR_JOINING))
525 findClusterNJDistance(i, j);
529 findClusterDistance(i, j);
532 SequenceNode sn = new SequenceNode();
534 sn.setLeft((node.elementAt(i)));
535 sn.setRight((node.elementAt(j)));
537 SequenceNode tmpi = (node.elementAt(i));
538 SequenceNode tmpj = (node.elementAt(j));
540 if (type.equals(NEIGHBOUR_JOINING))
542 findNewNJDistances(tmpi, tmpj, dist);
546 findNewDistances(tmpi, tmpj, dist);
552 node.setElementAt(sn, i);
567 void findNewNJDistances(SequenceNode tmpi, SequenceNode tmpj,
571 tmpi.dist = ((dist + ri) - rj) / 2;
572 tmpj.dist = (dist - tmpi.dist);
595 void findNewDistances(SequenceNode tmpi, SequenceNode tmpj,
601 SequenceNode sni = tmpi;
602 SequenceNode snj = tmpj;
607 sni = (SequenceNode) sni.left();
613 snj = (SequenceNode) snj.left();
616 tmpi.dist = ((dist / 2) - ih);
617 tmpj.dist = ((dist / 2) - jh);
628 void findClusterDistance(int i, int j)
630 int noi = cluster.elementAt(i).value.length;
631 int noj = cluster.elementAt(j).value.length;
633 // New distances from cluster to others
634 double[] newdist = new double[noseqs];
636 for (int l = 0; l < noseqs; l++)
638 if ((l != i) && (l != j))
640 // newdist[l] = ((distance[i][l] * noi) + (distance[j][l] * noj))
642 newdist[l] = ((distances.getValue(i, l) * noi) + (distances.getValue(
652 for (int ii = 0; ii < noseqs; ii++)
654 // distance[i][ii] = newdist[ii];
655 // distance[ii][i] = newdist[ii];
656 distances.setValue(i, ii, newdist[ii]);
657 distances.setValue(ii, i, newdist[ii]);
669 void findClusterNJDistance(int i, int j)
672 // New distances from cluster to others
673 double[] newdist = new double[noseqs];
675 for (int l = 0; l < noseqs; l++)
677 if ((l != i) && (l != j))
679 // newdist[l] = ((distance[i][l] + distance[j][l]) - distance[i][j]) /
681 newdist[l] = (distances.getValue(i, l) + distances.getValue(j, l) - distances
682 .getValue(i, j)) / 2;
690 for (int ii = 0; ii < noseqs; ii++)
692 // distance[i][ii] = newdist[ii];
693 // distance[ii][i] = newdist[ii];
694 distances.setValue(i, ii, newdist[ii]);
695 distances.setValue(ii, i, newdist[ii]);
707 * @return DOCUMENT ME!
709 double findr(int i, int j)
713 for (int k = 0; k < noseqs; k++)
715 if ((k != i) && (k != j) && (done[k] != 1))
717 // tmp = tmp + distance[i][k];
718 tmp = tmp + distances.getValue(i, k);
724 tmp = tmp / (noClus - 2);
733 * @return DOCUMENT ME!
735 double findMinNJDistance()
737 double min = Double.MAX_VALUE;
739 for (int i = 0; i < (noseqs - 1); i++)
741 for (int j = i + 1; j < noseqs; j++)
743 if ((done[i] != 1) && (done[j] != 1))
745 // float tmp = distance[i][j] - (findr(i, j) + findr(j, i));
746 double tmp = distances.getValue(i, j)
747 - (findr(i, j) + findr(j, i));
766 * @return DOCUMENT ME!
768 double findMinDistance()
770 double min = Double.MAX_VALUE;
772 for (int i = 0; i < (noseqs - 1); i++)
774 for (int j = i + 1; j < noseqs; j++)
776 if ((done[i] != 1) && (done[j] != 1))
778 // if (distance[i][j] < min)
779 if (distances.getValue(i, j) < min)
784 // min = distance[i][j];
785 min = distances.getValue(i, j);
799 cluster = new Vector<Cluster>();
801 for (int i = 0; i < noseqs; i++)
803 SequenceNode sn = new SequenceNode();
805 sn.setElement(sequences[i]);
806 sn.setName(sequences[i].getName());
809 int[] value = new int[1];
812 Cluster c = new Cluster(value);
813 cluster.addElement(c);
818 * Search for leaf nodes below (or at) the given node
821 * root node to search from
825 public Vector<SequenceNode> findLeaves(SequenceNode nd)
827 Vector<SequenceNode> leaves = new Vector<SequenceNode>();
828 findLeaves(nd, leaves);
833 * Search for leaf nodes.
836 * root node to search from
838 * Vector of leaves to add leaf node objects too.
840 * @return Vector of leaf nodes on binary tree
842 Vector<SequenceNode> findLeaves(SequenceNode nd,
843 Vector<SequenceNode> leaves)
850 if ((nd.left() == null) && (nd.right() == null)) // Interior node
853 leaves.addElement(nd);
860 * TODO: Identify internal nodes... if (node.isSequenceLabel()) {
861 * leaves.addElement(node); }
863 findLeaves((SequenceNode) nd.left(), leaves);
864 findLeaves((SequenceNode) nd.right(), leaves);
871 * printNode is mainly for debugging purposes.
876 void printNode(SequenceNode nd)
883 if ((nd.left() == null) && (nd.right() == null))
885 System.out.println("Leaf = " + ((SequenceI) nd.element()).getName());
886 System.out.println("Dist " + nd.dist);
887 System.out.println("Boot " + nd.getBootstrap());
891 System.out.println("Dist " + nd.dist);
892 printNode((SequenceNode) nd.left());
893 printNode((SequenceNode) nd.right());
903 void findMaxDist(SequenceNode nd)
910 if ((nd.left() == null) && (nd.right() == null))
912 double dist = nd.dist;
914 if (dist > maxDistValue)
922 findMaxDist((SequenceNode) nd.left());
923 findMaxDist((SequenceNode) nd.right());
930 * @return DOCUMENT ME!
932 public Vector<SequenceNode> getGroups()
940 * @return DOCUMENT ME!
942 public double getMaxHeight()
955 public void groupNodes(SequenceNode nd, float threshold)
962 if ((nd.height / maxheight) > threshold)
964 groups.addElement(nd);
968 groupNodes((SequenceNode) nd.left(), threshold);
969 groupNodes((SequenceNode) nd.right(), threshold);
979 * @return DOCUMENT ME!
981 public double findHeight(SequenceNode nd)
988 if ((nd.left() == null) && (nd.right() == null))
990 nd.height = ((SequenceNode) nd.parent()).height + nd.dist;
992 if (nd.height > maxheight)
1003 if (nd.parent() != null)
1005 nd.height = ((SequenceNode) nd.parent()).height + nd.dist;
1010 nd.height = (float) 0.0;
1013 maxheight = findHeight((SequenceNode) (nd.left()));
1014 maxheight = findHeight((SequenceNode) (nd.right()));
1023 * @return DOCUMENT ME!
1025 SequenceNode reRoot()
1027 // TODO not used - remove?
1028 if (maxdist != null)
1032 double tmpdist = maxdist.dist;
1035 SequenceNode sn = new SequenceNode();
1038 // New right hand of top
1039 SequenceNode snr = (SequenceNode) maxdist.parent();
1040 changeDirection(snr, maxdist);
1041 System.out.println("Printing reversed tree");
1043 snr.dist = tmpdist / 2;
1044 maxdist.dist = tmpdist / 2;
1047 maxdist.setParent(sn);
1050 sn.setLeft(maxdist);
1064 * @return true if original sequence data can be recovered
1066 public boolean hasOriginalSequenceData()
1068 return seqData != null;
1072 * Returns original alignment data used for calculation - or null where not
1075 * @return null or cut'n'pasteable alignment
1077 public String printOriginalSequenceData(char gapChar)
1079 if (seqData == null)
1084 StringBuffer sb = new StringBuffer();
1085 String[] seqdatas = seqData.getSequenceStrings(gapChar);
1086 for (int i = 0; i < seqdatas.length; i++)
1088 sb.append(new jalview.util.Format("%-" + 15 + "s").form(sequences[i]
1090 sb.append(" " + seqdatas[i] + "\n");
1092 return sb.toString();
1101 void printN(SequenceNode nd)
1108 if ((nd.left() != null) && (nd.right() != null))
1110 printN((SequenceNode) nd.left());
1111 printN((SequenceNode) nd.right());
1115 System.out.println(" name = " + ((SequenceI) nd.element()).getName());
1118 System.out.println(" dist = " + nd.dist + " " + nd.count + " "
1128 public void reCount(SequenceNode nd)
1132 // _lylimit = this.node.size();
1136 private long _lycount = 0, _lylimit = 0;
1144 void _reCount(SequenceNode nd)
1146 // if (_lycount<_lylimit)
1148 // System.err.println("Warning: depth of _recount greater than number of nodes.");
1156 if ((nd.left() != null) && (nd.right() != null))
1159 _reCount((SequenceNode) nd.left());
1160 _reCount((SequenceNode) nd.right());
1162 SequenceNode l = (SequenceNode) nd.left();
1163 SequenceNode r = (SequenceNode) nd.right();
1165 nd.count = l.count + r.count;
1166 nd.ycount = (l.ycount + r.ycount) / 2;
1171 nd.ycount = ycount++;
1182 public void swapNodes(SequenceNode nd)
1189 SequenceNode tmp = (SequenceNode) nd.left();
1191 nd.setLeft(nd.right());
1203 void changeDirection(SequenceNode nd, SequenceNode dir)
1210 if (nd.parent() != top)
1212 changeDirection((SequenceNode) nd.parent(), nd);
1214 SequenceNode tmp = (SequenceNode) nd.parent();
1216 if (dir == nd.left())
1221 else if (dir == nd.right())
1229 if (dir == nd.left())
1231 nd.setParent(nd.left());
1233 if (top.left() == nd)
1235 nd.setRight(top.right());
1239 nd.setRight(top.left());
1244 nd.setParent(nd.right());
1246 if (top.left() == nd)
1248 nd.setLeft(top.right());
1252 nd.setLeft(top.left());
1261 * @return DOCUMENT ME!
1263 public SequenceNode getMaxDist()
1271 * @return DOCUMENT ME!
1273 public SequenceNode getTopNode()
1280 * @return true if tree has real distances
1282 public boolean hasDistances()
1284 return hasDistances;
1289 * @return true if tree has real bootstrap values
1291 public boolean hasBootstrap()
1293 return hasBootstrap;
1296 public boolean hasRootDistance()
1298 return hasRootDistance;
1302 * apply the given transform to all the nodes in the tree.
1304 * @param nodeTransformI
1306 public void applyToNodes(NodeTransformI nodeTransformI)
1308 for (Enumeration<SequenceNode> nodes = node.elements(); nodes
1309 .hasMoreElements(); nodeTransformI.transform(nodes
1321 * @version $Revision$
1323 // TODO what does this class have that int[] doesn't have already?
1329 * Creates a new Cluster object.
1334 public Cluster(int[] value)