2 * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3 * Copyright (C) $$Year-Rel$$ The Jalview Authors
5 * This file is part of Jalview.
7 * Jalview is free software: you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation, either version 3
10 * of the License, or (at your option) any later version.
12 * Jalview is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty
14 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15 * PURPOSE. See the GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
19 * The Jalview Authors are detailed in the 'AUTHORS' file.
21 package jalview.analysis;
23 import jalview.api.analysis.DistanceScoreModelI;
24 import jalview.api.analysis.ScoreModelI;
25 import jalview.api.analysis.SimilarityParamsI;
26 import jalview.api.analysis.SimilarityScoreModelI;
27 import jalview.datamodel.AlignmentView;
28 import jalview.datamodel.BinaryNode;
29 import jalview.datamodel.CigarArray;
30 import jalview.datamodel.NodeTransformI;
31 import jalview.datamodel.SeqCigar;
32 import jalview.datamodel.Sequence;
33 import jalview.datamodel.SequenceI;
34 import jalview.datamodel.SequenceNode;
35 import jalview.io.NewickFile;
36 import jalview.math.MatrixI;
37 import jalview.viewmodel.AlignmentViewport;
39 import java.util.Enumeration;
40 import java.util.List;
41 import java.util.Vector;
51 public static final String AVERAGE_DISTANCE = "AV";
53 public static final String NEIGHBOUR_JOINING = "NJ";
55 public static final String FROM_FILE = "FromFile";
57 Vector<Cluster> cluster;
59 SequenceI[] sequences;
62 * SequenceData is a string representation of what the user
63 * sees. The display may contain hidden columns.
65 public AlignmentView seqData = null;
83 Vector<SequenceNode> groups = new Vector<SequenceNode>();
95 Vector<SequenceNode> node;
103 boolean hasDistances = true; // normal case for jalview trees
105 boolean hasBootstrap = false; // normal case for jalview trees
107 private boolean hasRootDistance = true;
110 * Create a new NJTree object with leaves associated with sequences in seqs,
111 * and original alignment data represented by Cigar strings.
120 public NJTree(SequenceI[] seqs, AlignmentView odata, NewickFile treefile)
122 this(seqs, treefile);
128 * sequenceString = new String[odata.length]; char gapChar =
129 * jalview.util.Comparison.GapChars.charAt(0); for (int i = 0; i <
130 * odata.length; i++) { SequenceI oseq_aligned = odata[i].getSeq(gapChar);
131 * sequenceString[i] = oseq_aligned.getSequence(); }
136 * Creates a new NJTree object from a tree from an external source
139 * SequenceI which should be associated with leafs of treefile
143 public NJTree(SequenceI[] seqs, NewickFile treefile)
145 this.sequences = seqs;
146 top = treefile.getTree();
149 * There is no dependent alignment to be recovered from an imported tree.
151 * if (sequenceString == null) { sequenceString = new String[seqs.length];
152 * for (int i = 0; i < seqs.length; i++) { sequenceString[i] =
153 * seqs[i].getSequence(); } }
156 hasDistances = treefile.HasDistances();
157 hasBootstrap = treefile.HasBootstrap();
158 hasRootDistance = treefile.HasRootDistance();
160 maxheight = findHeight(top);
162 SequenceIdMatcher algnIds = new SequenceIdMatcher(seqs);
164 Vector<SequenceNode> leaves = findLeaves(top);
167 int namesleft = seqs.length;
172 Vector<SequenceI> one2many = new Vector<SequenceI>();
173 int countOne2Many = 0;
174 while (i < leaves.size())
176 j = leaves.elementAt(i++);
177 realnam = j.getName();
182 nam = algnIds.findIdMatch(realnam);
188 if (one2many.contains(nam))
191 // if (jalview.bin.Cache.log.isDebugEnabled())
192 // jalview.bin.Cache.log.debug("One 2 many relationship for
197 one2many.addElement(nam);
203 j.setElement(new Sequence(realnam, "THISISAPLACEHLDER"));
204 j.setPlaceholder(true);
207 // if (jalview.bin.Cache.log.isDebugEnabled() && countOne2Many>0) {
208 // jalview.bin.Cache.log.debug("There were "+countOne2Many+" alignment
209 // sequence ids (out of "+one2many.size()+" unique ids) linked to two or
216 * Constructor given a viewport, tree type and score model
219 * the current alignment viewport
223 * a distance or similarity score model to use to compute the tree
224 * @param scoreParameters TODO
226 public NJTree(AlignmentViewport av, String treeType, ScoreModelI sm, SimilarityParamsI scoreParameters)
228 // TODO handle type "FromFile" more elegantly
229 if (!(treeType.equals(NEIGHBOUR_JOINING)))
231 treeType = AVERAGE_DISTANCE;
233 this.type = treeType;
235 boolean selview = av.getSelectionGroup() != null
236 && av.getSelectionGroup().getSize() > 1;
237 AlignmentView seqStrings = av.getAlignmentView(selview);
241 end = av.getAlignment().getWidth();
242 this.sequences = av.getAlignment().getSequencesArray();
246 start = av.getSelectionGroup().getStartRes();
247 end = av.getSelectionGroup().getEndRes() + 1;
248 this.sequences = av.getSelectionGroup().getSequencesInOrder(
252 init(seqStrings, start, end);
254 computeTree(sm, scoreParameters);
257 void init(AlignmentView seqView, int start, int end)
259 this.node = new Vector<SequenceNode>();
262 this.seqData = seqView;
266 SeqCigar[] seqs = new SeqCigar[sequences.length];
267 for (int i = 0; i < sequences.length; i++)
269 seqs[i] = new SeqCigar(sequences[i], start, end);
271 CigarArray sdata = new CigarArray(seqs);
272 sdata.addOperation(CigarArray.M, end - start + 1);
273 this.seqData = new AlignmentView(sdata, start);
277 * count the non-null sequences
281 done = new int[sequences.length];
283 for (SequenceI seq : sequences)
293 * Calculates the tree using the given score model and parameters, and the
294 * configured tree type
296 * If the score model computes pairwise distance scores, then these are used
297 * directly to derive the tree
299 * If the score model computes similarity scores, then the range of the scores
300 * is reversed to give a distance measure, and this is used to derive the tree
303 * @param scoreOptions
305 protected void computeTree(ScoreModelI sm, SimilarityParamsI scoreOptions)
307 if (sm instanceof DistanceScoreModelI)
309 distances = ((DistanceScoreModelI) sm).findDistances(seqData,
312 else if (sm instanceof SimilarityScoreModelI)
315 * compute similarity and invert it to give a distance measure
317 MatrixI result = ((SimilarityScoreModelI) sm).findSimilarities(
318 seqData, scoreOptions);
319 result.reverseRange(true);
325 noClus = cluster.size();
331 * Generate a string representation of the Tree
333 * @return Newick File with all tree data available
336 public String toString()
338 jalview.io.NewickFile fout = new jalview.io.NewickFile(getTopNode());
340 return fout.print(hasBootstrap(), hasDistances(),
341 hasRootDistance()); // output all data available for tree
346 * used when the alignment associated to a tree has changed.
349 * Sequence set to be associated with tree nodes
351 public void updatePlaceHolders(List<SequenceI> list)
353 Vector<SequenceNode> leaves = findLeaves(top);
355 int sz = leaves.size();
356 SequenceIdMatcher seqmatcher = null;
361 SequenceNode leaf = leaves.elementAt(i++);
363 if (list.contains(leaf.element()))
365 leaf.setPlaceholder(false);
369 if (seqmatcher == null)
371 // Only create this the first time we need it
372 SequenceI[] seqs = new SequenceI[list.size()];
374 for (int j = 0; j < seqs.length; j++)
376 seqs[j] = list.get(j);
379 seqmatcher = new SequenceIdMatcher(seqs);
382 SequenceI nam = seqmatcher.findIdMatch(leaf.getName());
386 if (!leaf.isPlaceholder())
388 // remapping the node to a new sequenceI - should remove any refs to
390 // TODO - make many sequenceI to one leaf mappings possible!
393 leaf.setPlaceholder(false);
394 leaf.setElement(nam);
398 if (!leaf.isPlaceholder())
400 // Construct a new placeholder sequence object for this leaf
401 leaf.setElement(new Sequence(leaf.getName(),
402 "THISISAPLACEHLDER"));
404 leaf.setPlaceholder(true);
412 * rename any nodes according to their associated sequence. This will modify
413 * the tree's metadata! (ie the original NewickFile or newly generated
414 * BinaryTree's label data)
416 public void renameAssociatedNodes()
418 applyToNodes(new NodeTransformI()
422 public void transform(BinaryNode nd)
424 Object el = nd.element();
425 if (el != null && el instanceof SequenceI)
427 nd.setName(((SequenceI) el).getName());
440 if (type.equals(NEIGHBOUR_JOINING))
449 Cluster c = joinClusters(mini, minj);
453 cluster.setElementAt(null, minj);
454 cluster.setElementAt(c, mini);
459 boolean onefound = false;
464 for (int i = 0; i < noseqs; i++)
468 if (onefound == false)
480 joinClusters(one, two);
481 top = (node.elementAt(one));
496 * @return DOCUMENT ME!
498 Cluster joinClusters(int i, int j)
500 double dist = distances.getValue(i, j);
502 int noi = cluster.elementAt(i).value.length;
503 int noj = cluster.elementAt(j).value.length;
505 int[] value = new int[noi + noj];
507 for (int ii = 0; ii < noi; ii++)
509 value[ii] = cluster.elementAt(i).value[ii];
512 for (int ii = noi; ii < (noi + noj); ii++)
514 value[ii] = cluster.elementAt(j).value[ii - noi];
517 Cluster c = new Cluster(value);
522 if (type.equals(NEIGHBOUR_JOINING))
524 findClusterNJDistance(i, j);
528 findClusterDistance(i, j);
531 SequenceNode sn = new SequenceNode();
533 sn.setLeft((node.elementAt(i)));
534 sn.setRight((node.elementAt(j)));
536 SequenceNode tmpi = (node.elementAt(i));
537 SequenceNode tmpj = (node.elementAt(j));
539 if (type.equals(NEIGHBOUR_JOINING))
541 findNewNJDistances(tmpi, tmpj, dist);
545 findNewDistances(tmpi, tmpj, dist);
551 node.setElementAt(sn, i);
566 void findNewNJDistances(SequenceNode tmpi, SequenceNode tmpj,
570 tmpi.dist = ((dist + ri) - rj) / 2;
571 tmpj.dist = (dist - tmpi.dist);
594 void findNewDistances(SequenceNode tmpi, SequenceNode tmpj,
600 SequenceNode sni = tmpi;
601 SequenceNode snj = tmpj;
606 sni = (SequenceNode) sni.left();
612 snj = (SequenceNode) snj.left();
615 tmpi.dist = ((dist / 2) - ih);
616 tmpj.dist = ((dist / 2) - jh);
627 void findClusterDistance(int i, int j)
629 int noi = cluster.elementAt(i).value.length;
630 int noj = cluster.elementAt(j).value.length;
632 // New distances from cluster to others
633 double[] newdist = new double[noseqs];
635 for (int l = 0; l < noseqs; l++)
637 if ((l != i) && (l != j))
639 // newdist[l] = ((distance[i][l] * noi) + (distance[j][l] * noj))
641 newdist[l] = ((distances.getValue(i, l) * noi) + (distances.getValue(
651 for (int ii = 0; ii < noseqs; ii++)
653 // distance[i][ii] = newdist[ii];
654 // distance[ii][i] = newdist[ii];
655 distances.setValue(i, ii, newdist[ii]);
656 distances.setValue(ii, i, newdist[ii]);
668 void findClusterNJDistance(int i, int j)
671 // New distances from cluster to others
672 double[] newdist = new double[noseqs];
674 for (int l = 0; l < noseqs; l++)
676 if ((l != i) && (l != j))
678 // newdist[l] = ((distance[i][l] + distance[j][l]) - distance[i][j]) /
680 newdist[l] = (distances.getValue(i, l) + distances.getValue(j, l) - distances
681 .getValue(i, j)) / 2;
689 for (int ii = 0; ii < noseqs; ii++)
691 // distance[i][ii] = newdist[ii];
692 // distance[ii][i] = newdist[ii];
693 distances.setValue(i, ii, newdist[ii]);
694 distances.setValue(ii, i, newdist[ii]);
706 * @return DOCUMENT ME!
708 double findr(int i, int j)
712 for (int k = 0; k < noseqs; k++)
714 if ((k != i) && (k != j) && (done[k] != 1))
716 // tmp = tmp + distance[i][k];
717 tmp = tmp + distances.getValue(i, k);
723 tmp = tmp / (noClus - 2);
732 * @return DOCUMENT ME!
734 double findMinNJDistance()
736 double min = Double.MAX_VALUE;
738 for (int i = 0; i < (noseqs - 1); i++)
740 for (int j = i + 1; j < noseqs; j++)
742 if ((done[i] != 1) && (done[j] != 1))
744 // float tmp = distance[i][j] - (findr(i, j) + findr(j, i));
745 double tmp = distances.getValue(i, j)
746 - (findr(i, j) + findr(j, i));
765 * @return DOCUMENT ME!
767 double findMinDistance()
769 double min = Double.MAX_VALUE;
771 for (int i = 0; i < (noseqs - 1); i++)
773 for (int j = i + 1; j < noseqs; j++)
775 if ((done[i] != 1) && (done[j] != 1))
777 // if (distance[i][j] < min)
778 if (distances.getValue(i, j) < min)
783 // min = distance[i][j];
784 min = distances.getValue(i, j);
798 cluster = new Vector<Cluster>();
800 for (int i = 0; i < noseqs; i++)
802 SequenceNode sn = new SequenceNode();
804 sn.setElement(sequences[i]);
805 sn.setName(sequences[i].getName());
808 int[] value = new int[1];
811 Cluster c = new Cluster(value);
812 cluster.addElement(c);
817 * Search for leaf nodes below (or at) the given node
820 * root node to search from
824 public Vector<SequenceNode> findLeaves(SequenceNode nd)
826 Vector<SequenceNode> leaves = new Vector<SequenceNode>();
827 findLeaves(nd, leaves);
832 * Search for leaf nodes.
835 * root node to search from
837 * Vector of leaves to add leaf node objects too.
839 * @return Vector of leaf nodes on binary tree
841 Vector<SequenceNode> findLeaves(SequenceNode nd,
842 Vector<SequenceNode> leaves)
849 if ((nd.left() == null) && (nd.right() == null)) // Interior node
852 leaves.addElement(nd);
859 * TODO: Identify internal nodes... if (node.isSequenceLabel()) {
860 * leaves.addElement(node); }
862 findLeaves((SequenceNode) nd.left(), leaves);
863 findLeaves((SequenceNode) nd.right(), leaves);
870 * printNode is mainly for debugging purposes.
875 void printNode(SequenceNode nd)
882 if ((nd.left() == null) && (nd.right() == null))
884 System.out.println("Leaf = " + ((SequenceI) nd.element()).getName());
885 System.out.println("Dist " + nd.dist);
886 System.out.println("Boot " + nd.getBootstrap());
890 System.out.println("Dist " + nd.dist);
891 printNode((SequenceNode) nd.left());
892 printNode((SequenceNode) nd.right());
902 void findMaxDist(SequenceNode nd)
909 if ((nd.left() == null) && (nd.right() == null))
911 double dist = nd.dist;
913 if (dist > maxDistValue)
921 findMaxDist((SequenceNode) nd.left());
922 findMaxDist((SequenceNode) nd.right());
929 * @return DOCUMENT ME!
931 public Vector<SequenceNode> getGroups()
939 * @return DOCUMENT ME!
941 public double getMaxHeight()
954 public void groupNodes(SequenceNode nd, float threshold)
961 if ((nd.height / maxheight) > threshold)
963 groups.addElement(nd);
967 groupNodes((SequenceNode) nd.left(), threshold);
968 groupNodes((SequenceNode) nd.right(), threshold);
978 * @return DOCUMENT ME!
980 public double findHeight(SequenceNode nd)
987 if ((nd.left() == null) && (nd.right() == null))
989 nd.height = ((SequenceNode) nd.parent()).height + nd.dist;
991 if (nd.height > maxheight)
1002 if (nd.parent() != null)
1004 nd.height = ((SequenceNode) nd.parent()).height + nd.dist;
1009 nd.height = (float) 0.0;
1012 maxheight = findHeight((SequenceNode) (nd.left()));
1013 maxheight = findHeight((SequenceNode) (nd.right()));
1022 * @return DOCUMENT ME!
1024 SequenceNode reRoot()
1026 // TODO not used - remove?
1027 if (maxdist != null)
1031 double tmpdist = maxdist.dist;
1034 SequenceNode sn = new SequenceNode();
1037 // New right hand of top
1038 SequenceNode snr = (SequenceNode) maxdist.parent();
1039 changeDirection(snr, maxdist);
1040 System.out.println("Printing reversed tree");
1042 snr.dist = tmpdist / 2;
1043 maxdist.dist = tmpdist / 2;
1046 maxdist.setParent(sn);
1049 sn.setLeft(maxdist);
1063 * @return true if original sequence data can be recovered
1065 public boolean hasOriginalSequenceData()
1067 return seqData != null;
1071 * Returns original alignment data used for calculation - or null where not
1074 * @return null or cut'n'pasteable alignment
1076 public String printOriginalSequenceData(char gapChar)
1078 if (seqData == null)
1083 StringBuffer sb = new StringBuffer();
1084 String[] seqdatas = seqData.getSequenceStrings(gapChar);
1085 for (int i = 0; i < seqdatas.length; i++)
1087 sb.append(new jalview.util.Format("%-" + 15 + "s").form(sequences[i]
1089 sb.append(" " + seqdatas[i] + "\n");
1091 return sb.toString();
1100 void printN(SequenceNode nd)
1107 if ((nd.left() != null) && (nd.right() != null))
1109 printN((SequenceNode) nd.left());
1110 printN((SequenceNode) nd.right());
1114 System.out.println(" name = " + ((SequenceI) nd.element()).getName());
1117 System.out.println(" dist = " + nd.dist + " " + nd.count + " "
1127 public void reCount(SequenceNode nd)
1131 // _lylimit = this.node.size();
1135 private long _lycount = 0, _lylimit = 0;
1143 void _reCount(SequenceNode nd)
1145 // if (_lycount<_lylimit)
1147 // System.err.println("Warning: depth of _recount greater than number of nodes.");
1155 if ((nd.left() != null) && (nd.right() != null))
1158 _reCount((SequenceNode) nd.left());
1159 _reCount((SequenceNode) nd.right());
1161 SequenceNode l = (SequenceNode) nd.left();
1162 SequenceNode r = (SequenceNode) nd.right();
1164 nd.count = l.count + r.count;
1165 nd.ycount = (l.ycount + r.ycount) / 2;
1170 nd.ycount = ycount++;
1181 public void swapNodes(SequenceNode nd)
1188 SequenceNode tmp = (SequenceNode) nd.left();
1190 nd.setLeft(nd.right());
1202 void changeDirection(SequenceNode nd, SequenceNode dir)
1209 if (nd.parent() != top)
1211 changeDirection((SequenceNode) nd.parent(), nd);
1213 SequenceNode tmp = (SequenceNode) nd.parent();
1215 if (dir == nd.left())
1220 else if (dir == nd.right())
1228 if (dir == nd.left())
1230 nd.setParent(nd.left());
1232 if (top.left() == nd)
1234 nd.setRight(top.right());
1238 nd.setRight(top.left());
1243 nd.setParent(nd.right());
1245 if (top.left() == nd)
1247 nd.setLeft(top.right());
1251 nd.setLeft(top.left());
1260 * @return DOCUMENT ME!
1262 public SequenceNode getMaxDist()
1270 * @return DOCUMENT ME!
1272 public SequenceNode getTopNode()
1279 * @return true if tree has real distances
1281 public boolean hasDistances()
1283 return hasDistances;
1288 * @return true if tree has real bootstrap values
1290 public boolean hasBootstrap()
1292 return hasBootstrap;
1295 public boolean hasRootDistance()
1297 return hasRootDistance;
1301 * apply the given transform to all the nodes in the tree.
1303 * @param nodeTransformI
1305 public void applyToNodes(NodeTransformI nodeTransformI)
1307 for (Enumeration<SequenceNode> nodes = node.elements(); nodes
1308 .hasMoreElements(); nodeTransformI.transform(nodes
1320 * @version $Revision$
1322 // TODO what does this class have that int[] doesn't have already?
1328 * Creates a new Cluster object.
1333 public Cluster(int[] value)