2 * Jalview - A Sequence Alignment Editor and Viewer (Version 2.8.1)
3 * Copyright (C) 2014 The Jalview Authors
5 * This file is part of Jalview.
7 * Jalview is free software: you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version.
11 * Jalview is distributed in the hope that it will be useful, but
12 * WITHOUT ANY WARRANTY; without even the implied warranty
13 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
14 * PURPOSE. See the GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License along with Jalview. If not, see <http://www.gnu.org/licenses/>.
17 * The Jalview Authors are detailed in the 'AUTHORS' file.
19 package jalview.analysis;
23 import jalview.api.analysis.ScoreModelI;
24 import jalview.datamodel.*;
26 import jalview.schemes.*;
27 import jalview.util.*;
41 // SequenceData is a string representation of what the user
42 // sees. The display may contain hidden columns.
43 public AlignmentView seqData = null;
61 Vector groups = new Vector();
83 boolean hasDistances = true; // normal case for jalview trees
85 boolean hasBootstrap = false; // normal case for jalview trees
87 private boolean hasRootDistance = true;
90 * Create a new NJTree object with leaves associated with sequences in seqs,
91 * and original alignment data represented by Cigar strings.
100 public NJTree(SequenceI[] seqs, AlignmentView odata, NewickFile treefile)
102 this(seqs, treefile);
108 * sequenceString = new String[odata.length]; char gapChar =
109 * jalview.util.Comparison.GapChars.charAt(0); for (int i = 0; i <
110 * odata.length; i++) { SequenceI oseq_aligned = odata[i].getSeq(gapChar);
111 * sequenceString[i] = oseq_aligned.getSequence(); }
116 * Creates a new NJTree object from a tree from an external source
119 * SequenceI which should be associated with leafs of treefile
123 public NJTree(SequenceI[] seqs, NewickFile treefile)
125 this.sequence = seqs;
126 top = treefile.getTree();
129 * There is no dependent alignment to be recovered from an imported tree.
131 * if (sequenceString == null) { sequenceString = new String[seqs.length];
132 * for (int i = 0; i < seqs.length; i++) { sequenceString[i] =
133 * seqs[i].getSequence(); } }
136 hasDistances = treefile.HasDistances();
137 hasBootstrap = treefile.HasBootstrap();
138 hasRootDistance = treefile.HasRootDistance();
140 maxheight = findHeight(top);
142 SequenceIdMatcher algnIds = new SequenceIdMatcher(seqs);
144 Vector leaves = new Vector();
145 findLeaves(top, leaves);
148 int namesleft = seqs.length;
153 Vector one2many = new Vector();
154 int countOne2Many = 0;
155 while (i < leaves.size())
157 j = (SequenceNode) leaves.elementAt(i++);
158 realnam = j.getName();
163 nam = algnIds.findIdMatch(realnam);
169 if (one2many.contains(nam))
172 // if (jalview.bin.Cache.log.isDebugEnabled())
173 // jalview.bin.Cache.log.debug("One 2 many relationship for
178 one2many.addElement(nam);
184 j.setElement(new Sequence(realnam, "THISISAPLACEHLDER"));
185 j.setPlaceholder(true);
188 // if (jalview.bin.Cache.log.isDebugEnabled() && countOne2Many>0) {
189 // jalview.bin.Cache.log.debug("There were "+countOne2Many+" alignment
190 // sequence ids (out of "+one2many.size()+" unique ids) linked to two or
197 * Creates a new NJTree object.
210 public NJTree(SequenceI[] sequence, AlignmentView seqData, String type,
211 String pwtype, int start, int end)
213 this.sequence = sequence;
214 this.node = new Vector();
216 this.pwtype = pwtype;
219 this.seqData = seqData;
223 SeqCigar[] seqs = new SeqCigar[sequence.length];
224 for (int i = 0; i < sequence.length; i++)
226 seqs[i] = new SeqCigar(sequence[i], start, end);
228 CigarArray sdata = new CigarArray(seqs);
229 sdata.addOperation(CigarArray.M, end - start + 1);
230 this.seqData = new AlignmentView(sdata, start);
232 // System.err.println("Made seqData");// dbg
233 if (!(type.equals("NJ")))
238 if (!(pwtype.equals("PID")))
240 if (ResidueProperties.getScoreMatrix(pwtype) == null)
248 done = new int[sequence.length];
250 while ((i < sequence.length) && (sequence[i] != null))
258 distance = findDistances();
259 // System.err.println("Made distances");// dbg
261 // System.err.println("Made leaves");// dbg
263 noClus = cluster.size();
266 // System.err.println("Made clusters");// dbg
271 * Generate a string representation of the Tree
273 * @return Newick File with all tree data available
275 public String toString()
277 jalview.io.NewickFile fout = new jalview.io.NewickFile(getTopNode());
279 return fout.print(isHasBootstrap(), isHasDistances(),
280 isHasRootDistance()); // output all data available for tree
285 * used when the alignment associated to a tree has changed.
288 * Sequence set to be associated with tree nodes
290 public void UpdatePlaceHolders(List<SequenceI> list)
292 Vector leaves = new Vector();
293 findLeaves(top, leaves);
295 int sz = leaves.size();
296 SequenceIdMatcher seqmatcher = null;
301 SequenceNode leaf = (SequenceNode) leaves.elementAt(i++);
303 if (list.contains(leaf.element()))
305 leaf.setPlaceholder(false);
309 if (seqmatcher == null)
311 // Only create this the first time we need it
312 SequenceI[] seqs = new SequenceI[list.size()];
314 for (int j = 0; j < seqs.length; j++)
316 seqs[j] = (SequenceI) list.get(j);
319 seqmatcher = new SequenceIdMatcher(seqs);
322 SequenceI nam = seqmatcher.findIdMatch(leaf.getName());
326 if (!leaf.isPlaceholder())
328 // remapping the node to a new sequenceI - should remove any refs to
330 // TODO - make many sequenceI to one leaf mappings possible!
333 leaf.setPlaceholder(false);
334 leaf.setElement(nam);
338 if (!leaf.isPlaceholder())
340 // Construct a new placeholder sequence object for this leaf
341 leaf.setElement(new Sequence(leaf.getName(),
342 "THISISAPLACEHLDER"));
344 leaf.setPlaceholder(true);
352 * rename any nodes according to their associated sequence. This will modify
353 * the tree's metadata! (ie the original NewickFile or newly generated
354 * BinaryTree's label data)
356 public void renameAssociatedNodes()
358 applyToNodes(new NodeTransformI()
362 public void transform(BinaryNode node)
364 Object el = node.element();
365 if (el != null && el instanceof SequenceI)
367 node.setName(((SequenceI) el).getName());
376 public void cluster()
380 if (type.equals("NJ"))
389 Cluster c = joinClusters(mini, minj);
393 cluster.setElementAt(null, minj);
394 cluster.setElementAt(c, mini);
399 boolean onefound = false;
404 for (int i = 0; i < noseqs; i++)
408 if (onefound == false)
420 joinClusters(one, two);
421 top = (SequenceNode) (node.elementAt(one));
436 * @return DOCUMENT ME!
438 public Cluster joinClusters(int i, int j)
440 float dist = distance[i][j];
442 int noi = ((Cluster) cluster.elementAt(i)).value.length;
443 int noj = ((Cluster) cluster.elementAt(j)).value.length;
445 int[] value = new int[noi + noj];
447 for (int ii = 0; ii < noi; ii++)
449 value[ii] = ((Cluster) cluster.elementAt(i)).value[ii];
452 for (int ii = noi; ii < (noi + noj); ii++)
454 value[ii] = ((Cluster) cluster.elementAt(j)).value[ii - noi];
457 Cluster c = new Cluster(value);
462 if (type.equals("NJ"))
464 findClusterNJDistance(i, j);
468 findClusterDistance(i, j);
471 SequenceNode sn = new SequenceNode();
473 sn.setLeft((SequenceNode) (node.elementAt(i)));
474 sn.setRight((SequenceNode) (node.elementAt(j)));
476 SequenceNode tmpi = (SequenceNode) (node.elementAt(i));
477 SequenceNode tmpj = (SequenceNode) (node.elementAt(j));
479 if (type.equals("NJ"))
481 findNewNJDistances(tmpi, tmpj, dist);
485 findNewDistances(tmpi, tmpj, dist);
491 node.setElementAt(sn, i);
506 public void findNewNJDistances(SequenceNode tmpi, SequenceNode tmpj,
510 tmpi.dist = ((dist + ri) - rj) / 2;
511 tmpj.dist = (dist - tmpi.dist);
534 public void findNewDistances(SequenceNode tmpi, SequenceNode tmpj,
540 SequenceNode sni = tmpi;
541 SequenceNode snj = tmpj;
546 sni = (SequenceNode) sni.left();
552 snj = (SequenceNode) snj.left();
555 tmpi.dist = ((dist / 2) - ih);
556 tmpj.dist = ((dist / 2) - jh);
567 public void findClusterDistance(int i, int j)
569 int noi = ((Cluster) cluster.elementAt(i)).value.length;
570 int noj = ((Cluster) cluster.elementAt(j)).value.length;
572 // New distances from cluster to others
573 float[] newdist = new float[noseqs];
575 for (int l = 0; l < noseqs; l++)
577 if ((l != i) && (l != j))
579 newdist[l] = ((distance[i][l] * noi) + (distance[j][l] * noj))
588 for (int ii = 0; ii < noseqs; ii++)
590 distance[i][ii] = newdist[ii];
591 distance[ii][i] = newdist[ii];
603 public void findClusterNJDistance(int i, int j)
606 // New distances from cluster to others
607 float[] newdist = new float[noseqs];
609 for (int l = 0; l < noseqs; l++)
611 if ((l != i) && (l != j))
613 newdist[l] = ((distance[i][l] + distance[j][l]) - distance[i][j]) / 2;
621 for (int ii = 0; ii < noseqs; ii++)
623 distance[i][ii] = newdist[ii];
624 distance[ii][i] = newdist[ii];
636 * @return DOCUMENT ME!
638 public float findr(int i, int j)
642 for (int k = 0; k < noseqs; k++)
644 if ((k != i) && (k != j) && (done[k] != 1))
646 tmp = tmp + distance[i][k];
652 tmp = tmp / (noClus - 2);
661 * @return DOCUMENT ME!
663 public float findMinNJDistance()
667 for (int i = 0; i < (noseqs - 1); i++)
669 for (int j = i + 1; j < noseqs; j++)
671 if ((done[i] != 1) && (done[j] != 1))
673 float tmp = distance[i][j] - (findr(i, j) + findr(j, i));
692 * @return DOCUMENT ME!
694 public float findMinDistance()
698 for (int i = 0; i < (noseqs - 1); i++)
700 for (int j = i + 1; j < noseqs; j++)
702 if ((done[i] != 1) && (done[j] != 1))
704 if (distance[i][j] < min)
709 min = distance[i][j];
719 * Calculate a distance matrix given the sequence input data and score model
721 * @return similarity matrix used to compute tree
723 public float[][] findDistances()
726 float[][] distance = new float[noseqs][noseqs];
728 // Pairwise substitution score (with no gap penalties)
729 ScoreModelI _pwmatrix = ResidueProperties.getScoreModel(pwtype);
730 if (_pwmatrix == null)
732 _pwmatrix = ResidueProperties.getScoreMatrix("BLOSUM62");
734 distance = _pwmatrix.findDistances(seqData);
743 public void makeLeaves()
745 cluster = new Vector();
747 for (int i = 0; i < noseqs; i++)
749 SequenceNode sn = new SequenceNode();
751 sn.setElement(sequence[i]);
752 sn.setName(sequence[i].getName());
755 int[] value = new int[1];
758 Cluster c = new Cluster(value);
759 cluster.addElement(c);
764 * Search for leaf nodes.
767 * root node to search from
769 * Vector of leaves to add leaf node objects too.
771 * @return Vector of leaf nodes on binary tree
773 public Vector findLeaves(SequenceNode node, Vector leaves)
780 if ((node.left() == null) && (node.right() == null)) // Interior node
783 leaves.addElement(node);
790 * TODO: Identify internal nodes... if (node.isSequenceLabel()) {
791 * leaves.addElement(node); }
793 findLeaves((SequenceNode) node.left(), leaves);
794 findLeaves((SequenceNode) node.right(), leaves);
801 * Find the leaf node with a particular ycount
804 * initial point on tree to search from
806 * value to search for
808 * @return null or the node with ycound=count
810 public Object findLeaf(SequenceNode node, int count)
812 found = _findLeaf(node, count);
818 * #see findLeaf(SequenceNode node, count)
820 public Object _findLeaf(SequenceNode node, int count)
827 if (node.ycount == count)
829 found = node.element();
835 _findLeaf((SequenceNode) node.left(), count);
836 _findLeaf((SequenceNode) node.right(), count);
843 * printNode is mainly for debugging purposes.
848 public void printNode(SequenceNode node)
855 if ((node.left() == null) && (node.right() == null))
858 .println("Leaf = " + ((SequenceI) node.element()).getName());
859 System.out.println("Dist " + ((SequenceNode) node).dist);
860 System.out.println("Boot " + node.getBootstrap());
864 System.out.println("Dist " + ((SequenceNode) node).dist);
865 printNode((SequenceNode) node.left());
866 printNode((SequenceNode) node.right());
876 public void findMaxDist(SequenceNode node)
883 if ((node.left() == null) && (node.right() == null))
885 float dist = ((SequenceNode) node).dist;
887 if (dist > maxDistValue)
889 maxdist = (SequenceNode) node;
895 findMaxDist((SequenceNode) node.left());
896 findMaxDist((SequenceNode) node.right());
903 * @return DOCUMENT ME!
905 public Vector getGroups()
913 * @return DOCUMENT ME!
915 public float getMaxHeight()
928 public void groupNodes(SequenceNode node, float threshold)
935 if ((node.height / maxheight) > threshold)
937 groups.addElement(node);
941 groupNodes((SequenceNode) node.left(), threshold);
942 groupNodes((SequenceNode) node.right(), threshold);
952 * @return DOCUMENT ME!
954 public float findHeight(SequenceNode node)
961 if ((node.left() == null) && (node.right() == null))
963 node.height = ((SequenceNode) node.parent()).height + node.dist;
965 if (node.height > maxheight)
976 if (node.parent() != null)
978 node.height = ((SequenceNode) node.parent()).height + node.dist;
983 node.height = (float) 0.0;
986 maxheight = findHeight((SequenceNode) (node.left()));
987 maxheight = findHeight((SequenceNode) (node.right()));
996 * @return DOCUMENT ME!
998 public SequenceNode reRoot()
1000 if (maxdist != null)
1004 float tmpdist = maxdist.dist;
1007 SequenceNode sn = new SequenceNode();
1010 // New right hand of top
1011 SequenceNode snr = (SequenceNode) maxdist.parent();
1012 changeDirection(snr, maxdist);
1013 System.out.println("Printing reversed tree");
1015 snr.dist = tmpdist / 2;
1016 maxdist.dist = tmpdist / 2;
1019 maxdist.setParent(sn);
1022 sn.setLeft(maxdist);
1036 * @return true if original sequence data can be recovered
1038 public boolean hasOriginalSequenceData()
1040 return seqData != null;
1044 * Returns original alignment data used for calculation - or null where not
1047 * @return null or cut'n'pasteable alignment
1049 public String printOriginalSequenceData(char gapChar)
1051 if (seqData == null)
1056 StringBuffer sb = new StringBuffer();
1057 String[] seqdatas = seqData.getSequenceStrings(gapChar);
1058 for (int i = 0; i < seqdatas.length; i++)
1060 sb.append(new jalview.util.Format("%-" + 15 + "s").form(sequence[i]
1062 sb.append(" " + seqdatas[i] + "\n");
1064 return sb.toString();
1073 public void printN(SequenceNode node)
1080 if ((node.left() != null) && (node.right() != null))
1082 printN((SequenceNode) node.left());
1083 printN((SequenceNode) node.right());
1087 System.out.println(" name = "
1088 + ((SequenceI) node.element()).getName());
1091 System.out.println(" dist = " + ((SequenceNode) node).dist + " "
1092 + ((SequenceNode) node).count + " "
1093 + ((SequenceNode) node).height);
1102 public void reCount(SequenceNode node)
1106 // _lylimit = this.node.size();
1110 private long _lycount = 0, _lylimit = 0;
1118 public void _reCount(SequenceNode node)
1120 // if (_lycount<_lylimit)
1122 // System.err.println("Warning: depth of _recount greater than number of nodes.");
1130 if ((node.left() != null) && (node.right() != null))
1133 _reCount((SequenceNode) node.left());
1134 _reCount((SequenceNode) node.right());
1136 SequenceNode l = (SequenceNode) node.left();
1137 SequenceNode r = (SequenceNode) node.right();
1139 ((SequenceNode) node).count = l.count + r.count;
1140 ((SequenceNode) node).ycount = (l.ycount + r.ycount) / 2;
1144 ((SequenceNode) node).count = 1;
1145 ((SequenceNode) node).ycount = ycount++;
1156 public void swapNodes(SequenceNode node)
1163 SequenceNode tmp = (SequenceNode) node.left();
1165 node.setLeft(node.right());
1177 public void changeDirection(SequenceNode node, SequenceNode dir)
1184 if (node.parent() != top)
1186 changeDirection((SequenceNode) node.parent(), node);
1188 SequenceNode tmp = (SequenceNode) node.parent();
1190 if (dir == node.left())
1192 node.setParent(dir);
1195 else if (dir == node.right())
1197 node.setParent(dir);
1203 if (dir == node.left())
1205 node.setParent(node.left());
1207 if (top.left() == node)
1209 node.setRight(top.right());
1213 node.setRight(top.left());
1218 node.setParent(node.right());
1220 if (top.left() == node)
1222 node.setLeft(top.right());
1226 node.setLeft(top.left());
1235 * @return DOCUMENT ME!
1237 public SequenceNode getMaxDist()
1245 * @return DOCUMENT ME!
1247 public SequenceNode getTopNode()
1254 * @return true if tree has real distances
1256 public boolean isHasDistances()
1258 return hasDistances;
1263 * @return true if tree has real bootstrap values
1265 public boolean isHasBootstrap()
1267 return hasBootstrap;
1270 public boolean isHasRootDistance()
1272 return hasRootDistance;
1276 * apply the given transform to all the nodes in the tree.
1278 * @param nodeTransformI
1280 public void applyToNodes(NodeTransformI nodeTransformI)
1282 for (Enumeration nodes = node.elements(); nodes.hasMoreElements(); nodeTransformI
1283 .transform((BinaryNode) nodes.nextElement()))
1292 * @version $Revision$
1299 * Creates a new Cluster object.
1304 public Cluster(int[] value)