2 * Jalview - A Sequence Alignment Editor and Viewer (Version 2.8.2)
3 * Copyright (C) 2014 The Jalview Authors
5 * This file is part of Jalview.
7 * Jalview is free software: you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation, either version 3
10 * of the License, or (at your option) any later version.
12 * Jalview is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty
14 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15 * PURPOSE. See the GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
19 * The Jalview Authors are detailed in the 'AUTHORS' file.
21 package jalview.analysis;
25 import jalview.api.analysis.ScoreModelI;
26 import jalview.datamodel.*;
28 import jalview.schemes.*;
29 import jalview.util.*;
43 // SequenceData is a string representation of what the user
44 // sees. The display may contain hidden columns.
45 public AlignmentView seqData = null;
63 Vector groups = new Vector();
85 boolean hasDistances = true; // normal case for jalview trees
87 boolean hasBootstrap = false; // normal case for jalview trees
89 private boolean hasRootDistance = true;
92 * Create a new NJTree object with leaves associated with sequences in seqs,
93 * and original alignment data represented by Cigar strings.
102 public NJTree(SequenceI[] seqs, AlignmentView odata, NewickFile treefile)
104 this(seqs, treefile);
110 * sequenceString = new String[odata.length]; char gapChar =
111 * jalview.util.Comparison.GapChars.charAt(0); for (int i = 0; i <
112 * odata.length; i++) { SequenceI oseq_aligned = odata[i].getSeq(gapChar);
113 * sequenceString[i] = oseq_aligned.getSequence(); }
118 * Creates a new NJTree object from a tree from an external source
121 * SequenceI which should be associated with leafs of treefile
125 public NJTree(SequenceI[] seqs, NewickFile treefile)
127 this.sequence = seqs;
128 top = treefile.getTree();
131 * There is no dependent alignment to be recovered from an imported tree.
133 * if (sequenceString == null) { sequenceString = new String[seqs.length];
134 * for (int i = 0; i < seqs.length; i++) { sequenceString[i] =
135 * seqs[i].getSequence(); } }
138 hasDistances = treefile.HasDistances();
139 hasBootstrap = treefile.HasBootstrap();
140 hasRootDistance = treefile.HasRootDistance();
142 maxheight = findHeight(top);
144 SequenceIdMatcher algnIds = new SequenceIdMatcher(seqs);
146 Vector leaves = new Vector();
147 findLeaves(top, leaves);
150 int namesleft = seqs.length;
155 Vector one2many = new Vector();
156 int countOne2Many = 0;
157 while (i < leaves.size())
159 j = (SequenceNode) leaves.elementAt(i++);
160 realnam = j.getName();
165 nam = algnIds.findIdMatch(realnam);
171 if (one2many.contains(nam))
174 // if (jalview.bin.Cache.log.isDebugEnabled())
175 // jalview.bin.Cache.log.debug("One 2 many relationship for
180 one2many.addElement(nam);
186 j.setElement(new Sequence(realnam, "THISISAPLACEHLDER"));
187 j.setPlaceholder(true);
190 // if (jalview.bin.Cache.log.isDebugEnabled() && countOne2Many>0) {
191 // jalview.bin.Cache.log.debug("There were "+countOne2Many+" alignment
192 // sequence ids (out of "+one2many.size()+" unique ids) linked to two or
199 * Creates a new NJTree object.
212 public NJTree(SequenceI[] sequence, AlignmentView seqData, String type,
213 String pwtype, int start, int end)
215 this.sequence = sequence;
216 this.node = new Vector();
218 this.pwtype = pwtype;
221 this.seqData = seqData;
225 SeqCigar[] seqs = new SeqCigar[sequence.length];
226 for (int i = 0; i < sequence.length; i++)
228 seqs[i] = new SeqCigar(sequence[i], start, end);
230 CigarArray sdata = new CigarArray(seqs);
231 sdata.addOperation(CigarArray.M, end - start + 1);
232 this.seqData = new AlignmentView(sdata, start);
234 // System.err.println("Made seqData");// dbg
235 if (!(type.equals("NJ")))
240 if (!(pwtype.equals("PID")))
242 if (ResidueProperties.getScoreMatrix(pwtype) == null)
250 done = new int[sequence.length];
252 while ((i < sequence.length) && (sequence[i] != null))
260 distance = findDistances();
261 // System.err.println("Made distances");// dbg
263 // System.err.println("Made leaves");// dbg
265 noClus = cluster.size();
268 // System.err.println("Made clusters");// dbg
273 * Generate a string representation of the Tree
275 * @return Newick File with all tree data available
277 public String toString()
279 jalview.io.NewickFile fout = new jalview.io.NewickFile(getTopNode());
281 return fout.print(isHasBootstrap(), isHasDistances(),
282 isHasRootDistance()); // output all data available for tree
287 * used when the alignment associated to a tree has changed.
290 * Sequence set to be associated with tree nodes
292 public void UpdatePlaceHolders(List<SequenceI> list)
294 Vector leaves = new Vector();
295 findLeaves(top, leaves);
297 int sz = leaves.size();
298 SequenceIdMatcher seqmatcher = null;
303 SequenceNode leaf = (SequenceNode) leaves.elementAt(i++);
305 if (list.contains(leaf.element()))
307 leaf.setPlaceholder(false);
311 if (seqmatcher == null)
313 // Only create this the first time we need it
314 SequenceI[] seqs = new SequenceI[list.size()];
316 for (int j = 0; j < seqs.length; j++)
318 seqs[j] = (SequenceI) list.get(j);
321 seqmatcher = new SequenceIdMatcher(seqs);
324 SequenceI nam = seqmatcher.findIdMatch(leaf.getName());
328 if (!leaf.isPlaceholder())
330 // remapping the node to a new sequenceI - should remove any refs to
332 // TODO - make many sequenceI to one leaf mappings possible!
335 leaf.setPlaceholder(false);
336 leaf.setElement(nam);
340 if (!leaf.isPlaceholder())
342 // Construct a new placeholder sequence object for this leaf
343 leaf.setElement(new Sequence(leaf.getName(),
344 "THISISAPLACEHLDER"));
346 leaf.setPlaceholder(true);
354 * rename any nodes according to their associated sequence. This will modify
355 * the tree's metadata! (ie the original NewickFile or newly generated
356 * BinaryTree's label data)
358 public void renameAssociatedNodes()
360 applyToNodes(new NodeTransformI()
364 public void transform(BinaryNode node)
366 Object el = node.element();
367 if (el != null && el instanceof SequenceI)
369 node.setName(((SequenceI) el).getName());
378 public void cluster()
382 if (type.equals("NJ"))
391 Cluster c = joinClusters(mini, minj);
395 cluster.setElementAt(null, minj);
396 cluster.setElementAt(c, mini);
401 boolean onefound = false;
406 for (int i = 0; i < noseqs; i++)
410 if (onefound == false)
422 joinClusters(one, two);
423 top = (SequenceNode) (node.elementAt(one));
438 * @return DOCUMENT ME!
440 public Cluster joinClusters(int i, int j)
442 float dist = distance[i][j];
444 int noi = ((Cluster) cluster.elementAt(i)).value.length;
445 int noj = ((Cluster) cluster.elementAt(j)).value.length;
447 int[] value = new int[noi + noj];
449 for (int ii = 0; ii < noi; ii++)
451 value[ii] = ((Cluster) cluster.elementAt(i)).value[ii];
454 for (int ii = noi; ii < (noi + noj); ii++)
456 value[ii] = ((Cluster) cluster.elementAt(j)).value[ii - noi];
459 Cluster c = new Cluster(value);
464 if (type.equals("NJ"))
466 findClusterNJDistance(i, j);
470 findClusterDistance(i, j);
473 SequenceNode sn = new SequenceNode();
475 sn.setLeft((SequenceNode) (node.elementAt(i)));
476 sn.setRight((SequenceNode) (node.elementAt(j)));
478 SequenceNode tmpi = (SequenceNode) (node.elementAt(i));
479 SequenceNode tmpj = (SequenceNode) (node.elementAt(j));
481 if (type.equals("NJ"))
483 findNewNJDistances(tmpi, tmpj, dist);
487 findNewDistances(tmpi, tmpj, dist);
493 node.setElementAt(sn, i);
508 public void findNewNJDistances(SequenceNode tmpi, SequenceNode tmpj,
512 tmpi.dist = ((dist + ri) - rj) / 2;
513 tmpj.dist = (dist - tmpi.dist);
536 public void findNewDistances(SequenceNode tmpi, SequenceNode tmpj,
542 SequenceNode sni = tmpi;
543 SequenceNode snj = tmpj;
548 sni = (SequenceNode) sni.left();
554 snj = (SequenceNode) snj.left();
557 tmpi.dist = ((dist / 2) - ih);
558 tmpj.dist = ((dist / 2) - jh);
569 public void findClusterDistance(int i, int j)
571 int noi = ((Cluster) cluster.elementAt(i)).value.length;
572 int noj = ((Cluster) cluster.elementAt(j)).value.length;
574 // New distances from cluster to others
575 float[] newdist = new float[noseqs];
577 for (int l = 0; l < noseqs; l++)
579 if ((l != i) && (l != j))
581 newdist[l] = ((distance[i][l] * noi) + (distance[j][l] * noj))
590 for (int ii = 0; ii < noseqs; ii++)
592 distance[i][ii] = newdist[ii];
593 distance[ii][i] = newdist[ii];
605 public void findClusterNJDistance(int i, int j)
608 // New distances from cluster to others
609 float[] newdist = new float[noseqs];
611 for (int l = 0; l < noseqs; l++)
613 if ((l != i) && (l != j))
615 newdist[l] = ((distance[i][l] + distance[j][l]) - distance[i][j]) / 2;
623 for (int ii = 0; ii < noseqs; ii++)
625 distance[i][ii] = newdist[ii];
626 distance[ii][i] = newdist[ii];
638 * @return DOCUMENT ME!
640 public float findr(int i, int j)
644 for (int k = 0; k < noseqs; k++)
646 if ((k != i) && (k != j) && (done[k] != 1))
648 tmp = tmp + distance[i][k];
654 tmp = tmp / (noClus - 2);
663 * @return DOCUMENT ME!
665 public float findMinNJDistance()
669 for (int i = 0; i < (noseqs - 1); i++)
671 for (int j = i + 1; j < noseqs; j++)
673 if ((done[i] != 1) && (done[j] != 1))
675 float tmp = distance[i][j] - (findr(i, j) + findr(j, i));
694 * @return DOCUMENT ME!
696 public float findMinDistance()
700 for (int i = 0; i < (noseqs - 1); i++)
702 for (int j = i + 1; j < noseqs; j++)
704 if ((done[i] != 1) && (done[j] != 1))
706 if (distance[i][j] < min)
711 min = distance[i][j];
721 * Calculate a distance matrix given the sequence input data and score model
723 * @return similarity matrix used to compute tree
725 public float[][] findDistances()
728 float[][] distance = new float[noseqs][noseqs];
730 // Pairwise substitution score (with no gap penalties)
731 ScoreModelI _pwmatrix = ResidueProperties.getScoreModel(pwtype);
732 if (_pwmatrix == null)
734 _pwmatrix = ResidueProperties.getScoreMatrix("BLOSUM62");
736 distance = _pwmatrix.findDistances(seqData);
745 public void makeLeaves()
747 cluster = new Vector();
749 for (int i = 0; i < noseqs; i++)
751 SequenceNode sn = new SequenceNode();
753 sn.setElement(sequence[i]);
754 sn.setName(sequence[i].getName());
757 int[] value = new int[1];
760 Cluster c = new Cluster(value);
761 cluster.addElement(c);
766 * Search for leaf nodes.
769 * root node to search from
771 * Vector of leaves to add leaf node objects too.
773 * @return Vector of leaf nodes on binary tree
775 public Vector findLeaves(SequenceNode node, Vector leaves)
782 if ((node.left() == null) && (node.right() == null)) // Interior node
785 leaves.addElement(node);
792 * TODO: Identify internal nodes... if (node.isSequenceLabel()) {
793 * leaves.addElement(node); }
795 findLeaves((SequenceNode) node.left(), leaves);
796 findLeaves((SequenceNode) node.right(), leaves);
803 * Find the leaf node with a particular ycount
806 * initial point on tree to search from
808 * value to search for
810 * @return null or the node with ycound=count
812 public Object findLeaf(SequenceNode node, int count)
814 found = _findLeaf(node, count);
820 * #see findLeaf(SequenceNode node, count)
822 public Object _findLeaf(SequenceNode node, int count)
829 if (node.ycount == count)
831 found = node.element();
837 _findLeaf((SequenceNode) node.left(), count);
838 _findLeaf((SequenceNode) node.right(), count);
845 * printNode is mainly for debugging purposes.
850 public void printNode(SequenceNode node)
857 if ((node.left() == null) && (node.right() == null))
860 .println("Leaf = " + ((SequenceI) node.element()).getName());
861 System.out.println("Dist " + ((SequenceNode) node).dist);
862 System.out.println("Boot " + node.getBootstrap());
866 System.out.println("Dist " + ((SequenceNode) node).dist);
867 printNode((SequenceNode) node.left());
868 printNode((SequenceNode) node.right());
878 public void findMaxDist(SequenceNode node)
885 if ((node.left() == null) && (node.right() == null))
887 float dist = ((SequenceNode) node).dist;
889 if (dist > maxDistValue)
891 maxdist = (SequenceNode) node;
897 findMaxDist((SequenceNode) node.left());
898 findMaxDist((SequenceNode) node.right());
905 * @return DOCUMENT ME!
907 public Vector getGroups()
915 * @return DOCUMENT ME!
917 public float getMaxHeight()
930 public void groupNodes(SequenceNode node, float threshold)
937 if ((node.height / maxheight) > threshold)
939 groups.addElement(node);
943 groupNodes((SequenceNode) node.left(), threshold);
944 groupNodes((SequenceNode) node.right(), threshold);
954 * @return DOCUMENT ME!
956 public float findHeight(SequenceNode node)
963 if ((node.left() == null) && (node.right() == null))
965 node.height = ((SequenceNode) node.parent()).height + node.dist;
967 if (node.height > maxheight)
978 if (node.parent() != null)
980 node.height = ((SequenceNode) node.parent()).height + node.dist;
985 node.height = (float) 0.0;
988 maxheight = findHeight((SequenceNode) (node.left()));
989 maxheight = findHeight((SequenceNode) (node.right()));
998 * @return DOCUMENT ME!
1000 public SequenceNode reRoot()
1002 if (maxdist != null)
1006 float tmpdist = maxdist.dist;
1009 SequenceNode sn = new SequenceNode();
1012 // New right hand of top
1013 SequenceNode snr = (SequenceNode) maxdist.parent();
1014 changeDirection(snr, maxdist);
1015 System.out.println("Printing reversed tree");
1017 snr.dist = tmpdist / 2;
1018 maxdist.dist = tmpdist / 2;
1021 maxdist.setParent(sn);
1024 sn.setLeft(maxdist);
1038 * @return true if original sequence data can be recovered
1040 public boolean hasOriginalSequenceData()
1042 return seqData != null;
1046 * Returns original alignment data used for calculation - or null where not
1049 * @return null or cut'n'pasteable alignment
1051 public String printOriginalSequenceData(char gapChar)
1053 if (seqData == null)
1058 StringBuffer sb = new StringBuffer();
1059 String[] seqdatas = seqData.getSequenceStrings(gapChar);
1060 for (int i = 0; i < seqdatas.length; i++)
1062 sb.append(new jalview.util.Format("%-" + 15 + "s").form(sequence[i]
1064 sb.append(" " + seqdatas[i] + "\n");
1066 return sb.toString();
1075 public void printN(SequenceNode node)
1082 if ((node.left() != null) && (node.right() != null))
1084 printN((SequenceNode) node.left());
1085 printN((SequenceNode) node.right());
1089 System.out.println(" name = "
1090 + ((SequenceI) node.element()).getName());
1093 System.out.println(" dist = " + ((SequenceNode) node).dist + " "
1094 + ((SequenceNode) node).count + " "
1095 + ((SequenceNode) node).height);
1104 public void reCount(SequenceNode node)
1108 // _lylimit = this.node.size();
1112 private long _lycount = 0, _lylimit = 0;
1120 public void _reCount(SequenceNode node)
1122 // if (_lycount<_lylimit)
1124 // System.err.println("Warning: depth of _recount greater than number of nodes.");
1132 if ((node.left() != null) && (node.right() != null))
1135 _reCount((SequenceNode) node.left());
1136 _reCount((SequenceNode) node.right());
1138 SequenceNode l = (SequenceNode) node.left();
1139 SequenceNode r = (SequenceNode) node.right();
1141 ((SequenceNode) node).count = l.count + r.count;
1142 ((SequenceNode) node).ycount = (l.ycount + r.ycount) / 2;
1146 ((SequenceNode) node).count = 1;
1147 ((SequenceNode) node).ycount = ycount++;
1158 public void swapNodes(SequenceNode node)
1165 SequenceNode tmp = (SequenceNode) node.left();
1167 node.setLeft(node.right());
1179 public void changeDirection(SequenceNode node, SequenceNode dir)
1186 if (node.parent() != top)
1188 changeDirection((SequenceNode) node.parent(), node);
1190 SequenceNode tmp = (SequenceNode) node.parent();
1192 if (dir == node.left())
1194 node.setParent(dir);
1197 else if (dir == node.right())
1199 node.setParent(dir);
1205 if (dir == node.left())
1207 node.setParent(node.left());
1209 if (top.left() == node)
1211 node.setRight(top.right());
1215 node.setRight(top.left());
1220 node.setParent(node.right());
1222 if (top.left() == node)
1224 node.setLeft(top.right());
1228 node.setLeft(top.left());
1237 * @return DOCUMENT ME!
1239 public SequenceNode getMaxDist()
1247 * @return DOCUMENT ME!
1249 public SequenceNode getTopNode()
1256 * @return true if tree has real distances
1258 public boolean isHasDistances()
1260 return hasDistances;
1265 * @return true if tree has real bootstrap values
1267 public boolean isHasBootstrap()
1269 return hasBootstrap;
1272 public boolean isHasRootDistance()
1274 return hasRootDistance;
1278 * apply the given transform to all the nodes in the tree.
1280 * @param nodeTransformI
1282 public void applyToNodes(NodeTransformI nodeTransformI)
1284 for (Enumeration nodes = node.elements(); nodes.hasMoreElements(); nodeTransformI
1285 .transform((BinaryNode) nodes.nextElement()))
1294 * @version $Revision$
1301 * Creates a new Cluster object.
1306 public Cluster(int[] value)