2 * Jalview - A Sequence Alignment Editor and Viewer (Version 2.8)
3 * Copyright (C) 2012 J Procter, AM Waterhouse, LM Lui, J Engelhardt, G Barton, M Clamp, S Searle
5 * This file is part of Jalview.
7 * Jalview is free software: you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version.
11 * Jalview is distributed in the hope that it will be useful, but
12 * WITHOUT ANY WARRANTY; without even the implied warranty
13 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
14 * PURPOSE. See the GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License along with Jalview. If not, see <http://www.gnu.org/licenses/>.
18 package jalview.analysis;
22 import jalview.datamodel.*;
24 import jalview.schemes.*;
25 import jalview.util.*;
39 // SequenceData is a string representation of what the user
40 // sees. The display may contain hidden columns.
41 public AlignmentView seqData = null;
59 Vector groups = new Vector();
81 boolean hasDistances = true; // normal case for jalview trees
83 boolean hasBootstrap = false; // normal case for jalview trees
85 private boolean hasRootDistance = true;
88 * Create a new NJTree object with leaves associated with sequences in seqs,
89 * and original alignment data represented by Cigar strings.
98 public NJTree(SequenceI[] seqs, AlignmentView odata, NewickFile treefile)
100 this(seqs, treefile);
106 * sequenceString = new String[odata.length]; char gapChar =
107 * jalview.util.Comparison.GapChars.charAt(0); for (int i = 0; i <
108 * odata.length; i++) { SequenceI oseq_aligned = odata[i].getSeq(gapChar);
109 * sequenceString[i] = oseq_aligned.getSequence(); }
114 * Creates a new NJTree object from a tree from an external source
117 * SequenceI which should be associated with leafs of treefile
121 public NJTree(SequenceI[] seqs, NewickFile treefile)
123 this.sequence = seqs;
124 top = treefile.getTree();
127 * There is no dependent alignment to be recovered from an imported tree.
129 * if (sequenceString == null) { sequenceString = new String[seqs.length];
130 * for (int i = 0; i < seqs.length; i++) { sequenceString[i] =
131 * seqs[i].getSequence(); } }
134 hasDistances = treefile.HasDistances();
135 hasBootstrap = treefile.HasBootstrap();
136 hasRootDistance = treefile.HasRootDistance();
138 maxheight = findHeight(top);
140 SequenceIdMatcher algnIds = new SequenceIdMatcher(seqs);
142 Vector leaves = new Vector();
143 findLeaves(top, leaves);
146 int namesleft = seqs.length;
151 Vector one2many = new Vector();
152 int countOne2Many = 0;
153 while (i < leaves.size())
155 j = (SequenceNode) leaves.elementAt(i++);
156 realnam = j.getName();
161 nam = algnIds.findIdMatch(realnam);
167 if (one2many.contains(nam))
170 // if (jalview.bin.Cache.log.isDebugEnabled())
171 // jalview.bin.Cache.log.debug("One 2 many relationship for
176 one2many.addElement(nam);
182 j.setElement(new Sequence(realnam, "THISISAPLACEHLDER"));
183 j.setPlaceholder(true);
186 // if (jalview.bin.Cache.log.isDebugEnabled() && countOne2Many>0) {
187 // jalview.bin.Cache.log.debug("There were "+countOne2Many+" alignment
188 // sequence ids (out of "+one2many.size()+" unique ids) linked to two or
195 * Creates a new NJTree object.
208 public NJTree(SequenceI[] sequence, AlignmentView seqData, String type,
209 String pwtype, int start, int end)
211 this.sequence = sequence;
212 this.node = new Vector();
214 this.pwtype = pwtype;
217 this.seqData = seqData;
221 SeqCigar[] seqs = new SeqCigar[sequence.length];
222 for (int i = 0; i < sequence.length; i++)
224 seqs[i] = new SeqCigar(sequence[i], start, end);
226 CigarArray sdata = new CigarArray(seqs);
227 sdata.addOperation(CigarArray.M, end - start + 1);
228 this.seqData = new AlignmentView(sdata, start);
230 // System.err.println("Made seqData");// dbg
231 if (!(type.equals("NJ")))
236 if (!(pwtype.equals("PID")))
238 if (ResidueProperties.getScoreMatrix(pwtype) == null)
246 done = new int[sequence.length];
248 while ((i < sequence.length) && (sequence[i] != null))
256 distance = findDistances(this.seqData
257 .getSequenceStrings(Comparison.GapChars.charAt(0)));
258 // System.err.println("Made distances");// dbg
260 // System.err.println("Made leaves");// dbg
262 noClus = cluster.size();
265 // System.err.println("Made clusters");// dbg
270 * Generate a string representation of the Tree
272 * @return Newick File with all tree data available
274 public String toString()
276 jalview.io.NewickFile fout = new jalview.io.NewickFile(getTopNode());
278 return fout.print(isHasBootstrap(), isHasDistances(),
279 isHasRootDistance()); // output all data available for tree
284 * used when the alignment associated to a tree has changed.
287 * Sequence set to be associated with tree nodes
289 public void UpdatePlaceHolders(List<SequenceI> list)
291 Vector leaves = new Vector();
292 findLeaves(top, leaves);
294 int sz = leaves.size();
295 SequenceIdMatcher seqmatcher = null;
300 SequenceNode leaf = (SequenceNode) leaves.elementAt(i++);
302 if (list.contains(leaf.element()))
304 leaf.setPlaceholder(false);
308 if (seqmatcher == null)
310 // Only create this the first time we need it
311 SequenceI[] seqs = new SequenceI[list.size()];
313 for (int j = 0; j < seqs.length; j++)
315 seqs[j] = (SequenceI) list.get(j);
318 seqmatcher = new SequenceIdMatcher(seqs);
321 SequenceI nam = seqmatcher.findIdMatch(leaf.getName());
325 if (!leaf.isPlaceholder())
327 // remapping the node to a new sequenceI - should remove any refs to
329 // TODO - make many sequenceI to one leaf mappings possible!
332 leaf.setPlaceholder(false);
333 leaf.setElement(nam);
337 if (!leaf.isPlaceholder())
339 // Construct a new placeholder sequence object for this leaf
340 leaf.setElement(new Sequence(leaf.getName(),
341 "THISISAPLACEHLDER"));
343 leaf.setPlaceholder(true);
351 * rename any nodes according to their associated sequence. This will modify
352 * the tree's metadata! (ie the original NewickFile or newly generated
353 * BinaryTree's label data)
355 public void renameAssociatedNodes()
357 applyToNodes(new NodeTransformI()
361 public void transform(BinaryNode node)
363 Object el = node.element();
364 if (el != null && el instanceof SequenceI)
366 node.setName(((SequenceI) el).getName());
375 public void cluster()
379 if (type.equals("NJ"))
388 Cluster c = joinClusters(mini, minj);
392 cluster.setElementAt(null, minj);
393 cluster.setElementAt(c, mini);
398 boolean onefound = false;
403 for (int i = 0; i < noseqs; i++)
407 if (onefound == false)
419 joinClusters(one, two);
420 top = (SequenceNode) (node.elementAt(one));
435 * @return DOCUMENT ME!
437 public Cluster joinClusters(int i, int j)
439 float dist = distance[i][j];
441 int noi = ((Cluster) cluster.elementAt(i)).value.length;
442 int noj = ((Cluster) cluster.elementAt(j)).value.length;
444 int[] value = new int[noi + noj];
446 for (int ii = 0; ii < noi; ii++)
448 value[ii] = ((Cluster) cluster.elementAt(i)).value[ii];
451 for (int ii = noi; ii < (noi + noj); ii++)
453 value[ii] = ((Cluster) cluster.elementAt(j)).value[ii - noi];
456 Cluster c = new Cluster(value);
461 if (type.equals("NJ"))
463 findClusterNJDistance(i, j);
467 findClusterDistance(i, j);
470 SequenceNode sn = new SequenceNode();
472 sn.setLeft((SequenceNode) (node.elementAt(i)));
473 sn.setRight((SequenceNode) (node.elementAt(j)));
475 SequenceNode tmpi = (SequenceNode) (node.elementAt(i));
476 SequenceNode tmpj = (SequenceNode) (node.elementAt(j));
478 if (type.equals("NJ"))
480 findNewNJDistances(tmpi, tmpj, dist);
484 findNewDistances(tmpi, tmpj, dist);
490 node.setElementAt(sn, i);
505 public void findNewNJDistances(SequenceNode tmpi, SequenceNode tmpj,
509 tmpi.dist = ((dist + ri) - rj) / 2;
510 tmpj.dist = (dist - tmpi.dist);
533 public void findNewDistances(SequenceNode tmpi, SequenceNode tmpj,
539 SequenceNode sni = tmpi;
540 SequenceNode snj = tmpj;
545 sni = (SequenceNode) sni.left();
551 snj = (SequenceNode) snj.left();
554 tmpi.dist = ((dist / 2) - ih);
555 tmpj.dist = ((dist / 2) - jh);
566 public void findClusterDistance(int i, int j)
568 int noi = ((Cluster) cluster.elementAt(i)).value.length;
569 int noj = ((Cluster) cluster.elementAt(j)).value.length;
571 // New distances from cluster to others
572 float[] newdist = new float[noseqs];
574 for (int l = 0; l < noseqs; l++)
576 if ((l != i) && (l != j))
578 newdist[l] = ((distance[i][l] * noi) + (distance[j][l] * noj))
587 for (int ii = 0; ii < noseqs; ii++)
589 distance[i][ii] = newdist[ii];
590 distance[ii][i] = newdist[ii];
602 public void findClusterNJDistance(int i, int j)
605 // New distances from cluster to others
606 float[] newdist = new float[noseqs];
608 for (int l = 0; l < noseqs; l++)
610 if ((l != i) && (l != j))
612 newdist[l] = ((distance[i][l] + distance[j][l]) - distance[i][j]) / 2;
620 for (int ii = 0; ii < noseqs; ii++)
622 distance[i][ii] = newdist[ii];
623 distance[ii][i] = newdist[ii];
635 * @return DOCUMENT ME!
637 public float findr(int i, int j)
641 for (int k = 0; k < noseqs; k++)
643 if ((k != i) && (k != j) && (done[k] != 1))
645 tmp = tmp + distance[i][k];
651 tmp = tmp / (noClus - 2);
660 * @return DOCUMENT ME!
662 public float findMinNJDistance()
666 for (int i = 0; i < (noseqs - 1); i++)
668 for (int j = i + 1; j < noseqs; j++)
670 if ((done[i] != 1) && (done[j] != 1))
672 float tmp = distance[i][j] - (findr(i, j) + findr(j, i));
691 * @return DOCUMENT ME!
693 public float findMinDistance()
697 for (int i = 0; i < (noseqs - 1); i++)
699 for (int j = i + 1; j < noseqs; j++)
701 if ((done[i] != 1) && (done[j] != 1))
703 if (distance[i][j] < min)
708 min = distance[i][j];
720 * @return DOCUMENT ME!
722 public float[][] findDistances(String[] sequenceString)
724 float[][] distance = new float[noseqs][noseqs];
726 if (pwtype.equals("PID"))
728 for (int i = 0; i < (noseqs - 1); i++)
730 for (int j = i; j < noseqs; j++)
738 distance[i][j] = 100 - Comparison.PID(sequenceString[i],
741 distance[j][i] = distance[i][j];
748 // Pairwise substitution score (with no gap penalties)
749 ScoreMatrix pwmatrix = ResidueProperties.getScoreMatrix(pwtype);
750 if (pwmatrix == null)
752 pwmatrix = ResidueProperties.getScoreMatrix("BLOSUM62");
755 int end = sequenceString[0].length();
756 for (int i = 0; i < (noseqs - 1); i++)
758 for (int j = i; j < noseqs; j++)
762 for (int k = 0; k < end; k++)
766 score += pwmatrix.getPairwiseScore(
767 sequenceString[i].charAt(k),
768 sequenceString[j].charAt(k));
769 } catch (Exception ex)
771 System.err.println("err creating BLOSUM62 tree");
772 ex.printStackTrace();
776 distance[i][j] = (float) score;
778 if (score > maxscore)
785 for (int i = 0; i < (noseqs - 1); i++)
787 for (int j = i; j < noseqs; j++)
789 distance[i][j] = (float) maxscore - distance[i][j];
790 distance[j][i] = distance[i][j];
799 * else if (pwtype.equals("SW")) { float max = -1;
801 * for (int i = 0; i < (noseqs - 1); i++) { for (int j = i; j < noseqs; j++)
802 * { AlignSeq as = new AlignSeq(sequence[i], sequence[j], "pep");
803 * as.calcScoreMatrix(); as.traceAlignment(); as.printAlignment(System.out);
804 * distance[i][j] = (float) as.maxscore;
806 * if (max < distance[i][j]) { max = distance[i][j]; } } }
808 * for (int i = 0; i < (noseqs - 1); i++) { for (int j = i; j < noseqs; j++)
809 * { distance[i][j] = max - distance[i][j]; distance[j][i] = distance[i][j];
817 public void makeLeaves()
819 cluster = new Vector();
821 for (int i = 0; i < noseqs; i++)
823 SequenceNode sn = new SequenceNode();
825 sn.setElement(sequence[i]);
826 sn.setName(sequence[i].getName());
829 int[] value = new int[1];
832 Cluster c = new Cluster(value);
833 cluster.addElement(c);
838 * Search for leaf nodes.
841 * root node to search from
843 * Vector of leaves to add leaf node objects too.
845 * @return Vector of leaf nodes on binary tree
847 public Vector findLeaves(SequenceNode node, Vector leaves)
854 if ((node.left() == null) && (node.right() == null)) // Interior node
857 leaves.addElement(node);
864 * TODO: Identify internal nodes... if (node.isSequenceLabel()) {
865 * leaves.addElement(node); }
867 findLeaves((SequenceNode) node.left(), leaves);
868 findLeaves((SequenceNode) node.right(), leaves);
875 * Find the leaf node with a particular ycount
878 * initial point on tree to search from
880 * value to search for
882 * @return null or the node with ycound=count
884 public Object findLeaf(SequenceNode node, int count)
886 found = _findLeaf(node, count);
892 * #see findLeaf(SequenceNode node, count)
894 public Object _findLeaf(SequenceNode node, int count)
901 if (node.ycount == count)
903 found = node.element();
909 _findLeaf((SequenceNode) node.left(), count);
910 _findLeaf((SequenceNode) node.right(), count);
917 * printNode is mainly for debugging purposes.
922 public void printNode(SequenceNode node)
929 if ((node.left() == null) && (node.right() == null))
932 .println("Leaf = " + ((SequenceI) node.element()).getName());
933 System.out.println("Dist " + ((SequenceNode) node).dist);
934 System.out.println("Boot " + node.getBootstrap());
938 System.out.println("Dist " + ((SequenceNode) node).dist);
939 printNode((SequenceNode) node.left());
940 printNode((SequenceNode) node.right());
950 public void findMaxDist(SequenceNode node)
957 if ((node.left() == null) && (node.right() == null))
959 float dist = ((SequenceNode) node).dist;
961 if (dist > maxDistValue)
963 maxdist = (SequenceNode) node;
969 findMaxDist((SequenceNode) node.left());
970 findMaxDist((SequenceNode) node.right());
977 * @return DOCUMENT ME!
979 public Vector getGroups()
987 * @return DOCUMENT ME!
989 public float getMaxHeight()
1002 public void groupNodes(SequenceNode node, float threshold)
1009 if ((node.height / maxheight) > threshold)
1011 groups.addElement(node);
1015 groupNodes((SequenceNode) node.left(), threshold);
1016 groupNodes((SequenceNode) node.right(), threshold);
1026 * @return DOCUMENT ME!
1028 public float findHeight(SequenceNode node)
1035 if ((node.left() == null) && (node.right() == null))
1037 node.height = ((SequenceNode) node.parent()).height + node.dist;
1039 if (node.height > maxheight)
1050 if (node.parent() != null)
1052 node.height = ((SequenceNode) node.parent()).height + node.dist;
1057 node.height = (float) 0.0;
1060 maxheight = findHeight((SequenceNode) (node.left()));
1061 maxheight = findHeight((SequenceNode) (node.right()));
1070 * @return DOCUMENT ME!
1072 public SequenceNode reRoot()
1074 if (maxdist != null)
1078 float tmpdist = maxdist.dist;
1081 SequenceNode sn = new SequenceNode();
1084 // New right hand of top
1085 SequenceNode snr = (SequenceNode) maxdist.parent();
1086 changeDirection(snr, maxdist);
1087 System.out.println("Printing reversed tree");
1089 snr.dist = tmpdist / 2;
1090 maxdist.dist = tmpdist / 2;
1093 maxdist.setParent(sn);
1096 sn.setLeft(maxdist);
1110 * @return true if original sequence data can be recovered
1112 public boolean hasOriginalSequenceData()
1114 return seqData != null;
1118 * Returns original alignment data used for calculation - or null where not
1121 * @return null or cut'n'pasteable alignment
1123 public String printOriginalSequenceData(char gapChar)
1125 if (seqData == null)
1130 StringBuffer sb = new StringBuffer();
1131 String[] seqdatas = seqData.getSequenceStrings(gapChar);
1132 for (int i = 0; i < seqdatas.length; i++)
1134 sb.append(new jalview.util.Format("%-" + 15 + "s").form(sequence[i]
1136 sb.append(" " + seqdatas[i] + "\n");
1138 return sb.toString();
1147 public void printN(SequenceNode node)
1154 if ((node.left() != null) && (node.right() != null))
1156 printN((SequenceNode) node.left());
1157 printN((SequenceNode) node.right());
1161 System.out.println(" name = "
1162 + ((SequenceI) node.element()).getName());
1165 System.out.println(" dist = " + ((SequenceNode) node).dist + " "
1166 + ((SequenceNode) node).count + " "
1167 + ((SequenceNode) node).height);
1176 public void reCount(SequenceNode node)
1180 // _lylimit = this.node.size();
1184 private long _lycount = 0, _lylimit = 0;
1192 public void _reCount(SequenceNode node)
1194 // if (_lycount<_lylimit)
1196 // System.err.println("Warning: depth of _recount greater than number of nodes.");
1204 if ((node.left() != null) && (node.right() != null))
1207 _reCount((SequenceNode) node.left());
1208 _reCount((SequenceNode) node.right());
1210 SequenceNode l = (SequenceNode) node.left();
1211 SequenceNode r = (SequenceNode) node.right();
1213 ((SequenceNode) node).count = l.count + r.count;
1214 ((SequenceNode) node).ycount = (l.ycount + r.ycount) / 2;
1218 ((SequenceNode) node).count = 1;
1219 ((SequenceNode) node).ycount = ycount++;
1230 public void swapNodes(SequenceNode node)
1237 SequenceNode tmp = (SequenceNode) node.left();
1239 node.setLeft(node.right());
1251 public void changeDirection(SequenceNode node, SequenceNode dir)
1258 if (node.parent() != top)
1260 changeDirection((SequenceNode) node.parent(), node);
1262 SequenceNode tmp = (SequenceNode) node.parent();
1264 if (dir == node.left())
1266 node.setParent(dir);
1269 else if (dir == node.right())
1271 node.setParent(dir);
1277 if (dir == node.left())
1279 node.setParent(node.left());
1281 if (top.left() == node)
1283 node.setRight(top.right());
1287 node.setRight(top.left());
1292 node.setParent(node.right());
1294 if (top.left() == node)
1296 node.setLeft(top.right());
1300 node.setLeft(top.left());
1309 * @return DOCUMENT ME!
1311 public SequenceNode getMaxDist()
1319 * @return DOCUMENT ME!
1321 public SequenceNode getTopNode()
1328 * @return true if tree has real distances
1330 public boolean isHasDistances()
1332 return hasDistances;
1337 * @return true if tree has real bootstrap values
1339 public boolean isHasBootstrap()
1341 return hasBootstrap;
1344 public boolean isHasRootDistance()
1346 return hasRootDistance;
1350 * apply the given transform to all the nodes in the tree.
1352 * @param nodeTransformI
1354 public void applyToNodes(NodeTransformI nodeTransformI)
1356 for (Enumeration nodes = node.elements(); nodes.hasMoreElements(); nodeTransformI
1357 .transform((BinaryNode) nodes.nextElement()))
1366 * @version $Revision$
1373 * Creates a new Cluster object.
1378 public Cluster(int[] value)