2 * Jalview - A Sequence Alignment Editor and Viewer
\r
3 * Copyright (C) 2005 AM Waterhouse, J Procter, G Barton, M Clamp, S Searle
\r
5 * This program is free software; you can redistribute it and/or
\r
6 * modify it under the terms of the GNU General Public License
\r
7 * as published by the Free Software Foundation; either version 2
\r
8 * of the License, or (at your option) any later version.
\r
10 * This program is distributed in the hope that it will be useful,
\r
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
\r
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
\r
13 * GNU General Public License for more details.
\r
15 * You should have received a copy of the GNU General Public License
\r
16 * along with this program; if not, write to the Free Software
\r
17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
\r
19 package jalview.analysis;
\r
21 import jalview.datamodel.*;
\r
23 import jalview.io.NewickFile;
\r
25 import jalview.schemes.ResidueProperties;
\r
27 import jalview.util.*;
\r
36 * @version $Revision$
\r
41 SequenceI[] sequence;
\r
43 //SequenceData is a string representation of what the user
\r
44 //sees. The display may contain hidden columns.
\r
45 CigarArray seqData=null;
\r
55 Vector groups = new Vector();
\r
56 SequenceNode maxdist;
\r
64 Object found = null;
\r
65 Object leaves = null;
\r
67 boolean hasDistances = true; // normal case for jalview trees
\r
68 boolean hasBootstrap = false; // normal case for jalview trees
\r
70 private boolean hasRootDistance = true;
\r
73 * Create a new NJTree object with leaves associated with sequences in seqs,
\r
74 * and original alignment data represented by Cigar strings.
\r
75 * @param seqs SequenceI[]
\r
76 * @param odata Cigar[]
\r
77 * @param treefile NewickFile
\r
79 public NJTree(SequenceI[] seqs, CigarArray odata, NewickFile treefile) {
\r
80 this(seqs, treefile);
\r
84 sequenceString = new String[odata.length];
\r
85 char gapChar = jalview.util.Comparison.GapChars.charAt(0);
\r
86 for (int i = 0; i < odata.length; i++)
\r
88 SequenceI oseq_aligned = odata[i].getSeq(gapChar);
\r
89 sequenceString[i] = oseq_aligned.getSequence();
\r
94 * Creates a new NJTree object from a tree from an external source
\r
96 * @param seqs SequenceI which should be associated with leafs of treefile
\r
97 * @param treefile A parsed tree
\r
99 public NJTree(SequenceI[] seqs, NewickFile treefile)
\r
101 this.sequence = seqs;
\r
102 top = treefile.getTree();
\r
105 * There is no dependent alignment to be recovered from an
\r
108 if (sequenceString == null)
\r
110 sequenceString = new String[seqs.length];
\r
111 for (int i = 0; i < seqs.length; i++)
\r
113 sequenceString[i] = seqs[i].getSequence();
\r
118 hasDistances = treefile.HasDistances();
\r
119 hasBootstrap = treefile.HasBootstrap();
\r
120 hasRootDistance = treefile.HasRootDistance();
\r
122 maxheight = findHeight(top);
\r
124 SequenceIdMatcher algnIds = new SequenceIdMatcher(seqs);
\r
126 Vector leaves = new Vector();
\r
127 findLeaves(top, leaves);
\r
130 int namesleft = seqs.length;
\r
136 while (i < leaves.size())
\r
138 j = (SequenceNode) leaves.elementAt(i++);
\r
139 realnam = j.getName();
\r
142 if (namesleft > -1)
\r
144 nam = algnIds.findIdMatch(realnam);
\r
154 j.setElement(new Sequence(realnam, "THISISAPLACEHLDER"));
\r
155 j.setPlaceholder(true);
\r
161 * Creates a new NJTree object.
\r
163 * @param sequence DOCUMENT ME!
\r
164 * @param type DOCUMENT ME!
\r
165 * @param pwtype DOCUMENT ME!
\r
166 * @param start DOCUMENT ME!
\r
167 * @param end DOCUMENT ME!
\r
169 public NJTree(SequenceI[] sequence,
\r
170 CigarArray seqData,
\r
173 int start, int end)
\r
175 this.sequence = sequence;
\r
176 this.node = new Vector();
\r
178 this.pwtype = pwtype;
\r
179 if (seqData!=null) {
\r
180 this.seqData = seqData;
\r
182 SeqCigar[] seqs = new SeqCigar[sequence.length];
\r
183 for(int i=0; i<sequence.length; i++)
\r
185 seqs[i] = new SeqCigar(sequence[i], start, end);
\r
187 this.seqData = new CigarArray(seqs);
\r
188 this.seqData.addOperation(CigarArray.M, end-start+1);
\r
191 if (!(type.equals("NJ")))
\r
196 if (!(pwtype.equals("PID")))
\r
203 done = new int[sequence.length];
\r
205 while ((i < sequence.length) && (sequence[i] != null))
\r
213 distance = findDistances(this.seqData.getSequenceStrings(Comparison.GapChars.charAt(0)));
\r
217 noClus = cluster.size();
\r
225 * @return DOCUMENT ME!
\r
227 public String toString()
\r
229 jalview.io.NewickFile fout = new jalview.io.NewickFile(getTopNode());
\r
231 return fout.print(false, true); // distances only
\r
236 * used when the alignment associated to a tree has changed.
\r
238 * @param alignment Vector
\r
240 public void UpdatePlaceHolders(Vector alignment)
\r
242 Vector leaves = new Vector();
\r
243 findLeaves(top, leaves);
\r
245 int sz = leaves.size();
\r
246 SequenceIdMatcher seqmatcher = null;
\r
251 SequenceNode leaf = (SequenceNode) leaves.elementAt(i++);
\r
253 if (alignment.contains(leaf.element()))
\r
255 leaf.setPlaceholder(false);
\r
259 if (seqmatcher == null)
\r
261 // Only create this the first time we need it
\r
262 SequenceI[] seqs = new SequenceI[alignment.size()];
\r
264 for (int j = 0; j < seqs.length; j++)
\r
265 seqs[j] = (SequenceI) alignment.elementAt(j);
\r
267 seqmatcher = new SequenceIdMatcher(seqs);
\r
270 SequenceI nam = seqmatcher.findIdMatch(leaf.getName());
\r
274 leaf.setPlaceholder(false);
\r
275 leaf.setElement(nam);
\r
279 leaf.setPlaceholder(true);
\r
288 public void cluster()
\r
292 if (type.equals("NJ"))
\r
294 findMinNJDistance();
\r
301 Cluster c = joinClusters(mini, minj);
\r
305 cluster.setElementAt(null, minj);
\r
306 cluster.setElementAt(c, mini);
\r
311 boolean onefound = false;
\r
316 for (int i = 0; i < noseqs; i++)
\r
320 if (onefound == false)
\r
332 joinClusters(one, two);
\r
333 top = (SequenceNode) (node.elementAt(one));
\r
343 * @param i DOCUMENT ME!
\r
344 * @param j DOCUMENT ME!
\r
346 * @return DOCUMENT ME!
\r
348 public Cluster joinClusters(int i, int j)
\r
350 float dist = distance[i][j];
\r
352 int noi = ((Cluster) cluster.elementAt(i)).value.length;
\r
353 int noj = ((Cluster) cluster.elementAt(j)).value.length;
\r
355 int[] value = new int[noi + noj];
\r
357 for (int ii = 0; ii < noi; ii++)
\r
359 value[ii] = ((Cluster) cluster.elementAt(i)).value[ii];
\r
362 for (int ii = noi; ii < (noi + noj); ii++)
\r
364 value[ii] = ((Cluster) cluster.elementAt(j)).value[ii - noi];
\r
367 Cluster c = new Cluster(value);
\r
372 if (type.equals("NJ"))
\r
374 findClusterNJDistance(i, j);
\r
378 findClusterDistance(i, j);
\r
381 SequenceNode sn = new SequenceNode();
\r
383 sn.setLeft((SequenceNode) (node.elementAt(i)));
\r
384 sn.setRight((SequenceNode) (node.elementAt(j)));
\r
386 SequenceNode tmpi = (SequenceNode) (node.elementAt(i));
\r
387 SequenceNode tmpj = (SequenceNode) (node.elementAt(j));
\r
389 if (type.equals("NJ"))
\r
391 findNewNJDistances(tmpi, tmpj, dist);
\r
395 findNewDistances(tmpi, tmpj, dist);
\r
398 tmpi.setParent(sn);
\r
399 tmpj.setParent(sn);
\r
401 node.setElementAt(sn, i);
\r
409 * @param tmpi DOCUMENT ME!
\r
410 * @param tmpj DOCUMENT ME!
\r
411 * @param dist DOCUMENT ME!
\r
413 public void findNewNJDistances(SequenceNode tmpi, SequenceNode tmpj,
\r
417 tmpi.dist = ((dist + ri) - rj) / 2;
\r
418 tmpj.dist = (dist - tmpi.dist);
\r
434 * @param tmpi DOCUMENT ME!
\r
435 * @param tmpj DOCUMENT ME!
\r
436 * @param dist DOCUMENT ME!
\r
438 public void findNewDistances(SequenceNode tmpi, SequenceNode tmpj,
\r
444 SequenceNode sni = tmpi;
\r
445 SequenceNode snj = tmpj;
\r
447 while (sni != null)
\r
449 ih = ih + sni.dist;
\r
450 sni = (SequenceNode) sni.left();
\r
453 while (snj != null)
\r
455 jh = jh + snj.dist;
\r
456 snj = (SequenceNode) snj.left();
\r
459 tmpi.dist = ((dist / 2) - ih);
\r
460 tmpj.dist = ((dist / 2) - jh);
\r
466 * @param i DOCUMENT ME!
\r
467 * @param j DOCUMENT ME!
\r
469 public void findClusterDistance(int i, int j)
\r
471 int noi = ((Cluster) cluster.elementAt(i)).value.length;
\r
472 int noj = ((Cluster) cluster.elementAt(j)).value.length;
\r
474 // New distances from cluster to others
\r
475 float[] newdist = new float[noseqs];
\r
477 for (int l = 0; l < noseqs; l++)
\r
479 if ((l != i) && (l != j))
\r
481 newdist[l] = ((distance[i][l] * noi) + (distance[j][l] * noj)) / (noi +
\r
490 for (int ii = 0; ii < noseqs; ii++)
\r
492 distance[i][ii] = newdist[ii];
\r
493 distance[ii][i] = newdist[ii];
\r
500 * @param i DOCUMENT ME!
\r
501 * @param j DOCUMENT ME!
\r
503 public void findClusterNJDistance(int i, int j)
\r
506 // New distances from cluster to others
\r
507 float[] newdist = new float[noseqs];
\r
509 for (int l = 0; l < noseqs; l++)
\r
511 if ((l != i) && (l != j))
\r
513 newdist[l] = ((distance[i][l] + distance[j][l]) -
\r
514 distance[i][j]) / 2;
\r
522 for (int ii = 0; ii < noseqs; ii++)
\r
524 distance[i][ii] = newdist[ii];
\r
525 distance[ii][i] = newdist[ii];
\r
532 * @param i DOCUMENT ME!
\r
533 * @param j DOCUMENT ME!
\r
535 * @return DOCUMENT ME!
\r
537 public float findr(int i, int j)
\r
541 for (int k = 0; k < noseqs; k++)
\r
543 if ((k != i) && (k != j) && (done[k] != 1))
\r
545 tmp = tmp + distance[i][k];
\r
551 tmp = tmp / (noClus - 2);
\r
560 * @return DOCUMENT ME!
\r
562 public float findMinNJDistance()
\r
564 float min = 100000;
\r
566 for (int i = 0; i < (noseqs - 1); i++)
\r
568 for (int j = i + 1; j < noseqs; j++)
\r
570 if ((done[i] != 1) && (done[j] != 1))
\r
572 float tmp = distance[i][j] - (findr(i, j) + findr(j, i));
\r
591 * @return DOCUMENT ME!
\r
593 public float findMinDistance()
\r
595 float min = 100000;
\r
597 for (int i = 0; i < (noseqs - 1); i++)
\r
599 for (int j = i + 1; j < noseqs; j++)
\r
601 if ((done[i] != 1) && (done[j] != 1))
\r
603 if (distance[i][j] < min)
\r
608 min = distance[i][j];
\r
620 * @return DOCUMENT ME!
\r
622 public float[][] findDistances(String[] sequenceString)
\r
624 float[][] distance = new float[noseqs][noseqs];
\r
626 if (pwtype.equals("PID"))
\r
628 for (int i = 0; i < (noseqs - 1); i++)
\r
630 for (int j = i; j < noseqs; j++)
\r
634 distance[i][i] = 0;
\r
638 distance[i][j] = 100 -
\r
639 Comparison.PID(sequenceString[i], sequenceString[j]);
\r
641 distance[j][i] = distance[i][j];
\r
646 else if (pwtype.equals("BL"))
\r
649 int end = sequenceString[0].length();
\r
650 for (int i = 0; i < (noseqs - 1); i++)
\r
652 for (int j = i; j < noseqs; j++)
\r
656 for (int k = 0; k < end; k++)
\r
660 score += ResidueProperties.getBLOSUM62(
\r
661 sequenceString[i].substring(k, k + 1),
\r
662 sequenceString[j].substring(k, k + 1));
\r
664 catch (Exception ex)
\r
666 System.err.println("err creating BLOSUM62 tree");
\r
667 ex.printStackTrace();
\r
671 distance[i][j] = (float) score;
\r
673 if (score > maxscore)
\r
680 for (int i = 0; i < (noseqs - 1); i++)
\r
682 for (int j = i; j < noseqs; j++)
\r
684 distance[i][j] = (float) maxscore - distance[i][j];
\r
685 distance[j][i] = distance[i][j];
\r
689 /* else if (pwtype.equals("SW"))
\r
693 for (int i = 0; i < (noseqs - 1); i++)
\r
695 for (int j = i; j < noseqs; j++)
\r
697 AlignSeq as = new AlignSeq(sequence[i], sequence[j], "pep");
\r
698 as.calcScoreMatrix();
\r
699 as.traceAlignment();
\r
700 as.printAlignment(System.out);
\r
701 distance[i][j] = (float) as.maxscore;
\r
703 if (max < distance[i][j])
\r
705 max = distance[i][j];
\r
710 for (int i = 0; i < (noseqs - 1); i++)
\r
712 for (int j = i; j < noseqs; j++)
\r
714 distance[i][j] = max - distance[i][j];
\r
715 distance[j][i] = distance[i][j];
\r
726 public void makeLeaves()
\r
728 cluster = new Vector();
\r
730 for (int i = 0; i < noseqs; i++)
\r
732 SequenceNode sn = new SequenceNode();
\r
734 sn.setElement(sequence[i]);
\r
735 sn.setName(sequence[i].getName());
\r
736 node.addElement(sn);
\r
738 int[] value = new int[1];
\r
741 Cluster c = new Cluster(value);
\r
742 cluster.addElement(c);
\r
749 * @param node DOCUMENT ME!
\r
750 * @param leaves DOCUMENT ME!
\r
752 * @return DOCUMENT ME!
\r
754 public Vector findLeaves(SequenceNode node, Vector leaves)
\r
761 if ((node.left() == null) && (node.right() == null))
\r
763 leaves.addElement(node);
\r
769 findLeaves((SequenceNode) node.left(), leaves);
\r
770 findLeaves((SequenceNode) node.right(), leaves);
\r
779 * @param node DOCUMENT ME!
\r
780 * @param count DOCUMENT ME!
\r
782 * @return DOCUMENT ME!
\r
784 public Object findLeaf(SequenceNode node, int count)
\r
786 found = _findLeaf(node, count);
\r
794 * @param node DOCUMENT ME!
\r
795 * @param count DOCUMENT ME!
\r
797 * @return DOCUMENT ME!
\r
799 public Object _findLeaf(SequenceNode node, int count)
\r
806 if (node.ycount == count)
\r
808 found = node.element();
\r
814 _findLeaf((SequenceNode) node.left(), count);
\r
815 _findLeaf((SequenceNode) node.right(), count);
\r
822 * printNode is mainly for debugging purposes.
\r
824 * @param node SequenceNode
\r
826 public void printNode(SequenceNode node)
\r
833 if ((node.left() == null) && (node.right() == null))
\r
835 System.out.println("Leaf = " +
\r
836 ((SequenceI) node.element()).getName());
\r
837 System.out.println("Dist " + ((SequenceNode) node).dist);
\r
838 System.out.println("Boot " + node.getBootstrap());
\r
842 System.out.println("Dist " + ((SequenceNode) node).dist);
\r
843 printNode((SequenceNode) node.left());
\r
844 printNode((SequenceNode) node.right());
\r
851 * @param node DOCUMENT ME!
\r
853 public void findMaxDist(SequenceNode node)
\r
860 if ((node.left() == null) && (node.right() == null))
\r
862 float dist = ((SequenceNode) node).dist;
\r
864 if (dist > maxDistValue)
\r
866 maxdist = (SequenceNode) node;
\r
867 maxDistValue = dist;
\r
872 findMaxDist((SequenceNode) node.left());
\r
873 findMaxDist((SequenceNode) node.right());
\r
880 * @return DOCUMENT ME!
\r
882 public Vector getGroups()
\r
890 * @return DOCUMENT ME!
\r
892 public float getMaxHeight()
\r
900 * @param node DOCUMENT ME!
\r
901 * @param threshold DOCUMENT ME!
\r
903 public void groupNodes(SequenceNode node, float threshold)
\r
910 if ((node.height / maxheight) > threshold)
\r
912 groups.addElement(node);
\r
916 groupNodes((SequenceNode) node.left(), threshold);
\r
917 groupNodes((SequenceNode) node.right(), threshold);
\r
924 * @param node DOCUMENT ME!
\r
926 * @return DOCUMENT ME!
\r
928 public float findHeight(SequenceNode node)
\r
935 if ((node.left() == null) && (node.right() == null))
\r
937 node.height = ((SequenceNode) node.parent()).height + node.dist;
\r
939 if (node.height > maxheight)
\r
941 return node.height;
\r
950 if (node.parent() != null)
\r
952 node.height = ((SequenceNode) node.parent()).height +
\r
958 node.height = (float) 0.0;
\r
961 maxheight = findHeight((SequenceNode) (node.left()));
\r
962 maxheight = findHeight((SequenceNode) (node.right()));
\r
971 * @return DOCUMENT ME!
\r
973 public SequenceNode reRoot()
\r
975 if (maxdist != null)
\r
979 float tmpdist = maxdist.dist;
\r
982 SequenceNode sn = new SequenceNode();
\r
983 sn.setParent(null);
\r
985 // New right hand of top
\r
986 SequenceNode snr = (SequenceNode) maxdist.parent();
\r
987 changeDirection(snr, maxdist);
\r
988 System.out.println("Printing reversed tree");
\r
990 snr.dist = tmpdist / 2;
\r
991 maxdist.dist = tmpdist / 2;
\r
994 maxdist.setParent(sn);
\r
997 sn.setLeft(maxdist);
\r
1010 * @return true if original sequence data can be recovered
\r
1012 public boolean hasOriginalSequenceData() {
\r
1013 return seqData!=null;
\r
1016 * Returns original alignment data used for calculation - or null where
\r
1019 * @return null or cut'n'pasteable alignment
\r
1021 public String printOriginalSequenceData()
\r
1023 if (seqData==null)
\r
1025 // return seqData.getSequenceString(Comparison.GapChars[0]);
\r
1026 StringBuffer sb = new StringBuffer();
\r
1027 String[] seqdatas = seqData.getSequenceStrings(Comparison.GapChars.charAt(0));
\r
1028 for(int i=0; i<seqdatas.length; i++)
\r
1030 sb.append(new jalview.util.Format("%-" + 15 + "s").form(
\r
1031 sequence[i].getName()));
\r
1032 sb.append(" "+seqdatas[i]+"\n");
\r
1034 return sb.toString();
\r
1040 * @param node DOCUMENT ME!
\r
1042 public void printN(SequenceNode node)
\r
1049 if ((node.left() != null) && (node.right() != null))
\r
1051 printN((SequenceNode) node.left());
\r
1052 printN((SequenceNode) node.right());
\r
1056 System.out.println(" name = " +
\r
1057 ((SequenceI) node.element()).getName());
\r
1060 System.out.println(" dist = " + ((SequenceNode) node).dist + " " +
\r
1061 ((SequenceNode) node).count + " " + ((SequenceNode) node).height);
\r
1067 * @param node DOCUMENT ME!
\r
1069 public void reCount(SequenceNode node)
\r
1078 * @param node DOCUMENT ME!
\r
1080 public void _reCount(SequenceNode node)
\r
1087 if ((node.left() != null) && (node.right() != null))
\r
1089 _reCount((SequenceNode) node.left());
\r
1090 _reCount((SequenceNode) node.right());
\r
1092 SequenceNode l = (SequenceNode) node.left();
\r
1093 SequenceNode r = (SequenceNode) node.right();
\r
1095 ((SequenceNode) node).count = l.count + r.count;
\r
1096 ((SequenceNode) node).ycount = (l.ycount + r.ycount) / 2;
\r
1100 ((SequenceNode) node).count = 1;
\r
1101 ((SequenceNode) node).ycount = ycount++;
\r
1108 * @param node DOCUMENT ME!
\r
1110 public void swapNodes(SequenceNode node)
\r
1117 SequenceNode tmp = (SequenceNode) node.left();
\r
1119 node.setLeft(node.right());
\r
1120 node.setRight(tmp);
\r
1126 * @param node DOCUMENT ME!
\r
1127 * @param dir DOCUMENT ME!
\r
1129 public void changeDirection(SequenceNode node, SequenceNode dir)
\r
1136 if (node.parent() != top)
\r
1138 changeDirection((SequenceNode) node.parent(), node);
\r
1140 SequenceNode tmp = (SequenceNode) node.parent();
\r
1142 if (dir == node.left())
\r
1144 node.setParent(dir);
\r
1145 node.setLeft(tmp);
\r
1147 else if (dir == node.right())
\r
1149 node.setParent(dir);
\r
1150 node.setRight(tmp);
\r
1155 if (dir == node.left())
\r
1157 node.setParent(node.left());
\r
1159 if (top.left() == node)
\r
1161 node.setRight(top.right());
\r
1165 node.setRight(top.left());
\r
1170 node.setParent(node.right());
\r
1172 if (top.left() == node)
\r
1174 node.setLeft(top.right());
\r
1178 node.setLeft(top.left());
\r
1188 * @return DOCUMENT ME!
\r
1190 public SequenceNode getMaxDist()
\r
1198 * @return DOCUMENT ME!
\r
1200 public SequenceNode getTopNode()
\r
1206 * @return true if tree has real distances
\r
1208 public boolean isHasDistances() {
\r
1209 return hasDistances;
\r
1214 * @return true if tree has real bootstrap values
\r
1216 public boolean isHasBootstrap() {
\r
1217 return hasBootstrap;
\r
1220 public boolean isHasRootDistance()
\r
1222 return hasRootDistance;
\r
1231 * @author $author$
\r
1232 * @version $Revision$
\r
1239 * Creates a new Cluster object.
\r
1241 * @param value DOCUMENT ME!
\r
1243 public Cluster(int[] value)
\r
1245 this.value = value;
\r