2 * Jalview - A Sequence Alignment Editor and Viewer
3 * Copyright (C) 2006 AM Waterhouse, J Procter, G Barton, M Clamp, S Searle
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License
7 * as published by the Free Software Foundation; either version 2
8 * of the License, or (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
19 package jalview.analysis;
21 import jalview.datamodel.*;
23 import jalview.io.NewickFile;
25 import jalview.schemes.ResidueProperties;
27 import jalview.util.*;
43 //SequenceData is a string representation of what the user
44 //sees. The display may contain hidden columns.
45 public AlignmentView seqData=null;
55 Vector groups = new Vector();
67 boolean hasDistances = true; // normal case for jalview trees
68 boolean hasBootstrap = false; // normal case for jalview trees
70 private boolean hasRootDistance = true;
73 * Create a new NJTree object with leaves associated with sequences in seqs,
74 * and original alignment data represented by Cigar strings.
75 * @param seqs SequenceI[]
76 * @param odata Cigar[]
77 * @param treefile NewickFile
79 public NJTree(SequenceI[] seqs, AlignmentView odata, NewickFile treefile) {
84 sequenceString = new String[odata.length];
85 char gapChar = jalview.util.Comparison.GapChars.charAt(0);
86 for (int i = 0; i < odata.length; i++)
88 SequenceI oseq_aligned = odata[i].getSeq(gapChar);
89 sequenceString[i] = oseq_aligned.getSequence();
94 * Creates a new NJTree object from a tree from an external source
96 * @param seqs SequenceI which should be associated with leafs of treefile
97 * @param treefile A parsed tree
99 public NJTree(SequenceI[] seqs, NewickFile treefile)
101 this.sequence = seqs;
102 top = treefile.getTree();
105 * There is no dependent alignment to be recovered from an
108 if (sequenceString == null)
110 sequenceString = new String[seqs.length];
111 for (int i = 0; i < seqs.length; i++)
113 sequenceString[i] = seqs[i].getSequence();
118 hasDistances = treefile.HasDistances();
119 hasBootstrap = treefile.HasBootstrap();
120 hasRootDistance = treefile.HasRootDistance();
122 maxheight = findHeight(top);
124 SequenceIdMatcher algnIds = new SequenceIdMatcher(seqs);
126 Vector leaves = new Vector();
127 findLeaves(top, leaves);
130 int namesleft = seqs.length;
136 while (i < leaves.size())
138 j = (SequenceNode) leaves.elementAt(i++);
139 realnam = j.getName();
144 nam = algnIds.findIdMatch(realnam);
154 j.setElement(new Sequence(realnam, "THISISAPLACEHLDER"));
155 j.setPlaceholder(true);
161 * Creates a new NJTree object.
163 * @param sequence DOCUMENT ME!
164 * @param type DOCUMENT ME!
165 * @param pwtype DOCUMENT ME!
166 * @param start DOCUMENT ME!
167 * @param end DOCUMENT ME!
169 public NJTree(SequenceI[] sequence,
170 AlignmentView seqData,
175 this.sequence = sequence;
176 this.node = new Vector();
178 this.pwtype = pwtype;
180 this.seqData = seqData;
182 SeqCigar[] seqs = new SeqCigar[sequence.length];
183 for(int i=0; i<sequence.length; i++)
185 seqs[i] = new SeqCigar(sequence[i], start, end);
187 CigarArray sdata = new CigarArray(seqs);
188 sdata.addOperation(CigarArray.M, end-start+1);
189 this.seqData = new AlignmentView(sdata);
192 if (!(type.equals("NJ")))
197 if (!(pwtype.equals("PID")))
204 done = new int[sequence.length];
206 while ((i < sequence.length) && (sequence[i] != null))
214 distance = findDistances(this.seqData.getSequenceStrings(Comparison.GapChars.charAt(0)));
218 noClus = cluster.size();
226 * @return DOCUMENT ME!
228 public String toString()
230 jalview.io.NewickFile fout = new jalview.io.NewickFile(getTopNode());
232 return fout.print(false, true); // distances only
237 * used when the alignment associated to a tree has changed.
239 * @param alignment Vector
241 public void UpdatePlaceHolders(Vector alignment)
243 Vector leaves = new Vector();
244 findLeaves(top, leaves);
246 int sz = leaves.size();
247 SequenceIdMatcher seqmatcher = null;
252 SequenceNode leaf = (SequenceNode) leaves.elementAt(i++);
254 if (alignment.contains(leaf.element()))
256 leaf.setPlaceholder(false);
260 if (seqmatcher == null)
262 // Only create this the first time we need it
263 SequenceI[] seqs = new SequenceI[alignment.size()];
265 for (int j = 0; j < seqs.length; j++)
266 seqs[j] = (SequenceI) alignment.elementAt(j);
268 seqmatcher = new SequenceIdMatcher(seqs);
271 SequenceI nam = seqmatcher.findIdMatch(leaf.getName());
275 leaf.setPlaceholder(false);
276 leaf.setElement(nam);
280 leaf.setPlaceholder(true);
289 public void cluster()
293 if (type.equals("NJ"))
302 Cluster c = joinClusters(mini, minj);
306 cluster.setElementAt(null, minj);
307 cluster.setElementAt(c, mini);
312 boolean onefound = false;
317 for (int i = 0; i < noseqs; i++)
321 if (onefound == false)
333 joinClusters(one, two);
334 top = (SequenceNode) (node.elementAt(one));
344 * @param i DOCUMENT ME!
345 * @param j DOCUMENT ME!
347 * @return DOCUMENT ME!
349 public Cluster joinClusters(int i, int j)
351 float dist = distance[i][j];
353 int noi = ((Cluster) cluster.elementAt(i)).value.length;
354 int noj = ((Cluster) cluster.elementAt(j)).value.length;
356 int[] value = new int[noi + noj];
358 for (int ii = 0; ii < noi; ii++)
360 value[ii] = ((Cluster) cluster.elementAt(i)).value[ii];
363 for (int ii = noi; ii < (noi + noj); ii++)
365 value[ii] = ((Cluster) cluster.elementAt(j)).value[ii - noi];
368 Cluster c = new Cluster(value);
373 if (type.equals("NJ"))
375 findClusterNJDistance(i, j);
379 findClusterDistance(i, j);
382 SequenceNode sn = new SequenceNode();
384 sn.setLeft((SequenceNode) (node.elementAt(i)));
385 sn.setRight((SequenceNode) (node.elementAt(j)));
387 SequenceNode tmpi = (SequenceNode) (node.elementAt(i));
388 SequenceNode tmpj = (SequenceNode) (node.elementAt(j));
390 if (type.equals("NJ"))
392 findNewNJDistances(tmpi, tmpj, dist);
396 findNewDistances(tmpi, tmpj, dist);
402 node.setElementAt(sn, i);
410 * @param tmpi DOCUMENT ME!
411 * @param tmpj DOCUMENT ME!
412 * @param dist DOCUMENT ME!
414 public void findNewNJDistances(SequenceNode tmpi, SequenceNode tmpj,
418 tmpi.dist = ((dist + ri) - rj) / 2;
419 tmpj.dist = (dist - tmpi.dist);
435 * @param tmpi DOCUMENT ME!
436 * @param tmpj DOCUMENT ME!
437 * @param dist DOCUMENT ME!
439 public void findNewDistances(SequenceNode tmpi, SequenceNode tmpj,
445 SequenceNode sni = tmpi;
446 SequenceNode snj = tmpj;
451 sni = (SequenceNode) sni.left();
457 snj = (SequenceNode) snj.left();
460 tmpi.dist = ((dist / 2) - ih);
461 tmpj.dist = ((dist / 2) - jh);
467 * @param i DOCUMENT ME!
468 * @param j DOCUMENT ME!
470 public void findClusterDistance(int i, int j)
472 int noi = ((Cluster) cluster.elementAt(i)).value.length;
473 int noj = ((Cluster) cluster.elementAt(j)).value.length;
475 // New distances from cluster to others
476 float[] newdist = new float[noseqs];
478 for (int l = 0; l < noseqs; l++)
480 if ((l != i) && (l != j))
482 newdist[l] = ((distance[i][l] * noi) + (distance[j][l] * noj)) / (noi +
491 for (int ii = 0; ii < noseqs; ii++)
493 distance[i][ii] = newdist[ii];
494 distance[ii][i] = newdist[ii];
501 * @param i DOCUMENT ME!
502 * @param j DOCUMENT ME!
504 public void findClusterNJDistance(int i, int j)
507 // New distances from cluster to others
508 float[] newdist = new float[noseqs];
510 for (int l = 0; l < noseqs; l++)
512 if ((l != i) && (l != j))
514 newdist[l] = ((distance[i][l] + distance[j][l]) -
523 for (int ii = 0; ii < noseqs; ii++)
525 distance[i][ii] = newdist[ii];
526 distance[ii][i] = newdist[ii];
533 * @param i DOCUMENT ME!
534 * @param j DOCUMENT ME!
536 * @return DOCUMENT ME!
538 public float findr(int i, int j)
542 for (int k = 0; k < noseqs; k++)
544 if ((k != i) && (k != j) && (done[k] != 1))
546 tmp = tmp + distance[i][k];
552 tmp = tmp / (noClus - 2);
561 * @return DOCUMENT ME!
563 public float findMinNJDistance()
567 for (int i = 0; i < (noseqs - 1); i++)
569 for (int j = i + 1; j < noseqs; j++)
571 if ((done[i] != 1) && (done[j] != 1))
573 float tmp = distance[i][j] - (findr(i, j) + findr(j, i));
592 * @return DOCUMENT ME!
594 public float findMinDistance()
598 for (int i = 0; i < (noseqs - 1); i++)
600 for (int j = i + 1; j < noseqs; j++)
602 if ((done[i] != 1) && (done[j] != 1))
604 if (distance[i][j] < min)
609 min = distance[i][j];
621 * @return DOCUMENT ME!
623 public float[][] findDistances(String[] sequenceString)
625 float[][] distance = new float[noseqs][noseqs];
627 if (pwtype.equals("PID"))
629 for (int i = 0; i < (noseqs - 1); i++)
631 for (int j = i; j < noseqs; j++)
639 distance[i][j] = 100 -
640 Comparison.PID(sequenceString[i], sequenceString[j]);
642 distance[j][i] = distance[i][j];
647 else if (pwtype.equals("BL"))
650 int end = sequenceString[0].length();
651 for (int i = 0; i < (noseqs - 1); i++)
653 for (int j = i; j < noseqs; j++)
657 for (int k = 0; k < end; k++)
661 score += ResidueProperties.getBLOSUM62(
662 sequenceString[i].substring(k, k + 1),
663 sequenceString[j].substring(k, k + 1));
667 System.err.println("err creating BLOSUM62 tree");
668 ex.printStackTrace();
672 distance[i][j] = (float) score;
674 if (score > maxscore)
681 for (int i = 0; i < (noseqs - 1); i++)
683 for (int j = i; j < noseqs; j++)
685 distance[i][j] = (float) maxscore - distance[i][j];
686 distance[j][i] = distance[i][j];
690 /* else if (pwtype.equals("SW"))
694 for (int i = 0; i < (noseqs - 1); i++)
696 for (int j = i; j < noseqs; j++)
698 AlignSeq as = new AlignSeq(sequence[i], sequence[j], "pep");
699 as.calcScoreMatrix();
701 as.printAlignment(System.out);
702 distance[i][j] = (float) as.maxscore;
704 if (max < distance[i][j])
706 max = distance[i][j];
711 for (int i = 0; i < (noseqs - 1); i++)
713 for (int j = i; j < noseqs; j++)
715 distance[i][j] = max - distance[i][j];
716 distance[j][i] = distance[i][j];
727 public void makeLeaves()
729 cluster = new Vector();
731 for (int i = 0; i < noseqs; i++)
733 SequenceNode sn = new SequenceNode();
735 sn.setElement(sequence[i]);
736 sn.setName(sequence[i].getName());
739 int[] value = new int[1];
742 Cluster c = new Cluster(value);
743 cluster.addElement(c);
750 * @param node DOCUMENT ME!
751 * @param leaves DOCUMENT ME!
753 * @return DOCUMENT ME!
755 public Vector findLeaves(SequenceNode node, Vector leaves)
762 if ((node.left() == null) && (node.right() == null))
764 leaves.addElement(node);
770 findLeaves((SequenceNode) node.left(), leaves);
771 findLeaves((SequenceNode) node.right(), leaves);
780 * @param node DOCUMENT ME!
781 * @param count DOCUMENT ME!
783 * @return DOCUMENT ME!
785 public Object findLeaf(SequenceNode node, int count)
787 found = _findLeaf(node, count);
795 * @param node DOCUMENT ME!
796 * @param count DOCUMENT ME!
798 * @return DOCUMENT ME!
800 public Object _findLeaf(SequenceNode node, int count)
807 if (node.ycount == count)
809 found = node.element();
815 _findLeaf((SequenceNode) node.left(), count);
816 _findLeaf((SequenceNode) node.right(), count);
823 * printNode is mainly for debugging purposes.
825 * @param node SequenceNode
827 public void printNode(SequenceNode node)
834 if ((node.left() == null) && (node.right() == null))
836 System.out.println("Leaf = " +
837 ((SequenceI) node.element()).getName());
838 System.out.println("Dist " + ((SequenceNode) node).dist);
839 System.out.println("Boot " + node.getBootstrap());
843 System.out.println("Dist " + ((SequenceNode) node).dist);
844 printNode((SequenceNode) node.left());
845 printNode((SequenceNode) node.right());
852 * @param node DOCUMENT ME!
854 public void findMaxDist(SequenceNode node)
861 if ((node.left() == null) && (node.right() == null))
863 float dist = ((SequenceNode) node).dist;
865 if (dist > maxDistValue)
867 maxdist = (SequenceNode) node;
873 findMaxDist((SequenceNode) node.left());
874 findMaxDist((SequenceNode) node.right());
881 * @return DOCUMENT ME!
883 public Vector getGroups()
891 * @return DOCUMENT ME!
893 public float getMaxHeight()
901 * @param node DOCUMENT ME!
902 * @param threshold DOCUMENT ME!
904 public void groupNodes(SequenceNode node, float threshold)
911 if ((node.height / maxheight) > threshold)
913 groups.addElement(node);
917 groupNodes((SequenceNode) node.left(), threshold);
918 groupNodes((SequenceNode) node.right(), threshold);
925 * @param node DOCUMENT ME!
927 * @return DOCUMENT ME!
929 public float findHeight(SequenceNode node)
936 if ((node.left() == null) && (node.right() == null))
938 node.height = ((SequenceNode) node.parent()).height + node.dist;
940 if (node.height > maxheight)
951 if (node.parent() != null)
953 node.height = ((SequenceNode) node.parent()).height +
959 node.height = (float) 0.0;
962 maxheight = findHeight((SequenceNode) (node.left()));
963 maxheight = findHeight((SequenceNode) (node.right()));
972 * @return DOCUMENT ME!
974 public SequenceNode reRoot()
980 float tmpdist = maxdist.dist;
983 SequenceNode sn = new SequenceNode();
986 // New right hand of top
987 SequenceNode snr = (SequenceNode) maxdist.parent();
988 changeDirection(snr, maxdist);
989 System.out.println("Printing reversed tree");
991 snr.dist = tmpdist / 2;
992 maxdist.dist = tmpdist / 2;
995 maxdist.setParent(sn);
1011 * @return true if original sequence data can be recovered
1013 public boolean hasOriginalSequenceData() {
1014 return seqData!=null;
1017 * Returns original alignment data used for calculation - or null where
1020 * @return null or cut'n'pasteable alignment
1022 public String printOriginalSequenceData(char gapChar)
1027 StringBuffer sb = new StringBuffer();
1028 String[] seqdatas = seqData.getSequenceStrings(gapChar);
1029 for(int i=0; i<seqdatas.length; i++)
1031 sb.append(new jalview.util.Format("%-" + 15 + "s").form(
1032 sequence[i].getName()));
1033 sb.append(" "+seqdatas[i]+"\n");
1035 return sb.toString();
1040 * @param node DOCUMENT ME!
1042 public void printN(SequenceNode node)
1049 if ((node.left() != null) && (node.right() != null))
1051 printN((SequenceNode) node.left());
1052 printN((SequenceNode) node.right());
1056 System.out.println(" name = " +
1057 ((SequenceI) node.element()).getName());
1060 System.out.println(" dist = " + ((SequenceNode) node).dist + " " +
1061 ((SequenceNode) node).count + " " + ((SequenceNode) node).height);
1067 * @param node DOCUMENT ME!
1069 public void reCount(SequenceNode node)
1078 * @param node DOCUMENT ME!
1080 public void _reCount(SequenceNode node)
1087 if ((node.left() != null) && (node.right() != null))
1089 _reCount((SequenceNode) node.left());
1090 _reCount((SequenceNode) node.right());
1092 SequenceNode l = (SequenceNode) node.left();
1093 SequenceNode r = (SequenceNode) node.right();
1095 ((SequenceNode) node).count = l.count + r.count;
1096 ((SequenceNode) node).ycount = (l.ycount + r.ycount) / 2;
1100 ((SequenceNode) node).count = 1;
1101 ((SequenceNode) node).ycount = ycount++;
1108 * @param node DOCUMENT ME!
1110 public void swapNodes(SequenceNode node)
1117 SequenceNode tmp = (SequenceNode) node.left();
1119 node.setLeft(node.right());
1126 * @param node DOCUMENT ME!
1127 * @param dir DOCUMENT ME!
1129 public void changeDirection(SequenceNode node, SequenceNode dir)
1136 if (node.parent() != top)
1138 changeDirection((SequenceNode) node.parent(), node);
1140 SequenceNode tmp = (SequenceNode) node.parent();
1142 if (dir == node.left())
1144 node.setParent(dir);
1147 else if (dir == node.right())
1149 node.setParent(dir);
1155 if (dir == node.left())
1157 node.setParent(node.left());
1159 if (top.left() == node)
1161 node.setRight(top.right());
1165 node.setRight(top.left());
1170 node.setParent(node.right());
1172 if (top.left() == node)
1174 node.setLeft(top.right());
1178 node.setLeft(top.left());
1188 * @return DOCUMENT ME!
1190 public SequenceNode getMaxDist()
1198 * @return DOCUMENT ME!
1200 public SequenceNode getTopNode()
1206 * @return true if tree has real distances
1208 public boolean isHasDistances() {
1209 return hasDistances;
1214 * @return true if tree has real bootstrap values
1216 public boolean isHasBootstrap() {
1217 return hasBootstrap;
1220 public boolean isHasRootDistance()
1222 return hasRootDistance;
1232 * @version $Revision$
1239 * Creates a new Cluster object.
1241 * @param value DOCUMENT ME!
1243 public Cluster(int[] value)