2 * Jalview - A Sequence Alignment Editor and Viewer
\r
3 * Copyright (C) 2005 AM Waterhouse, J Procter, G Barton, M Clamp, S Searle
\r
5 * This program is free software; you can redistribute it and/or
\r
6 * modify it under the terms of the GNU General Public License
\r
7 * as published by the Free Software Foundation; either version 2
\r
8 * of the License, or (at your option) any later version.
\r
10 * This program is distributed in the hope that it will be useful,
\r
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
\r
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
\r
13 * GNU General Public License for more details.
\r
15 * You should have received a copy of the GNU General Public License
\r
16 * along with this program; if not, write to the Free Software
\r
17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
\r
19 package jalview.analysis;
\r
21 import jalview.datamodel.*;
\r
23 import jalview.io.NewickFile;
\r
25 import jalview.schemes.ResidueProperties;
\r
27 import jalview.util.*;
\r
36 * @version $Revision$
\r
41 SequenceI[] sequence;
\r
50 Vector groups = new Vector();
\r
51 SequenceNode maxdist;
\r
59 Object found = null;
\r
60 Object leaves = null;
\r
65 * Creates a new NJTree object.
\r
67 * @param node DOCUMENT ME!
\r
69 public NJTree(SequenceNode node)
\r
72 maxheight = findHeight(top);
\r
76 * Creates a new NJTree object.
\r
78 * @param seqs DOCUMENT ME!
\r
79 * @param treefile DOCUMENT ME!
\r
81 public NJTree(SequenceI[] seqs, NewickFile treefile)
\r
83 top = treefile.getTree();
\r
84 maxheight = findHeight(top);
\r
86 SequenceIdMatcher algnIds = new SequenceIdMatcher(seqs);
\r
88 Vector leaves = new Vector();
\r
89 findLeaves(top, leaves);
\r
92 int namesleft = seqs.length;
\r
98 while (i < leaves.size())
\r
100 j = (SequenceNode) leaves.elementAt(i++);
\r
101 realnam = j.getName();
\r
104 if (namesleft > -1)
\r
106 nam = algnIds.findIdMatch(realnam);
\r
116 j.setElement(new Sequence(realnam, "THISISAPLACEHLDER"));
\r
117 j.setPlaceholder(true);
\r
123 * Creates a new NJTree object.
\r
125 * @param sequence DOCUMENT ME!
\r
126 * @param start DOCUMENT ME!
\r
127 * @param end DOCUMENT ME!
\r
129 public NJTree(SequenceI[] sequence, int start, int end)
\r
131 this(sequence, "NJ", "BL", start, end);
\r
135 * Creates a new NJTree object.
\r
137 * @param sequence DOCUMENT ME!
\r
138 * @param type DOCUMENT ME!
\r
139 * @param pwtype DOCUMENT ME!
\r
140 * @param start DOCUMENT ME!
\r
141 * @param end DOCUMENT ME!
\r
143 public NJTree(SequenceI[] sequence, String type, String pwtype, int start,
\r
146 this.sequence = sequence;
\r
147 this.node = new Vector();
\r
149 this.pwtype = pwtype;
\r
150 this.start = start;
\r
153 if (!(type.equals("NJ")))
\r
158 if (!(pwtype.equals("PID")))
\r
165 done = new int[sequence.length];
\r
167 while ((i < sequence.length) && (sequence[i] != null))
\r
175 distance = findDistances();
\r
179 noClus = cluster.size();
\r
187 * @return DOCUMENT ME!
\r
189 public String toString()
\r
191 jalview.io.NewickFile fout = new jalview.io.NewickFile(getTopNode());
\r
193 return fout.print(false, true); // distances only
\r
198 * used when the alignment associated to a tree has changed.
\r
200 * @param alignment Vector
\r
202 public void UpdatePlaceHolders(Vector alignment)
\r
204 Vector leaves = new Vector();
\r
205 findLeaves(top, leaves);
\r
207 int sz = leaves.size();
\r
208 SequenceIdMatcher seqmatcher = null;
\r
213 SequenceNode leaf = (SequenceNode) leaves.elementAt(i++);
\r
215 if (alignment.contains(leaf.element()))
\r
217 leaf.setPlaceholder(false);
\r
221 if (seqmatcher == null)
\r
223 // Only create this the first time we need it
\r
224 SequenceI[] seqs = new SequenceI[alignment.size()];
\r
226 for (int j = 0; j < seqs.length; j++)
\r
227 seqs[j] = (SequenceI) alignment.elementAt(j);
\r
229 seqmatcher = new SequenceIdMatcher(seqs);
\r
232 SequenceI nam = seqmatcher.findIdMatch(leaf.getName());
\r
236 leaf.setPlaceholder(false);
\r
237 leaf.setElement(nam);
\r
241 leaf.setPlaceholder(true);
\r
250 public void cluster()
\r
254 if (type.equals("NJ"))
\r
256 float mind = findMinNJDistance();
\r
260 float mind = findMinDistance();
\r
263 Cluster c = joinClusters(mini, minj);
\r
267 cluster.setElementAt(null, minj);
\r
268 cluster.setElementAt(c, mini);
\r
273 boolean onefound = false;
\r
278 for (int i = 0; i < noseqs; i++)
\r
282 if (onefound == false)
\r
294 Cluster c = joinClusters(one, two);
\r
295 top = (SequenceNode) (node.elementAt(one));
\r
305 * @param i DOCUMENT ME!
\r
306 * @param j DOCUMENT ME!
\r
308 * @return DOCUMENT ME!
\r
310 public Cluster joinClusters(int i, int j)
\r
312 float dist = distance[i][j];
\r
314 int noi = ((Cluster) cluster.elementAt(i)).value.length;
\r
315 int noj = ((Cluster) cluster.elementAt(j)).value.length;
\r
317 int[] value = new int[noi + noj];
\r
319 for (int ii = 0; ii < noi; ii++)
\r
321 value[ii] = ((Cluster) cluster.elementAt(i)).value[ii];
\r
324 for (int ii = noi; ii < (noi + noj); ii++)
\r
326 value[ii] = ((Cluster) cluster.elementAt(j)).value[ii - noi];
\r
329 Cluster c = new Cluster(value);
\r
334 if (type.equals("NJ"))
\r
336 findClusterNJDistance(i, j);
\r
340 findClusterDistance(i, j);
\r
343 SequenceNode sn = new SequenceNode();
\r
345 sn.setLeft((SequenceNode) (node.elementAt(i)));
\r
346 sn.setRight((SequenceNode) (node.elementAt(j)));
\r
348 SequenceNode tmpi = (SequenceNode) (node.elementAt(i));
\r
349 SequenceNode tmpj = (SequenceNode) (node.elementAt(j));
\r
351 if (type.equals("NJ"))
\r
353 findNewNJDistances(tmpi, tmpj, dist);
\r
357 findNewDistances(tmpi, tmpj, dist);
\r
360 tmpi.setParent(sn);
\r
361 tmpj.setParent(sn);
\r
363 node.setElementAt(sn, i);
\r
371 * @param tmpi DOCUMENT ME!
\r
372 * @param tmpj DOCUMENT ME!
\r
373 * @param dist DOCUMENT ME!
\r
375 public void findNewNJDistances(SequenceNode tmpi, SequenceNode tmpj,
\r
381 SequenceNode sni = tmpi;
\r
382 SequenceNode snj = tmpj;
\r
384 tmpi.dist = ((dist + ri) - rj) / 2;
\r
385 tmpj.dist = (dist - tmpi.dist);
\r
401 * @param tmpi DOCUMENT ME!
\r
402 * @param tmpj DOCUMENT ME!
\r
403 * @param dist DOCUMENT ME!
\r
405 public void findNewDistances(SequenceNode tmpi, SequenceNode tmpj,
\r
411 SequenceNode sni = tmpi;
\r
412 SequenceNode snj = tmpj;
\r
414 while (sni != null)
\r
416 ih = ih + sni.dist;
\r
417 sni = (SequenceNode) sni.left();
\r
420 while (snj != null)
\r
422 jh = jh + snj.dist;
\r
423 snj = (SequenceNode) snj.left();
\r
426 tmpi.dist = ((dist / 2) - ih);
\r
427 tmpj.dist = ((dist / 2) - jh);
\r
433 * @param i DOCUMENT ME!
\r
434 * @param j DOCUMENT ME!
\r
436 public void findClusterDistance(int i, int j)
\r
438 int noi = ((Cluster) cluster.elementAt(i)).value.length;
\r
439 int noj = ((Cluster) cluster.elementAt(j)).value.length;
\r
441 // New distances from cluster to others
\r
442 float[] newdist = new float[noseqs];
\r
444 for (int l = 0; l < noseqs; l++)
\r
446 if ((l != i) && (l != j))
\r
448 newdist[l] = ((distance[i][l] * noi) + (distance[j][l] * noj)) / (noi +
\r
457 for (int ii = 0; ii < noseqs; ii++)
\r
459 distance[i][ii] = newdist[ii];
\r
460 distance[ii][i] = newdist[ii];
\r
467 * @param i DOCUMENT ME!
\r
468 * @param j DOCUMENT ME!
\r
470 public void findClusterNJDistance(int i, int j)
\r
472 int noi = ((Cluster) cluster.elementAt(i)).value.length;
\r
473 int noj = ((Cluster) cluster.elementAt(j)).value.length;
\r
475 // New distances from cluster to others
\r
476 float[] newdist = new float[noseqs];
\r
478 for (int l = 0; l < noseqs; l++)
\r
480 if ((l != i) && (l != j))
\r
482 newdist[l] = ((distance[i][l] + distance[j][l]) -
\r
483 distance[i][j]) / 2;
\r
491 for (int ii = 0; ii < noseqs; ii++)
\r
493 distance[i][ii] = newdist[ii];
\r
494 distance[ii][i] = newdist[ii];
\r
501 * @param i DOCUMENT ME!
\r
502 * @param j DOCUMENT ME!
\r
504 * @return DOCUMENT ME!
\r
506 public float findr(int i, int j)
\r
510 for (int k = 0; k < noseqs; k++)
\r
512 if ((k != i) && (k != j) && (done[k] != 1))
\r
514 tmp = tmp + distance[i][k];
\r
520 tmp = tmp / (noClus - 2);
\r
529 * @return DOCUMENT ME!
\r
531 public float findMinNJDistance()
\r
533 float min = 100000;
\r
535 for (int i = 0; i < (noseqs - 1); i++)
\r
537 for (int j = i + 1; j < noseqs; j++)
\r
539 if ((done[i] != 1) && (done[j] != 1))
\r
541 float tmp = distance[i][j] - (findr(i, j) + findr(j, i));
\r
560 * @return DOCUMENT ME!
\r
562 public float findMinDistance()
\r
564 float min = 100000;
\r
566 for (int i = 0; i < (noseqs - 1); i++)
\r
568 for (int j = i + 1; j < noseqs; j++)
\r
570 if ((done[i] != 1) && (done[j] != 1))
\r
572 if (distance[i][j] < min)
\r
577 min = distance[i][j];
\r
589 * @return DOCUMENT ME!
\r
591 public float[][] findDistances()
\r
593 float[][] distance = new float[noseqs][noseqs];
\r
595 if (pwtype.equals("PID"))
\r
597 for (int i = 0; i < (noseqs - 1); i++)
\r
599 for (int j = i; j < noseqs; j++)
\r
603 distance[i][i] = 0;
\r
607 distance[i][j] = 100 -
\r
608 Comparison.PID(sequence[i], sequence[j], start, end);
\r
609 distance[j][i] = distance[i][j];
\r
614 else if (pwtype.equals("BL"))
\r
618 for (int i = 0; i < (noseqs - 1); i++)
\r
620 for (int j = i; j < noseqs; j++)
\r
624 for (int k = start; k < end; k++)
\r
628 score += ResidueProperties.getBLOSUM62(sequence[i].getSequence(
\r
629 k, k + 1), sequence[j].getSequence(k, k +
\r
632 catch (Exception ex)
\r
634 System.err.println("err creating BLOSUM62 tree");
\r
635 ex.printStackTrace();
\r
639 distance[i][j] = (float) score;
\r
641 if (score > maxscore)
\r
648 for (int i = 0; i < (noseqs - 1); i++)
\r
650 for (int j = i; j < noseqs; j++)
\r
652 distance[i][j] = (float) maxscore - distance[i][j];
\r
653 distance[j][i] = distance[i][j];
\r
657 else if (pwtype.equals("SW"))
\r
661 for (int i = 0; i < (noseqs - 1); i++)
\r
663 for (int j = i; j < noseqs; j++)
\r
665 AlignSeq as = new AlignSeq(sequence[i], sequence[j], "pep");
\r
666 as.calcScoreMatrix();
\r
667 as.traceAlignment();
\r
668 as.printAlignment();
\r
669 distance[i][j] = (float) as.maxscore;
\r
671 if (max < distance[i][j])
\r
673 max = distance[i][j];
\r
678 for (int i = 0; i < (noseqs - 1); i++)
\r
680 for (int j = i; j < noseqs; j++)
\r
682 distance[i][j] = max - distance[i][j];
\r
683 distance[j][i] = distance[i][j];
\r
694 public void makeLeaves()
\r
696 cluster = new Vector();
\r
698 for (int i = 0; i < noseqs; i++)
\r
700 SequenceNode sn = new SequenceNode();
\r
702 sn.setElement(sequence[i]);
\r
703 sn.setName(sequence[i].getName());
\r
704 node.addElement(sn);
\r
706 int[] value = new int[1];
\r
709 Cluster c = new Cluster(value);
\r
710 cluster.addElement(c);
\r
717 * @param node DOCUMENT ME!
\r
718 * @param leaves DOCUMENT ME!
\r
720 * @return DOCUMENT ME!
\r
722 public Vector findLeaves(SequenceNode node, Vector leaves)
\r
729 if ((node.left() == null) && (node.right() == null))
\r
731 leaves.addElement(node);
\r
737 findLeaves((SequenceNode) node.left(), leaves);
\r
738 findLeaves((SequenceNode) node.right(), leaves);
\r
747 * @param node DOCUMENT ME!
\r
748 * @param count DOCUMENT ME!
\r
750 * @return DOCUMENT ME!
\r
752 public Object findLeaf(SequenceNode node, int count)
\r
754 found = _findLeaf(node, count);
\r
762 * @param node DOCUMENT ME!
\r
763 * @param count DOCUMENT ME!
\r
765 * @return DOCUMENT ME!
\r
767 public Object _findLeaf(SequenceNode node, int count)
\r
774 if (node.ycount == count)
\r
776 found = node.element();
\r
782 _findLeaf((SequenceNode) node.left(), count);
\r
783 _findLeaf((SequenceNode) node.right(), count);
\r
790 * printNode is mainly for debugging purposes.
\r
792 * @param node SequenceNode
\r
794 public void printNode(SequenceNode node)
\r
801 if ((node.left() == null) && (node.right() == null))
\r
803 System.out.println("Leaf = " +
\r
804 ((SequenceI) node.element()).getName());
\r
805 System.out.println("Dist " + ((SequenceNode) node).dist);
\r
806 System.out.println("Boot " + node.getBootstrap());
\r
810 System.out.println("Dist " + ((SequenceNode) node).dist);
\r
811 printNode((SequenceNode) node.left());
\r
812 printNode((SequenceNode) node.right());
\r
819 * @param node DOCUMENT ME!
\r
821 public void findMaxDist(SequenceNode node)
\r
828 if ((node.left() == null) && (node.right() == null))
\r
830 float dist = ((SequenceNode) node).dist;
\r
832 if (dist > maxDistValue)
\r
834 maxdist = (SequenceNode) node;
\r
835 maxDistValue = dist;
\r
840 findMaxDist((SequenceNode) node.left());
\r
841 findMaxDist((SequenceNode) node.right());
\r
848 * @return DOCUMENT ME!
\r
850 public Vector getGroups()
\r
858 * @return DOCUMENT ME!
\r
860 public float getMaxHeight()
\r
868 * @param node DOCUMENT ME!
\r
869 * @param threshold DOCUMENT ME!
\r
871 public void groupNodes(SequenceNode node, float threshold)
\r
878 if ((node.height / maxheight) > threshold)
\r
880 groups.addElement(node);
\r
884 groupNodes((SequenceNode) node.left(), threshold);
\r
885 groupNodes((SequenceNode) node.right(), threshold);
\r
892 * @param node DOCUMENT ME!
\r
894 * @return DOCUMENT ME!
\r
896 public float findHeight(SequenceNode node)
\r
903 if ((node.left() == null) && (node.right() == null))
\r
905 node.height = ((SequenceNode) node.parent()).height + node.dist;
\r
907 if (node.height > maxheight)
\r
909 return node.height;
\r
918 if (node.parent() != null)
\r
920 node.height = ((SequenceNode) node.parent()).height +
\r
926 node.height = (float) 0.0;
\r
929 maxheight = findHeight((SequenceNode) (node.left()));
\r
930 maxheight = findHeight((SequenceNode) (node.right()));
\r
939 * @return DOCUMENT ME!
\r
941 public SequenceNode reRoot()
\r
943 if (maxdist != null)
\r
947 float tmpdist = maxdist.dist;
\r
950 SequenceNode sn = new SequenceNode();
\r
951 sn.setParent(null);
\r
953 // New right hand of top
\r
954 SequenceNode snr = (SequenceNode) maxdist.parent();
\r
955 changeDirection(snr, maxdist);
\r
956 System.out.println("Printing reversed tree");
\r
958 snr.dist = tmpdist / 2;
\r
959 maxdist.dist = tmpdist / 2;
\r
962 maxdist.setParent(sn);
\r
965 sn.setLeft(maxdist);
\r
980 * @param node DOCUMENT ME!
\r
982 public static void printN(SequenceNode node)
\r
989 if ((node.left() != null) && (node.right() != null))
\r
991 printN((SequenceNode) node.left());
\r
992 printN((SequenceNode) node.right());
\r
996 System.out.println(" name = " +
\r
997 ((SequenceI) node.element()).getName());
\r
1000 System.out.println(" dist = " + ((SequenceNode) node).dist + " " +
\r
1001 ((SequenceNode) node).count + " " + ((SequenceNode) node).height);
\r
1007 * @param node DOCUMENT ME!
\r
1009 public void reCount(SequenceNode node)
\r
1018 * @param node DOCUMENT ME!
\r
1020 public void _reCount(SequenceNode node)
\r
1027 if ((node.left() != null) && (node.right() != null))
\r
1029 _reCount((SequenceNode) node.left());
\r
1030 _reCount((SequenceNode) node.right());
\r
1032 SequenceNode l = (SequenceNode) node.left();
\r
1033 SequenceNode r = (SequenceNode) node.right();
\r
1035 ((SequenceNode) node).count = l.count + r.count;
\r
1036 ((SequenceNode) node).ycount = (l.ycount + r.ycount) / 2;
\r
1040 ((SequenceNode) node).count = 1;
\r
1041 ((SequenceNode) node).ycount = ycount++;
\r
1048 * @param node DOCUMENT ME!
\r
1050 public void swapNodes(SequenceNode node)
\r
1057 SequenceNode tmp = (SequenceNode) node.left();
\r
1059 node.setLeft(node.right());
\r
1060 node.setRight(tmp);
\r
1066 * @param node DOCUMENT ME!
\r
1067 * @param dir DOCUMENT ME!
\r
1069 public void changeDirection(SequenceNode node, SequenceNode dir)
\r
1076 if (node.parent() != top)
\r
1078 changeDirection((SequenceNode) node.parent(), node);
\r
1080 SequenceNode tmp = (SequenceNode) node.parent();
\r
1082 if (dir == node.left())
\r
1084 node.setParent(dir);
\r
1085 node.setLeft(tmp);
\r
1087 else if (dir == node.right())
\r
1089 node.setParent(dir);
\r
1090 node.setRight(tmp);
\r
1095 if (dir == node.left())
\r
1097 node.setParent(node.left());
\r
1099 if (top.left() == node)
\r
1101 node.setRight(top.right());
\r
1105 node.setRight(top.left());
\r
1110 node.setParent(node.right());
\r
1112 if (top.left() == node)
\r
1114 node.setLeft(top.right());
\r
1118 node.setLeft(top.left());
\r
1127 * @param node DOCUMENT ME!
\r
1129 public void setMaxDist(SequenceNode node)
\r
1131 this.maxdist = maxdist;
\r
1137 * @return DOCUMENT ME!
\r
1139 public SequenceNode getMaxDist()
\r
1147 * @return DOCUMENT ME!
\r
1149 public SequenceNode getTopNode()
\r
1159 * @author $author$
\r
1160 * @version $Revision$
\r
1167 * Creates a new Cluster object.
\r
1169 * @param value DOCUMENT ME!
\r
1171 public Cluster(int[] value)
\r
1173 this.value = value;
\r