2 * Jalview - A Sequence Alignment Editor and Viewer
\r
3 * Copyright (C) 2005 AM Waterhouse, J Procter, G Barton, M Clamp, S Searle
\r
5 * This program is free software; you can redistribute it and/or
\r
6 * modify it under the terms of the GNU General Public License
\r
7 * as published by the Free Software Foundation; either version 2
\r
8 * of the License, or (at your option) any later version.
\r
10 * This program is distributed in the hope that it will be useful,
\r
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
\r
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
\r
13 * GNU General Public License for more details.
\r
15 * You should have received a copy of the GNU General Public License
\r
16 * along with this program; if not, write to the Free Software
\r
17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
\r
19 package jalview.analysis;
\r
21 import jalview.datamodel.*;
\r
23 import jalview.io.NewickFile;
\r
25 import jalview.schemes.ResidueProperties;
\r
27 import jalview.util.*;
\r
32 public class NJTree {
\r
34 SequenceI[] sequence;
\r
43 Vector groups = new Vector();
\r
44 SequenceNode maxdist;
\r
52 Object found = null;
\r
53 Object leaves = null;
\r
57 public NJTree(SequenceNode node) {
\r
59 maxheight = findHeight(top);
\r
62 public NJTree(SequenceI[] seqs, NewickFile treefile) {
\r
63 top = treefile.getTree();
\r
64 maxheight = findHeight(top);
\r
66 SequenceIdMatcher algnIds = new SequenceIdMatcher(seqs);
\r
68 Vector leaves = new Vector();
\r
69 findLeaves(top, leaves);
\r
72 int namesleft = seqs.length;
\r
78 while (i < leaves.size()) {
\r
79 j = (SequenceNode) leaves.elementAt(i++);
\r
80 realnam = j.getName();
\r
83 if (namesleft > -1) {
\r
84 nam = algnIds.findIdMatch(realnam);
\r
91 j.setElement(new Sequence(realnam, "THISISAPLACEHLDER"));
\r
92 j.setPlaceholder(true);
\r
97 public NJTree(SequenceI[] sequence, int start, int end) {
\r
98 this(sequence, "NJ", "BL", start, end);
\r
101 public NJTree(SequenceI[] sequence, String type, String pwtype, int start,
\r
103 this.sequence = sequence;
\r
104 this.node = new Vector();
\r
106 this.pwtype = pwtype;
\r
107 this.start = start;
\r
110 if (!(type.equals("NJ"))) {
\r
114 if (!(pwtype.equals("PID"))) {
\r
120 done = new int[sequence.length];
\r
122 while ((i < sequence.length) && (sequence[i] != null)) {
\r
129 distance = findDistances();
\r
133 noClus = cluster.size();
\r
138 public String toString() {
\r
139 jalview.io.NewickFile fout = new jalview.io.NewickFile(getTopNode());
\r
141 return fout.print(false, true); // distances only
\r
146 * used when the alignment associated to a tree has changed.
\r
148 * @param alignment Vector
\r
150 public void UpdatePlaceHolders(Vector alignment) {
\r
151 Vector leaves = new Vector();
\r
152 findLeaves(top, leaves);
\r
154 int sz = leaves.size();
\r
155 SequenceIdMatcher seqmatcher = null;
\r
159 SequenceNode leaf = (SequenceNode) leaves.elementAt(i++);
\r
161 if (alignment.contains(leaf.element())) {
\r
162 leaf.setPlaceholder(false);
\r
164 if (seqmatcher == null) {
\r
165 // Only create this the first time we need it
\r
166 SequenceI[] seqs = new SequenceI[alignment.size()];
\r
168 for (int j = 0; j < seqs.length; j++)
\r
169 seqs[j] = (SequenceI) alignment.elementAt(j);
\r
171 seqmatcher = new SequenceIdMatcher(seqs);
\r
174 SequenceI nam = seqmatcher.findIdMatch(leaf.getName());
\r
177 leaf.setPlaceholder(false);
\r
178 leaf.setElement(nam);
\r
180 leaf.setPlaceholder(true);
\r
186 public void cluster() {
\r
187 while (noClus > 2) {
\r
188 if (type.equals("NJ")) {
\r
189 float mind = findMinNJDistance();
\r
191 float mind = findMinDistance();
\r
194 Cluster c = joinClusters(mini, minj);
\r
198 cluster.setElementAt(null, minj);
\r
199 cluster.setElementAt(c, mini);
\r
204 boolean onefound = false;
\r
209 for (int i = 0; i < noseqs; i++) {
\r
210 if (done[i] != 1) {
\r
211 if (onefound == false) {
\r
220 Cluster c = joinClusters(one, two);
\r
221 top = (SequenceNode) (node.elementAt(one));
\r
228 public Cluster joinClusters(int i, int j) {
\r
229 float dist = distance[i][j];
\r
231 int noi = ((Cluster) cluster.elementAt(i)).value.length;
\r
232 int noj = ((Cluster) cluster.elementAt(j)).value.length;
\r
234 int[] value = new int[noi + noj];
\r
236 for (int ii = 0; ii < noi; ii++) {
\r
237 value[ii] = ((Cluster) cluster.elementAt(i)).value[ii];
\r
240 for (int ii = noi; ii < (noi + noj); ii++) {
\r
241 value[ii] = ((Cluster) cluster.elementAt(j)).value[ii - noi];
\r
244 Cluster c = new Cluster(value);
\r
249 if (type.equals("NJ")) {
\r
250 findClusterNJDistance(i, j);
\r
252 findClusterDistance(i, j);
\r
255 SequenceNode sn = new SequenceNode();
\r
257 sn.setLeft((SequenceNode) (node.elementAt(i)));
\r
258 sn.setRight((SequenceNode) (node.elementAt(j)));
\r
260 SequenceNode tmpi = (SequenceNode) (node.elementAt(i));
\r
261 SequenceNode tmpj = (SequenceNode) (node.elementAt(j));
\r
263 if (type.equals("NJ")) {
\r
264 findNewNJDistances(tmpi, tmpj, dist);
\r
266 findNewDistances(tmpi, tmpj, dist);
\r
269 tmpi.setParent(sn);
\r
270 tmpj.setParent(sn);
\r
272 node.setElementAt(sn, i);
\r
277 public void findNewNJDistances(SequenceNode tmpi, SequenceNode tmpj,
\r
282 SequenceNode sni = tmpi;
\r
283 SequenceNode snj = tmpj;
\r
285 tmpi.dist = ((dist + ri) - rj) / 2;
\r
286 tmpj.dist = (dist - tmpi.dist);
\r
288 if (tmpi.dist < 0) {
\r
292 if (tmpj.dist < 0) {
\r
297 public void findNewDistances(SequenceNode tmpi, SequenceNode tmpj,
\r
302 SequenceNode sni = tmpi;
\r
303 SequenceNode snj = tmpj;
\r
305 while (sni != null) {
\r
306 ih = ih + sni.dist;
\r
307 sni = (SequenceNode) sni.left();
\r
310 while (snj != null) {
\r
311 jh = jh + snj.dist;
\r
312 snj = (SequenceNode) snj.left();
\r
315 tmpi.dist = ((dist / 2) - ih);
\r
316 tmpj.dist = ((dist / 2) - jh);
\r
319 public void findClusterDistance(int i, int j) {
\r
320 int noi = ((Cluster) cluster.elementAt(i)).value.length;
\r
321 int noj = ((Cluster) cluster.elementAt(j)).value.length;
\r
323 // New distances from cluster to others
\r
324 float[] newdist = new float[noseqs];
\r
326 for (int l = 0; l < noseqs; l++) {
\r
327 if ((l != i) && (l != j)) {
\r
328 newdist[l] = ((distance[i][l] * noi) + (distance[j][l] * noj)) / (noi +
\r
335 for (int ii = 0; ii < noseqs; ii++) {
\r
336 distance[i][ii] = newdist[ii];
\r
337 distance[ii][i] = newdist[ii];
\r
341 public void findClusterNJDistance(int i, int j) {
\r
342 int noi = ((Cluster) cluster.elementAt(i)).value.length;
\r
343 int noj = ((Cluster) cluster.elementAt(j)).value.length;
\r
345 // New distances from cluster to others
\r
346 float[] newdist = new float[noseqs];
\r
348 for (int l = 0; l < noseqs; l++) {
\r
349 if ((l != i) && (l != j)) {
\r
350 newdist[l] = ((distance[i][l] + distance[j][l]) -
\r
351 distance[i][j]) / 2;
\r
357 for (int ii = 0; ii < noseqs; ii++) {
\r
358 distance[i][ii] = newdist[ii];
\r
359 distance[ii][i] = newdist[ii];
\r
363 public float findr(int i, int j) {
\r
366 for (int k = 0; k < noseqs; k++) {
\r
367 if ((k != i) && (k != j) && (done[k] != 1)) {
\r
368 tmp = tmp + distance[i][k];
\r
373 tmp = tmp / (noClus - 2);
\r
379 public float findMinNJDistance() {
\r
380 float min = 100000;
\r
382 for (int i = 0; i < (noseqs - 1); i++) {
\r
383 for (int j = i + 1; j < noseqs; j++) {
\r
384 if ((done[i] != 1) && (done[j] != 1)) {
\r
385 float tmp = distance[i][j] - (findr(i, j) + findr(j, i));
\r
400 public float findMinDistance() {
\r
401 float min = 100000;
\r
403 for (int i = 0; i < (noseqs - 1); i++) {
\r
404 for (int j = i + 1; j < noseqs; j++) {
\r
405 if ((done[i] != 1) && (done[j] != 1)) {
\r
406 if (distance[i][j] < min) {
\r
410 min = distance[i][j];
\r
419 public float[][] findDistances() {
\r
420 float[][] distance = new float[noseqs][noseqs];
\r
422 if (pwtype.equals("PID")) {
\r
423 for (int i = 0; i < (noseqs - 1); i++) {
\r
424 for (int j = i; j < noseqs; j++) {
\r
426 distance[i][i] = 0;
\r
428 distance[i][j] = 100 -
\r
429 Comparison.PID(sequence[i], sequence[j], start, end);
\r
430 distance[j][i] = distance[i][j];
\r
434 } else if (pwtype.equals("BL")) {
\r
437 for (int i = 0; i < (noseqs - 1); i++) {
\r
438 for (int j = i; j < noseqs; j++) {
\r
441 for (int k = start; k < end; k++) {
\r
443 score += ResidueProperties.getBLOSUM62(sequence[i].getSequence(
\r
444 k, k + 1), sequence[j].getSequence(k, k +
\r
446 } catch (Exception ex) {
\r
447 System.err.println("err creating BLOSUM62 tree");
\r
448 ex.printStackTrace();
\r
452 distance[i][j] = (float) score;
\r
454 if (score > maxscore) {
\r
460 for (int i = 0; i < (noseqs - 1); i++) {
\r
461 for (int j = i; j < noseqs; j++) {
\r
462 distance[i][j] = (float) maxscore - distance[i][j];
\r
463 distance[j][i] = distance[i][j];
\r
466 } else if (pwtype.equals("SW")) {
\r
469 for (int i = 0; i < (noseqs - 1); i++) {
\r
470 for (int j = i; j < noseqs; j++) {
\r
471 AlignSeq as = new AlignSeq(sequence[i], sequence[j], "pep");
\r
472 as.calcScoreMatrix();
\r
473 as.traceAlignment();
\r
474 as.printAlignment();
\r
475 distance[i][j] = (float) as.maxscore;
\r
477 if (max < distance[i][j]) {
\r
478 max = distance[i][j];
\r
483 for (int i = 0; i < (noseqs - 1); i++) {
\r
484 for (int j = i; j < noseqs; j++) {
\r
485 distance[i][j] = max - distance[i][j];
\r
486 distance[j][i] = distance[i][j];
\r
494 public void makeLeaves() {
\r
495 cluster = new Vector();
\r
497 for (int i = 0; i < noseqs; i++) {
\r
498 SequenceNode sn = new SequenceNode();
\r
500 sn.setElement(sequence[i]);
\r
501 sn.setName(sequence[i].getName());
\r
502 node.addElement(sn);
\r
504 int[] value = new int[1];
\r
507 Cluster c = new Cluster(value);
\r
508 cluster.addElement(c);
\r
512 public Vector findLeaves(SequenceNode node, Vector leaves) {
\r
513 if (node == null) {
\r
517 if ((node.left() == null) && (node.right() == null)) {
\r
518 leaves.addElement(node);
\r
522 findLeaves((SequenceNode) node.left(), leaves);
\r
523 findLeaves((SequenceNode) node.right(), leaves);
\r
529 public Object findLeaf(SequenceNode node, int count) {
\r
530 found = _findLeaf(node, count);
\r
535 public Object _findLeaf(SequenceNode node, int count) {
\r
536 if (node == null) {
\r
540 if (node.ycount == count) {
\r
541 found = node.element();
\r
545 _findLeaf((SequenceNode) node.left(), count);
\r
546 _findLeaf((SequenceNode) node.right(), count);
\r
553 * printNode is mainly for debugging purposes.
\r
555 * @param node SequenceNode
\r
557 public void printNode(SequenceNode node) {
\r
558 if (node == null) {
\r
562 if ((node.left() == null) && (node.right() == null)) {
\r
563 System.out.println("Leaf = " +
\r
564 ((SequenceI) node.element()).getName());
\r
565 System.out.println("Dist " + ((SequenceNode) node).dist);
\r
566 System.out.println("Boot " + node.getBootstrap());
\r
568 System.out.println("Dist " + ((SequenceNode) node).dist);
\r
569 printNode((SequenceNode) node.left());
\r
570 printNode((SequenceNode) node.right());
\r
574 public void findMaxDist(SequenceNode node) {
\r
575 if (node == null) {
\r
579 if ((node.left() == null) && (node.right() == null)) {
\r
580 float dist = ((SequenceNode) node).dist;
\r
582 if (dist > maxDistValue) {
\r
583 maxdist = (SequenceNode) node;
\r
584 maxDistValue = dist;
\r
587 findMaxDist((SequenceNode) node.left());
\r
588 findMaxDist((SequenceNode) node.right());
\r
592 public Vector getGroups() {
\r
596 public float getMaxHeight() {
\r
600 public void groupNodes(SequenceNode node, float threshold) {
\r
601 if (node == null) {
\r
605 if ((node.height / maxheight) > threshold) {
\r
606 groups.addElement(node);
\r
608 groupNodes((SequenceNode) node.left(), threshold);
\r
609 groupNodes((SequenceNode) node.right(), threshold);
\r
613 public float findHeight(SequenceNode node) {
\r
614 if (node == null) {
\r
618 if ((node.left() == null) && (node.right() == null)) {
\r
619 node.height = ((SequenceNode) node.parent()).height + node.dist;
\r
621 if (node.height > maxheight) {
\r
622 return node.height;
\r
627 if (node.parent() != null) {
\r
628 node.height = ((SequenceNode) node.parent()).height +
\r
632 node.height = (float) 0.0;
\r
635 maxheight = findHeight((SequenceNode) (node.left()));
\r
636 maxheight = findHeight((SequenceNode) (node.right()));
\r
642 public SequenceNode reRoot() {
\r
643 if (maxdist != null) {
\r
646 float tmpdist = maxdist.dist;
\r
649 SequenceNode sn = new SequenceNode();
\r
650 sn.setParent(null);
\r
652 // New right hand of top
\r
653 SequenceNode snr = (SequenceNode) maxdist.parent();
\r
654 changeDirection(snr, maxdist);
\r
655 System.out.println("Printing reversed tree");
\r
657 snr.dist = tmpdist / 2;
\r
658 maxdist.dist = tmpdist / 2;
\r
661 maxdist.setParent(sn);
\r
664 sn.setLeft(maxdist);
\r
676 public static void printN(SequenceNode node) {
\r
677 if (node == null) {
\r
681 if ((node.left() != null) && (node.right() != null)) {
\r
682 printN((SequenceNode) node.left());
\r
683 printN((SequenceNode) node.right());
\r
685 System.out.println(" name = " +
\r
686 ((SequenceI) node.element()).getName());
\r
689 System.out.println(" dist = " + ((SequenceNode) node).dist + " " +
\r
690 ((SequenceNode) node).count + " " + ((SequenceNode) node).height);
\r
693 public void reCount(SequenceNode node) {
\r
698 public void _reCount(SequenceNode node) {
\r
699 if (node == null) {
\r
703 if ((node.left() != null) && (node.right() != null)) {
\r
704 _reCount((SequenceNode) node.left());
\r
705 _reCount((SequenceNode) node.right());
\r
707 SequenceNode l = (SequenceNode) node.left();
\r
708 SequenceNode r = (SequenceNode) node.right();
\r
710 ((SequenceNode) node).count = l.count + r.count;
\r
711 ((SequenceNode) node).ycount = (l.ycount + r.ycount) / 2;
\r
713 ((SequenceNode) node).count = 1;
\r
714 ((SequenceNode) node).ycount = ycount++;
\r
718 public void swapNodes(SequenceNode node) {
\r
719 if (node == null) {
\r
723 SequenceNode tmp = (SequenceNode) node.left();
\r
725 node.setLeft(node.right());
\r
726 node.setRight(tmp);
\r
729 public void changeDirection(SequenceNode node, SequenceNode dir) {
\r
730 if (node == null) {
\r
734 if (node.parent() != top) {
\r
735 changeDirection((SequenceNode) node.parent(), node);
\r
737 SequenceNode tmp = (SequenceNode) node.parent();
\r
739 if (dir == node.left()) {
\r
740 node.setParent(dir);
\r
742 } else if (dir == node.right()) {
\r
743 node.setParent(dir);
\r
744 node.setRight(tmp);
\r
747 if (dir == node.left()) {
\r
748 node.setParent(node.left());
\r
750 if (top.left() == node) {
\r
751 node.setRight(top.right());
\r
753 node.setRight(top.left());
\r
756 node.setParent(node.right());
\r
758 if (top.left() == node) {
\r
759 node.setLeft(top.right());
\r
761 node.setLeft(top.left());
\r
767 public void setMaxDist(SequenceNode node) {
\r
768 this.maxdist = maxdist;
\r
771 public SequenceNode getMaxDist() {
\r
775 public SequenceNode getTopNode() {
\r
784 public Cluster(int[] value) {
\r
785 this.value = value;
\r