2 * Jalview - A Sequence Alignment Editor and Viewer
\r
3 * Copyright (C) 2005 AM Waterhouse, J Procter, G Barton, M Clamp, S Searle
\r
5 * This program is free software; you can redistribute it and/or
\r
6 * modify it under the terms of the GNU General Public License
\r
7 * as published by the Free Software Foundation; either version 2
\r
8 * of the License, or (at your option) any later version.
\r
10 * This program is distributed in the hope that it will be useful,
\r
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
\r
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
\r
13 * GNU General Public License for more details.
\r
15 * You should have received a copy of the GNU General Public License
\r
16 * along with this program; if not, write to the Free Software
\r
17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
\r
20 package jalview.analysis;
\r
22 import jalview.datamodel.*;
\r
23 import jalview.util.*;
\r
24 import jalview.schemes.ResidueProperties;
\r
27 import jalview.io.NewickFile;
\r
29 public class NJTree {
\r
32 SequenceI[] sequence;
\r
45 Vector groups = new Vector();
\r
46 SequenceNode maxdist;
\r
59 Object found = null;
\r
60 Object leaves = null;
\r
65 public NJTree(SequenceNode node) {
\r
67 maxheight = findHeight(top);
\r
70 public String toString()
\r
72 jalview.io.NewickFile fout = new jalview.io.NewickFile(getTopNode());
\r
73 return fout.print(false,true); // distances only
\r
76 public NJTree(SequenceI[] seqs, NewickFile treefile) {
\r
77 top = treefile.getTree();
\r
78 maxheight = findHeight(top);
\r
79 SequenceIdMatcher algnIds = new SequenceIdMatcher(seqs);
\r
81 Vector leaves = new Vector();
\r
82 findLeaves(top, leaves);
\r
85 int namesleft = seqs.length;
\r
90 while (i < leaves.size())
\r
92 j = (SequenceNode) leaves.elementAt(i++);
\r
93 realnam = j.getName();
\r
96 nam = algnIds.findIdMatch(realnam);
\r
101 j.setElement(new Sequence(realnam, "THISISAPLACEHLDER"));
\r
102 j.setPlaceholder(true);
\r
110 * used when the alignment associated to a tree has changed.
\r
112 * @param alignment Vector
\r
114 public void UpdatePlaceHolders(Vector alignment) {
\r
115 Vector leaves = new Vector();
\r
116 findLeaves(top, leaves);
\r
117 int sz = leaves.size();
\r
118 SequenceIdMatcher seqmatcher=null;
\r
121 SequenceNode leaf = (SequenceNode) leaves.elementAt(i++);
\r
122 if (alignment.contains(leaf.element()))
\r
123 leaf.setPlaceholder(false);
\r
125 if (seqmatcher==null) {
\r
126 // Only create this the first time we need it
\r
127 SequenceI[] seqs = new SequenceI[alignment.size()];
\r
128 for (int j=0; j<seqs.length; j++)
\r
129 seqs[j] = (SequenceI) alignment.elementAt(j);
\r
130 seqmatcher = new SequenceIdMatcher(seqs);
\r
132 SequenceI nam = seqmatcher.findIdMatch(leaf.getName());
\r
134 leaf.setPlaceholder(false);
\r
135 leaf.setElement(nam);
\r
137 leaf.setPlaceholder(true);
\r
143 public NJTree(SequenceI[] sequence,int start, int end) {
\r
144 this(sequence,"NJ","BL",start,end);
\r
147 public NJTree(SequenceI[] sequence,String type,String pwtype,int start, int end ) {
\r
149 this.sequence = sequence;
\r
150 this.node = new Vector();
\r
152 this.pwtype = pwtype;
\r
153 this.start = start;
\r
156 if (!(type.equals("NJ"))) {
\r
160 if (!(pwtype.equals("PID"))) {
\r
166 done = new int[sequence.length];
\r
169 while (i < sequence.length && sequence[i] != null) {
\r
176 distance = findDistances();
\r
180 noClus = cluster.size();
\r
187 public void cluster() {
\r
189 while (noClus > 2) {
\r
190 if (type.equals("NJ")) {
\r
191 float mind = findMinNJDistance();
\r
193 float mind = findMinDistance();
\r
196 Cluster c = joinClusters(mini,minj);
\r
201 cluster.setElementAt(null,minj);
\r
202 cluster.setElementAt(c,mini);
\r
207 boolean onefound = false;
\r
212 for (int i=0; i < noseqs; i++) {
\r
213 if (done[i] != 1) {
\r
214 if (onefound == false) {
\r
223 Cluster c = joinClusters(one,two);
\r
224 top = (SequenceNode)(node.elementAt(one));
\r
232 public Cluster joinClusters(int i, int j) {
\r
234 float dist = distance[i][j];
\r
236 int noi = ((Cluster)cluster.elementAt(i)).value.length;
\r
237 int noj = ((Cluster)cluster.elementAt(j)).value.length;
\r
239 int[] value = new int[noi + noj];
\r
241 for (int ii = 0; ii < noi;ii++) {
\r
242 value[ii] = ((Cluster)cluster.elementAt(i)).value[ii];
\r
245 for (int ii = noi; ii < noi+ noj;ii++) {
\r
246 value[ii] = ((Cluster)cluster.elementAt(j)).value[ii-noi];
\r
249 Cluster c = new Cluster(value);
\r
254 if (type.equals("NJ")) {
\r
255 findClusterNJDistance(i,j);
\r
257 findClusterDistance(i,j);
\r
260 SequenceNode sn = new SequenceNode();
\r
262 sn.setLeft((SequenceNode)(node.elementAt(i)));
\r
263 sn.setRight((SequenceNode)(node.elementAt(j)));
\r
265 SequenceNode tmpi = (SequenceNode)(node.elementAt(i));
\r
266 SequenceNode tmpj = (SequenceNode)(node.elementAt(j));
\r
268 if (type.equals("NJ")) {
\r
269 findNewNJDistances(tmpi,tmpj,dist);
\r
271 findNewDistances(tmpi,tmpj,dist);
\r
274 tmpi.setParent(sn);
\r
275 tmpj.setParent(sn);
\r
277 node.setElementAt(sn,i);
\r
281 public void findNewNJDistances(SequenceNode tmpi, SequenceNode tmpj, float dist) {
\r
286 SequenceNode sni = tmpi;
\r
287 SequenceNode snj = tmpj;
\r
289 tmpi.dist = (dist + ri - rj)/2;
\r
290 tmpj.dist = (dist - tmpi.dist);
\r
292 if (tmpi.dist < 0) {
\r
295 if (tmpj.dist < 0) {
\r
300 public void findNewDistances(SequenceNode tmpi,SequenceNode tmpj,float dist) {
\r
305 SequenceNode sni = tmpi;
\r
306 SequenceNode snj = tmpj;
\r
308 while (sni != null) {
\r
309 ih = ih + sni.dist;
\r
310 sni = (SequenceNode)sni.left();
\r
313 while (snj != null) {
\r
314 jh = jh + snj.dist;
\r
315 snj = (SequenceNode)snj.left();
\r
318 tmpi.dist = (dist/2 - ih);
\r
319 tmpj.dist = (dist/2 - jh);
\r
324 public void findClusterDistance(int i, int j) {
\r
326 int noi = ((Cluster)cluster.elementAt(i)).value.length;
\r
327 int noj = ((Cluster)cluster.elementAt(j)).value.length;
\r
329 // New distances from cluster to others
\r
330 float[] newdist = new float[noseqs];
\r
332 for (int l = 0; l < noseqs; l++) {
\r
333 if ( l != i && l != j) {
\r
334 newdist[l] = (distance[i][l] * noi + distance[j][l] * noj)/(noi + noj);
\r
340 for (int ii=0; ii < noseqs;ii++) {
\r
341 distance[i][ii] = newdist[ii];
\r
342 distance[ii][i] = newdist[ii];
\r
346 public void findClusterNJDistance(int i, int j) {
\r
348 int noi = ((Cluster)cluster.elementAt(i)).value.length;
\r
349 int noj = ((Cluster)cluster.elementAt(j)).value.length;
\r
351 // New distances from cluster to others
\r
352 float[] newdist = new float[noseqs];
\r
354 for (int l = 0; l < noseqs; l++) {
\r
355 if ( l != i && l != j) {
\r
356 newdist[l] = (distance[i][l] + distance[j][l] - distance[i][j])/2;
\r
362 for (int ii=0; ii < noseqs;ii++) {
\r
363 distance[i][ii] = newdist[ii];
\r
364 distance[ii][i] = newdist[ii];
\r
368 public float findr(int i, int j) {
\r
371 for (int k=0; k < noseqs;k++) {
\r
372 if (k!= i && k!= j && done[k] != 1) {
\r
373 tmp = tmp + distance[i][k];
\r
378 tmp = tmp/(noClus - 2);
\r
384 public float findMinNJDistance() {
\r
386 float min = 100000;
\r
388 for (int i=0; i < noseqs-1; i++) {
\r
389 for (int j=i+1;j < noseqs;j++) {
\r
390 if (done[i] != 1 && done[j] != 1) {
\r
391 float tmp = distance[i][j] - (findr(i,j) + findr(j,i));
\r
406 public float findMinDistance() {
\r
408 float min = 100000;
\r
410 for (int i=0; i < noseqs-1;i++) {
\r
411 for (int j = i+1; j < noseqs;j++) {
\r
412 if (done[i] != 1 && done[j] != 1) {
\r
413 if (distance[i][j] < min) {
\r
417 min = distance[i][j];
\r
425 public float[][] findDistances() {
\r
427 float[][] distance = new float[noseqs][noseqs];
\r
428 if (pwtype.equals("PID")) {
\r
429 for (int i = 0; i < noseqs-1; i++) {
\r
430 for (int j = i; j < noseqs; j++) {
\r
432 distance[i][i] = 0;
\r
434 distance[i][j] = 100-Comparison.PID(sequence[i], sequence[j], start, end);
\r
435 distance[j][i] = distance[i][j];
\r
439 } else if (pwtype.equals("BL")) {
\r
442 for (int i = 0; i < noseqs-1; i++) {
\r
443 for (int j = i; j < noseqs; j++) {
\r
445 for (int k=start; k < end; k++) {
\r
448 ResidueProperties.getBLOSUM62(sequence[i].getSequence(k,
\r
450 sequence[j].getSequence(k,
\r
452 }catch(Exception ex){System.err.println("err creating BLOSUM62 tree");ex.printStackTrace();}
\r
454 distance[i][j] = (float)score;
\r
455 if (score > maxscore) {
\r
460 for (int i = 0; i < noseqs-1; i++) {
\r
461 for (int j = i; j < noseqs; j++) {
\r
462 distance[i][j] = (float)maxscore - distance[i][j];
\r
463 distance[j][i] = distance[i][j];
\r
466 } else if (pwtype.equals("SW")) {
\r
468 for (int i = 0; i < noseqs-1; i++) {
\r
469 for (int j = i; j < noseqs; j++) {
\r
470 AlignSeq as = new AlignSeq(sequence[i],sequence[j],"pep");
\r
471 as.calcScoreMatrix();
\r
472 as.traceAlignment();
\r
473 as.printAlignment();
\r
474 distance[i][j] = (float)as.maxscore;
\r
475 if (max < distance[i][j]) {
\r
476 max = distance[i][j];
\r
480 for (int i = 0; i < noseqs-1; i++) {
\r
481 for (int j = i; j < noseqs; j++) {
\r
482 distance[i][j] = max - distance[i][j];
\r
483 distance[j][i] = distance[i][j];
\r
491 public void makeLeaves() {
\r
492 cluster = new Vector();
\r
494 for (int i=0; i < noseqs; i++) {
\r
495 SequenceNode sn = new SequenceNode();
\r
497 sn.setElement(sequence[i]);
\r
498 sn.setName(sequence[i].getName());
\r
499 node.addElement(sn);
\r
501 int[] value = new int[1];
\r
504 Cluster c = new Cluster(value);
\r
505 cluster.addElement(c);
\r
509 public Vector findLeaves(SequenceNode node, Vector leaves) {
\r
510 if (node == null) {
\r
514 if (node.left() == null && node.right() == null) {
\r
515 leaves.addElement(node);
\r
518 findLeaves((SequenceNode)node.left(),leaves);
\r
519 findLeaves((SequenceNode)node.right(),leaves);
\r
524 public Object findLeaf(SequenceNode node, int count) {
\r
525 found = _findLeaf(node,count);
\r
529 public Object _findLeaf(SequenceNode node,int count) {
\r
530 if (node == null) {
\r
533 if (node.ycount == count) {
\r
534 found = node.element();
\r
537 _findLeaf((SequenceNode)node.left(),count);
\r
538 _findLeaf((SequenceNode)node.right(),count);
\r
545 * printNode is mainly for debugging purposes.
\r
547 * @param node SequenceNode
\r
549 public void printNode(SequenceNode node) {
\r
550 if (node == null) {
\r
553 if (node.left() == null && node.right() == null) {
\r
554 System.out.println("Leaf = " + ((SequenceI)node.element()).getName());
\r
555 System.out.println("Dist " + ((SequenceNode)node).dist);
\r
556 System.out.println("Boot " + node.getBootstrap());
\r
558 System.out.println("Dist " + ((SequenceNode)node).dist);
\r
559 printNode((SequenceNode)node.left());
\r
560 printNode((SequenceNode)node.right());
\r
563 public void findMaxDist(SequenceNode node) {
\r
564 if (node == null) {
\r
567 if (node.left() == null && node.right() == null) {
\r
569 float dist = ((SequenceNode)node).dist;
\r
570 if (dist > maxDistValue) {
\r
571 maxdist = (SequenceNode)node;
\r
572 maxDistValue = dist;
\r
575 findMaxDist((SequenceNode)node.left());
\r
576 findMaxDist((SequenceNode)node.right());
\r
579 public Vector getGroups() {
\r
582 public float getMaxHeight() {
\r
585 public void groupNodes(SequenceNode node, float threshold) {
\r
586 if (node == null) {
\r
590 if (node.height/maxheight > threshold) {
\r
591 groups.addElement(node);
\r
593 groupNodes((SequenceNode)node.left(),threshold);
\r
594 groupNodes((SequenceNode)node.right(),threshold);
\r
598 public float findHeight(SequenceNode node) {
\r
600 if (node == null) {
\r
604 if (node.left() == null && node.right() == null) {
\r
605 node.height = ((SequenceNode)node.parent()).height + node.dist;
\r
607 if (node.height > maxheight) {
\r
608 return node.height;
\r
613 if (node.parent() != null) {
\r
614 node.height = ((SequenceNode)node.parent()).height + node.dist;
\r
617 node.height = (float)0.0;
\r
620 maxheight = findHeight((SequenceNode)(node.left()));
\r
621 maxheight = findHeight((SequenceNode)(node.right()));
\r
625 public SequenceNode reRoot() {
\r
626 if (maxdist != null) {
\r
628 float tmpdist = maxdist.dist;
\r
631 SequenceNode sn = new SequenceNode();
\r
632 sn.setParent(null);
\r
634 // New right hand of top
\r
635 SequenceNode snr = (SequenceNode)maxdist.parent();
\r
636 changeDirection(snr,maxdist);
\r
637 System.out.println("Printing reversed tree");
\r
639 snr.dist = tmpdist/2;
\r
640 maxdist.dist = tmpdist/2;
\r
643 maxdist.setParent(sn);
\r
646 sn.setLeft(maxdist);
\r
657 public static void printN(SequenceNode node) {
\r
658 if (node == null) {
\r
662 if (node.left() != null && node.right() != null) {
\r
663 printN((SequenceNode)node.left());
\r
664 printN((SequenceNode)node.right());
\r
666 System.out.println(" name = " + ((SequenceI)node.element()).getName());
\r
668 System.out.println(" dist = " + ((SequenceNode)node).dist + " " + ((SequenceNode)node).count + " " + ((SequenceNode)node).height);
\r
671 public void reCount(SequenceNode node) {
\r
675 public void _reCount(SequenceNode node) {
\r
676 if (node == null) {
\r
680 if (node.left() != null && node.right() != null) {
\r
681 _reCount((SequenceNode)node.left());
\r
682 _reCount((SequenceNode)node.right());
\r
684 SequenceNode l = (SequenceNode)node.left();
\r
685 SequenceNode r = (SequenceNode)node.right();
\r
687 ((SequenceNode)node).count = l.count + r.count;
\r
688 ((SequenceNode)node).ycount = (l.ycount + r.ycount)/2;
\r
691 ((SequenceNode)node).count = 1;
\r
692 ((SequenceNode)node).ycount = ycount++;
\r
696 public void swapNodes(SequenceNode node) {
\r
697 if (node == null) {
\r
700 SequenceNode tmp = (SequenceNode)node.left();
\r
702 node.setLeft(node.right());
\r
703 node.setRight(tmp);
\r
705 public void changeDirection(SequenceNode node, SequenceNode dir) {
\r
706 if (node == null) {
\r
709 if (node.parent() != top) {
\r
710 changeDirection((SequenceNode)node.parent(), node);
\r
712 SequenceNode tmp = (SequenceNode)node.parent();
\r
714 if (dir == node.left()) {
\r
715 node.setParent(dir);
\r
717 } else if (dir == node.right()) {
\r
718 node.setParent(dir);
\r
719 node.setRight(tmp);
\r
723 if (dir == node.left()) {
\r
724 node.setParent(node.left());
\r
726 if (top.left() == node) {
\r
727 node.setRight(top.right());
\r
729 node.setRight(top.left());
\r
732 node.setParent(node.right());
\r
734 if (top.left() == node) {
\r
735 node.setLeft(top.right());
\r
737 node.setLeft(top.left());
\r
742 public void setMaxDist(SequenceNode node) {
\r
743 this.maxdist = maxdist;
\r
745 public SequenceNode getMaxDist() {
\r
748 public SequenceNode getTopNode() {
\r
760 public Cluster(int[] value) {
\r
761 this.value = value;
\r