1 package jalview.analysis;
\r
3 import jalview.datamodel.*;
\r
4 import jalview.util.*;
\r
5 import jalview.schemes.ResidueProperties;
\r
8 import jalview.io.NewickFile;
\r
10 public class NJTree {
\r
13 SequenceI[] sequence;
\r
26 Vector groups = new Vector();
\r
27 SequenceNode maxdist;
\r
40 Object found = null;
\r
41 Object leaves = null;
\r
46 public NJTree(SequenceNode node) {
\r
48 maxheight = findHeight(top);
\r
51 // Private SequenceID class to do fuzzy .equals() method for Hashtable.
\r
53 private class SeqIdname {
\r
56 SeqIdname(String s) {
\r
59 public int hashCode() {
\r
60 return (id.substring(0,4).hashCode());
\r
62 public boolean equals(Object s) {
\r
63 if (s instanceof SeqIdname) {
\r
64 return this.equals((SeqIdname) s);
\r
66 if (s instanceof String) {
\r
67 return this.equals((String) s);
\r
74 public boolean equals(SeqIdname s) {
\r
75 if (id.startsWith(s.id) || s.id.startsWith(id)) {
\r
81 public boolean equals(String s) {
\r
82 if (id.startsWith(s) || s.startsWith(id)) {
\r
89 public NJTree(SequenceI[] seqs, NewickFile treefile) {
\r
90 top = treefile.getTree();
\r
91 maxheight = findHeight(top);
\r
92 Hashtable names = new Hashtable();
\r
93 for (int i = 0; i < seqs.length; i++)
\r
95 names.put(new SeqIdname(seqs[i].getDisplayId()), seqs[i]);
\r
97 Vector leaves = new Vector();
\r
98 findLeaves(top, leaves);
\r
100 int namesleft = seqs.length;
\r
103 while (i < leaves.size())
\r
105 j = (SequenceNode) leaves.elementAt(i++);
\r
106 nam = new SeqIdname(j.getName());
\r
108 && names.containsKey(nam))
\r
110 j.setElement(names.get(nam));
\r
113 j.setElement(new Sequence(nam.id, "THISISAPLACEHLDER"));
\r
118 public NJTree(SequenceI[] sequence,int start, int end) {
\r
119 this(sequence,"NJ","BL",start,end);
\r
122 public NJTree(SequenceI[] sequence,String type,String pwtype,int start, int end ) {
\r
124 this.sequence = sequence;
\r
125 this.node = new Vector();
\r
127 this.pwtype = pwtype;
\r
128 this.start = start;
\r
131 if (!(type.equals("NJ"))) {
\r
135 if (!(pwtype.equals("PID"))) {
\r
141 done = new int[sequence.length];
\r
144 while (i < sequence.length && sequence[i] != null) {
\r
151 distance = findDistances();
\r
155 noClus = cluster.size();
\r
162 public void cluster() {
\r
164 while (noClus > 2) {
\r
165 if (type.equals("NJ")) {
\r
166 float mind = findMinNJDistance();
\r
168 float mind = findMinDistance();
\r
171 Cluster c = joinClusters(mini,minj);
\r
176 cluster.setElementAt(null,minj);
\r
177 cluster.setElementAt(c,mini);
\r
182 boolean onefound = false;
\r
187 for (int i=0; i < noseqs; i++) {
\r
188 if (done[i] != 1) {
\r
189 if (onefound == false) {
\r
198 Cluster c = joinClusters(one,two);
\r
199 top = (SequenceNode)(node.elementAt(one));
\r
207 public Cluster joinClusters(int i, int j) {
\r
209 float dist = distance[i][j];
\r
211 int noi = ((Cluster)cluster.elementAt(i)).value.length;
\r
212 int noj = ((Cluster)cluster.elementAt(j)).value.length;
\r
214 int[] value = new int[noi + noj];
\r
216 for (int ii = 0; ii < noi;ii++) {
\r
217 value[ii] = ((Cluster)cluster.elementAt(i)).value[ii];
\r
220 for (int ii = noi; ii < noi+ noj;ii++) {
\r
221 value[ii] = ((Cluster)cluster.elementAt(j)).value[ii-noi];
\r
224 Cluster c = new Cluster(value);
\r
229 if (type.equals("NJ")) {
\r
230 findClusterNJDistance(i,j);
\r
232 findClusterDistance(i,j);
\r
235 SequenceNode sn = new SequenceNode();
\r
237 sn.setLeft((SequenceNode)(node.elementAt(i)));
\r
238 sn.setRight((SequenceNode)(node.elementAt(j)));
\r
240 SequenceNode tmpi = (SequenceNode)(node.elementAt(i));
\r
241 SequenceNode tmpj = (SequenceNode)(node.elementAt(j));
\r
243 if (type.equals("NJ")) {
\r
244 findNewNJDistances(tmpi,tmpj,dist);
\r
246 findNewDistances(tmpi,tmpj,dist);
\r
249 tmpi.setParent(sn);
\r
250 tmpj.setParent(sn);
\r
252 node.setElementAt(sn,i);
\r
256 public void findNewNJDistances(SequenceNode tmpi, SequenceNode tmpj, float dist) {
\r
261 SequenceNode sni = tmpi;
\r
262 SequenceNode snj = tmpj;
\r
264 tmpi.dist = (dist + ri - rj)/2;
\r
265 tmpj.dist = (dist - tmpi.dist);
\r
267 if (tmpi.dist < 0) {
\r
270 if (tmpj.dist < 0) {
\r
275 public void findNewDistances(SequenceNode tmpi,SequenceNode tmpj,float dist) {
\r
280 SequenceNode sni = tmpi;
\r
281 SequenceNode snj = tmpj;
\r
283 while (sni != null) {
\r
284 ih = ih + sni.dist;
\r
285 sni = (SequenceNode)sni.left();
\r
288 while (snj != null) {
\r
289 jh = jh + snj.dist;
\r
290 snj = (SequenceNode)snj.left();
\r
293 tmpi.dist = (dist/2 - ih);
\r
294 tmpj.dist = (dist/2 - jh);
\r
299 public void findClusterDistance(int i, int j) {
\r
301 int noi = ((Cluster)cluster.elementAt(i)).value.length;
\r
302 int noj = ((Cluster)cluster.elementAt(j)).value.length;
\r
304 // New distances from cluster to others
\r
305 float[] newdist = new float[noseqs];
\r
307 for (int l = 0; l < noseqs; l++) {
\r
308 if ( l != i && l != j) {
\r
309 newdist[l] = (distance[i][l] * noi + distance[j][l] * noj)/(noi + noj);
\r
315 for (int ii=0; ii < noseqs;ii++) {
\r
316 distance[i][ii] = newdist[ii];
\r
317 distance[ii][i] = newdist[ii];
\r
321 public void findClusterNJDistance(int i, int j) {
\r
323 int noi = ((Cluster)cluster.elementAt(i)).value.length;
\r
324 int noj = ((Cluster)cluster.elementAt(j)).value.length;
\r
326 // New distances from cluster to others
\r
327 float[] newdist = new float[noseqs];
\r
329 for (int l = 0; l < noseqs; l++) {
\r
330 if ( l != i && l != j) {
\r
331 newdist[l] = (distance[i][l] + distance[j][l] - distance[i][j])/2;
\r
337 for (int ii=0; ii < noseqs;ii++) {
\r
338 distance[i][ii] = newdist[ii];
\r
339 distance[ii][i] = newdist[ii];
\r
343 public float findr(int i, int j) {
\r
346 for (int k=0; k < noseqs;k++) {
\r
347 if (k!= i && k!= j && done[k] != 1) {
\r
348 tmp = tmp + distance[i][k];
\r
353 tmp = tmp/(noClus - 2);
\r
359 public float findMinNJDistance() {
\r
361 float min = 100000;
\r
363 for (int i=0; i < noseqs-1; i++) {
\r
364 for (int j=i+1;j < noseqs;j++) {
\r
365 if (done[i] != 1 && done[j] != 1) {
\r
366 float tmp = distance[i][j] - (findr(i,j) + findr(j,i));
\r
381 public float findMinDistance() {
\r
383 float min = 100000;
\r
385 for (int i=0; i < noseqs-1;i++) {
\r
386 for (int j = i+1; j < noseqs;j++) {
\r
387 if (done[i] != 1 && done[j] != 1) {
\r
388 if (distance[i][j] < min) {
\r
392 min = distance[i][j];
\r
400 public float[][] findDistances() {
\r
402 float[][] distance = new float[noseqs][noseqs];
\r
403 if (pwtype.equals("PID")) {
\r
404 for (int i = 0; i < noseqs-1; i++) {
\r
405 for (int j = i; j < noseqs; j++) {
\r
407 distance[i][i] = 0;
\r
409 distance[i][j] = 100-Comparison.PID(sequence[i], sequence[j]);
\r
410 distance[j][i] = distance[i][j];
\r
414 } else if (pwtype.equals("BL")) {
\r
417 for (int i = 0; i < noseqs-1; i++) {
\r
418 for (int j = i; j < noseqs; j++) {
\r
420 for (int k=0; k < sequence[i].getLength(); k++) {
\r
423 ResidueProperties.getBLOSUM62(sequence[i].getSequence(k,
\r
425 sequence[j].getSequence(k,
\r
427 }catch(Exception ex){System.out.println("err creating BLOSUM62 tree");}
\r
429 distance[i][j] = (float)score;
\r
430 if (score > maxscore) {
\r
435 for (int i = 0; i < noseqs-1; i++) {
\r
436 for (int j = i; j < noseqs; j++) {
\r
437 distance[i][j] = (float)maxscore - distance[i][j];
\r
438 distance[j][i] = distance[i][j];
\r
441 } else if (pwtype.equals("SW")) {
\r
443 for (int i = 0; i < noseqs-1; i++) {
\r
444 for (int j = i; j < noseqs; j++) {
\r
445 AlignSeq as = new AlignSeq(sequence[i],sequence[j],"pep");
\r
446 as.calcScoreMatrix();
\r
447 as.traceAlignment();
\r
448 as.printAlignment();
\r
449 distance[i][j] = (float)as.maxscore;
\r
450 if (max < distance[i][j]) {
\r
451 max = distance[i][j];
\r
455 for (int i = 0; i < noseqs-1; i++) {
\r
456 for (int j = i; j < noseqs; j++) {
\r
457 distance[i][j] = max - distance[i][j];
\r
458 distance[j][i] = distance[i][j];
\r
466 public void makeLeaves() {
\r
467 cluster = new Vector();
\r
469 for (int i=0; i < noseqs; i++) {
\r
470 SequenceNode sn = new SequenceNode();
\r
472 sn.setElement(sequence[i]);
\r
473 sn.setName(sequence[i].getName());
\r
474 node.addElement(sn);
\r
476 int[] value = new int[1];
\r
479 Cluster c = new Cluster(value);
\r
480 cluster.addElement(c);
\r
484 public Vector findLeaves(SequenceNode node, Vector leaves) {
\r
485 if (node == null) {
\r
489 if (node.left() == null && node.right() == null) {
\r
490 leaves.addElement(node);
\r
493 findLeaves((SequenceNode)node.left(),leaves);
\r
494 findLeaves((SequenceNode)node.right(),leaves);
\r
499 public Object findLeaf(SequenceNode node, int count) {
\r
500 found = _findLeaf(node,count);
\r
504 public Object _findLeaf(SequenceNode node,int count) {
\r
505 if (node == null) {
\r
508 if (node.ycount == count) {
\r
509 found = node.element();
\r
512 _findLeaf((SequenceNode)node.left(),count);
\r
513 _findLeaf((SequenceNode)node.right(),count);
\r
519 public void printNode(SequenceNode node) {
\r
520 if (node == null) {
\r
523 if (node.left() == null && node.right() == null) {
\r
524 System.out.println("Leaf = " + ((SequenceI)node.element()).getName());
\r
525 System.out.println("Dist " + ((SequenceNode)node).dist);
\r
526 System.out.println("Boot " + node.getBootstrap());
\r
528 System.out.println("Dist " + ((SequenceNode)node).dist);
\r
529 printNode((SequenceNode)node.left());
\r
530 printNode((SequenceNode)node.right());
\r
533 public void findMaxDist(SequenceNode node) {
\r
534 if (node == null) {
\r
537 if (node.left() == null && node.right() == null) {
\r
539 float dist = ((SequenceNode)node).dist;
\r
540 if (dist > maxDistValue) {
\r
541 maxdist = (SequenceNode)node;
\r
542 maxDistValue = dist;
\r
545 findMaxDist((SequenceNode)node.left());
\r
546 findMaxDist((SequenceNode)node.right());
\r
549 public Vector getGroups() {
\r
552 public float getMaxHeight() {
\r
555 public void groupNodes(SequenceNode node, float threshold) {
\r
556 if (node == null) {
\r
560 if (node.height/maxheight > threshold) {
\r
561 groups.addElement(node);
\r
563 groupNodes((SequenceNode)node.left(),threshold);
\r
564 groupNodes((SequenceNode)node.right(),threshold);
\r
568 public float findHeight(SequenceNode node) {
\r
570 if (node == null) {
\r
574 if (node.left() == null && node.right() == null) {
\r
575 node.height = ((SequenceNode)node.parent()).height + node.dist;
\r
577 if (node.height > maxheight) {
\r
578 return node.height;
\r
583 if (node.parent() != null) {
\r
584 node.height = ((SequenceNode)node.parent()).height + node.dist;
\r
587 node.height = (float)0.0;
\r
590 maxheight = findHeight((SequenceNode)(node.left()));
\r
591 maxheight = findHeight((SequenceNode)(node.right()));
\r
595 public SequenceNode reRoot() {
\r
596 if (maxdist != null) {
\r
598 float tmpdist = maxdist.dist;
\r
601 SequenceNode sn = new SequenceNode();
\r
602 sn.setParent(null);
\r
604 // New right hand of top
\r
605 SequenceNode snr = (SequenceNode)maxdist.parent();
\r
606 changeDirection(snr,maxdist);
\r
607 System.out.println("Printing reversed tree");
\r
609 snr.dist = tmpdist/2;
\r
610 maxdist.dist = tmpdist/2;
\r
613 maxdist.setParent(sn);
\r
616 sn.setLeft(maxdist);
\r
627 public static void printN(SequenceNode node) {
\r
628 if (node == null) {
\r
632 if (node.left() != null && node.right() != null) {
\r
633 printN((SequenceNode)node.left());
\r
634 printN((SequenceNode)node.right());
\r
636 System.out.println(" name = " + ((SequenceI)node.element()).getName());
\r
638 System.out.println(" dist = " + ((SequenceNode)node).dist + " " + ((SequenceNode)node).count + " " + ((SequenceNode)node).height);
\r
641 public void reCount(SequenceNode node) {
\r
645 public void _reCount(SequenceNode node) {
\r
646 if (node == null) {
\r
650 if (node.left() != null && node.right() != null) {
\r
651 _reCount((SequenceNode)node.left());
\r
652 _reCount((SequenceNode)node.right());
\r
654 SequenceNode l = (SequenceNode)node.left();
\r
655 SequenceNode r = (SequenceNode)node.right();
\r
657 ((SequenceNode)node).count = l.count + r.count;
\r
658 ((SequenceNode)node).ycount = (l.ycount + r.ycount)/2;
\r
661 ((SequenceNode)node).count = 1;
\r
662 ((SequenceNode)node).ycount = ycount++;
\r
666 public void swapNodes(SequenceNode node) {
\r
667 if (node == null) {
\r
670 SequenceNode tmp = (SequenceNode)node.left();
\r
672 node.setLeft(node.right());
\r
673 node.setRight(tmp);
\r
675 public void changeDirection(SequenceNode node, SequenceNode dir) {
\r
676 if (node == null) {
\r
679 if (node.parent() != top) {
\r
680 changeDirection((SequenceNode)node.parent(), node);
\r
682 SequenceNode tmp = (SequenceNode)node.parent();
\r
684 if (dir == node.left()) {
\r
685 node.setParent(dir);
\r
687 } else if (dir == node.right()) {
\r
688 node.setParent(dir);
\r
689 node.setRight(tmp);
\r
693 if (dir == node.left()) {
\r
694 node.setParent(node.left());
\r
696 if (top.left() == node) {
\r
697 node.setRight(top.right());
\r
699 node.setRight(top.left());
\r
702 node.setParent(node.right());
\r
704 if (top.left() == node) {
\r
705 node.setLeft(top.right());
\r
707 node.setLeft(top.left());
\r
712 public void setMaxDist(SequenceNode node) {
\r
713 this.maxdist = maxdist;
\r
715 public SequenceNode getMaxDist() {
\r
718 public SequenceNode getTopNode() {
\r
730 public Cluster(int[] value) {
\r
731 this.value = value;
\r