2 * Jalview - A Sequence Alignment Editor and Viewer
3 * Copyright (C) 2007 AM Waterhouse, J Procter, G Barton, M Clamp, S Searle
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License
7 * as published by the Free Software Foundation; either version 2
8 * of the License, or (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
19 package jalview.analysis;
23 import jalview.datamodel.*;
25 import jalview.schemes.*;
26 import jalview.util.*;
39 //SequenceData is a string representation of what the user
40 //sees. The display may contain hidden columns.
41 public AlignmentView seqData = null;
51 Vector groups = new Vector();
63 boolean hasDistances = true; // normal case for jalview trees
64 boolean hasBootstrap = false; // normal case for jalview trees
66 private boolean hasRootDistance = true;
69 * Create a new NJTree object with leaves associated with sequences in seqs,
70 * and original alignment data represented by Cigar strings.
71 * @param seqs SequenceI[]
72 * @param odata Cigar[]
73 * @param treefile NewickFile
75 public NJTree(SequenceI[] seqs, AlignmentView odata, NewickFile treefile)
83 sequenceString = new String[odata.length];
84 char gapChar = jalview.util.Comparison.GapChars.charAt(0);
85 for (int i = 0; i < odata.length; i++)
87 SequenceI oseq_aligned = odata[i].getSeq(gapChar);
88 sequenceString[i] = oseq_aligned.getSequence();
93 * Creates a new NJTree object from a tree from an external source
95 * @param seqs SequenceI which should be associated with leafs of treefile
96 * @param treefile A parsed tree
98 public NJTree(SequenceI[] seqs, NewickFile treefile)
100 this.sequence = seqs;
101 top = treefile.getTree();
104 * There is no dependent alignment to be recovered from an
107 if (sequenceString == null)
109 sequenceString = new String[seqs.length];
110 for (int i = 0; i < seqs.length; i++)
112 sequenceString[i] = seqs[i].getSequence();
117 hasDistances = treefile.HasDistances();
118 hasBootstrap = treefile.HasBootstrap();
119 hasRootDistance = treefile.HasRootDistance();
121 maxheight = findHeight(top);
123 SequenceIdMatcher algnIds = new SequenceIdMatcher(seqs);
125 Vector leaves = new Vector();
126 findLeaves(top, leaves);
129 int namesleft = seqs.length;
134 Vector one2many = new Vector();
135 int countOne2Many = 0;
136 while (i < leaves.size())
138 j = (SequenceNode) leaves.elementAt(i++);
139 realnam = j.getName();
144 nam = algnIds.findIdMatch(realnam);
150 if (one2many.contains(nam))
153 // if (jalview.bin.Cache.log.isDebugEnabled())
154 // jalview.bin.Cache.log.debug("One 2 many relationship for "+nam.getName());
158 one2many.addElement(nam);
164 j.setElement(new Sequence(realnam, "THISISAPLACEHLDER"));
165 j.setPlaceholder(true);
168 // if (jalview.bin.Cache.log.isDebugEnabled() && countOne2Many>0) {
169 // jalview.bin.Cache.log.debug("There were "+countOne2Many+" alignment sequence ids (out of "+one2many.size()+" unique ids) linked to two or more leaves.");
175 * Creates a new NJTree object.
177 * @param sequence DOCUMENT ME!
178 * @param type DOCUMENT ME!
179 * @param pwtype DOCUMENT ME!
180 * @param start DOCUMENT ME!
181 * @param end DOCUMENT ME!
183 public NJTree(SequenceI[] sequence,
184 AlignmentView seqData,
189 this.sequence = sequence;
190 this.node = new Vector();
192 this.pwtype = pwtype;
195 this.seqData = seqData;
199 SeqCigar[] seqs = new SeqCigar[sequence.length];
200 for (int i = 0; i < sequence.length; i++)
202 seqs[i] = new SeqCigar(sequence[i], start, end);
204 CigarArray sdata = new CigarArray(seqs);
205 sdata.addOperation(CigarArray.M, end - start + 1);
206 this.seqData = new AlignmentView(sdata, start);
209 if (! (type.equals("NJ")))
214 if (! (pwtype.equals("PID")))
216 if (ResidueProperties.getScoreMatrix(pwtype) == null)
224 done = new int[sequence.length];
226 while ( (i < sequence.length) && (sequence[i] != null))
234 distance = findDistances(this.seqData.getSequenceStrings(Comparison.
235 GapChars.charAt(0)));
239 noClus = cluster.size();
247 * @return DOCUMENT ME!
249 public String toString()
251 jalview.io.NewickFile fout = new jalview.io.NewickFile(getTopNode());
253 return fout.print(false, true); // distances only
258 * used when the alignment associated to a tree has changed.
260 * @param alignment Vector
262 public void UpdatePlaceHolders(Vector alignment)
264 Vector leaves = new Vector();
265 findLeaves(top, leaves);
267 int sz = leaves.size();
268 SequenceIdMatcher seqmatcher = null;
273 SequenceNode leaf = (SequenceNode) leaves.elementAt(i++);
275 if (alignment.contains(leaf.element()))
277 leaf.setPlaceholder(false);
281 if (seqmatcher == null)
283 // Only create this the first time we need it
284 SequenceI[] seqs = new SequenceI[alignment.size()];
286 for (int j = 0; j < seqs.length; j++)
288 seqs[j] = (SequenceI) alignment.elementAt(j);
291 seqmatcher = new SequenceIdMatcher(seqs);
294 SequenceI nam = seqmatcher.findIdMatch(leaf.getName());
298 if (!leaf.isPlaceholder())
300 // remapping the node to a new sequenceI - should remove any refs to old one.
301 // TODO - make many sequenceI to one leaf mappings possible! (JBPNote)
303 leaf.setPlaceholder(false);
304 leaf.setElement(nam);
308 if (!leaf.isPlaceholder())
310 // Construct a new placeholder sequence object for this leaf
311 leaf.setElement(new Sequence(leaf.getName(), "THISISAPLACEHLDER"));
313 leaf.setPlaceholder(true);
323 public void cluster()
327 if (type.equals("NJ"))
336 Cluster c = joinClusters(mini, minj);
340 cluster.setElementAt(null, minj);
341 cluster.setElementAt(c, mini);
346 boolean onefound = false;
351 for (int i = 0; i < noseqs; i++)
355 if (onefound == false)
367 joinClusters(one, two);
368 top = (SequenceNode) (node.elementAt(one));
378 * @param i DOCUMENT ME!
379 * @param j DOCUMENT ME!
381 * @return DOCUMENT ME!
383 public Cluster joinClusters(int i, int j)
385 float dist = distance[i][j];
387 int noi = ( (Cluster) cluster.elementAt(i)).value.length;
388 int noj = ( (Cluster) cluster.elementAt(j)).value.length;
390 int[] value = new int[noi + noj];
392 for (int ii = 0; ii < noi; ii++)
394 value[ii] = ( (Cluster) cluster.elementAt(i)).value[ii];
397 for (int ii = noi; ii < (noi + noj); ii++)
399 value[ii] = ( (Cluster) cluster.elementAt(j)).value[ii - noi];
402 Cluster c = new Cluster(value);
407 if (type.equals("NJ"))
409 findClusterNJDistance(i, j);
413 findClusterDistance(i, j);
416 SequenceNode sn = new SequenceNode();
418 sn.setLeft( (SequenceNode) (node.elementAt(i)));
419 sn.setRight( (SequenceNode) (node.elementAt(j)));
421 SequenceNode tmpi = (SequenceNode) (node.elementAt(i));
422 SequenceNode tmpj = (SequenceNode) (node.elementAt(j));
424 if (type.equals("NJ"))
426 findNewNJDistances(tmpi, tmpj, dist);
430 findNewDistances(tmpi, tmpj, dist);
436 node.setElementAt(sn, i);
444 * @param tmpi DOCUMENT ME!
445 * @param tmpj DOCUMENT ME!
446 * @param dist DOCUMENT ME!
448 public void findNewNJDistances(SequenceNode tmpi, SequenceNode tmpj,
452 tmpi.dist = ( (dist + ri) - rj) / 2;
453 tmpj.dist = (dist - tmpi.dist);
469 * @param tmpi DOCUMENT ME!
470 * @param tmpj DOCUMENT ME!
471 * @param dist DOCUMENT ME!
473 public void findNewDistances(SequenceNode tmpi, SequenceNode tmpj,
479 SequenceNode sni = tmpi;
480 SequenceNode snj = tmpj;
485 sni = (SequenceNode) sni.left();
491 snj = (SequenceNode) snj.left();
494 tmpi.dist = ( (dist / 2) - ih);
495 tmpj.dist = ( (dist / 2) - jh);
501 * @param i DOCUMENT ME!
502 * @param j DOCUMENT ME!
504 public void findClusterDistance(int i, int j)
506 int noi = ( (Cluster) cluster.elementAt(i)).value.length;
507 int noj = ( (Cluster) cluster.elementAt(j)).value.length;
509 // New distances from cluster to others
510 float[] newdist = new float[noseqs];
512 for (int l = 0; l < noseqs; l++)
514 if ( (l != i) && (l != j))
516 newdist[l] = ( (distance[i][l] * noi) + (distance[j][l] * noj)) / (noi +
525 for (int ii = 0; ii < noseqs; ii++)
527 distance[i][ii] = newdist[ii];
528 distance[ii][i] = newdist[ii];
535 * @param i DOCUMENT ME!
536 * @param j DOCUMENT ME!
538 public void findClusterNJDistance(int i, int j)
541 // New distances from cluster to others
542 float[] newdist = new float[noseqs];
544 for (int l = 0; l < noseqs; l++)
546 if ( (l != i) && (l != j))
548 newdist[l] = ( (distance[i][l] + distance[j][l]) -
557 for (int ii = 0; ii < noseqs; ii++)
559 distance[i][ii] = newdist[ii];
560 distance[ii][i] = newdist[ii];
567 * @param i DOCUMENT ME!
568 * @param j DOCUMENT ME!
570 * @return DOCUMENT ME!
572 public float findr(int i, int j)
576 for (int k = 0; k < noseqs; k++)
578 if ( (k != i) && (k != j) && (done[k] != 1))
580 tmp = tmp + distance[i][k];
586 tmp = tmp / (noClus - 2);
595 * @return DOCUMENT ME!
597 public float findMinNJDistance()
601 for (int i = 0; i < (noseqs - 1); i++)
603 for (int j = i + 1; j < noseqs; j++)
605 if ( (done[i] != 1) && (done[j] != 1))
607 float tmp = distance[i][j] - (findr(i, j) + findr(j, i));
626 * @return DOCUMENT ME!
628 public float findMinDistance()
632 for (int i = 0; i < (noseqs - 1); i++)
634 for (int j = i + 1; j < noseqs; j++)
636 if ( (done[i] != 1) && (done[j] != 1))
638 if (distance[i][j] < min)
643 min = distance[i][j];
655 * @return DOCUMENT ME!
657 public float[][] findDistances(String[] sequenceString)
659 float[][] distance = new float[noseqs][noseqs];
661 if (pwtype.equals("PID"))
663 for (int i = 0; i < (noseqs - 1); i++)
665 for (int j = i; j < noseqs; j++)
673 distance[i][j] = 100 -
674 Comparison.PID(sequenceString[i], sequenceString[j]);
676 distance[j][i] = distance[i][j];
683 // Pairwise substitution score (with no gap penalties)
684 ScoreMatrix pwmatrix = ResidueProperties.getScoreMatrix(pwtype);
685 if (pwmatrix == null)
687 pwmatrix = ResidueProperties.getScoreMatrix("BLOSUM62");
690 int end = sequenceString[0].length();
691 for (int i = 0; i < (noseqs - 1); i++)
693 for (int j = i; j < noseqs; j++)
697 for (int k = 0; k < end; k++)
701 score += pwmatrix.getPairwiseScore(sequenceString[i].charAt(k),
702 sequenceString[j].charAt(k));
706 System.err.println("err creating BLOSUM62 tree");
707 ex.printStackTrace();
711 distance[i][j] = (float) score;
713 if (score > maxscore)
720 for (int i = 0; i < (noseqs - 1); i++)
722 for (int j = i; j < noseqs; j++)
724 distance[i][j] = (float) maxscore - distance[i][j];
725 distance[j][i] = distance[i][j];
733 /* else if (pwtype.equals("SW"))
737 for (int i = 0; i < (noseqs - 1); i++)
739 for (int j = i; j < noseqs; j++)
741 AlignSeq as = new AlignSeq(sequence[i], sequence[j], "pep");
742 as.calcScoreMatrix();
744 as.printAlignment(System.out);
745 distance[i][j] = (float) as.maxscore;
747 if (max < distance[i][j])
749 max = distance[i][j];
754 for (int i = 0; i < (noseqs - 1); i++)
756 for (int j = i; j < noseqs; j++)
758 distance[i][j] = max - distance[i][j];
759 distance[j][i] = distance[i][j];
768 public void makeLeaves()
770 cluster = new Vector();
772 for (int i = 0; i < noseqs; i++)
774 SequenceNode sn = new SequenceNode();
776 sn.setElement(sequence[i]);
777 sn.setName(sequence[i].getName());
780 int[] value = new int[1];
783 Cluster c = new Cluster(value);
784 cluster.addElement(c);
789 * Search for leaf nodes.
791 * @param node root node to search from
792 * @param leaves Vector of leaves to add leaf node objects too.
794 * @return Vector of leaf nodes on binary tree
796 public Vector findLeaves(SequenceNode node, Vector leaves)
803 if ( (node.left() == null) && (node.right() == null)) // Interior node detection
805 leaves.addElement(node);
811 /* TODO: Identify internal nodes... if (node.isSequenceLabel())
813 leaves.addElement(node);
815 findLeaves( (SequenceNode) node.left(), leaves);
816 findLeaves( (SequenceNode) node.right(), leaves);
823 * Find the leaf node with a particular ycount
825 * @param node initial point on tree to search from
826 * @param count value to search for
828 * @return null or the node with ycound=count
830 public Object findLeaf(SequenceNode node, int count)
832 found = _findLeaf(node, count);
837 /*#see findLeaf(SequenceNode node, count)
840 public Object _findLeaf(SequenceNode node, int count)
847 if (node.ycount == count)
849 found = node.element();
855 _findLeaf( (SequenceNode) node.left(), count);
856 _findLeaf( (SequenceNode) node.right(), count);
863 * printNode is mainly for debugging purposes.
865 * @param node SequenceNode
867 public void printNode(SequenceNode node)
874 if ( (node.left() == null) && (node.right() == null))
876 System.out.println("Leaf = " +
877 ( (SequenceI) node.element()).getName());
878 System.out.println("Dist " + ( (SequenceNode) node).dist);
879 System.out.println("Boot " + node.getBootstrap());
883 System.out.println("Dist " + ( (SequenceNode) node).dist);
884 printNode( (SequenceNode) node.left());
885 printNode( (SequenceNode) node.right());
892 * @param node DOCUMENT ME!
894 public void findMaxDist(SequenceNode node)
901 if ( (node.left() == null) && (node.right() == null))
903 float dist = ( (SequenceNode) node).dist;
905 if (dist > maxDistValue)
907 maxdist = (SequenceNode) node;
913 findMaxDist( (SequenceNode) node.left());
914 findMaxDist( (SequenceNode) node.right());
921 * @return DOCUMENT ME!
923 public Vector getGroups()
931 * @return DOCUMENT ME!
933 public float getMaxHeight()
941 * @param node DOCUMENT ME!
942 * @param threshold DOCUMENT ME!
944 public void groupNodes(SequenceNode node, float threshold)
951 if ( (node.height / maxheight) > threshold)
953 groups.addElement(node);
957 groupNodes( (SequenceNode) node.left(), threshold);
958 groupNodes( (SequenceNode) node.right(), threshold);
965 * @param node DOCUMENT ME!
967 * @return DOCUMENT ME!
969 public float findHeight(SequenceNode node)
976 if ( (node.left() == null) && (node.right() == null))
978 node.height = ( (SequenceNode) node.parent()).height + node.dist;
980 if (node.height > maxheight)
991 if (node.parent() != null)
993 node.height = ( (SequenceNode) node.parent()).height +
999 node.height = (float) 0.0;
1002 maxheight = findHeight( (SequenceNode) (node.left()));
1003 maxheight = findHeight( (SequenceNode) (node.right()));
1012 * @return DOCUMENT ME!
1014 public SequenceNode reRoot()
1016 if (maxdist != null)
1020 float tmpdist = maxdist.dist;
1023 SequenceNode sn = new SequenceNode();
1026 // New right hand of top
1027 SequenceNode snr = (SequenceNode) maxdist.parent();
1028 changeDirection(snr, maxdist);
1029 System.out.println("Printing reversed tree");
1031 snr.dist = tmpdist / 2;
1032 maxdist.dist = tmpdist / 2;
1035 maxdist.setParent(sn);
1038 sn.setLeft(maxdist);
1052 * @return true if original sequence data can be recovered
1054 public boolean hasOriginalSequenceData()
1056 return seqData != null;
1060 * Returns original alignment data used for calculation - or null where
1063 * @return null or cut'n'pasteable alignment
1065 public String printOriginalSequenceData(char gapChar)
1067 if (seqData == null)
1072 StringBuffer sb = new StringBuffer();
1073 String[] seqdatas = seqData.getSequenceStrings(gapChar);
1074 for (int i = 0; i < seqdatas.length; i++)
1076 sb.append(new jalview.util.Format("%-" + 15 + "s").form(
1077 sequence[i].getName()));
1078 sb.append(" " + seqdatas[i] + "\n");
1080 return sb.toString();
1086 * @param node DOCUMENT ME!
1088 public void printN(SequenceNode node)
1095 if ( (node.left() != null) && (node.right() != null))
1097 printN( (SequenceNode) node.left());
1098 printN( (SequenceNode) node.right());
1102 System.out.println(" name = " +
1103 ( (SequenceI) node.element()).getName());
1106 System.out.println(" dist = " + ( (SequenceNode) node).dist + " " +
1107 ( (SequenceNode) node).count + " " +
1108 ( (SequenceNode) node).height);
1114 * @param node DOCUMENT ME!
1116 public void reCount(SequenceNode node)
1125 * @param node DOCUMENT ME!
1127 public void _reCount(SequenceNode node)
1134 if ( (node.left() != null) && (node.right() != null))
1136 _reCount( (SequenceNode) node.left());
1137 _reCount( (SequenceNode) node.right());
1139 SequenceNode l = (SequenceNode) node.left();
1140 SequenceNode r = (SequenceNode) node.right();
1142 ( (SequenceNode) node).count = l.count + r.count;
1143 ( (SequenceNode) node).ycount = (l.ycount + r.ycount) / 2;
1147 ( (SequenceNode) node).count = 1;
1148 ( (SequenceNode) node).ycount = ycount++;
1155 * @param node DOCUMENT ME!
1157 public void swapNodes(SequenceNode node)
1164 SequenceNode tmp = (SequenceNode) node.left();
1166 node.setLeft(node.right());
1173 * @param node DOCUMENT ME!
1174 * @param dir DOCUMENT ME!
1176 public void changeDirection(SequenceNode node, SequenceNode dir)
1183 if (node.parent() != top)
1185 changeDirection( (SequenceNode) node.parent(), node);
1187 SequenceNode tmp = (SequenceNode) node.parent();
1189 if (dir == node.left())
1191 node.setParent(dir);
1194 else if (dir == node.right())
1196 node.setParent(dir);
1202 if (dir == node.left())
1204 node.setParent(node.left());
1206 if (top.left() == node)
1208 node.setRight(top.right());
1212 node.setRight(top.left());
1217 node.setParent(node.right());
1219 if (top.left() == node)
1221 node.setLeft(top.right());
1225 node.setLeft(top.left());
1234 * @return DOCUMENT ME!
1236 public SequenceNode getMaxDist()
1244 * @return DOCUMENT ME!
1246 public SequenceNode getTopNode()
1253 * @return true if tree has real distances
1255 public boolean isHasDistances()
1257 return hasDistances;
1262 * @return true if tree has real bootstrap values
1264 public boolean isHasBootstrap()
1266 return hasBootstrap;
1269 public boolean isHasRootDistance()
1271 return hasRootDistance;
1280 * @version $Revision$
1287 * Creates a new Cluster object.
1289 * @param value DOCUMENT ME!
1291 public Cluster(int[] value)