2 * Jalview - A Sequence Alignment Editor and Viewer
3 * Copyright (C) 2006 AM Waterhouse, J Procter, G Barton, M Clamp, S Searle
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License
7 * as published by the Free Software Foundation; either version 2
8 * of the License, or (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
19 package jalview.analysis;
21 import jalview.datamodel.*;
23 import jalview.io.NewickFile;
25 import jalview.schemes.ResidueProperties;
26 import jalview.schemes.ScoreMatrix;
28 import jalview.util.*;
44 //SequenceData is a string representation of what the user
45 //sees. The display may contain hidden columns.
46 public AlignmentView seqData=null;
56 Vector groups = new Vector();
68 boolean hasDistances = true; // normal case for jalview trees
69 boolean hasBootstrap = false; // normal case for jalview trees
71 private boolean hasRootDistance = true;
74 * Create a new NJTree object with leaves associated with sequences in seqs,
75 * and original alignment data represented by Cigar strings.
76 * @param seqs SequenceI[]
77 * @param odata Cigar[]
78 * @param treefile NewickFile
80 public NJTree(SequenceI[] seqs, AlignmentView odata, NewickFile treefile) {
85 sequenceString = new String[odata.length];
86 char gapChar = jalview.util.Comparison.GapChars.charAt(0);
87 for (int i = 0; i < odata.length; i++)
89 SequenceI oseq_aligned = odata[i].getSeq(gapChar);
90 sequenceString[i] = oseq_aligned.getSequence();
95 * Creates a new NJTree object from a tree from an external source
97 * @param seqs SequenceI which should be associated with leafs of treefile
98 * @param treefile A parsed tree
100 public NJTree(SequenceI[] seqs, NewickFile treefile)
102 this.sequence = seqs;
103 top = treefile.getTree();
106 * There is no dependent alignment to be recovered from an
109 if (sequenceString == null)
111 sequenceString = new String[seqs.length];
112 for (int i = 0; i < seqs.length; i++)
114 sequenceString[i] = seqs[i].getSequence();
119 hasDistances = treefile.HasDistances();
120 hasBootstrap = treefile.HasBootstrap();
121 hasRootDistance = treefile.HasRootDistance();
123 maxheight = findHeight(top);
125 SequenceIdMatcher algnIds = new SequenceIdMatcher(seqs);
127 Vector leaves = new Vector();
128 findLeaves(top, leaves);
131 int namesleft = seqs.length;
136 Vector one2many=new Vector();
138 while (i < leaves.size())
140 j = (SequenceNode) leaves.elementAt(i++);
141 realnam = j.getName();
146 nam = algnIds.findIdMatch(realnam);
152 if (one2many.contains(nam)) {
154 // if (jalview.bin.Cache.log.isDebugEnabled())
155 // jalview.bin.Cache.log.debug("One 2 many relationship for "+nam.getName());
157 one2many.addElement(nam);
163 j.setElement(new Sequence(realnam, "THISISAPLACEHLDER"));
164 j.setPlaceholder(true);
167 // if (jalview.bin.Cache.log.isDebugEnabled() && countOne2Many>0) {
168 // jalview.bin.Cache.log.debug("There were "+countOne2Many+" alignment sequence ids (out of "+one2many.size()+" unique ids) linked to two or more leaves.");
174 * Creates a new NJTree object.
176 * @param sequence DOCUMENT ME!
177 * @param type DOCUMENT ME!
178 * @param pwtype DOCUMENT ME!
179 * @param start DOCUMENT ME!
180 * @param end DOCUMENT ME!
182 public NJTree(SequenceI[] sequence,
183 AlignmentView seqData,
188 this.sequence = sequence;
189 this.node = new Vector();
191 this.pwtype = pwtype;
193 this.seqData = seqData;
195 SeqCigar[] seqs = new SeqCigar[sequence.length];
196 for(int i=0; i<sequence.length; i++)
198 seqs[i] = new SeqCigar(sequence[i], start, end);
200 CigarArray sdata = new CigarArray(seqs);
201 sdata.addOperation(CigarArray.M, end-start+1);
202 this.seqData = new AlignmentView(sdata, start);
205 if (!(type.equals("NJ")))
210 if (!(pwtype.equals("PID")))
212 if (ResidueProperties.getScoreMatrix(pwtype)==null) {
219 done = new int[sequence.length];
221 while ((i < sequence.length) && (sequence[i] != null))
229 distance = findDistances(this.seqData.getSequenceStrings(Comparison.GapChars.charAt(0)));
233 noClus = cluster.size();
241 * @return DOCUMENT ME!
243 public String toString()
245 jalview.io.NewickFile fout = new jalview.io.NewickFile(getTopNode());
247 return fout.print(false, true); // distances only
252 * used when the alignment associated to a tree has changed.
254 * @param alignment Vector
256 public void UpdatePlaceHolders(Vector alignment)
258 Vector leaves = new Vector();
259 findLeaves(top, leaves);
261 int sz = leaves.size();
262 SequenceIdMatcher seqmatcher = null;
267 SequenceNode leaf = (SequenceNode) leaves.elementAt(i++);
269 if (alignment.contains(leaf.element()))
271 leaf.setPlaceholder(false);
275 if (seqmatcher == null)
277 // Only create this the first time we need it
278 SequenceI[] seqs = new SequenceI[alignment.size()];
280 for (int j = 0; j < seqs.length; j++)
281 seqs[j] = (SequenceI) alignment.elementAt(j);
283 seqmatcher = new SequenceIdMatcher(seqs);
286 SequenceI nam = seqmatcher.findIdMatch(leaf.getName());
290 if (!leaf.isPlaceholder()) {
291 // remapping the node to a new sequenceI - should remove any refs to old one.
292 // TODO - make many sequenceI to one leaf mappings possible! (JBPNote)
294 leaf.setPlaceholder(false);
295 leaf.setElement(nam);
299 if (!leaf.isPlaceholder()) {
300 // Construct a new placeholder sequence object for this leaf
301 leaf.setElement(new Sequence(leaf.getName(), "THISISAPLACEHLDER"));
303 leaf.setPlaceholder(true);
313 public void cluster()
317 if (type.equals("NJ"))
326 Cluster c = joinClusters(mini, minj);
330 cluster.setElementAt(null, minj);
331 cluster.setElementAt(c, mini);
336 boolean onefound = false;
341 for (int i = 0; i < noseqs; i++)
345 if (onefound == false)
357 joinClusters(one, two);
358 top = (SequenceNode) (node.elementAt(one));
368 * @param i DOCUMENT ME!
369 * @param j DOCUMENT ME!
371 * @return DOCUMENT ME!
373 public Cluster joinClusters(int i, int j)
375 float dist = distance[i][j];
377 int noi = ((Cluster) cluster.elementAt(i)).value.length;
378 int noj = ((Cluster) cluster.elementAt(j)).value.length;
380 int[] value = new int[noi + noj];
382 for (int ii = 0; ii < noi; ii++)
384 value[ii] = ((Cluster) cluster.elementAt(i)).value[ii];
387 for (int ii = noi; ii < (noi + noj); ii++)
389 value[ii] = ((Cluster) cluster.elementAt(j)).value[ii - noi];
392 Cluster c = new Cluster(value);
397 if (type.equals("NJ"))
399 findClusterNJDistance(i, j);
403 findClusterDistance(i, j);
406 SequenceNode sn = new SequenceNode();
408 sn.setLeft((SequenceNode) (node.elementAt(i)));
409 sn.setRight((SequenceNode) (node.elementAt(j)));
411 SequenceNode tmpi = (SequenceNode) (node.elementAt(i));
412 SequenceNode tmpj = (SequenceNode) (node.elementAt(j));
414 if (type.equals("NJ"))
416 findNewNJDistances(tmpi, tmpj, dist);
420 findNewDistances(tmpi, tmpj, dist);
426 node.setElementAt(sn, i);
434 * @param tmpi DOCUMENT ME!
435 * @param tmpj DOCUMENT ME!
436 * @param dist DOCUMENT ME!
438 public void findNewNJDistances(SequenceNode tmpi, SequenceNode tmpj,
442 tmpi.dist = ((dist + ri) - rj) / 2;
443 tmpj.dist = (dist - tmpi.dist);
459 * @param tmpi DOCUMENT ME!
460 * @param tmpj DOCUMENT ME!
461 * @param dist DOCUMENT ME!
463 public void findNewDistances(SequenceNode tmpi, SequenceNode tmpj,
469 SequenceNode sni = tmpi;
470 SequenceNode snj = tmpj;
475 sni = (SequenceNode) sni.left();
481 snj = (SequenceNode) snj.left();
484 tmpi.dist = ((dist / 2) - ih);
485 tmpj.dist = ((dist / 2) - jh);
491 * @param i DOCUMENT ME!
492 * @param j DOCUMENT ME!
494 public void findClusterDistance(int i, int j)
496 int noi = ((Cluster) cluster.elementAt(i)).value.length;
497 int noj = ((Cluster) cluster.elementAt(j)).value.length;
499 // New distances from cluster to others
500 float[] newdist = new float[noseqs];
502 for (int l = 0; l < noseqs; l++)
504 if ((l != i) && (l != j))
506 newdist[l] = ((distance[i][l] * noi) + (distance[j][l] * noj)) / (noi +
515 for (int ii = 0; ii < noseqs; ii++)
517 distance[i][ii] = newdist[ii];
518 distance[ii][i] = newdist[ii];
525 * @param i DOCUMENT ME!
526 * @param j DOCUMENT ME!
528 public void findClusterNJDistance(int i, int j)
531 // New distances from cluster to others
532 float[] newdist = new float[noseqs];
534 for (int l = 0; l < noseqs; l++)
536 if ((l != i) && (l != j))
538 newdist[l] = ((distance[i][l] + distance[j][l]) -
547 for (int ii = 0; ii < noseqs; ii++)
549 distance[i][ii] = newdist[ii];
550 distance[ii][i] = newdist[ii];
557 * @param i DOCUMENT ME!
558 * @param j DOCUMENT ME!
560 * @return DOCUMENT ME!
562 public float findr(int i, int j)
566 for (int k = 0; k < noseqs; k++)
568 if ((k != i) && (k != j) && (done[k] != 1))
570 tmp = tmp + distance[i][k];
576 tmp = tmp / (noClus - 2);
585 * @return DOCUMENT ME!
587 public float findMinNJDistance()
591 for (int i = 0; i < (noseqs - 1); i++)
593 for (int j = i + 1; j < noseqs; j++)
595 if ((done[i] != 1) && (done[j] != 1))
597 float tmp = distance[i][j] - (findr(i, j) + findr(j, i));
616 * @return DOCUMENT ME!
618 public float findMinDistance()
622 for (int i = 0; i < (noseqs - 1); i++)
624 for (int j = i + 1; j < noseqs; j++)
626 if ((done[i] != 1) && (done[j] != 1))
628 if (distance[i][j] < min)
633 min = distance[i][j];
645 * @return DOCUMENT ME!
647 public float[][] findDistances(String[] sequenceString)
649 float[][] distance = new float[noseqs][noseqs];
651 if (pwtype.equals("PID"))
653 for (int i = 0; i < (noseqs - 1); i++)
655 for (int j = i; j < noseqs; j++)
663 distance[i][j] = 100 -
664 Comparison.PID(sequenceString[i], sequenceString[j]);
666 distance[j][i] = distance[i][j];
672 // Pairwise substitution score (with no gap penalties)
673 ScoreMatrix pwmatrix=ResidueProperties.getScoreMatrix(pwtype);
674 if (pwmatrix==null) {
675 pwmatrix=ResidueProperties.getScoreMatrix("BLOSUM62");
678 int end = sequenceString[0].length();
679 for (int i = 0; i < (noseqs - 1); i++)
681 for (int j = i; j < noseqs; j++)
685 for (int k = 0; k < end; k++)
689 score += pwmatrix.getPairwiseScore(sequenceString[i].charAt(k),
690 sequenceString[j].charAt(k));
694 System.err.println("err creating BLOSUM62 tree");
695 ex.printStackTrace();
699 distance[i][j] = (float) score;
701 if (score > maxscore)
708 for (int i = 0; i < (noseqs - 1); i++)
710 for (int j = i; j < noseqs; j++)
712 distance[i][j] = (float) maxscore - distance[i][j];
713 distance[j][i] = distance[i][j];
721 /* else if (pwtype.equals("SW"))
725 for (int i = 0; i < (noseqs - 1); i++)
727 for (int j = i; j < noseqs; j++)
729 AlignSeq as = new AlignSeq(sequence[i], sequence[j], "pep");
730 as.calcScoreMatrix();
732 as.printAlignment(System.out);
733 distance[i][j] = (float) as.maxscore;
735 if (max < distance[i][j])
737 max = distance[i][j];
742 for (int i = 0; i < (noseqs - 1); i++)
744 for (int j = i; j < noseqs; j++)
746 distance[i][j] = max - distance[i][j];
747 distance[j][i] = distance[i][j];
756 public void makeLeaves()
758 cluster = new Vector();
760 for (int i = 0; i < noseqs; i++)
762 SequenceNode sn = new SequenceNode();
764 sn.setElement(sequence[i]);
765 sn.setName(sequence[i].getName());
768 int[] value = new int[1];
771 Cluster c = new Cluster(value);
772 cluster.addElement(c);
779 * @param node DOCUMENT ME!
780 * @param leaves DOCUMENT ME!
782 * @return DOCUMENT ME!
784 public Vector findLeaves(SequenceNode node, Vector leaves)
791 if ((node.left() == null) && (node.right() == null))
793 leaves.addElement(node);
799 findLeaves((SequenceNode) node.left(), leaves);
800 findLeaves((SequenceNode) node.right(), leaves);
809 * @param node DOCUMENT ME!
810 * @param count DOCUMENT ME!
812 * @return DOCUMENT ME!
814 public Object findLeaf(SequenceNode node, int count)
816 found = _findLeaf(node, count);
824 * @param node DOCUMENT ME!
825 * @param count DOCUMENT ME!
827 * @return DOCUMENT ME!
829 public Object _findLeaf(SequenceNode node, int count)
836 if (node.ycount == count)
838 found = node.element();
844 _findLeaf((SequenceNode) node.left(), count);
845 _findLeaf((SequenceNode) node.right(), count);
852 * printNode is mainly for debugging purposes.
854 * @param node SequenceNode
856 public void printNode(SequenceNode node)
863 if ((node.left() == null) && (node.right() == null))
865 System.out.println("Leaf = " +
866 ((SequenceI) node.element()).getName());
867 System.out.println("Dist " + ((SequenceNode) node).dist);
868 System.out.println("Boot " + node.getBootstrap());
872 System.out.println("Dist " + ((SequenceNode) node).dist);
873 printNode((SequenceNode) node.left());
874 printNode((SequenceNode) node.right());
881 * @param node DOCUMENT ME!
883 public void findMaxDist(SequenceNode node)
890 if ((node.left() == null) && (node.right() == null))
892 float dist = ((SequenceNode) node).dist;
894 if (dist > maxDistValue)
896 maxdist = (SequenceNode) node;
902 findMaxDist((SequenceNode) node.left());
903 findMaxDist((SequenceNode) node.right());
910 * @return DOCUMENT ME!
912 public Vector getGroups()
920 * @return DOCUMENT ME!
922 public float getMaxHeight()
930 * @param node DOCUMENT ME!
931 * @param threshold DOCUMENT ME!
933 public void groupNodes(SequenceNode node, float threshold)
940 if ((node.height / maxheight) > threshold)
942 groups.addElement(node);
946 groupNodes((SequenceNode) node.left(), threshold);
947 groupNodes((SequenceNode) node.right(), threshold);
954 * @param node DOCUMENT ME!
956 * @return DOCUMENT ME!
958 public float findHeight(SequenceNode node)
965 if ((node.left() == null) && (node.right() == null))
967 node.height = ((SequenceNode) node.parent()).height + node.dist;
969 if (node.height > maxheight)
980 if (node.parent() != null)
982 node.height = ((SequenceNode) node.parent()).height +
988 node.height = (float) 0.0;
991 maxheight = findHeight((SequenceNode) (node.left()));
992 maxheight = findHeight((SequenceNode) (node.right()));
1001 * @return DOCUMENT ME!
1003 public SequenceNode reRoot()
1005 if (maxdist != null)
1009 float tmpdist = maxdist.dist;
1012 SequenceNode sn = new SequenceNode();
1015 // New right hand of top
1016 SequenceNode snr = (SequenceNode) maxdist.parent();
1017 changeDirection(snr, maxdist);
1018 System.out.println("Printing reversed tree");
1020 snr.dist = tmpdist / 2;
1021 maxdist.dist = tmpdist / 2;
1024 maxdist.setParent(sn);
1027 sn.setLeft(maxdist);
1040 * @return true if original sequence data can be recovered
1042 public boolean hasOriginalSequenceData() {
1043 return seqData!=null;
1046 * Returns original alignment data used for calculation - or null where
1049 * @return null or cut'n'pasteable alignment
1051 public String printOriginalSequenceData(char gapChar)
1056 StringBuffer sb = new StringBuffer();
1057 String[] seqdatas = seqData.getSequenceStrings(gapChar);
1058 for(int i=0; i<seqdatas.length; i++)
1060 sb.append(new jalview.util.Format("%-" + 15 + "s").form(
1061 sequence[i].getName()));
1062 sb.append(" "+seqdatas[i]+"\n");
1064 return sb.toString();
1069 * @param node DOCUMENT ME!
1071 public void printN(SequenceNode node)
1078 if ((node.left() != null) && (node.right() != null))
1080 printN((SequenceNode) node.left());
1081 printN((SequenceNode) node.right());
1085 System.out.println(" name = " +
1086 ((SequenceI) node.element()).getName());
1089 System.out.println(" dist = " + ((SequenceNode) node).dist + " " +
1090 ((SequenceNode) node).count + " " + ((SequenceNode) node).height);
1096 * @param node DOCUMENT ME!
1098 public void reCount(SequenceNode node)
1107 * @param node DOCUMENT ME!
1109 public void _reCount(SequenceNode node)
1116 if ((node.left() != null) && (node.right() != null))
1118 _reCount((SequenceNode) node.left());
1119 _reCount((SequenceNode) node.right());
1121 SequenceNode l = (SequenceNode) node.left();
1122 SequenceNode r = (SequenceNode) node.right();
1124 ((SequenceNode) node).count = l.count + r.count;
1125 ((SequenceNode) node).ycount = (l.ycount + r.ycount) / 2;
1129 ((SequenceNode) node).count = 1;
1130 ((SequenceNode) node).ycount = ycount++;
1137 * @param node DOCUMENT ME!
1139 public void swapNodes(SequenceNode node)
1146 SequenceNode tmp = (SequenceNode) node.left();
1148 node.setLeft(node.right());
1155 * @param node DOCUMENT ME!
1156 * @param dir DOCUMENT ME!
1158 public void changeDirection(SequenceNode node, SequenceNode dir)
1165 if (node.parent() != top)
1167 changeDirection((SequenceNode) node.parent(), node);
1169 SequenceNode tmp = (SequenceNode) node.parent();
1171 if (dir == node.left())
1173 node.setParent(dir);
1176 else if (dir == node.right())
1178 node.setParent(dir);
1184 if (dir == node.left())
1186 node.setParent(node.left());
1188 if (top.left() == node)
1190 node.setRight(top.right());
1194 node.setRight(top.left());
1199 node.setParent(node.right());
1201 if (top.left() == node)
1203 node.setLeft(top.right());
1207 node.setLeft(top.left());
1217 * @return DOCUMENT ME!
1219 public SequenceNode getMaxDist()
1227 * @return DOCUMENT ME!
1229 public SequenceNode getTopNode()
1235 * @return true if tree has real distances
1237 public boolean isHasDistances() {
1238 return hasDistances;
1243 * @return true if tree has real bootstrap values
1245 public boolean isHasBootstrap() {
1246 return hasBootstrap;
1249 public boolean isHasRootDistance()
1251 return hasRootDistance;
1261 * @version $Revision$
1268 * Creates a new Cluster object.
1270 * @param value DOCUMENT ME!
1272 public Cluster(int[] value)