2 * Jalview - A Sequence Alignment Editor and Viewer
3 * Copyright (C) 2006 AM Waterhouse, J Procter, G Barton, M Clamp, S Searle
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License
7 * as published by the Free Software Foundation; either version 2
8 * of the License, or (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
19 package jalview.analysis;
21 import jalview.datamodel.*;
23 import jalview.io.NewickFile;
25 import jalview.schemes.ResidueProperties;
27 import jalview.util.*;
43 //SequenceData is a string representation of what the user
44 //sees. The display may contain hidden columns.
45 public AlignmentView seqData=null;
55 Vector groups = new Vector();
67 boolean hasDistances = true; // normal case for jalview trees
68 boolean hasBootstrap = false; // normal case for jalview trees
70 private boolean hasRootDistance = true;
73 * Create a new NJTree object with leaves associated with sequences in seqs,
74 * and original alignment data represented by Cigar strings.
75 * @param seqs SequenceI[]
76 * @param odata Cigar[]
77 * @param treefile NewickFile
79 public NJTree(SequenceI[] seqs, AlignmentView odata, NewickFile treefile) {
84 sequenceString = new String[odata.length];
85 char gapChar = jalview.util.Comparison.GapChars.charAt(0);
86 for (int i = 0; i < odata.length; i++)
88 SequenceI oseq_aligned = odata[i].getSeq(gapChar);
89 sequenceString[i] = oseq_aligned.getSequence();
94 * Creates a new NJTree object from a tree from an external source
96 * @param seqs SequenceI which should be associated with leafs of treefile
97 * @param treefile A parsed tree
99 public NJTree(SequenceI[] seqs, NewickFile treefile)
101 this.sequence = seqs;
102 top = treefile.getTree();
105 * There is no dependent alignment to be recovered from an
108 if (sequenceString == null)
110 sequenceString = new String[seqs.length];
111 for (int i = 0; i < seqs.length; i++)
113 sequenceString[i] = seqs[i].getSequence();
118 hasDistances = treefile.HasDistances();
119 hasBootstrap = treefile.HasBootstrap();
120 hasRootDistance = treefile.HasRootDistance();
122 maxheight = findHeight(top);
124 SequenceIdMatcher algnIds = new SequenceIdMatcher(seqs);
126 Vector leaves = new Vector();
127 findLeaves(top, leaves);
130 int namesleft = seqs.length;
135 Vector one2many=new Vector();
137 while (i < leaves.size())
139 j = (SequenceNode) leaves.elementAt(i++);
140 realnam = j.getName();
145 nam = algnIds.findIdMatch(realnam);
151 if (one2many.contains(nam)) {
153 // if (jalview.bin.Cache.log.isDebugEnabled())
154 // jalview.bin.Cache.log.debug("One 2 many relationship for "+nam.getName());
156 one2many.addElement(nam);
162 j.setElement(new Sequence(realnam, "THISISAPLACEHLDER"));
163 j.setPlaceholder(true);
166 // if (jalview.bin.Cache.log.isDebugEnabled() && countOne2Many>0) {
167 // jalview.bin.Cache.log.debug("There were "+countOne2Many+" alignment sequence ids (out of "+one2many.size()+" unique ids) linked to two or more leaves.");
173 * Creates a new NJTree object.
175 * @param sequence DOCUMENT ME!
176 * @param type DOCUMENT ME!
177 * @param pwtype DOCUMENT ME!
178 * @param start DOCUMENT ME!
179 * @param end DOCUMENT ME!
181 public NJTree(SequenceI[] sequence,
182 AlignmentView seqData,
187 this.sequence = sequence;
188 this.node = new Vector();
190 this.pwtype = pwtype;
192 this.seqData = seqData;
194 SeqCigar[] seqs = new SeqCigar[sequence.length];
195 for(int i=0; i<sequence.length; i++)
197 seqs[i] = new SeqCigar(sequence[i], start, end);
199 CigarArray sdata = new CigarArray(seqs);
200 sdata.addOperation(CigarArray.M, end-start+1);
201 this.seqData = new AlignmentView(sdata, start);
204 if (!(type.equals("NJ")))
209 if (!(pwtype.equals("PID")))
216 done = new int[sequence.length];
218 while ((i < sequence.length) && (sequence[i] != null))
226 distance = findDistances(this.seqData.getSequenceStrings(Comparison.GapChars.charAt(0)));
230 noClus = cluster.size();
238 * @return DOCUMENT ME!
240 public String toString()
242 jalview.io.NewickFile fout = new jalview.io.NewickFile(getTopNode());
244 return fout.print(false, true); // distances only
249 * used when the alignment associated to a tree has changed.
251 * @param alignment Vector
253 public void UpdatePlaceHolders(Vector alignment)
255 Vector leaves = new Vector();
256 findLeaves(top, leaves);
258 int sz = leaves.size();
259 SequenceIdMatcher seqmatcher = null;
264 SequenceNode leaf = (SequenceNode) leaves.elementAt(i++);
266 if (alignment.contains(leaf.element()))
268 leaf.setPlaceholder(false);
272 if (seqmatcher == null)
274 // Only create this the first time we need it
275 SequenceI[] seqs = new SequenceI[alignment.size()];
277 for (int j = 0; j < seqs.length; j++)
278 seqs[j] = (SequenceI) alignment.elementAt(j);
280 seqmatcher = new SequenceIdMatcher(seqs);
283 SequenceI nam = seqmatcher.findIdMatch(leaf.getName());
287 if (!leaf.isPlaceholder()) {
288 // remapping the node to a new sequenceI - should remove any refs to old one.
289 // TODO - make many sequenceI to one leaf mappings possible! (JBPNote)
291 leaf.setPlaceholder(false);
292 leaf.setElement(nam);
296 if (!leaf.isPlaceholder()) {
297 // Construct a new placeholder sequence object for this leaf
298 leaf.setElement(new Sequence(leaf.getName(), "THISISAPLACEHLDER"));
300 leaf.setPlaceholder(true);
310 public void cluster()
314 if (type.equals("NJ"))
323 Cluster c = joinClusters(mini, minj);
327 cluster.setElementAt(null, minj);
328 cluster.setElementAt(c, mini);
333 boolean onefound = false;
338 for (int i = 0; i < noseqs; i++)
342 if (onefound == false)
354 joinClusters(one, two);
355 top = (SequenceNode) (node.elementAt(one));
365 * @param i DOCUMENT ME!
366 * @param j DOCUMENT ME!
368 * @return DOCUMENT ME!
370 public Cluster joinClusters(int i, int j)
372 float dist = distance[i][j];
374 int noi = ((Cluster) cluster.elementAt(i)).value.length;
375 int noj = ((Cluster) cluster.elementAt(j)).value.length;
377 int[] value = new int[noi + noj];
379 for (int ii = 0; ii < noi; ii++)
381 value[ii] = ((Cluster) cluster.elementAt(i)).value[ii];
384 for (int ii = noi; ii < (noi + noj); ii++)
386 value[ii] = ((Cluster) cluster.elementAt(j)).value[ii - noi];
389 Cluster c = new Cluster(value);
394 if (type.equals("NJ"))
396 findClusterNJDistance(i, j);
400 findClusterDistance(i, j);
403 SequenceNode sn = new SequenceNode();
405 sn.setLeft((SequenceNode) (node.elementAt(i)));
406 sn.setRight((SequenceNode) (node.elementAt(j)));
408 SequenceNode tmpi = (SequenceNode) (node.elementAt(i));
409 SequenceNode tmpj = (SequenceNode) (node.elementAt(j));
411 if (type.equals("NJ"))
413 findNewNJDistances(tmpi, tmpj, dist);
417 findNewDistances(tmpi, tmpj, dist);
423 node.setElementAt(sn, i);
431 * @param tmpi DOCUMENT ME!
432 * @param tmpj DOCUMENT ME!
433 * @param dist DOCUMENT ME!
435 public void findNewNJDistances(SequenceNode tmpi, SequenceNode tmpj,
439 tmpi.dist = ((dist + ri) - rj) / 2;
440 tmpj.dist = (dist - tmpi.dist);
456 * @param tmpi DOCUMENT ME!
457 * @param tmpj DOCUMENT ME!
458 * @param dist DOCUMENT ME!
460 public void findNewDistances(SequenceNode tmpi, SequenceNode tmpj,
466 SequenceNode sni = tmpi;
467 SequenceNode snj = tmpj;
472 sni = (SequenceNode) sni.left();
478 snj = (SequenceNode) snj.left();
481 tmpi.dist = ((dist / 2) - ih);
482 tmpj.dist = ((dist / 2) - jh);
488 * @param i DOCUMENT ME!
489 * @param j DOCUMENT ME!
491 public void findClusterDistance(int i, int j)
493 int noi = ((Cluster) cluster.elementAt(i)).value.length;
494 int noj = ((Cluster) cluster.elementAt(j)).value.length;
496 // New distances from cluster to others
497 float[] newdist = new float[noseqs];
499 for (int l = 0; l < noseqs; l++)
501 if ((l != i) && (l != j))
503 newdist[l] = ((distance[i][l] * noi) + (distance[j][l] * noj)) / (noi +
512 for (int ii = 0; ii < noseqs; ii++)
514 distance[i][ii] = newdist[ii];
515 distance[ii][i] = newdist[ii];
522 * @param i DOCUMENT ME!
523 * @param j DOCUMENT ME!
525 public void findClusterNJDistance(int i, int j)
528 // New distances from cluster to others
529 float[] newdist = new float[noseqs];
531 for (int l = 0; l < noseqs; l++)
533 if ((l != i) && (l != j))
535 newdist[l] = ((distance[i][l] + distance[j][l]) -
544 for (int ii = 0; ii < noseqs; ii++)
546 distance[i][ii] = newdist[ii];
547 distance[ii][i] = newdist[ii];
554 * @param i DOCUMENT ME!
555 * @param j DOCUMENT ME!
557 * @return DOCUMENT ME!
559 public float findr(int i, int j)
563 for (int k = 0; k < noseqs; k++)
565 if ((k != i) && (k != j) && (done[k] != 1))
567 tmp = tmp + distance[i][k];
573 tmp = tmp / (noClus - 2);
582 * @return DOCUMENT ME!
584 public float findMinNJDistance()
588 for (int i = 0; i < (noseqs - 1); i++)
590 for (int j = i + 1; j < noseqs; j++)
592 if ((done[i] != 1) && (done[j] != 1))
594 float tmp = distance[i][j] - (findr(i, j) + findr(j, i));
613 * @return DOCUMENT ME!
615 public float findMinDistance()
619 for (int i = 0; i < (noseqs - 1); i++)
621 for (int j = i + 1; j < noseqs; j++)
623 if ((done[i] != 1) && (done[j] != 1))
625 if (distance[i][j] < min)
630 min = distance[i][j];
642 * @return DOCUMENT ME!
644 public float[][] findDistances(String[] sequenceString)
646 float[][] distance = new float[noseqs][noseqs];
648 if (pwtype.equals("PID"))
650 for (int i = 0; i < (noseqs - 1); i++)
652 for (int j = i; j < noseqs; j++)
660 distance[i][j] = 100 -
661 Comparison.PID(sequenceString[i], sequenceString[j]);
663 distance[j][i] = distance[i][j];
668 else if (pwtype.equals("BL"))
671 int end = sequenceString[0].length();
672 for (int i = 0; i < (noseqs - 1); i++)
674 for (int j = i; j < noseqs; j++)
678 for (int k = 0; k < end; k++)
682 score += ResidueProperties.getBLOSUM62(
683 sequenceString[i].substring(k, k + 1),
684 sequenceString[j].substring(k, k + 1));
688 System.err.println("err creating BLOSUM62 tree");
689 ex.printStackTrace();
693 distance[i][j] = (float) score;
695 if (score > maxscore)
702 for (int i = 0; i < (noseqs - 1); i++)
704 for (int j = i; j < noseqs; j++)
706 distance[i][j] = (float) maxscore - distance[i][j];
707 distance[j][i] = distance[i][j];
711 /* else if (pwtype.equals("SW"))
715 for (int i = 0; i < (noseqs - 1); i++)
717 for (int j = i; j < noseqs; j++)
719 AlignSeq as = new AlignSeq(sequence[i], sequence[j], "pep");
720 as.calcScoreMatrix();
722 as.printAlignment(System.out);
723 distance[i][j] = (float) as.maxscore;
725 if (max < distance[i][j])
727 max = distance[i][j];
732 for (int i = 0; i < (noseqs - 1); i++)
734 for (int j = i; j < noseqs; j++)
736 distance[i][j] = max - distance[i][j];
737 distance[j][i] = distance[i][j];
748 public void makeLeaves()
750 cluster = new Vector();
752 for (int i = 0; i < noseqs; i++)
754 SequenceNode sn = new SequenceNode();
756 sn.setElement(sequence[i]);
757 sn.setName(sequence[i].getName());
760 int[] value = new int[1];
763 Cluster c = new Cluster(value);
764 cluster.addElement(c);
771 * @param node DOCUMENT ME!
772 * @param leaves DOCUMENT ME!
774 * @return DOCUMENT ME!
776 public Vector findLeaves(SequenceNode node, Vector leaves)
783 if ((node.left() == null) && (node.right() == null))
785 leaves.addElement(node);
791 findLeaves((SequenceNode) node.left(), leaves);
792 findLeaves((SequenceNode) node.right(), leaves);
801 * @param node DOCUMENT ME!
802 * @param count DOCUMENT ME!
804 * @return DOCUMENT ME!
806 public Object findLeaf(SequenceNode node, int count)
808 found = _findLeaf(node, count);
816 * @param node DOCUMENT ME!
817 * @param count DOCUMENT ME!
819 * @return DOCUMENT ME!
821 public Object _findLeaf(SequenceNode node, int count)
828 if (node.ycount == count)
830 found = node.element();
836 _findLeaf((SequenceNode) node.left(), count);
837 _findLeaf((SequenceNode) node.right(), count);
844 * printNode is mainly for debugging purposes.
846 * @param node SequenceNode
848 public void printNode(SequenceNode node)
855 if ((node.left() == null) && (node.right() == null))
857 System.out.println("Leaf = " +
858 ((SequenceI) node.element()).getName());
859 System.out.println("Dist " + ((SequenceNode) node).dist);
860 System.out.println("Boot " + node.getBootstrap());
864 System.out.println("Dist " + ((SequenceNode) node).dist);
865 printNode((SequenceNode) node.left());
866 printNode((SequenceNode) node.right());
873 * @param node DOCUMENT ME!
875 public void findMaxDist(SequenceNode node)
882 if ((node.left() == null) && (node.right() == null))
884 float dist = ((SequenceNode) node).dist;
886 if (dist > maxDistValue)
888 maxdist = (SequenceNode) node;
894 findMaxDist((SequenceNode) node.left());
895 findMaxDist((SequenceNode) node.right());
902 * @return DOCUMENT ME!
904 public Vector getGroups()
912 * @return DOCUMENT ME!
914 public float getMaxHeight()
922 * @param node DOCUMENT ME!
923 * @param threshold DOCUMENT ME!
925 public void groupNodes(SequenceNode node, float threshold)
932 if ((node.height / maxheight) > threshold)
934 groups.addElement(node);
938 groupNodes((SequenceNode) node.left(), threshold);
939 groupNodes((SequenceNode) node.right(), threshold);
946 * @param node DOCUMENT ME!
948 * @return DOCUMENT ME!
950 public float findHeight(SequenceNode node)
957 if ((node.left() == null) && (node.right() == null))
959 node.height = ((SequenceNode) node.parent()).height + node.dist;
961 if (node.height > maxheight)
972 if (node.parent() != null)
974 node.height = ((SequenceNode) node.parent()).height +
980 node.height = (float) 0.0;
983 maxheight = findHeight((SequenceNode) (node.left()));
984 maxheight = findHeight((SequenceNode) (node.right()));
993 * @return DOCUMENT ME!
995 public SequenceNode reRoot()
1001 float tmpdist = maxdist.dist;
1004 SequenceNode sn = new SequenceNode();
1007 // New right hand of top
1008 SequenceNode snr = (SequenceNode) maxdist.parent();
1009 changeDirection(snr, maxdist);
1010 System.out.println("Printing reversed tree");
1012 snr.dist = tmpdist / 2;
1013 maxdist.dist = tmpdist / 2;
1016 maxdist.setParent(sn);
1019 sn.setLeft(maxdist);
1032 * @return true if original sequence data can be recovered
1034 public boolean hasOriginalSequenceData() {
1035 return seqData!=null;
1038 * Returns original alignment data used for calculation - or null where
1041 * @return null or cut'n'pasteable alignment
1043 public String printOriginalSequenceData(char gapChar)
1048 StringBuffer sb = new StringBuffer();
1049 String[] seqdatas = seqData.getSequenceStrings(gapChar);
1050 for(int i=0; i<seqdatas.length; i++)
1052 sb.append(new jalview.util.Format("%-" + 15 + "s").form(
1053 sequence[i].getName()));
1054 sb.append(" "+seqdatas[i]+"\n");
1056 return sb.toString();
1061 * @param node DOCUMENT ME!
1063 public void printN(SequenceNode node)
1070 if ((node.left() != null) && (node.right() != null))
1072 printN((SequenceNode) node.left());
1073 printN((SequenceNode) node.right());
1077 System.out.println(" name = " +
1078 ((SequenceI) node.element()).getName());
1081 System.out.println(" dist = " + ((SequenceNode) node).dist + " " +
1082 ((SequenceNode) node).count + " " + ((SequenceNode) node).height);
1088 * @param node DOCUMENT ME!
1090 public void reCount(SequenceNode node)
1099 * @param node DOCUMENT ME!
1101 public void _reCount(SequenceNode node)
1108 if ((node.left() != null) && (node.right() != null))
1110 _reCount((SequenceNode) node.left());
1111 _reCount((SequenceNode) node.right());
1113 SequenceNode l = (SequenceNode) node.left();
1114 SequenceNode r = (SequenceNode) node.right();
1116 ((SequenceNode) node).count = l.count + r.count;
1117 ((SequenceNode) node).ycount = (l.ycount + r.ycount) / 2;
1121 ((SequenceNode) node).count = 1;
1122 ((SequenceNode) node).ycount = ycount++;
1129 * @param node DOCUMENT ME!
1131 public void swapNodes(SequenceNode node)
1138 SequenceNode tmp = (SequenceNode) node.left();
1140 node.setLeft(node.right());
1147 * @param node DOCUMENT ME!
1148 * @param dir DOCUMENT ME!
1150 public void changeDirection(SequenceNode node, SequenceNode dir)
1157 if (node.parent() != top)
1159 changeDirection((SequenceNode) node.parent(), node);
1161 SequenceNode tmp = (SequenceNode) node.parent();
1163 if (dir == node.left())
1165 node.setParent(dir);
1168 else if (dir == node.right())
1170 node.setParent(dir);
1176 if (dir == node.left())
1178 node.setParent(node.left());
1180 if (top.left() == node)
1182 node.setRight(top.right());
1186 node.setRight(top.left());
1191 node.setParent(node.right());
1193 if (top.left() == node)
1195 node.setLeft(top.right());
1199 node.setLeft(top.left());
1209 * @return DOCUMENT ME!
1211 public SequenceNode getMaxDist()
1219 * @return DOCUMENT ME!
1221 public SequenceNode getTopNode()
1227 * @return true if tree has real distances
1229 public boolean isHasDistances() {
1230 return hasDistances;
1235 * @return true if tree has real bootstrap values
1237 public boolean isHasBootstrap() {
1238 return hasBootstrap;
1241 public boolean isHasRootDistance()
1243 return hasRootDistance;
1253 * @version $Revision$
1260 * Creates a new Cluster object.
1262 * @param value DOCUMENT ME!
1264 public Cluster(int[] value)