2 * Jalview - A Sequence Alignment Editor and Viewer (Development Version 2.4.1)
3 * Copyright (C) 2009 AM Waterhouse, J Procter, G Barton, M Clamp, S Searle
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License
7 * as published by the Free Software Foundation; either version 2
8 * of the License, or (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
19 package jalview.analysis;
23 import jalview.datamodel.*;
25 import jalview.schemes.*;
26 import jalview.util.*;
40 // SequenceData is a string representation of what the user
41 // sees. The display may contain hidden columns.
42 public AlignmentView seqData = null;
60 Vector groups = new Vector();
82 boolean hasDistances = true; // normal case for jalview trees
84 boolean hasBootstrap = false; // normal case for jalview trees
86 private boolean hasRootDistance = true;
89 * Create a new NJTree object with leaves associated with sequences in seqs,
90 * and original alignment data represented by Cigar strings.
99 public NJTree(SequenceI[] seqs, AlignmentView odata, NewickFile treefile)
101 this(seqs, treefile);
107 * sequenceString = new String[odata.length]; char gapChar =
108 * jalview.util.Comparison.GapChars.charAt(0); for (int i = 0; i <
109 * odata.length; i++) { SequenceI oseq_aligned = odata[i].getSeq(gapChar);
110 * sequenceString[i] = oseq_aligned.getSequence(); }
115 * Creates a new NJTree object from a tree from an external source
118 * SequenceI which should be associated with leafs of treefile
122 public NJTree(SequenceI[] seqs, NewickFile treefile)
124 this.sequence = seqs;
125 top = treefile.getTree();
128 * There is no dependent alignment to be recovered from an imported tree.
130 * if (sequenceString == null) { sequenceString = new String[seqs.length];
131 * for (int i = 0; i < seqs.length; i++) { sequenceString[i] =
132 * seqs[i].getSequence(); } }
135 hasDistances = treefile.HasDistances();
136 hasBootstrap = treefile.HasBootstrap();
137 hasRootDistance = treefile.HasRootDistance();
139 maxheight = findHeight(top);
141 SequenceIdMatcher algnIds = new SequenceIdMatcher(seqs);
143 Vector leaves = new Vector();
144 findLeaves(top, leaves);
147 int namesleft = seqs.length;
152 Vector one2many = new Vector();
153 int countOne2Many = 0;
154 while (i < leaves.size())
156 j = (SequenceNode) leaves.elementAt(i++);
157 realnam = j.getName();
162 nam = algnIds.findIdMatch(realnam);
168 if (one2many.contains(nam))
171 // if (jalview.bin.Cache.log.isDebugEnabled())
172 // jalview.bin.Cache.log.debug("One 2 many relationship for
177 one2many.addElement(nam);
183 j.setElement(new Sequence(realnam, "THISISAPLACEHLDER"));
184 j.setPlaceholder(true);
187 // if (jalview.bin.Cache.log.isDebugEnabled() && countOne2Many>0) {
188 // jalview.bin.Cache.log.debug("There were "+countOne2Many+" alignment
189 // sequence ids (out of "+one2many.size()+" unique ids) linked to two or
196 * Creates a new NJTree object.
209 public NJTree(SequenceI[] sequence, AlignmentView seqData, String type,
210 String pwtype, int start, int end)
212 this.sequence = sequence;
213 this.node = new Vector();
215 this.pwtype = pwtype;
218 this.seqData = seqData;
222 SeqCigar[] seqs = new SeqCigar[sequence.length];
223 for (int i = 0; i < sequence.length; i++)
225 seqs[i] = new SeqCigar(sequence[i], start, end);
227 CigarArray sdata = new CigarArray(seqs);
228 sdata.addOperation(CigarArray.M, end - start + 1);
229 this.seqData = new AlignmentView(sdata, start);
231 // System.err.println("Made seqData");// dbg
232 if (!(type.equals("NJ")))
237 if (!(pwtype.equals("PID")))
239 if (ResidueProperties.getScoreMatrix(pwtype) == null)
247 done = new int[sequence.length];
249 while ((i < sequence.length) && (sequence[i] != null))
257 distance = findDistances(this.seqData
258 .getSequenceStrings(Comparison.GapChars.charAt(0)));
259 // System.err.println("Made distances");// dbg
261 // System.err.println("Made leaves");// dbg
263 noClus = cluster.size();
266 // System.err.println("Made clusters");// dbg
273 * @return DOCUMENT ME!
275 public String toString()
277 jalview.io.NewickFile fout = new jalview.io.NewickFile(getTopNode());
279 return fout.print(false, true); // distances only
284 * used when the alignment associated to a tree has changed.
289 public void UpdatePlaceHolders(Vector alignment)
291 Vector leaves = new Vector();
292 findLeaves(top, leaves);
294 int sz = leaves.size();
295 SequenceIdMatcher seqmatcher = null;
300 SequenceNode leaf = (SequenceNode) leaves.elementAt(i++);
302 if (alignment.contains(leaf.element()))
304 leaf.setPlaceholder(false);
308 if (seqmatcher == null)
310 // Only create this the first time we need it
311 SequenceI[] seqs = new SequenceI[alignment.size()];
313 for (int j = 0; j < seqs.length; j++)
315 seqs[j] = (SequenceI) alignment.elementAt(j);
318 seqmatcher = new SequenceIdMatcher(seqs);
321 SequenceI nam = seqmatcher.findIdMatch(leaf.getName());
325 if (!leaf.isPlaceholder())
327 // remapping the node to a new sequenceI - should remove any refs to
329 // TODO - make many sequenceI to one leaf mappings possible!
332 leaf.setPlaceholder(false);
333 leaf.setElement(nam);
337 if (!leaf.isPlaceholder())
339 // Construct a new placeholder sequence object for this leaf
340 leaf.setElement(new Sequence(leaf.getName(),
341 "THISISAPLACEHLDER"));
343 leaf.setPlaceholder(true);
353 public void cluster()
357 if (type.equals("NJ"))
366 Cluster c = joinClusters(mini, minj);
370 cluster.setElementAt(null, minj);
371 cluster.setElementAt(c, mini);
376 boolean onefound = false;
381 for (int i = 0; i < noseqs; i++)
385 if (onefound == false)
397 joinClusters(one, two);
398 top = (SequenceNode) (node.elementAt(one));
413 * @return DOCUMENT ME!
415 public Cluster joinClusters(int i, int j)
417 float dist = distance[i][j];
419 int noi = ((Cluster) cluster.elementAt(i)).value.length;
420 int noj = ((Cluster) cluster.elementAt(j)).value.length;
422 int[] value = new int[noi + noj];
424 for (int ii = 0; ii < noi; ii++)
426 value[ii] = ((Cluster) cluster.elementAt(i)).value[ii];
429 for (int ii = noi; ii < (noi + noj); ii++)
431 value[ii] = ((Cluster) cluster.elementAt(j)).value[ii - noi];
434 Cluster c = new Cluster(value);
439 if (type.equals("NJ"))
441 findClusterNJDistance(i, j);
445 findClusterDistance(i, j);
448 SequenceNode sn = new SequenceNode();
450 sn.setLeft((SequenceNode) (node.elementAt(i)));
451 sn.setRight((SequenceNode) (node.elementAt(j)));
453 SequenceNode tmpi = (SequenceNode) (node.elementAt(i));
454 SequenceNode tmpj = (SequenceNode) (node.elementAt(j));
456 if (type.equals("NJ"))
458 findNewNJDistances(tmpi, tmpj, dist);
462 findNewDistances(tmpi, tmpj, dist);
468 node.setElementAt(sn, i);
483 public void findNewNJDistances(SequenceNode tmpi, SequenceNode tmpj,
487 tmpi.dist = ((dist + ri) - rj) / 2;
488 tmpj.dist = (dist - tmpi.dist);
511 public void findNewDistances(SequenceNode tmpi, SequenceNode tmpj,
517 SequenceNode sni = tmpi;
518 SequenceNode snj = tmpj;
523 sni = (SequenceNode) sni.left();
529 snj = (SequenceNode) snj.left();
532 tmpi.dist = ((dist / 2) - ih);
533 tmpj.dist = ((dist / 2) - jh);
544 public void findClusterDistance(int i, int j)
546 int noi = ((Cluster) cluster.elementAt(i)).value.length;
547 int noj = ((Cluster) cluster.elementAt(j)).value.length;
549 // New distances from cluster to others
550 float[] newdist = new float[noseqs];
552 for (int l = 0; l < noseqs; l++)
554 if ((l != i) && (l != j))
556 newdist[l] = ((distance[i][l] * noi) + (distance[j][l] * noj))
565 for (int ii = 0; ii < noseqs; ii++)
567 distance[i][ii] = newdist[ii];
568 distance[ii][i] = newdist[ii];
580 public void findClusterNJDistance(int i, int j)
583 // New distances from cluster to others
584 float[] newdist = new float[noseqs];
586 for (int l = 0; l < noseqs; l++)
588 if ((l != i) && (l != j))
590 newdist[l] = ((distance[i][l] + distance[j][l]) - distance[i][j]) / 2;
598 for (int ii = 0; ii < noseqs; ii++)
600 distance[i][ii] = newdist[ii];
601 distance[ii][i] = newdist[ii];
613 * @return DOCUMENT ME!
615 public float findr(int i, int j)
619 for (int k = 0; k < noseqs; k++)
621 if ((k != i) && (k != j) && (done[k] != 1))
623 tmp = tmp + distance[i][k];
629 tmp = tmp / (noClus - 2);
638 * @return DOCUMENT ME!
640 public float findMinNJDistance()
644 for (int i = 0; i < (noseqs - 1); i++)
646 for (int j = i + 1; j < noseqs; j++)
648 if ((done[i] != 1) && (done[j] != 1))
650 float tmp = distance[i][j] - (findr(i, j) + findr(j, i));
669 * @return DOCUMENT ME!
671 public float findMinDistance()
675 for (int i = 0; i < (noseqs - 1); i++)
677 for (int j = i + 1; j < noseqs; j++)
679 if ((done[i] != 1) && (done[j] != 1))
681 if (distance[i][j] < min)
686 min = distance[i][j];
698 * @return DOCUMENT ME!
700 public float[][] findDistances(String[] sequenceString)
702 float[][] distance = new float[noseqs][noseqs];
704 if (pwtype.equals("PID"))
706 for (int i = 0; i < (noseqs - 1); i++)
708 for (int j = i; j < noseqs; j++)
716 distance[i][j] = 100 - Comparison.PID(sequenceString[i],
719 distance[j][i] = distance[i][j];
726 // Pairwise substitution score (with no gap penalties)
727 ScoreMatrix pwmatrix = ResidueProperties.getScoreMatrix(pwtype);
728 if (pwmatrix == null)
730 pwmatrix = ResidueProperties.getScoreMatrix("BLOSUM62");
733 int end = sequenceString[0].length();
734 for (int i = 0; i < (noseqs - 1); i++)
736 for (int j = i; j < noseqs; j++)
740 for (int k = 0; k < end; k++)
744 score += pwmatrix.getPairwiseScore(sequenceString[i]
745 .charAt(k), sequenceString[j].charAt(k));
746 } catch (Exception ex)
748 System.err.println("err creating BLOSUM62 tree");
749 ex.printStackTrace();
753 distance[i][j] = (float) score;
755 if (score > maxscore)
762 for (int i = 0; i < (noseqs - 1); i++)
764 for (int j = i; j < noseqs; j++)
766 distance[i][j] = (float) maxscore - distance[i][j];
767 distance[j][i] = distance[i][j];
776 * else if (pwtype.equals("SW")) { float max = -1;
778 * for (int i = 0; i < (noseqs - 1); i++) { for (int j = i; j < noseqs; j++) {
779 * AlignSeq as = new AlignSeq(sequence[i], sequence[j], "pep");
780 * as.calcScoreMatrix(); as.traceAlignment(); as.printAlignment(System.out);
781 * distance[i][j] = (float) as.maxscore;
783 * if (max < distance[i][j]) { max = distance[i][j]; } } }
785 * for (int i = 0; i < (noseqs - 1); i++) { for (int j = i; j < noseqs; j++) {
786 * distance[i][j] = max - distance[i][j]; distance[j][i] = distance[i][j]; } } }/
793 public void makeLeaves()
795 cluster = new Vector();
797 for (int i = 0; i < noseqs; i++)
799 SequenceNode sn = new SequenceNode();
801 sn.setElement(sequence[i]);
802 sn.setName(sequence[i].getName());
805 int[] value = new int[1];
808 Cluster c = new Cluster(value);
809 cluster.addElement(c);
814 * Search for leaf nodes.
817 * root node to search from
819 * Vector of leaves to add leaf node objects too.
821 * @return Vector of leaf nodes on binary tree
823 public Vector findLeaves(SequenceNode node, Vector leaves)
830 if ((node.left() == null) && (node.right() == null)) // Interior node
833 leaves.addElement(node);
840 * TODO: Identify internal nodes... if (node.isSequenceLabel()) {
841 * leaves.addElement(node); }
843 findLeaves((SequenceNode) node.left(), leaves);
844 findLeaves((SequenceNode) node.right(), leaves);
851 * Find the leaf node with a particular ycount
854 * initial point on tree to search from
856 * value to search for
858 * @return null or the node with ycound=count
860 public Object findLeaf(SequenceNode node, int count)
862 found = _findLeaf(node, count);
868 * #see findLeaf(SequenceNode node, count)
871 public Object _findLeaf(SequenceNode node, int count)
878 if (node.ycount == count)
880 found = node.element();
886 _findLeaf((SequenceNode) node.left(), count);
887 _findLeaf((SequenceNode) node.right(), count);
894 * printNode is mainly for debugging purposes.
899 public void printNode(SequenceNode node)
906 if ((node.left() == null) && (node.right() == null))
909 .println("Leaf = " + ((SequenceI) node.element()).getName());
910 System.out.println("Dist " + ((SequenceNode) node).dist);
911 System.out.println("Boot " + node.getBootstrap());
915 System.out.println("Dist " + ((SequenceNode) node).dist);
916 printNode((SequenceNode) node.left());
917 printNode((SequenceNode) node.right());
927 public void findMaxDist(SequenceNode node)
934 if ((node.left() == null) && (node.right() == null))
936 float dist = ((SequenceNode) node).dist;
938 if (dist > maxDistValue)
940 maxdist = (SequenceNode) node;
946 findMaxDist((SequenceNode) node.left());
947 findMaxDist((SequenceNode) node.right());
954 * @return DOCUMENT ME!
956 public Vector getGroups()
964 * @return DOCUMENT ME!
966 public float getMaxHeight()
979 public void groupNodes(SequenceNode node, float threshold)
986 if ((node.height / maxheight) > threshold)
988 groups.addElement(node);
992 groupNodes((SequenceNode) node.left(), threshold);
993 groupNodes((SequenceNode) node.right(), threshold);
1003 * @return DOCUMENT ME!
1005 public float findHeight(SequenceNode node)
1012 if ((node.left() == null) && (node.right() == null))
1014 node.height = ((SequenceNode) node.parent()).height + node.dist;
1016 if (node.height > maxheight)
1027 if (node.parent() != null)
1029 node.height = ((SequenceNode) node.parent()).height + node.dist;
1034 node.height = (float) 0.0;
1037 maxheight = findHeight((SequenceNode) (node.left()));
1038 maxheight = findHeight((SequenceNode) (node.right()));
1047 * @return DOCUMENT ME!
1049 public SequenceNode reRoot()
1051 if (maxdist != null)
1055 float tmpdist = maxdist.dist;
1058 SequenceNode sn = new SequenceNode();
1061 // New right hand of top
1062 SequenceNode snr = (SequenceNode) maxdist.parent();
1063 changeDirection(snr, maxdist);
1064 System.out.println("Printing reversed tree");
1066 snr.dist = tmpdist / 2;
1067 maxdist.dist = tmpdist / 2;
1070 maxdist.setParent(sn);
1073 sn.setLeft(maxdist);
1087 * @return true if original sequence data can be recovered
1089 public boolean hasOriginalSequenceData()
1091 return seqData != null;
1095 * Returns original alignment data used for calculation - or null where not
1098 * @return null or cut'n'pasteable alignment
1100 public String printOriginalSequenceData(char gapChar)
1102 if (seqData == null)
1107 StringBuffer sb = new StringBuffer();
1108 String[] seqdatas = seqData.getSequenceStrings(gapChar);
1109 for (int i = 0; i < seqdatas.length; i++)
1111 sb.append(new jalview.util.Format("%-" + 15 + "s").form(sequence[i]
1113 sb.append(" " + seqdatas[i] + "\n");
1115 return sb.toString();
1124 public void printN(SequenceNode node)
1131 if ((node.left() != null) && (node.right() != null))
1133 printN((SequenceNode) node.left());
1134 printN((SequenceNode) node.right());
1138 System.out.println(" name = "
1139 + ((SequenceI) node.element()).getName());
1142 System.out.println(" dist = " + ((SequenceNode) node).dist + " "
1143 + ((SequenceNode) node).count + " "
1144 + ((SequenceNode) node).height);
1153 public void reCount(SequenceNode node)
1157 // _lylimit = this.node.size();
1160 private long _lycount=0,_lylimit=0;
1167 public void _reCount(SequenceNode node)
1169 // if (_lycount<_lylimit)
1171 // System.err.println("Warning: depth of _recount greater than number of nodes.");
1179 if ((node.left() != null) && (node.right() != null))
1182 _reCount((SequenceNode) node.left());
1183 _reCount((SequenceNode) node.right());
1185 SequenceNode l = (SequenceNode) node.left();
1186 SequenceNode r = (SequenceNode) node.right();
1188 ((SequenceNode) node).count = l.count + r.count;
1189 ((SequenceNode) node).ycount = (l.ycount + r.ycount) / 2;
1193 ((SequenceNode) node).count = 1;
1194 ((SequenceNode) node).ycount = ycount++;
1205 public void swapNodes(SequenceNode node)
1212 SequenceNode tmp = (SequenceNode) node.left();
1214 node.setLeft(node.right());
1226 public void changeDirection(SequenceNode node, SequenceNode dir)
1233 if (node.parent() != top)
1235 changeDirection((SequenceNode) node.parent(), node);
1237 SequenceNode tmp = (SequenceNode) node.parent();
1239 if (dir == node.left())
1241 node.setParent(dir);
1244 else if (dir == node.right())
1246 node.setParent(dir);
1252 if (dir == node.left())
1254 node.setParent(node.left());
1256 if (top.left() == node)
1258 node.setRight(top.right());
1262 node.setRight(top.left());
1267 node.setParent(node.right());
1269 if (top.left() == node)
1271 node.setLeft(top.right());
1275 node.setLeft(top.left());
1284 * @return DOCUMENT ME!
1286 public SequenceNode getMaxDist()
1294 * @return DOCUMENT ME!
1296 public SequenceNode getTopNode()
1303 * @return true if tree has real distances
1305 public boolean isHasDistances()
1307 return hasDistances;
1312 * @return true if tree has real bootstrap values
1314 public boolean isHasBootstrap()
1316 return hasBootstrap;
1319 public boolean isHasRootDistance()
1321 return hasRootDistance;
1324 * apply the given transform to all the nodes in the tree.
1325 * @param nodeTransformI
1327 public void applyToNodes(NodeTransformI nodeTransformI)
1329 for (Enumeration nodes = node.elements(); nodes.hasMoreElements();
1330 nodeTransformI.transform((BinaryNode)nodes.nextElement()))
1339 * @version $Revision$
1346 * Creates a new Cluster object.
1351 public Cluster(int[] value)