2 * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3 * Copyright (C) $$Year-Rel$$ The Jalview Authors
5 * This file is part of Jalview.
7 * Jalview is free software: you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation, either version 3
10 * of the License, or (at your option) any later version.
12 * Jalview is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty
14 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15 * PURPOSE. See the GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
19 * The Jalview Authors are detailed in the 'AUTHORS' file.
21 package jalview.analysis;
23 import jalview.api.analysis.ScoreModelI;
24 import jalview.datamodel.AlignmentView;
25 import jalview.datamodel.BinaryNode;
26 import jalview.datamodel.CigarArray;
27 import jalview.datamodel.NodeTransformI;
28 import jalview.datamodel.SeqCigar;
29 import jalview.datamodel.Sequence;
30 import jalview.datamodel.SequenceI;
31 import jalview.datamodel.SequenceNode;
32 import jalview.io.NewickFile;
33 import jalview.schemes.ResidueProperties;
35 import java.util.Enumeration;
36 import java.util.List;
37 import java.util.Vector;
51 // SequenceData is a string representation of what the user
52 // sees. The display may contain hidden columns.
53 public AlignmentView seqData = null;
71 Vector groups = new Vector();
93 boolean hasDistances = true; // normal case for jalview trees
95 boolean hasBootstrap = false; // normal case for jalview trees
97 private boolean hasRootDistance = true;
100 * Create a new NJTree object with leaves associated with sequences in seqs,
101 * and original alignment data represented by Cigar strings.
110 public NJTree(SequenceI[] seqs, AlignmentView odata, NewickFile treefile)
112 this(seqs, treefile);
118 * sequenceString = new String[odata.length]; char gapChar =
119 * jalview.util.Comparison.GapChars.charAt(0); for (int i = 0; i <
120 * odata.length; i++) { SequenceI oseq_aligned = odata[i].getSeq(gapChar);
121 * sequenceString[i] = oseq_aligned.getSequence(); }
126 * Creates a new NJTree object from a tree from an external source
129 * SequenceI which should be associated with leafs of treefile
133 public NJTree(SequenceI[] seqs, NewickFile treefile)
135 this.sequence = seqs;
136 top = treefile.getTree();
139 * There is no dependent alignment to be recovered from an imported tree.
141 * if (sequenceString == null) { sequenceString = new String[seqs.length];
142 * for (int i = 0; i < seqs.length; i++) { sequenceString[i] =
143 * seqs[i].getSequence(); } }
146 hasDistances = treefile.HasDistances();
147 hasBootstrap = treefile.HasBootstrap();
148 hasRootDistance = treefile.HasRootDistance();
150 maxheight = findHeight(top);
152 SequenceIdMatcher algnIds = new SequenceIdMatcher(seqs);
154 Vector leaves = new Vector();
155 findLeaves(top, leaves);
158 int namesleft = seqs.length;
163 Vector one2many = new Vector();
164 int countOne2Many = 0;
165 while (i < leaves.size())
167 j = (SequenceNode) leaves.elementAt(i++);
168 realnam = j.getName();
173 nam = algnIds.findIdMatch(realnam);
179 if (one2many.contains(nam))
182 // if (jalview.bin.Cache.log.isDebugEnabled())
183 // jalview.bin.Cache.log.debug("One 2 many relationship for
188 one2many.addElement(nam);
194 j.setElement(new Sequence(realnam, "THISISAPLACEHLDER"));
195 j.setPlaceholder(true);
198 // if (jalview.bin.Cache.log.isDebugEnabled() && countOne2Many>0) {
199 // jalview.bin.Cache.log.debug("There were "+countOne2Many+" alignment
200 // sequence ids (out of "+one2many.size()+" unique ids) linked to two or
207 * Creates a new NJTree object.
220 public NJTree(SequenceI[] sequence, AlignmentView seqData, String type,
221 String pwtype, ScoreModelI sm, int start, int end)
223 this.sequence = sequence;
224 this.node = new Vector();
226 this.pwtype = pwtype;
229 this.seqData = seqData;
233 SeqCigar[] seqs = new SeqCigar[sequence.length];
234 for (int i = 0; i < sequence.length; i++)
236 seqs[i] = new SeqCigar(sequence[i], start, end);
238 CigarArray sdata = new CigarArray(seqs);
239 sdata.addOperation(CigarArray.M, end - start + 1);
240 this.seqData = new AlignmentView(sdata, start);
242 // System.err.println("Made seqData");// dbg
243 if (!(type.equals("NJ")))
248 if (sm == null && !(pwtype.equals("PID")))
250 if (ResidueProperties.getScoreMatrix(pwtype) == null)
258 done = new int[sequence.length];
260 while ((i < sequence.length) && (sequence[i] != null))
268 distance = findDistances(sm);
269 // System.err.println("Made distances");// dbg
271 // System.err.println("Made leaves");// dbg
273 noClus = cluster.size();
276 // System.err.println("Made clusters");// dbg
281 * Generate a string representation of the Tree
283 * @return Newick File with all tree data available
285 public String toString()
287 jalview.io.NewickFile fout = new jalview.io.NewickFile(getTopNode());
289 return fout.print(isHasBootstrap(), isHasDistances(),
290 isHasRootDistance()); // output all data available for tree
295 * used when the alignment associated to a tree has changed.
298 * Sequence set to be associated with tree nodes
300 public void UpdatePlaceHolders(List<SequenceI> list)
302 Vector leaves = new Vector();
303 findLeaves(top, leaves);
305 int sz = leaves.size();
306 SequenceIdMatcher seqmatcher = null;
311 SequenceNode leaf = (SequenceNode) leaves.elementAt(i++);
313 if (list.contains(leaf.element()))
315 leaf.setPlaceholder(false);
319 if (seqmatcher == null)
321 // Only create this the first time we need it
322 SequenceI[] seqs = new SequenceI[list.size()];
324 for (int j = 0; j < seqs.length; j++)
326 seqs[j] = list.get(j);
329 seqmatcher = new SequenceIdMatcher(seqs);
332 SequenceI nam = seqmatcher.findIdMatch(leaf.getName());
336 if (!leaf.isPlaceholder())
338 // remapping the node to a new sequenceI - should remove any refs to
340 // TODO - make many sequenceI to one leaf mappings possible!
343 leaf.setPlaceholder(false);
344 leaf.setElement(nam);
348 if (!leaf.isPlaceholder())
350 // Construct a new placeholder sequence object for this leaf
351 leaf.setElement(new Sequence(leaf.getName(),
352 "THISISAPLACEHLDER"));
354 leaf.setPlaceholder(true);
362 * rename any nodes according to their associated sequence. This will modify
363 * the tree's metadata! (ie the original NewickFile or newly generated
364 * BinaryTree's label data)
366 public void renameAssociatedNodes()
368 applyToNodes(new NodeTransformI()
372 public void transform(BinaryNode node)
374 Object el = node.element();
375 if (el != null && el instanceof SequenceI)
377 node.setName(((SequenceI) el).getName());
386 public void cluster()
390 if (type.equals("NJ"))
399 Cluster c = joinClusters(mini, minj);
403 cluster.setElementAt(null, minj);
404 cluster.setElementAt(c, mini);
409 boolean onefound = false;
414 for (int i = 0; i < noseqs; i++)
418 if (onefound == false)
430 joinClusters(one, two);
431 top = (SequenceNode) (node.elementAt(one));
446 * @return DOCUMENT ME!
448 public Cluster joinClusters(int i, int j)
450 float dist = distance[i][j];
452 int noi = ((Cluster) cluster.elementAt(i)).value.length;
453 int noj = ((Cluster) cluster.elementAt(j)).value.length;
455 int[] value = new int[noi + noj];
457 for (int ii = 0; ii < noi; ii++)
459 value[ii] = ((Cluster) cluster.elementAt(i)).value[ii];
462 for (int ii = noi; ii < (noi + noj); ii++)
464 value[ii] = ((Cluster) cluster.elementAt(j)).value[ii - noi];
467 Cluster c = new Cluster(value);
472 if (type.equals("NJ"))
474 findClusterNJDistance(i, j);
478 findClusterDistance(i, j);
481 SequenceNode sn = new SequenceNode();
483 sn.setLeft((SequenceNode) (node.elementAt(i)));
484 sn.setRight((SequenceNode) (node.elementAt(j)));
486 SequenceNode tmpi = (SequenceNode) (node.elementAt(i));
487 SequenceNode tmpj = (SequenceNode) (node.elementAt(j));
489 if (type.equals("NJ"))
491 findNewNJDistances(tmpi, tmpj, dist);
495 findNewDistances(tmpi, tmpj, dist);
501 node.setElementAt(sn, i);
516 public void findNewNJDistances(SequenceNode tmpi, SequenceNode tmpj,
520 tmpi.dist = ((dist + ri) - rj) / 2;
521 tmpj.dist = (dist - tmpi.dist);
544 public void findNewDistances(SequenceNode tmpi, SequenceNode tmpj,
550 SequenceNode sni = tmpi;
551 SequenceNode snj = tmpj;
556 sni = (SequenceNode) sni.left();
562 snj = (SequenceNode) snj.left();
565 tmpi.dist = ((dist / 2) - ih);
566 tmpj.dist = ((dist / 2) - jh);
577 public void findClusterDistance(int i, int j)
579 int noi = ((Cluster) cluster.elementAt(i)).value.length;
580 int noj = ((Cluster) cluster.elementAt(j)).value.length;
582 // New distances from cluster to others
583 float[] newdist = new float[noseqs];
585 for (int l = 0; l < noseqs; l++)
587 if ((l != i) && (l != j))
589 newdist[l] = ((distance[i][l] * noi) + (distance[j][l] * noj))
598 for (int ii = 0; ii < noseqs; ii++)
600 distance[i][ii] = newdist[ii];
601 distance[ii][i] = newdist[ii];
613 public void findClusterNJDistance(int i, int j)
616 // New distances from cluster to others
617 float[] newdist = new float[noseqs];
619 for (int l = 0; l < noseqs; l++)
621 if ((l != i) && (l != j))
623 newdist[l] = ((distance[i][l] + distance[j][l]) - distance[i][j]) / 2;
631 for (int ii = 0; ii < noseqs; ii++)
633 distance[i][ii] = newdist[ii];
634 distance[ii][i] = newdist[ii];
646 * @return DOCUMENT ME!
648 public float findr(int i, int j)
652 for (int k = 0; k < noseqs; k++)
654 if ((k != i) && (k != j) && (done[k] != 1))
656 tmp = tmp + distance[i][k];
662 tmp = tmp / (noClus - 2);
671 * @return DOCUMENT ME!
673 public float findMinNJDistance()
677 for (int i = 0; i < (noseqs - 1); i++)
679 for (int j = i + 1; j < noseqs; j++)
681 if ((done[i] != 1) && (done[j] != 1))
683 float tmp = distance[i][j] - (findr(i, j) + findr(j, i));
702 * @return DOCUMENT ME!
704 public float findMinDistance()
708 for (int i = 0; i < (noseqs - 1); i++)
710 for (int j = i + 1; j < noseqs; j++)
712 if ((done[i] != 1) && (done[j] != 1))
714 if (distance[i][j] < min)
719 min = distance[i][j];
729 * Calculate a distance matrix given the sequence input data and score model
731 * @return similarity matrix used to compute tree
733 public float[][] findDistances(ScoreModelI _pwmatrix)
736 float[][] distance = new float[noseqs][noseqs];
737 if (_pwmatrix == null)
739 // Resolve substitution model
740 _pwmatrix = ResidueProperties.getScoreModel(pwtype);
741 if (_pwmatrix == null)
743 _pwmatrix = ResidueProperties.getScoreMatrix("BLOSUM62");
746 distance = _pwmatrix.findDistances(seqData);
754 public void makeLeaves()
756 cluster = new Vector();
758 for (int i = 0; i < noseqs; i++)
760 SequenceNode sn = new SequenceNode();
762 sn.setElement(sequence[i]);
763 sn.setName(sequence[i].getName());
766 int[] value = new int[1];
769 Cluster c = new Cluster(value);
770 cluster.addElement(c);
775 * Search for leaf nodes.
778 * root node to search from
780 * Vector of leaves to add leaf node objects too.
782 * @return Vector of leaf nodes on binary tree
784 public Vector findLeaves(SequenceNode node, Vector leaves)
791 if ((node.left() == null) && (node.right() == null)) // Interior node
794 leaves.addElement(node);
801 * TODO: Identify internal nodes... if (node.isSequenceLabel()) {
802 * leaves.addElement(node); }
804 findLeaves((SequenceNode) node.left(), leaves);
805 findLeaves((SequenceNode) node.right(), leaves);
812 * Find the leaf node with a particular ycount
815 * initial point on tree to search from
817 * value to search for
819 * @return null or the node with ycound=count
821 public Object findLeaf(SequenceNode node, int count)
823 found = _findLeaf(node, count);
829 * #see findLeaf(SequenceNode node, count)
831 public Object _findLeaf(SequenceNode node, int count)
838 if (node.ycount == count)
840 found = node.element();
846 _findLeaf((SequenceNode) node.left(), count);
847 _findLeaf((SequenceNode) node.right(), count);
854 * printNode is mainly for debugging purposes.
859 public void printNode(SequenceNode node)
866 if ((node.left() == null) && (node.right() == null))
869 .println("Leaf = " + ((SequenceI) node.element()).getName());
870 System.out.println("Dist " + node.dist);
871 System.out.println("Boot " + node.getBootstrap());
875 System.out.println("Dist " + node.dist);
876 printNode((SequenceNode) node.left());
877 printNode((SequenceNode) node.right());
887 public void findMaxDist(SequenceNode node)
894 if ((node.left() == null) && (node.right() == null))
896 float dist = node.dist;
898 if (dist > maxDistValue)
906 findMaxDist((SequenceNode) node.left());
907 findMaxDist((SequenceNode) node.right());
914 * @return DOCUMENT ME!
916 public Vector getGroups()
924 * @return DOCUMENT ME!
926 public float getMaxHeight()
939 public void groupNodes(SequenceNode node, float threshold)
946 if ((node.height / maxheight) > threshold)
948 groups.addElement(node);
952 groupNodes((SequenceNode) node.left(), threshold);
953 groupNodes((SequenceNode) node.right(), threshold);
963 * @return DOCUMENT ME!
965 public float findHeight(SequenceNode node)
972 if ((node.left() == null) && (node.right() == null))
974 node.height = ((SequenceNode) node.parent()).height + node.dist;
976 if (node.height > maxheight)
987 if (node.parent() != null)
989 node.height = ((SequenceNode) node.parent()).height + node.dist;
994 node.height = (float) 0.0;
997 maxheight = findHeight((SequenceNode) (node.left()));
998 maxheight = findHeight((SequenceNode) (node.right()));
1007 * @return DOCUMENT ME!
1009 public SequenceNode reRoot()
1011 if (maxdist != null)
1015 float tmpdist = maxdist.dist;
1018 SequenceNode sn = new SequenceNode();
1021 // New right hand of top
1022 SequenceNode snr = (SequenceNode) maxdist.parent();
1023 changeDirection(snr, maxdist);
1024 System.out.println("Printing reversed tree");
1026 snr.dist = tmpdist / 2;
1027 maxdist.dist = tmpdist / 2;
1030 maxdist.setParent(sn);
1033 sn.setLeft(maxdist);
1047 * @return true if original sequence data can be recovered
1049 public boolean hasOriginalSequenceData()
1051 return seqData != null;
1055 * Returns original alignment data used for calculation - or null where not
1058 * @return null or cut'n'pasteable alignment
1060 public String printOriginalSequenceData(char gapChar)
1062 if (seqData == null)
1067 StringBuffer sb = new StringBuffer();
1068 String[] seqdatas = seqData.getSequenceStrings(gapChar);
1069 for (int i = 0; i < seqdatas.length; i++)
1071 sb.append(new jalview.util.Format("%-" + 15 + "s").form(sequence[i]
1073 sb.append(" " + seqdatas[i] + "\n");
1075 return sb.toString();
1084 public void printN(SequenceNode node)
1091 if ((node.left() != null) && (node.right() != null))
1093 printN((SequenceNode) node.left());
1094 printN((SequenceNode) node.right());
1098 System.out.println(" name = "
1099 + ((SequenceI) node.element()).getName());
1102 System.out.println(" dist = " + node.dist + " " + node.count + " "
1112 public void reCount(SequenceNode node)
1116 // _lylimit = this.node.size();
1120 private long _lycount = 0, _lylimit = 0;
1128 public void _reCount(SequenceNode node)
1130 // if (_lycount<_lylimit)
1132 // System.err.println("Warning: depth of _recount greater than number of nodes.");
1140 if ((node.left() != null) && (node.right() != null))
1143 _reCount((SequenceNode) node.left());
1144 _reCount((SequenceNode) node.right());
1146 SequenceNode l = (SequenceNode) node.left();
1147 SequenceNode r = (SequenceNode) node.right();
1149 node.count = l.count + r.count;
1150 node.ycount = (l.ycount + r.ycount) / 2;
1155 node.ycount = ycount++;
1166 public void swapNodes(SequenceNode node)
1173 SequenceNode tmp = (SequenceNode) node.left();
1175 node.setLeft(node.right());
1187 public void changeDirection(SequenceNode node, SequenceNode dir)
1194 if (node.parent() != top)
1196 changeDirection((SequenceNode) node.parent(), node);
1198 SequenceNode tmp = (SequenceNode) node.parent();
1200 if (dir == node.left())
1202 node.setParent(dir);
1205 else if (dir == node.right())
1207 node.setParent(dir);
1213 if (dir == node.left())
1215 node.setParent(node.left());
1217 if (top.left() == node)
1219 node.setRight(top.right());
1223 node.setRight(top.left());
1228 node.setParent(node.right());
1230 if (top.left() == node)
1232 node.setLeft(top.right());
1236 node.setLeft(top.left());
1245 * @return DOCUMENT ME!
1247 public SequenceNode getMaxDist()
1255 * @return DOCUMENT ME!
1257 public SequenceNode getTopNode()
1264 * @return true if tree has real distances
1266 public boolean isHasDistances()
1268 return hasDistances;
1273 * @return true if tree has real bootstrap values
1275 public boolean isHasBootstrap()
1277 return hasBootstrap;
1280 public boolean isHasRootDistance()
1282 return hasRootDistance;
1286 * apply the given transform to all the nodes in the tree.
1288 * @param nodeTransformI
1290 public void applyToNodes(NodeTransformI nodeTransformI)
1292 for (Enumeration nodes = node.elements(); nodes.hasMoreElements(); nodeTransformI
1293 .transform((BinaryNode) nodes.nextElement()))
1304 * @version $Revision$
1311 * Creates a new Cluster object.
1316 public Cluster(int[] value)