2 * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3 * Copyright (C) $$Year-Rel$$ The Jalview Authors
5 * This file is part of Jalview.
7 * Jalview is free software: you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation, either version 3
10 * of the License, or (at your option) any later version.
12 * Jalview is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty
14 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15 * PURPOSE. See the GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
19 * The Jalview Authors are detailed in the 'AUTHORS' file.
21 package jalview.analysis;
23 import jalview.analysis.scoremodels.ScoreModels;
24 import jalview.api.analysis.ScoreModelI;
25 import jalview.datamodel.AlignmentView;
26 import jalview.datamodel.BinaryNode;
27 import jalview.datamodel.CigarArray;
28 import jalview.datamodel.NodeTransformI;
29 import jalview.datamodel.SeqCigar;
30 import jalview.datamodel.Sequence;
31 import jalview.datamodel.SequenceI;
32 import jalview.datamodel.SequenceNode;
33 import jalview.io.NewickFile;
35 import java.util.Enumeration;
36 import java.util.List;
37 import java.util.Vector;
50 public static final String AVERAGE_DISTANCE = "AV";
52 public static final String NEIGHBOUR_JOINING = "NJ";
54 public static final String FROM_FILE = "FromFile";
56 Vector<Cluster> cluster;
60 // SequenceData is a string representation of what the user
61 // sees. The display may contain hidden columns.
62 public AlignmentView seqData = null;
80 Vector<SequenceNode> groups = new Vector<SequenceNode>();
92 Vector<SequenceNode> node;
100 boolean hasDistances = true; // normal case for jalview trees
102 boolean hasBootstrap = false; // normal case for jalview trees
104 private boolean hasRootDistance = true;
107 * Create a new NJTree object with leaves associated with sequences in seqs,
108 * and original alignment data represented by Cigar strings.
117 public NJTree(SequenceI[] seqs, AlignmentView odata, NewickFile treefile)
119 this(seqs, treefile);
125 * sequenceString = new String[odata.length]; char gapChar =
126 * jalview.util.Comparison.GapChars.charAt(0); for (int i = 0; i <
127 * odata.length; i++) { SequenceI oseq_aligned = odata[i].getSeq(gapChar);
128 * sequenceString[i] = oseq_aligned.getSequence(); }
133 * Creates a new NJTree object from a tree from an external source
136 * SequenceI which should be associated with leafs of treefile
140 public NJTree(SequenceI[] seqs, NewickFile treefile)
142 this.sequence = seqs;
143 top = treefile.getTree();
146 * There is no dependent alignment to be recovered from an imported tree.
148 * if (sequenceString == null) { sequenceString = new String[seqs.length];
149 * for (int i = 0; i < seqs.length; i++) { sequenceString[i] =
150 * seqs[i].getSequence(); } }
153 hasDistances = treefile.HasDistances();
154 hasBootstrap = treefile.HasBootstrap();
155 hasRootDistance = treefile.HasRootDistance();
157 maxheight = findHeight(top);
159 SequenceIdMatcher algnIds = new SequenceIdMatcher(seqs);
161 Vector<SequenceNode> leaves = findLeaves(top);
164 int namesleft = seqs.length;
169 Vector<SequenceI> one2many = new Vector<SequenceI>();
170 int countOne2Many = 0;
171 while (i < leaves.size())
173 j = leaves.elementAt(i++);
174 realnam = j.getName();
179 nam = algnIds.findIdMatch(realnam);
185 if (one2many.contains(nam))
188 // if (jalview.bin.Cache.log.isDebugEnabled())
189 // jalview.bin.Cache.log.debug("One 2 many relationship for
194 one2many.addElement(nam);
200 j.setElement(new Sequence(realnam, "THISISAPLACEHLDER"));
201 j.setPlaceholder(true);
204 // if (jalview.bin.Cache.log.isDebugEnabled() && countOne2Many>0) {
205 // jalview.bin.Cache.log.debug("There were "+countOne2Many+" alignment
206 // sequence ids (out of "+one2many.size()+" unique ids) linked to two or
213 * Creates a new NJTree object.
226 public NJTree(SequenceI[] sequence, AlignmentView seqData, String type,
227 String pwtype, ScoreModelI sm, int start, int end)
229 this.sequence = sequence;
230 this.node = new Vector<SequenceNode>();
232 this.pwtype = pwtype;
235 this.seqData = seqData;
239 SeqCigar[] seqs = new SeqCigar[sequence.length];
240 for (int i = 0; i < sequence.length; i++)
242 seqs[i] = new SeqCigar(sequence[i], start, end);
244 CigarArray sdata = new CigarArray(seqs);
245 sdata.addOperation(CigarArray.M, end - start + 1);
246 this.seqData = new AlignmentView(sdata, start);
248 // System.err.println("Made seqData");// dbg
249 if (!(type.equals(NEIGHBOUR_JOINING)))
251 type = AVERAGE_DISTANCE;
254 if (sm == null && !(pwtype.equals("PID")))
256 if (ScoreModels.getInstance().forName(pwtype) == null)
264 done = new int[sequence.length];
266 while ((i < sequence.length) && (sequence[i] != null))
274 distance = findDistances(sm);
275 // System.err.println("Made distances");// dbg
277 // System.err.println("Made leaves");// dbg
279 noClus = cluster.size();
282 // System.err.println("Made clusters");// dbg
287 * Generate a string representation of the Tree
289 * @return Newick File with all tree data available
292 public String toString()
294 jalview.io.NewickFile fout = new jalview.io.NewickFile(getTopNode());
296 return fout.print(isHasBootstrap(), isHasDistances(),
297 isHasRootDistance()); // output all data available for tree
302 * used when the alignment associated to a tree has changed.
305 * Sequence set to be associated with tree nodes
307 public void UpdatePlaceHolders(List<SequenceI> list)
309 Vector<SequenceNode> leaves = findLeaves(top);
311 int sz = leaves.size();
312 SequenceIdMatcher seqmatcher = null;
317 SequenceNode leaf = leaves.elementAt(i++);
319 if (list.contains(leaf.element()))
321 leaf.setPlaceholder(false);
325 if (seqmatcher == null)
327 // Only create this the first time we need it
328 SequenceI[] seqs = new SequenceI[list.size()];
330 for (int j = 0; j < seqs.length; j++)
332 seqs[j] = list.get(j);
335 seqmatcher = new SequenceIdMatcher(seqs);
338 SequenceI nam = seqmatcher.findIdMatch(leaf.getName());
342 if (!leaf.isPlaceholder())
344 // remapping the node to a new sequenceI - should remove any refs to
346 // TODO - make many sequenceI to one leaf mappings possible!
349 leaf.setPlaceholder(false);
350 leaf.setElement(nam);
354 if (!leaf.isPlaceholder())
356 // Construct a new placeholder sequence object for this leaf
357 leaf.setElement(new Sequence(leaf.getName(),
358 "THISISAPLACEHLDER"));
360 leaf.setPlaceholder(true);
368 * rename any nodes according to their associated sequence. This will modify
369 * the tree's metadata! (ie the original NewickFile or newly generated
370 * BinaryTree's label data)
372 public void renameAssociatedNodes()
374 applyToNodes(new NodeTransformI()
378 public void transform(BinaryNode nd)
380 Object el = nd.element();
381 if (el != null && el instanceof SequenceI)
383 nd.setName(((SequenceI) el).getName());
392 public void cluster()
396 if (type.equals(NEIGHBOUR_JOINING))
405 Cluster c = joinClusters(mini, minj);
409 cluster.setElementAt(null, minj);
410 cluster.setElementAt(c, mini);
415 boolean onefound = false;
420 for (int i = 0; i < noseqs; i++)
424 if (onefound == false)
436 joinClusters(one, two);
437 top = (node.elementAt(one));
452 * @return DOCUMENT ME!
454 public Cluster joinClusters(int i, int j)
456 float dist = distance[i][j];
458 int noi = cluster.elementAt(i).value.length;
459 int noj = cluster.elementAt(j).value.length;
461 int[] value = new int[noi + noj];
463 for (int ii = 0; ii < noi; ii++)
465 value[ii] = cluster.elementAt(i).value[ii];
468 for (int ii = noi; ii < (noi + noj); ii++)
470 value[ii] = cluster.elementAt(j).value[ii - noi];
473 Cluster c = new Cluster(value);
478 if (type.equals(NEIGHBOUR_JOINING))
480 findClusterNJDistance(i, j);
484 findClusterDistance(i, j);
487 SequenceNode sn = new SequenceNode();
489 sn.setLeft((node.elementAt(i)));
490 sn.setRight((node.elementAt(j)));
492 SequenceNode tmpi = (node.elementAt(i));
493 SequenceNode tmpj = (node.elementAt(j));
495 if (type.equals(NEIGHBOUR_JOINING))
497 findNewNJDistances(tmpi, tmpj, dist);
501 findNewDistances(tmpi, tmpj, dist);
507 node.setElementAt(sn, i);
522 public void findNewNJDistances(SequenceNode tmpi, SequenceNode tmpj,
526 tmpi.dist = ((dist + ri) - rj) / 2;
527 tmpj.dist = (dist - tmpi.dist);
550 public void findNewDistances(SequenceNode tmpi, SequenceNode tmpj,
556 SequenceNode sni = tmpi;
557 SequenceNode snj = tmpj;
562 sni = (SequenceNode) sni.left();
568 snj = (SequenceNode) snj.left();
571 tmpi.dist = ((dist / 2) - ih);
572 tmpj.dist = ((dist / 2) - jh);
583 public void findClusterDistance(int i, int j)
585 int noi = cluster.elementAt(i).value.length;
586 int noj = cluster.elementAt(j).value.length;
588 // New distances from cluster to others
589 float[] newdist = new float[noseqs];
591 for (int l = 0; l < noseqs; l++)
593 if ((l != i) && (l != j))
595 newdist[l] = ((distance[i][l] * noi) + (distance[j][l] * noj))
604 for (int ii = 0; ii < noseqs; ii++)
606 distance[i][ii] = newdist[ii];
607 distance[ii][i] = newdist[ii];
619 public void findClusterNJDistance(int i, int j)
622 // New distances from cluster to others
623 float[] newdist = new float[noseqs];
625 for (int l = 0; l < noseqs; l++)
627 if ((l != i) && (l != j))
629 newdist[l] = ((distance[i][l] + distance[j][l]) - distance[i][j]) / 2;
637 for (int ii = 0; ii < noseqs; ii++)
639 distance[i][ii] = newdist[ii];
640 distance[ii][i] = newdist[ii];
652 * @return DOCUMENT ME!
654 public float findr(int i, int j)
658 for (int k = 0; k < noseqs; k++)
660 if ((k != i) && (k != j) && (done[k] != 1))
662 tmp = tmp + distance[i][k];
668 tmp = tmp / (noClus - 2);
677 * @return DOCUMENT ME!
679 public float findMinNJDistance()
683 for (int i = 0; i < (noseqs - 1); i++)
685 for (int j = i + 1; j < noseqs; j++)
687 if ((done[i] != 1) && (done[j] != 1))
689 float tmp = distance[i][j] - (findr(i, j) + findr(j, i));
708 * @return DOCUMENT ME!
710 public float findMinDistance()
714 for (int i = 0; i < (noseqs - 1); i++)
716 for (int j = i + 1; j < noseqs; j++)
718 if ((done[i] != 1) && (done[j] != 1))
720 if (distance[i][j] < min)
725 min = distance[i][j];
735 * Calculate a distance matrix given the sequence input data and score model
737 * @return similarity matrix used to compute tree
739 public float[][] findDistances(ScoreModelI _pwmatrix)
742 float[][] dist = new float[noseqs][noseqs];
743 if (_pwmatrix == null)
745 // Resolve substitution model
746 _pwmatrix = ScoreModels.getInstance().forName(pwtype);
747 if (_pwmatrix == null)
749 _pwmatrix = ScoreModels.getInstance().forName("BLOSUM62");
752 dist = _pwmatrix.findDistances(seqData);
760 public void makeLeaves()
762 cluster = new Vector<Cluster>();
764 for (int i = 0; i < noseqs; i++)
766 SequenceNode sn = new SequenceNode();
768 sn.setElement(sequence[i]);
769 sn.setName(sequence[i].getName());
772 int[] value = new int[1];
775 Cluster c = new Cluster(value);
776 cluster.addElement(c);
781 * Search for leaf nodes below (or at) the given node
784 * root node to search from
788 public Vector<SequenceNode> findLeaves(SequenceNode nd)
790 Vector<SequenceNode> leaves = new Vector<SequenceNode>();
791 findLeaves(nd, leaves);
796 * Search for leaf nodes.
799 * root node to search from
801 * Vector of leaves to add leaf node objects too.
803 * @return Vector of leaf nodes on binary tree
805 Vector<SequenceNode> findLeaves(SequenceNode nd,
806 Vector<SequenceNode> leaves)
813 if ((nd.left() == null) && (nd.right() == null)) // Interior node
816 leaves.addElement(nd);
823 * TODO: Identify internal nodes... if (node.isSequenceLabel()) {
824 * leaves.addElement(node); }
826 findLeaves((SequenceNode) nd.left(), leaves);
827 findLeaves((SequenceNode) nd.right(), leaves);
834 * Find the leaf node with a particular ycount
837 * initial point on tree to search from
839 * value to search for
841 * @return null or the node with ycound=count
843 public Object findLeaf(SequenceNode nd, int count)
845 found = _findLeaf(nd, count);
851 * #see findLeaf(SequenceNode node, count)
853 public Object _findLeaf(SequenceNode nd, int count)
860 if (nd.ycount == count)
862 found = nd.element();
868 _findLeaf((SequenceNode) nd.left(), count);
869 _findLeaf((SequenceNode) nd.right(), count);
876 * printNode is mainly for debugging purposes.
881 public void printNode(SequenceNode nd)
888 if ((nd.left() == null) && (nd.right() == null))
890 System.out.println("Leaf = " + ((SequenceI) nd.element()).getName());
891 System.out.println("Dist " + nd.dist);
892 System.out.println("Boot " + nd.getBootstrap());
896 System.out.println("Dist " + nd.dist);
897 printNode((SequenceNode) nd.left());
898 printNode((SequenceNode) nd.right());
908 public void findMaxDist(SequenceNode nd)
915 if ((nd.left() == null) && (nd.right() == null))
917 float dist = nd.dist;
919 if (dist > maxDistValue)
927 findMaxDist((SequenceNode) nd.left());
928 findMaxDist((SequenceNode) nd.right());
935 * @return DOCUMENT ME!
937 public Vector<SequenceNode> getGroups()
945 * @return DOCUMENT ME!
947 public float getMaxHeight()
960 public void groupNodes(SequenceNode nd, float threshold)
967 if ((nd.height / maxheight) > threshold)
969 groups.addElement(nd);
973 groupNodes((SequenceNode) nd.left(), threshold);
974 groupNodes((SequenceNode) nd.right(), threshold);
984 * @return DOCUMENT ME!
986 public float findHeight(SequenceNode nd)
993 if ((nd.left() == null) && (nd.right() == null))
995 nd.height = ((SequenceNode) nd.parent()).height + nd.dist;
997 if (nd.height > maxheight)
1008 if (nd.parent() != null)
1010 nd.height = ((SequenceNode) nd.parent()).height + nd.dist;
1015 nd.height = (float) 0.0;
1018 maxheight = findHeight((SequenceNode) (nd.left()));
1019 maxheight = findHeight((SequenceNode) (nd.right()));
1028 * @return DOCUMENT ME!
1030 public SequenceNode reRoot()
1032 if (maxdist != null)
1036 float tmpdist = maxdist.dist;
1039 SequenceNode sn = new SequenceNode();
1042 // New right hand of top
1043 SequenceNode snr = (SequenceNode) maxdist.parent();
1044 changeDirection(snr, maxdist);
1045 System.out.println("Printing reversed tree");
1047 snr.dist = tmpdist / 2;
1048 maxdist.dist = tmpdist / 2;
1051 maxdist.setParent(sn);
1054 sn.setLeft(maxdist);
1068 * @return true if original sequence data can be recovered
1070 public boolean hasOriginalSequenceData()
1072 return seqData != null;
1076 * Returns original alignment data used for calculation - or null where not
1079 * @return null or cut'n'pasteable alignment
1081 public String printOriginalSequenceData(char gapChar)
1083 if (seqData == null)
1088 StringBuffer sb = new StringBuffer();
1089 String[] seqdatas = seqData.getSequenceStrings(gapChar);
1090 for (int i = 0; i < seqdatas.length; i++)
1092 sb.append(new jalview.util.Format("%-" + 15 + "s").form(sequence[i]
1094 sb.append(" " + seqdatas[i] + "\n");
1096 return sb.toString();
1105 public void printN(SequenceNode nd)
1112 if ((nd.left() != null) && (nd.right() != null))
1114 printN((SequenceNode) nd.left());
1115 printN((SequenceNode) nd.right());
1119 System.out.println(" name = " + ((SequenceI) nd.element()).getName());
1122 System.out.println(" dist = " + nd.dist + " " + nd.count + " "
1132 public void reCount(SequenceNode nd)
1136 // _lylimit = this.node.size();
1140 private long _lycount = 0, _lylimit = 0;
1148 public void _reCount(SequenceNode nd)
1150 // if (_lycount<_lylimit)
1152 // System.err.println("Warning: depth of _recount greater than number of nodes.");
1160 if ((nd.left() != null) && (nd.right() != null))
1163 _reCount((SequenceNode) nd.left());
1164 _reCount((SequenceNode) nd.right());
1166 SequenceNode l = (SequenceNode) nd.left();
1167 SequenceNode r = (SequenceNode) nd.right();
1169 nd.count = l.count + r.count;
1170 nd.ycount = (l.ycount + r.ycount) / 2;
1175 nd.ycount = ycount++;
1186 public void swapNodes(SequenceNode nd)
1193 SequenceNode tmp = (SequenceNode) nd.left();
1195 nd.setLeft(nd.right());
1207 public void changeDirection(SequenceNode nd, SequenceNode dir)
1214 if (nd.parent() != top)
1216 changeDirection((SequenceNode) nd.parent(), nd);
1218 SequenceNode tmp = (SequenceNode) nd.parent();
1220 if (dir == nd.left())
1225 else if (dir == nd.right())
1233 if (dir == nd.left())
1235 nd.setParent(nd.left());
1237 if (top.left() == nd)
1239 nd.setRight(top.right());
1243 nd.setRight(top.left());
1248 nd.setParent(nd.right());
1250 if (top.left() == nd)
1252 nd.setLeft(top.right());
1256 nd.setLeft(top.left());
1265 * @return DOCUMENT ME!
1267 public SequenceNode getMaxDist()
1275 * @return DOCUMENT ME!
1277 public SequenceNode getTopNode()
1284 * @return true if tree has real distances
1286 public boolean isHasDistances()
1288 return hasDistances;
1293 * @return true if tree has real bootstrap values
1295 public boolean isHasBootstrap()
1297 return hasBootstrap;
1300 public boolean isHasRootDistance()
1302 return hasRootDistance;
1306 * apply the given transform to all the nodes in the tree.
1308 * @param nodeTransformI
1310 public void applyToNodes(NodeTransformI nodeTransformI)
1312 for (Enumeration<SequenceNode> nodes = node.elements(); nodes
1313 .hasMoreElements(); nodeTransformI.transform(nodes
1325 * @version $Revision$
1332 * Creates a new Cluster object.
1337 public Cluster(int[] value)