2 * Jalview - A Sequence Alignment Editor and Viewer (Version 2.4)
3 * Copyright (C) 2008 AM Waterhouse, J Procter, G Barton, M Clamp, S Searle
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License
7 * as published by the Free Software Foundation; either version 2
8 * of the License, or (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
19 package jalview.analysis;
23 import jalview.datamodel.*;
25 import jalview.schemes.*;
26 import jalview.util.*;
40 // SequenceData is a string representation of what the user
41 // sees. The display may contain hidden columns.
42 public AlignmentView seqData = null;
60 Vector groups = new Vector();
82 boolean hasDistances = true; // normal case for jalview trees
84 boolean hasBootstrap = false; // normal case for jalview trees
86 private boolean hasRootDistance = true;
89 * Create a new NJTree object with leaves associated with sequences in seqs,
90 * and original alignment data represented by Cigar strings.
99 public NJTree(SequenceI[] seqs, AlignmentView odata, NewickFile treefile)
101 this(seqs, treefile);
107 * sequenceString = new String[odata.length]; char gapChar =
108 * jalview.util.Comparison.GapChars.charAt(0); for (int i = 0; i <
109 * odata.length; i++) { SequenceI oseq_aligned = odata[i].getSeq(gapChar);
110 * sequenceString[i] = oseq_aligned.getSequence(); }
115 * Creates a new NJTree object from a tree from an external source
118 * SequenceI which should be associated with leafs of treefile
122 public NJTree(SequenceI[] seqs, NewickFile treefile)
124 this.sequence = seqs;
125 top = treefile.getTree();
128 * There is no dependent alignment to be recovered from an imported tree.
130 * if (sequenceString == null) { sequenceString = new String[seqs.length];
131 * for (int i = 0; i < seqs.length; i++) { sequenceString[i] =
132 * seqs[i].getSequence(); } }
135 hasDistances = treefile.HasDistances();
136 hasBootstrap = treefile.HasBootstrap();
137 hasRootDistance = treefile.HasRootDistance();
139 maxheight = findHeight(top);
141 SequenceIdMatcher algnIds = new SequenceIdMatcher(seqs);
143 Vector leaves = new Vector();
144 findLeaves(top, leaves);
147 int namesleft = seqs.length;
152 Vector one2many = new Vector();
153 int countOne2Many = 0;
154 while (i < leaves.size())
156 j = (SequenceNode) leaves.elementAt(i++);
157 realnam = j.getName();
162 nam = algnIds.findIdMatch(realnam);
168 if (one2many.contains(nam))
171 // if (jalview.bin.Cache.log.isDebugEnabled())
172 // jalview.bin.Cache.log.debug("One 2 many relationship for
177 one2many.addElement(nam);
183 j.setElement(new Sequence(realnam, "THISISAPLACEHLDER"));
184 j.setPlaceholder(true);
187 // if (jalview.bin.Cache.log.isDebugEnabled() && countOne2Many>0) {
188 // jalview.bin.Cache.log.debug("There were "+countOne2Many+" alignment
189 // sequence ids (out of "+one2many.size()+" unique ids) linked to two or
196 * Creates a new NJTree object.
209 public NJTree(SequenceI[] sequence, AlignmentView seqData, String type,
210 String pwtype, int start, int end)
212 this.sequence = sequence;
213 this.node = new Vector();
215 this.pwtype = pwtype;
218 this.seqData = seqData;
222 SeqCigar[] seqs = new SeqCigar[sequence.length];
223 for (int i = 0; i < sequence.length; i++)
225 seqs[i] = new SeqCigar(sequence[i], start, end);
227 CigarArray sdata = new CigarArray(seqs);
228 sdata.addOperation(CigarArray.M, end - start + 1);
229 this.seqData = new AlignmentView(sdata, start);
232 if (!(type.equals("NJ")))
237 if (!(pwtype.equals("PID")))
239 if (ResidueProperties.getScoreMatrix(pwtype) == null)
247 done = new int[sequence.length];
249 while ((i < sequence.length) && (sequence[i] != null))
257 distance = findDistances(this.seqData
258 .getSequenceStrings(Comparison.GapChars.charAt(0)));
262 noClus = cluster.size();
270 * @return DOCUMENT ME!
272 public String toString()
274 jalview.io.NewickFile fout = new jalview.io.NewickFile(getTopNode());
276 return fout.print(false, true); // distances only
281 * used when the alignment associated to a tree has changed.
286 public void UpdatePlaceHolders(Vector alignment)
288 Vector leaves = new Vector();
289 findLeaves(top, leaves);
291 int sz = leaves.size();
292 SequenceIdMatcher seqmatcher = null;
297 SequenceNode leaf = (SequenceNode) leaves.elementAt(i++);
299 if (alignment.contains(leaf.element()))
301 leaf.setPlaceholder(false);
305 if (seqmatcher == null)
307 // Only create this the first time we need it
308 SequenceI[] seqs = new SequenceI[alignment.size()];
310 for (int j = 0; j < seqs.length; j++)
312 seqs[j] = (SequenceI) alignment.elementAt(j);
315 seqmatcher = new SequenceIdMatcher(seqs);
318 SequenceI nam = seqmatcher.findIdMatch(leaf.getName());
322 if (!leaf.isPlaceholder())
324 // remapping the node to a new sequenceI - should remove any refs to
326 // TODO - make many sequenceI to one leaf mappings possible!
329 leaf.setPlaceholder(false);
330 leaf.setElement(nam);
334 if (!leaf.isPlaceholder())
336 // Construct a new placeholder sequence object for this leaf
337 leaf.setElement(new Sequence(leaf.getName(),
338 "THISISAPLACEHLDER"));
340 leaf.setPlaceholder(true);
350 public void cluster()
354 if (type.equals("NJ"))
363 Cluster c = joinClusters(mini, minj);
367 cluster.setElementAt(null, minj);
368 cluster.setElementAt(c, mini);
373 boolean onefound = false;
378 for (int i = 0; i < noseqs; i++)
382 if (onefound == false)
394 joinClusters(one, two);
395 top = (SequenceNode) (node.elementAt(one));
410 * @return DOCUMENT ME!
412 public Cluster joinClusters(int i, int j)
414 float dist = distance[i][j];
416 int noi = ((Cluster) cluster.elementAt(i)).value.length;
417 int noj = ((Cluster) cluster.elementAt(j)).value.length;
419 int[] value = new int[noi + noj];
421 for (int ii = 0; ii < noi; ii++)
423 value[ii] = ((Cluster) cluster.elementAt(i)).value[ii];
426 for (int ii = noi; ii < (noi + noj); ii++)
428 value[ii] = ((Cluster) cluster.elementAt(j)).value[ii - noi];
431 Cluster c = new Cluster(value);
436 if (type.equals("NJ"))
438 findClusterNJDistance(i, j);
442 findClusterDistance(i, j);
445 SequenceNode sn = new SequenceNode();
447 sn.setLeft((SequenceNode) (node.elementAt(i)));
448 sn.setRight((SequenceNode) (node.elementAt(j)));
450 SequenceNode tmpi = (SequenceNode) (node.elementAt(i));
451 SequenceNode tmpj = (SequenceNode) (node.elementAt(j));
453 if (type.equals("NJ"))
455 findNewNJDistances(tmpi, tmpj, dist);
459 findNewDistances(tmpi, tmpj, dist);
465 node.setElementAt(sn, i);
480 public void findNewNJDistances(SequenceNode tmpi, SequenceNode tmpj,
484 tmpi.dist = ((dist + ri) - rj) / 2;
485 tmpj.dist = (dist - tmpi.dist);
508 public void findNewDistances(SequenceNode tmpi, SequenceNode tmpj,
514 SequenceNode sni = tmpi;
515 SequenceNode snj = tmpj;
520 sni = (SequenceNode) sni.left();
526 snj = (SequenceNode) snj.left();
529 tmpi.dist = ((dist / 2) - ih);
530 tmpj.dist = ((dist / 2) - jh);
541 public void findClusterDistance(int i, int j)
543 int noi = ((Cluster) cluster.elementAt(i)).value.length;
544 int noj = ((Cluster) cluster.elementAt(j)).value.length;
546 // New distances from cluster to others
547 float[] newdist = new float[noseqs];
549 for (int l = 0; l < noseqs; l++)
551 if ((l != i) && (l != j))
553 newdist[l] = ((distance[i][l] * noi) + (distance[j][l] * noj))
562 for (int ii = 0; ii < noseqs; ii++)
564 distance[i][ii] = newdist[ii];
565 distance[ii][i] = newdist[ii];
577 public void findClusterNJDistance(int i, int j)
580 // New distances from cluster to others
581 float[] newdist = new float[noseqs];
583 for (int l = 0; l < noseqs; l++)
585 if ((l != i) && (l != j))
587 newdist[l] = ((distance[i][l] + distance[j][l]) - distance[i][j]) / 2;
595 for (int ii = 0; ii < noseqs; ii++)
597 distance[i][ii] = newdist[ii];
598 distance[ii][i] = newdist[ii];
610 * @return DOCUMENT ME!
612 public float findr(int i, int j)
616 for (int k = 0; k < noseqs; k++)
618 if ((k != i) && (k != j) && (done[k] != 1))
620 tmp = tmp + distance[i][k];
626 tmp = tmp / (noClus - 2);
635 * @return DOCUMENT ME!
637 public float findMinNJDistance()
641 for (int i = 0; i < (noseqs - 1); i++)
643 for (int j = i + 1; j < noseqs; j++)
645 if ((done[i] != 1) && (done[j] != 1))
647 float tmp = distance[i][j] - (findr(i, j) + findr(j, i));
666 * @return DOCUMENT ME!
668 public float findMinDistance()
672 for (int i = 0; i < (noseqs - 1); i++)
674 for (int j = i + 1; j < noseqs; j++)
676 if ((done[i] != 1) && (done[j] != 1))
678 if (distance[i][j] < min)
683 min = distance[i][j];
695 * @return DOCUMENT ME!
697 public float[][] findDistances(String[] sequenceString)
699 float[][] distance = new float[noseqs][noseqs];
701 if (pwtype.equals("PID"))
703 for (int i = 0; i < (noseqs - 1); i++)
705 for (int j = i; j < noseqs; j++)
713 distance[i][j] = 100 - Comparison.PID(sequenceString[i],
716 distance[j][i] = distance[i][j];
723 // Pairwise substitution score (with no gap penalties)
724 ScoreMatrix pwmatrix = ResidueProperties.getScoreMatrix(pwtype);
725 if (pwmatrix == null)
727 pwmatrix = ResidueProperties.getScoreMatrix("BLOSUM62");
730 int end = sequenceString[0].length();
731 for (int i = 0; i < (noseqs - 1); i++)
733 for (int j = i; j < noseqs; j++)
737 for (int k = 0; k < end; k++)
741 score += pwmatrix.getPairwiseScore(sequenceString[i]
742 .charAt(k), sequenceString[j].charAt(k));
743 } catch (Exception ex)
745 System.err.println("err creating BLOSUM62 tree");
746 ex.printStackTrace();
750 distance[i][j] = (float) score;
752 if (score > maxscore)
759 for (int i = 0; i < (noseqs - 1); i++)
761 for (int j = i; j < noseqs; j++)
763 distance[i][j] = (float) maxscore - distance[i][j];
764 distance[j][i] = distance[i][j];
773 * else if (pwtype.equals("SW")) { float max = -1;
775 * for (int i = 0; i < (noseqs - 1); i++) { for (int j = i; j < noseqs; j++) {
776 * AlignSeq as = new AlignSeq(sequence[i], sequence[j], "pep");
777 * as.calcScoreMatrix(); as.traceAlignment(); as.printAlignment(System.out);
778 * distance[i][j] = (float) as.maxscore;
780 * if (max < distance[i][j]) { max = distance[i][j]; } } }
782 * for (int i = 0; i < (noseqs - 1); i++) { for (int j = i; j < noseqs; j++) {
783 * distance[i][j] = max - distance[i][j]; distance[j][i] = distance[i][j]; } } }/
790 public void makeLeaves()
792 cluster = new Vector();
794 for (int i = 0; i < noseqs; i++)
796 SequenceNode sn = new SequenceNode();
798 sn.setElement(sequence[i]);
799 sn.setName(sequence[i].getName());
802 int[] value = new int[1];
805 Cluster c = new Cluster(value);
806 cluster.addElement(c);
811 * Search for leaf nodes.
814 * root node to search from
816 * Vector of leaves to add leaf node objects too.
818 * @return Vector of leaf nodes on binary tree
820 public Vector findLeaves(SequenceNode node, Vector leaves)
827 if ((node.left() == null) && (node.right() == null)) // Interior node
830 leaves.addElement(node);
837 * TODO: Identify internal nodes... if (node.isSequenceLabel()) {
838 * leaves.addElement(node); }
840 findLeaves((SequenceNode) node.left(), leaves);
841 findLeaves((SequenceNode) node.right(), leaves);
848 * Find the leaf node with a particular ycount
851 * initial point on tree to search from
853 * value to search for
855 * @return null or the node with ycound=count
857 public Object findLeaf(SequenceNode node, int count)
859 found = _findLeaf(node, count);
865 * #see findLeaf(SequenceNode node, count)
868 public Object _findLeaf(SequenceNode node, int count)
875 if (node.ycount == count)
877 found = node.element();
883 _findLeaf((SequenceNode) node.left(), count);
884 _findLeaf((SequenceNode) node.right(), count);
891 * printNode is mainly for debugging purposes.
896 public void printNode(SequenceNode node)
903 if ((node.left() == null) && (node.right() == null))
906 .println("Leaf = " + ((SequenceI) node.element()).getName());
907 System.out.println("Dist " + ((SequenceNode) node).dist);
908 System.out.println("Boot " + node.getBootstrap());
912 System.out.println("Dist " + ((SequenceNode) node).dist);
913 printNode((SequenceNode) node.left());
914 printNode((SequenceNode) node.right());
924 public void findMaxDist(SequenceNode node)
931 if ((node.left() == null) && (node.right() == null))
933 float dist = ((SequenceNode) node).dist;
935 if (dist > maxDistValue)
937 maxdist = (SequenceNode) node;
943 findMaxDist((SequenceNode) node.left());
944 findMaxDist((SequenceNode) node.right());
951 * @return DOCUMENT ME!
953 public Vector getGroups()
961 * @return DOCUMENT ME!
963 public float getMaxHeight()
976 public void groupNodes(SequenceNode node, float threshold)
983 if ((node.height / maxheight) > threshold)
985 groups.addElement(node);
989 groupNodes((SequenceNode) node.left(), threshold);
990 groupNodes((SequenceNode) node.right(), threshold);
1000 * @return DOCUMENT ME!
1002 public float findHeight(SequenceNode node)
1009 if ((node.left() == null) && (node.right() == null))
1011 node.height = ((SequenceNode) node.parent()).height + node.dist;
1013 if (node.height > maxheight)
1024 if (node.parent() != null)
1026 node.height = ((SequenceNode) node.parent()).height + node.dist;
1031 node.height = (float) 0.0;
1034 maxheight = findHeight((SequenceNode) (node.left()));
1035 maxheight = findHeight((SequenceNode) (node.right()));
1044 * @return DOCUMENT ME!
1046 public SequenceNode reRoot()
1048 if (maxdist != null)
1052 float tmpdist = maxdist.dist;
1055 SequenceNode sn = new SequenceNode();
1058 // New right hand of top
1059 SequenceNode snr = (SequenceNode) maxdist.parent();
1060 changeDirection(snr, maxdist);
1061 System.out.println("Printing reversed tree");
1063 snr.dist = tmpdist / 2;
1064 maxdist.dist = tmpdist / 2;
1067 maxdist.setParent(sn);
1070 sn.setLeft(maxdist);
1084 * @return true if original sequence data can be recovered
1086 public boolean hasOriginalSequenceData()
1088 return seqData != null;
1092 * Returns original alignment data used for calculation - or null where not
1095 * @return null or cut'n'pasteable alignment
1097 public String printOriginalSequenceData(char gapChar)
1099 if (seqData == null)
1104 StringBuffer sb = new StringBuffer();
1105 String[] seqdatas = seqData.getSequenceStrings(gapChar);
1106 for (int i = 0; i < seqdatas.length; i++)
1108 sb.append(new jalview.util.Format("%-" + 15 + "s").form(sequence[i]
1110 sb.append(" " + seqdatas[i] + "\n");
1112 return sb.toString();
1121 public void printN(SequenceNode node)
1128 if ((node.left() != null) && (node.right() != null))
1130 printN((SequenceNode) node.left());
1131 printN((SequenceNode) node.right());
1135 System.out.println(" name = "
1136 + ((SequenceI) node.element()).getName());
1139 System.out.println(" dist = " + ((SequenceNode) node).dist + " "
1140 + ((SequenceNode) node).count + " "
1141 + ((SequenceNode) node).height);
1150 public void reCount(SequenceNode node)
1162 public void _reCount(SequenceNode node)
1169 if ((node.left() != null) && (node.right() != null))
1171 _reCount((SequenceNode) node.left());
1172 _reCount((SequenceNode) node.right());
1174 SequenceNode l = (SequenceNode) node.left();
1175 SequenceNode r = (SequenceNode) node.right();
1177 ((SequenceNode) node).count = l.count + r.count;
1178 ((SequenceNode) node).ycount = (l.ycount + r.ycount) / 2;
1182 ((SequenceNode) node).count = 1;
1183 ((SequenceNode) node).ycount = ycount++;
1193 public void swapNodes(SequenceNode node)
1200 SequenceNode tmp = (SequenceNode) node.left();
1202 node.setLeft(node.right());
1214 public void changeDirection(SequenceNode node, SequenceNode dir)
1221 if (node.parent() != top)
1223 changeDirection((SequenceNode) node.parent(), node);
1225 SequenceNode tmp = (SequenceNode) node.parent();
1227 if (dir == node.left())
1229 node.setParent(dir);
1232 else if (dir == node.right())
1234 node.setParent(dir);
1240 if (dir == node.left())
1242 node.setParent(node.left());
1244 if (top.left() == node)
1246 node.setRight(top.right());
1250 node.setRight(top.left());
1255 node.setParent(node.right());
1257 if (top.left() == node)
1259 node.setLeft(top.right());
1263 node.setLeft(top.left());
1272 * @return DOCUMENT ME!
1274 public SequenceNode getMaxDist()
1282 * @return DOCUMENT ME!
1284 public SequenceNode getTopNode()
1291 * @return true if tree has real distances
1293 public boolean isHasDistances()
1295 return hasDistances;
1300 * @return true if tree has real bootstrap values
1302 public boolean isHasBootstrap()
1304 return hasBootstrap;
1307 public boolean isHasRootDistance()
1309 return hasRootDistance;
1312 * apply the given transform to all the nodes in the tree.
1313 * @param nodeTransformI
1315 public void applyToNodes(NodeTransformI nodeTransformI)
1317 for (Enumeration nodes = node.elements(); nodes.hasMoreElements();
1318 nodeTransformI.transform((BinaryNode)nodes.nextElement()))
1327 * @version $Revision$
1334 * Creates a new Cluster object.
1339 public Cluster(int[] value)