X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Fanalysis%2FNJTree.java;h=e0e50fbf505b18d63dfd8bf9c9942483a3346b20;hb=37de9310bec3501cbc6381e0c3dcb282fcaad812;hp=6dca7f8867f84578e99daa3dec89627472ac46b3;hpb=d423f22792e47dbc800ae220a58677f988971d06;p=jalview.git diff --git a/src/jalview/analysis/NJTree.java b/src/jalview/analysis/NJTree.java old mode 100755 new mode 100644 index 6dca7f8..e0e50fb --- a/src/jalview/analysis/NJTree.java +++ b/src/jalview/analysis/NJTree.java @@ -1,28 +1,40 @@ /* - * Jalview - A Sequence Alignment Editor and Viewer (Version 2.5) - * Copyright (C) 2010 J Procter, AM Waterhouse, G Barton, M Clamp, S Searle + * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$) + * Copyright (C) $$Year-Rel$$ The Jalview Authors * * This file is part of Jalview. * * Jalview is free software: you can redistribute it and/or * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. - * + * as published by the Free Software Foundation, either version 3 + * of the License, or (at your option) any later version. + * * Jalview is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty * of MERCHANTABILITY or FITNESS FOR A PARTICULAR * PURPOSE. See the GNU General Public License for more details. * - * You should have received a copy of the GNU General Public License along with Jalview. If not, see . + * You should have received a copy of the GNU General Public License + * along with Jalview. If not, see . + * The Jalview Authors are detailed in the 'AUTHORS' file. */ package jalview.analysis; -import java.util.*; - -import jalview.datamodel.*; -import jalview.io.*; -import jalview.schemes.*; -import jalview.util.*; +import jalview.api.analysis.ScoreModelI; +import jalview.datamodel.AlignmentView; +import jalview.datamodel.BinaryNode; +import jalview.datamodel.CigarArray; +import jalview.datamodel.NodeTransformI; +import jalview.datamodel.SeqCigar; +import jalview.datamodel.Sequence; +import jalview.datamodel.SequenceI; +import jalview.datamodel.SequenceNode; +import jalview.io.NewickFile; +import jalview.schemes.ResidueProperties; + +import java.util.Enumeration; +import java.util.List; +import java.util.Vector; /** * DOCUMENT ME! @@ -32,7 +44,7 @@ import jalview.util.*; */ public class NJTree { - Vector cluster; + Vector cluster; SequenceI[] sequence; @@ -56,7 +68,7 @@ public class NJTree float rj; - Vector groups = new Vector(); + Vector groups = new Vector(); SequenceNode maxdist; @@ -68,7 +80,7 @@ public class NJTree int ycount; - Vector node; + Vector node; String type; @@ -76,8 +88,6 @@ public class NJTree Object found = null; - Object leaves = null; - boolean hasDistances = true; // normal case for jalview trees boolean hasBootstrap = false; // normal case for jalview trees @@ -139,8 +149,7 @@ public class NJTree SequenceIdMatcher algnIds = new SequenceIdMatcher(seqs); - Vector leaves = new Vector(); - findLeaves(top, leaves); + Vector leaves = findLeaves(top); int i = 0; int namesleft = seqs.length; @@ -148,11 +157,11 @@ public class NJTree SequenceNode j; SequenceI nam; String realnam; - Vector one2many = new Vector(); + Vector one2many = new Vector(); int countOne2Many = 0; while (i < leaves.size()) { - j = (SequenceNode) leaves.elementAt(i++); + j = leaves.elementAt(i++); realnam = j.getName(); nam = null; @@ -206,10 +215,10 @@ public class NJTree * DOCUMENT ME! */ public NJTree(SequenceI[] sequence, AlignmentView seqData, String type, - String pwtype, int start, int end) + String pwtype, ScoreModelI sm, int start, int end) { this.sequence = sequence; - this.node = new Vector(); + this.node = new Vector(); this.type = type; this.pwtype = pwtype; if (seqData != null) @@ -233,11 +242,11 @@ public class NJTree type = "AV"; } - if (!(pwtype.equals("PID"))) + if (sm == null && !(pwtype.equals("PID"))) { if (ResidueProperties.getScoreMatrix(pwtype) == null) { - type = "BLOSUM62"; + pwtype = "BLOSUM62"; } } @@ -253,8 +262,7 @@ public class NJTree noseqs = i++; - distance = findDistances(this.seqData - .getSequenceStrings(Comparison.GapChars.charAt(0))); + distance = findDistances(sm); // System.err.println("Made distances");// dbg makeLeaves(); // System.err.println("Made leaves");// dbg @@ -267,28 +275,29 @@ public class NJTree } /** - * DOCUMENT ME! + * Generate a string representation of the Tree * - * @return DOCUMENT ME! + * @return Newick File with all tree data available */ + @Override public String toString() { jalview.io.NewickFile fout = new jalview.io.NewickFile(getTopNode()); - return fout.print(false, true); // distances only + return fout.print(isHasBootstrap(), isHasDistances(), + isHasRootDistance()); // output all data available for tree } /** * * used when the alignment associated to a tree has changed. * - * @param alignment - * Vector + * @param list + * Sequence set to be associated with tree nodes */ - public void UpdatePlaceHolders(Vector alignment) + public void UpdatePlaceHolders(List list) { - Vector leaves = new Vector(); - findLeaves(top, leaves); + Vector leaves = findLeaves(top); int sz = leaves.size(); SequenceIdMatcher seqmatcher = null; @@ -296,9 +305,9 @@ public class NJTree while (i < sz) { - SequenceNode leaf = (SequenceNode) leaves.elementAt(i++); + SequenceNode leaf = leaves.elementAt(i++); - if (alignment.contains(leaf.element())) + if (list.contains(leaf.element())) { leaf.setPlaceholder(false); } @@ -307,11 +316,11 @@ public class NJTree if (seqmatcher == null) { // Only create this the first time we need it - SequenceI[] seqs = new SequenceI[alignment.size()]; + SequenceI[] seqs = new SequenceI[list.size()]; for (int j = 0; j < seqs.length; j++) { - seqs[j] = (SequenceI) alignment.elementAt(j); + seqs[j] = list.get(j); } seqmatcher = new SequenceIdMatcher(seqs); @@ -347,6 +356,28 @@ public class NJTree } /** + * rename any nodes according to their associated sequence. This will modify + * the tree's metadata! (ie the original NewickFile or newly generated + * BinaryTree's label data) + */ + public void renameAssociatedNodes() + { + applyToNodes(new NodeTransformI() + { + + @Override + public void transform(BinaryNode nd) + { + Object el = nd.element(); + if (el != null && el instanceof SequenceI) + { + nd.setName(((SequenceI) el).getName()); + } + } + }); + } + + /** * DOCUMENT ME! */ public void cluster() @@ -394,7 +425,7 @@ public class NJTree } joinClusters(one, two); - top = (SequenceNode) (node.elementAt(one)); + top = (node.elementAt(one)); reCount(top); findHeight(top); @@ -415,19 +446,19 @@ public class NJTree { float dist = distance[i][j]; - int noi = ((Cluster) cluster.elementAt(i)).value.length; - int noj = ((Cluster) cluster.elementAt(j)).value.length; + int noi = cluster.elementAt(i).value.length; + int noj = cluster.elementAt(j).value.length; int[] value = new int[noi + noj]; for (int ii = 0; ii < noi; ii++) { - value[ii] = ((Cluster) cluster.elementAt(i)).value[ii]; + value[ii] = cluster.elementAt(i).value[ii]; } for (int ii = noi; ii < (noi + noj); ii++) { - value[ii] = ((Cluster) cluster.elementAt(j)).value[ii - noi]; + value[ii] = cluster.elementAt(j).value[ii - noi]; } Cluster c = new Cluster(value); @@ -446,11 +477,11 @@ public class NJTree SequenceNode sn = new SequenceNode(); - sn.setLeft((SequenceNode) (node.elementAt(i))); - sn.setRight((SequenceNode) (node.elementAt(j))); + sn.setLeft((node.elementAt(i))); + sn.setRight((node.elementAt(j))); - SequenceNode tmpi = (SequenceNode) (node.elementAt(i)); - SequenceNode tmpj = (SequenceNode) (node.elementAt(j)); + SequenceNode tmpi = (node.elementAt(i)); + SequenceNode tmpj = (node.elementAt(j)); if (type.equals("NJ")) { @@ -542,8 +573,8 @@ public class NJTree */ public void findClusterDistance(int i, int j) { - int noi = ((Cluster) cluster.elementAt(i)).value.length; - int noj = ((Cluster) cluster.elementAt(j)).value.length; + int noi = cluster.elementAt(i).value.length; + int noj = cluster.elementAt(j).value.length; // New distances from cluster to others float[] newdist = new float[noseqs]; @@ -692,99 +723,26 @@ public class NJTree } /** - * DOCUMENT ME! + * Calculate a distance matrix given the sequence input data and score model * - * @return DOCUMENT ME! + * @return similarity matrix used to compute tree */ - public float[][] findDistances(String[] sequenceString) + public float[][] findDistances(ScoreModelI _pwmatrix) { - float[][] distance = new float[noseqs][noseqs]; - if (pwtype.equals("PID")) + float[][] dist = new float[noseqs][noseqs]; + if (_pwmatrix == null) { - for (int i = 0; i < (noseqs - 1); i++) + // Resolve substitution model + _pwmatrix = ResidueProperties.getScoreModel(pwtype); + if (_pwmatrix == null) { - for (int j = i; j < noseqs; j++) - { - if (j == i) - { - distance[i][i] = 0; - } - else - { - distance[i][j] = 100 - Comparison.PID(sequenceString[i], - sequenceString[j]); - - distance[j][i] = distance[i][j]; - } - } + _pwmatrix = ResidueProperties.getScoreMatrix("BLOSUM62"); } } - else - { - // Pairwise substitution score (with no gap penalties) - ScoreMatrix pwmatrix = ResidueProperties.getScoreMatrix(pwtype); - if (pwmatrix == null) - { - pwmatrix = ResidueProperties.getScoreMatrix("BLOSUM62"); - } - int maxscore = 0; - int end = sequenceString[0].length(); - for (int i = 0; i < (noseqs - 1); i++) - { - for (int j = i; j < noseqs; j++) - { - int score = 0; + dist = _pwmatrix.findDistances(seqData); + return dist; - for (int k = 0; k < end; k++) - { - try - { - score += pwmatrix.getPairwiseScore(sequenceString[i] - .charAt(k), sequenceString[j].charAt(k)); - } catch (Exception ex) - { - System.err.println("err creating BLOSUM62 tree"); - ex.printStackTrace(); - } - } - - distance[i][j] = (float) score; - - if (score > maxscore) - { - maxscore = score; - } - } - } - - for (int i = 0; i < (noseqs - 1); i++) - { - for (int j = i; j < noseqs; j++) - { - distance[i][j] = (float) maxscore - distance[i][j]; - distance[j][i] = distance[i][j]; - } - } - - } - return distance; - - // else - /* - * else if (pwtype.equals("SW")) { float max = -1; - * - * for (int i = 0; i < (noseqs - 1); i++) { for (int j = i; j < noseqs; j++) - * { AlignSeq as = new AlignSeq(sequence[i], sequence[j], "pep"); - * as.calcScoreMatrix(); as.traceAlignment(); as.printAlignment(System.out); - * distance[i][j] = (float) as.maxscore; - * - * if (max < distance[i][j]) { max = distance[i][j]; } } } - * - * for (int i = 0; i < (noseqs - 1); i++) { for (int j = i; j < noseqs; j++) - * { distance[i][j] = max - distance[i][j]; distance[j][i] = distance[i][j]; - * } } }/ - */ } /** @@ -792,7 +750,7 @@ public class NJTree */ public void makeLeaves() { - cluster = new Vector(); + cluster = new Vector(); for (int i = 0; i < noseqs; i++) { @@ -811,26 +769,42 @@ public class NJTree } /** + * Search for leaf nodes below (or at) the given node + * + * @param nd + * root node to search from + * + * @return + */ + public Vector findLeaves(SequenceNode nd) + { + Vector leaves = new Vector(); + findLeaves(nd, leaves); + return leaves; + } + + /** * Search for leaf nodes. * - * @param node + * @param nd * root node to search from * @param leaves * Vector of leaves to add leaf node objects too. * * @return Vector of leaf nodes on binary tree */ - public Vector findLeaves(SequenceNode node, Vector leaves) + Vector findLeaves(SequenceNode nd, + Vector leaves) { - if (node == null) + if (nd == null) { return leaves; } - if ((node.left() == null) && (node.right() == null)) // Interior node + if ((nd.left() == null) && (nd.right() == null)) // Interior node // detection { - leaves.addElement(node); + leaves.addElement(nd); return leaves; } @@ -840,8 +814,8 @@ public class NJTree * TODO: Identify internal nodes... if (node.isSequenceLabel()) { * leaves.addElement(node); } */ - findLeaves((SequenceNode) node.left(), leaves); - findLeaves((SequenceNode) node.right(), leaves); + findLeaves((SequenceNode) nd.left(), leaves); + findLeaves((SequenceNode) nd.right(), leaves); } return leaves; @@ -850,16 +824,16 @@ public class NJTree /** * Find the leaf node with a particular ycount * - * @param node + * @param nd * initial point on tree to search from * @param count * value to search for * * @return null or the node with ycound=count */ - public Object findLeaf(SequenceNode node, int count) + public Object findLeaf(SequenceNode nd, int count) { - found = _findLeaf(node, count); + found = _findLeaf(nd, count); return found; } @@ -867,23 +841,23 @@ public class NJTree /* * #see findLeaf(SequenceNode node, count) */ - public Object _findLeaf(SequenceNode node, int count) + public Object _findLeaf(SequenceNode nd, int count) { - if (node == null) + if (nd == null) { return null; } - if (node.ycount == count) + if (nd.ycount == count) { - found = node.element(); + found = nd.element(); return found; } else { - _findLeaf((SequenceNode) node.left(), count); - _findLeaf((SequenceNode) node.right(), count); + _findLeaf((SequenceNode) nd.left(), count); + _findLeaf((SequenceNode) nd.right(), count); } return found; @@ -892,58 +866,57 @@ public class NJTree /** * printNode is mainly for debugging purposes. * - * @param node + * @param nd * SequenceNode */ - public void printNode(SequenceNode node) + public void printNode(SequenceNode nd) { - if (node == null) + if (nd == null) { return; } - if ((node.left() == null) && (node.right() == null)) + if ((nd.left() == null) && (nd.right() == null)) { - System.out - .println("Leaf = " + ((SequenceI) node.element()).getName()); - System.out.println("Dist " + ((SequenceNode) node).dist); - System.out.println("Boot " + node.getBootstrap()); + System.out.println("Leaf = " + ((SequenceI) nd.element()).getName()); + System.out.println("Dist " + nd.dist); + System.out.println("Boot " + nd.getBootstrap()); } else { - System.out.println("Dist " + ((SequenceNode) node).dist); - printNode((SequenceNode) node.left()); - printNode((SequenceNode) node.right()); + System.out.println("Dist " + nd.dist); + printNode((SequenceNode) nd.left()); + printNode((SequenceNode) nd.right()); } } /** * DOCUMENT ME! * - * @param node + * @param nd * DOCUMENT ME! */ - public void findMaxDist(SequenceNode node) + public void findMaxDist(SequenceNode nd) { - if (node == null) + if (nd == null) { return; } - if ((node.left() == null) && (node.right() == null)) + if ((nd.left() == null) && (nd.right() == null)) { - float dist = ((SequenceNode) node).dist; + float dist = nd.dist; if (dist > maxDistValue) { - maxdist = (SequenceNode) node; + maxdist = nd; maxDistValue = dist; } } else { - findMaxDist((SequenceNode) node.left()); - findMaxDist((SequenceNode) node.right()); + findMaxDist((SequenceNode) nd.left()); + findMaxDist((SequenceNode) nd.right()); } } @@ -952,7 +925,7 @@ public class NJTree * * @return DOCUMENT ME! */ - public Vector getGroups() + public Vector getGroups() { return groups; } @@ -970,51 +943,51 @@ public class NJTree /** * DOCUMENT ME! * - * @param node + * @param nd * DOCUMENT ME! * @param threshold * DOCUMENT ME! */ - public void groupNodes(SequenceNode node, float threshold) + public void groupNodes(SequenceNode nd, float threshold) { - if (node == null) + if (nd == null) { return; } - if ((node.height / maxheight) > threshold) + if ((nd.height / maxheight) > threshold) { - groups.addElement(node); + groups.addElement(nd); } else { - groupNodes((SequenceNode) node.left(), threshold); - groupNodes((SequenceNode) node.right(), threshold); + groupNodes((SequenceNode) nd.left(), threshold); + groupNodes((SequenceNode) nd.right(), threshold); } } /** * DOCUMENT ME! * - * @param node + * @param nd * DOCUMENT ME! * * @return DOCUMENT ME! */ - public float findHeight(SequenceNode node) + public float findHeight(SequenceNode nd) { - if (node == null) + if (nd == null) { return maxheight; } - if ((node.left() == null) && (node.right() == null)) + if ((nd.left() == null) && (nd.right() == null)) { - node.height = ((SequenceNode) node.parent()).height + node.dist; + nd.height = ((SequenceNode) nd.parent()).height + nd.dist; - if (node.height > maxheight) + if (nd.height > maxheight) { - return node.height; + return nd.height; } else { @@ -1023,18 +996,18 @@ public class NJTree } else { - if (node.parent() != null) + if (nd.parent() != null) { - node.height = ((SequenceNode) node.parent()).height + node.dist; + nd.height = ((SequenceNode) nd.parent()).height + nd.dist; } else { maxheight = 0; - node.height = (float) 0.0; + nd.height = (float) 0.0; } - maxheight = findHeight((SequenceNode) (node.left())); - maxheight = findHeight((SequenceNode) (node.right())); + maxheight = findHeight((SequenceNode) (nd.left())); + maxheight = findHeight((SequenceNode) (nd.right())); } return maxheight; @@ -1117,44 +1090,42 @@ public class NJTree /** * DOCUMENT ME! * - * @param node + * @param nd * DOCUMENT ME! */ - public void printN(SequenceNode node) + public void printN(SequenceNode nd) { - if (node == null) + if (nd == null) { return; } - if ((node.left() != null) && (node.right() != null)) + if ((nd.left() != null) && (nd.right() != null)) { - printN((SequenceNode) node.left()); - printN((SequenceNode) node.right()); + printN((SequenceNode) nd.left()); + printN((SequenceNode) nd.right()); } else { - System.out.println(" name = " - + ((SequenceI) node.element()).getName()); + System.out.println(" name = " + ((SequenceI) nd.element()).getName()); } - System.out.println(" dist = " + ((SequenceNode) node).dist + " " - + ((SequenceNode) node).count + " " - + ((SequenceNode) node).height); + System.out.println(" dist = " + nd.dist + " " + nd.count + " " + + nd.height); } /** * DOCUMENT ME! * - * @param node + * @param nd * DOCUMENT ME! */ - public void reCount(SequenceNode node) + public void reCount(SequenceNode nd) { ycount = 0; _lycount = 0; // _lylimit = this.node.size(); - _reCount(node); + _reCount(nd); } private long _lycount = 0, _lylimit = 0; @@ -1162,37 +1133,37 @@ public class NJTree /** * DOCUMENT ME! * - * @param node + * @param nd * DOCUMENT ME! */ - public void _reCount(SequenceNode node) + public void _reCount(SequenceNode nd) { // if (_lycount<_lylimit) // { // System.err.println("Warning: depth of _recount greater than number of nodes."); // } - if (node == null) + if (nd == null) { return; } _lycount++; - if ((node.left() != null) && (node.right() != null)) + if ((nd.left() != null) && (nd.right() != null)) { - _reCount((SequenceNode) node.left()); - _reCount((SequenceNode) node.right()); + _reCount((SequenceNode) nd.left()); + _reCount((SequenceNode) nd.right()); - SequenceNode l = (SequenceNode) node.left(); - SequenceNode r = (SequenceNode) node.right(); + SequenceNode l = (SequenceNode) nd.left(); + SequenceNode r = (SequenceNode) nd.right(); - ((SequenceNode) node).count = l.count + r.count; - ((SequenceNode) node).ycount = (l.ycount + r.ycount) / 2; + nd.count = l.count + r.count; + nd.ycount = (l.ycount + r.ycount) / 2; } else { - ((SequenceNode) node).count = 1; - ((SequenceNode) node).ycount = ycount++; + nd.count = 1; + nd.ycount = ycount++; } _lycount--; } @@ -1200,80 +1171,80 @@ public class NJTree /** * DOCUMENT ME! * - * @param node + * @param nd * DOCUMENT ME! */ - public void swapNodes(SequenceNode node) + public void swapNodes(SequenceNode nd) { - if (node == null) + if (nd == null) { return; } - SequenceNode tmp = (SequenceNode) node.left(); + SequenceNode tmp = (SequenceNode) nd.left(); - node.setLeft(node.right()); - node.setRight(tmp); + nd.setLeft(nd.right()); + nd.setRight(tmp); } /** * DOCUMENT ME! * - * @param node + * @param nd * DOCUMENT ME! * @param dir * DOCUMENT ME! */ - public void changeDirection(SequenceNode node, SequenceNode dir) + public void changeDirection(SequenceNode nd, SequenceNode dir) { - if (node == null) + if (nd == null) { return; } - if (node.parent() != top) + if (nd.parent() != top) { - changeDirection((SequenceNode) node.parent(), node); + changeDirection((SequenceNode) nd.parent(), nd); - SequenceNode tmp = (SequenceNode) node.parent(); + SequenceNode tmp = (SequenceNode) nd.parent(); - if (dir == node.left()) + if (dir == nd.left()) { - node.setParent(dir); - node.setLeft(tmp); + nd.setParent(dir); + nd.setLeft(tmp); } - else if (dir == node.right()) + else if (dir == nd.right()) { - node.setParent(dir); - node.setRight(tmp); + nd.setParent(dir); + nd.setRight(tmp); } } else { - if (dir == node.left()) + if (dir == nd.left()) { - node.setParent(node.left()); + nd.setParent(nd.left()); - if (top.left() == node) + if (top.left() == nd) { - node.setRight(top.right()); + nd.setRight(top.right()); } else { - node.setRight(top.left()); + nd.setRight(top.left()); } } else { - node.setParent(node.right()); + nd.setParent(nd.right()); - if (top.left() == node) + if (top.left() == nd) { - node.setLeft(top.right()); + nd.setLeft(top.right()); } else { - node.setLeft(top.left()); + nd.setLeft(top.left()); } } } @@ -1329,9 +1300,12 @@ public class NJTree */ public void applyToNodes(NodeTransformI nodeTransformI) { - for (Enumeration nodes = node.elements(); nodes.hasMoreElements(); nodeTransformI - .transform((BinaryNode) nodes.nextElement())) + for (Enumeration nodes = node.elements(); nodes + .hasMoreElements(); nodeTransformI.transform(nodes + .nextElement())) + { ; + } } }