X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Fanalysis%2FNJTree.java;h=dd2dbbf46dcd9c5ea053cfdde77462676daa4c09;hb=49685e6426d5ac136dce4907196751680c667670;hp=20d28f7e2fcbe49dd0deb690b65dc9d3de28260e;hpb=ddf4180014d2316824c4ecd4bcbbe20be4033d43;p=jalview.git diff --git a/src/jalview/analysis/NJTree.java b/src/jalview/analysis/NJTree.java index 20d28f7..dd2dbbf 100755 --- a/src/jalview/analysis/NJTree.java +++ b/src/jalview/analysis/NJTree.java @@ -1,1208 +1,1295 @@ -/* -* Jalview - A Sequence Alignment Editor and Viewer -* Copyright (C) 2005 AM Waterhouse, J Procter, G Barton, M Clamp, S Searle -* -* This program is free software; you can redistribute it and/or -* modify it under the terms of the GNU General Public License -* as published by the Free Software Foundation; either version 2 -* of the License, or (at your option) any later version. -* -* This program is distributed in the hope that it will be useful, -* but WITHOUT ANY WARRANTY; without even the implied warranty of -* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -* GNU General Public License for more details. -* -* You should have received a copy of the GNU General Public License -* along with this program; if not, write to the Free Software -* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA -*/ -package jalview.analysis; - -import jalview.datamodel.*; - -import jalview.io.NewickFile; - -import jalview.schemes.ResidueProperties; - -import jalview.util.*; - -import java.util.*; - - -/** - * DOCUMENT ME! - * - * @author $author$ - * @version $Revision$ - */ -public class NJTree -{ - Vector cluster; - SequenceI[] sequence; - - //SequenceData is a string representation of what the user - //sees. The display may contain hidden columns. - String [] sequenceString; - - int[] done; - int noseqs; - int noClus; - float[][] distance; - int mini; - int minj; - float ri; - float rj; - Vector groups = new Vector(); - SequenceNode maxdist; - SequenceNode top; - float maxDistValue; - float maxheight; - int ycount; - Vector node; - String type; - String pwtype; - Object found = null; - Object leaves = null; - - boolean hasDistances = true; // normal case for jalview trees - boolean hasBootstrap = false; // normal case for jalview trees - - private boolean hasRootDistance = true; - - - /** - * Creates a new NJTree object. - * - * @param seqs DOCUMENT ME! - * @param treefile DOCUMENT ME! - */ - public NJTree(SequenceI[] seqs, NewickFile treefile) - { - top = treefile.getTree(); - - if (sequenceString == null) - { - sequenceString = new String[sequence.length]; - for (int i = 0; i < sequence.length; i++) - { - sequenceString[i] = sequence[i].getSequence(); - } - } - - - hasDistances = treefile.HasDistances(); - hasBootstrap = treefile.HasBootstrap(); - hasRootDistance = treefile.HasRootDistance(); - - maxheight = findHeight(top); - - SequenceIdMatcher algnIds = new SequenceIdMatcher(seqs); - - Vector leaves = new Vector(); - findLeaves(top, leaves); - - int i = 0; - int namesleft = seqs.length; - - SequenceNode j; - SequenceI nam; - String realnam; - - while (i < leaves.size()) - { - j = (SequenceNode) leaves.elementAt(i++); - realnam = j.getName(); - nam = null; - - if (namesleft > -1) - { - nam = algnIds.findIdMatch(realnam); - } - - if (nam != null) - { - j.setElement(nam); - namesleft--; - } - else - { - j.setElement(new Sequence(realnam, "THISISAPLACEHLDER")); - j.setPlaceholder(true); - } - } - } - - /** - * Creates a new NJTree object. - * - * @param sequence DOCUMENT ME! - * @param type DOCUMENT ME! - * @param pwtype DOCUMENT ME! - * @param start DOCUMENT ME! - * @param end DOCUMENT ME! - */ - public NJTree(SequenceI[] sequence, - String [] sequenceString, - String type, - String pwtype, - int start, int end) - { - this.sequence = sequence; - this.node = new Vector(); - this.type = type; - this.pwtype = pwtype; - - if (sequenceString == null) - { - this.sequenceString = new String[sequence.length]; - for(int i=0; i 2) - { - if (type.equals("NJ")) - { - findMinNJDistance(); - } - else - { - findMinDistance(); - } - - Cluster c = joinClusters(mini, minj); - - done[minj] = 1; - - cluster.setElementAt(null, minj); - cluster.setElementAt(c, mini); - - noClus--; - } - - boolean onefound = false; - - int one = -1; - int two = -1; - - for (int i = 0; i < noseqs; i++) - { - if (done[i] != 1) - { - if (onefound == false) - { - two = i; - onefound = true; - } - else - { - one = i; - } - } - } - - joinClusters(one, two); - top = (SequenceNode) (node.elementAt(one)); - - reCount(top); - findHeight(top); - findMaxDist(top); - } - - /** - * DOCUMENT ME! - * - * @param i DOCUMENT ME! - * @param j DOCUMENT ME! - * - * @return DOCUMENT ME! - */ - public Cluster joinClusters(int i, int j) - { - float dist = distance[i][j]; - - int noi = ((Cluster) cluster.elementAt(i)).value.length; - int noj = ((Cluster) cluster.elementAt(j)).value.length; - - int[] value = new int[noi + noj]; - - for (int ii = 0; ii < noi; ii++) - { - value[ii] = ((Cluster) cluster.elementAt(i)).value[ii]; - } - - for (int ii = noi; ii < (noi + noj); ii++) - { - value[ii] = ((Cluster) cluster.elementAt(j)).value[ii - noi]; - } - - Cluster c = new Cluster(value); - - ri = findr(i, j); - rj = findr(j, i); - - if (type.equals("NJ")) - { - findClusterNJDistance(i, j); - } - else - { - findClusterDistance(i, j); - } - - SequenceNode sn = new SequenceNode(); - - sn.setLeft((SequenceNode) (node.elementAt(i))); - sn.setRight((SequenceNode) (node.elementAt(j))); - - SequenceNode tmpi = (SequenceNode) (node.elementAt(i)); - SequenceNode tmpj = (SequenceNode) (node.elementAt(j)); - - if (type.equals("NJ")) - { - findNewNJDistances(tmpi, tmpj, dist); - } - else - { - findNewDistances(tmpi, tmpj, dist); - } - - tmpi.setParent(sn); - tmpj.setParent(sn); - - node.setElementAt(sn, i); - - return c; - } - - /** - * DOCUMENT ME! - * - * @param tmpi DOCUMENT ME! - * @param tmpj DOCUMENT ME! - * @param dist DOCUMENT ME! - */ - public void findNewNJDistances(SequenceNode tmpi, SequenceNode tmpj, - float dist) - { - - tmpi.dist = ((dist + ri) - rj) / 2; - tmpj.dist = (dist - tmpi.dist); - - if (tmpi.dist < 0) - { - tmpi.dist = 0; - } - - if (tmpj.dist < 0) - { - tmpj.dist = 0; - } - } - - /** - * DOCUMENT ME! - * - * @param tmpi DOCUMENT ME! - * @param tmpj DOCUMENT ME! - * @param dist DOCUMENT ME! - */ - public void findNewDistances(SequenceNode tmpi, SequenceNode tmpj, - float dist) - { - float ih = 0; - float jh = 0; - - SequenceNode sni = tmpi; - SequenceNode snj = tmpj; - - while (sni != null) - { - ih = ih + sni.dist; - sni = (SequenceNode) sni.left(); - } - - while (snj != null) - { - jh = jh + snj.dist; - snj = (SequenceNode) snj.left(); - } - - tmpi.dist = ((dist / 2) - ih); - tmpj.dist = ((dist / 2) - jh); - } - - /** - * DOCUMENT ME! - * - * @param i DOCUMENT ME! - * @param j DOCUMENT ME! - */ - public void findClusterDistance(int i, int j) - { - int noi = ((Cluster) cluster.elementAt(i)).value.length; - int noj = ((Cluster) cluster.elementAt(j)).value.length; - - // New distances from cluster to others - float[] newdist = new float[noseqs]; - - for (int l = 0; l < noseqs; l++) - { - if ((l != i) && (l != j)) - { - newdist[l] = ((distance[i][l] * noi) + (distance[j][l] * noj)) / (noi + - noj); - } - else - { - newdist[l] = 0; - } - } - - for (int ii = 0; ii < noseqs; ii++) - { - distance[i][ii] = newdist[ii]; - distance[ii][i] = newdist[ii]; - } - } - - /** - * DOCUMENT ME! - * - * @param i DOCUMENT ME! - * @param j DOCUMENT ME! - */ - public void findClusterNJDistance(int i, int j) - { - - // New distances from cluster to others - float[] newdist = new float[noseqs]; - - for (int l = 0; l < noseqs; l++) - { - if ((l != i) && (l != j)) - { - newdist[l] = ((distance[i][l] + distance[j][l]) - - distance[i][j]) / 2; - } - else - { - newdist[l] = 0; - } - } - - for (int ii = 0; ii < noseqs; ii++) - { - distance[i][ii] = newdist[ii]; - distance[ii][i] = newdist[ii]; - } - } - - /** - * DOCUMENT ME! - * - * @param i DOCUMENT ME! - * @param j DOCUMENT ME! - * - * @return DOCUMENT ME! - */ - public float findr(int i, int j) - { - float tmp = 1; - - for (int k = 0; k < noseqs; k++) - { - if ((k != i) && (k != j) && (done[k] != 1)) - { - tmp = tmp + distance[i][k]; - } - } - - if (noClus > 2) - { - tmp = tmp / (noClus - 2); - } - - return tmp; - } - - /** - * DOCUMENT ME! - * - * @return DOCUMENT ME! - */ - public float findMinNJDistance() - { - float min = 100000; - - for (int i = 0; i < (noseqs - 1); i++) - { - for (int j = i + 1; j < noseqs; j++) - { - if ((done[i] != 1) && (done[j] != 1)) - { - float tmp = distance[i][j] - (findr(i, j) + findr(j, i)); - - if (tmp < min) - { - mini = i; - minj = j; - - min = tmp; - } - } - } - } - - return min; - } - - /** - * DOCUMENT ME! - * - * @return DOCUMENT ME! - */ - public float findMinDistance() - { - float min = 100000; - - for (int i = 0; i < (noseqs - 1); i++) - { - for (int j = i + 1; j < noseqs; j++) - { - if ((done[i] != 1) && (done[j] != 1)) - { - if (distance[i][j] < min) - { - mini = i; - minj = j; - - min = distance[i][j]; - } - } - } - } - - return min; - } - - /** - * DOCUMENT ME! - * - * @return DOCUMENT ME! - */ - public float[][] findDistances() - { - float[][] distance = new float[noseqs][noseqs]; - - if (pwtype.equals("PID")) - { - for (int i = 0; i < (noseqs - 1); i++) - { - for (int j = i; j < noseqs; j++) - { - if (j == i) - { - distance[i][i] = 0; - } - else - { - distance[i][j] = 100 - - Comparison.PID(sequenceString[i], sequenceString[j]); - - distance[j][i] = distance[i][j]; - } - } - } - } - else if (pwtype.equals("BL")) - { - int maxscore = 0; - int end = sequenceString[0].length(); - for (int i = 0; i < (noseqs - 1); i++) - { - for (int j = i; j < noseqs; j++) - { - int score = 0; - - for (int k = 0; k < end; k++) - { - try - { - score += ResidueProperties.getBLOSUM62( - sequenceString[i].substring(k, k + 1), - sequenceString[j].substring(k, k + 1)); - } - catch (Exception ex) - { - System.err.println("err creating BLOSUM62 tree"); - ex.printStackTrace(); - } - } - - distance[i][j] = (float) score; - - if (score > maxscore) - { - maxscore = score; - } - } - } - - for (int i = 0; i < (noseqs - 1); i++) - { - for (int j = i; j < noseqs; j++) - { - distance[i][j] = (float) maxscore - distance[i][j]; - distance[j][i] = distance[i][j]; - } - } - } - /* else if (pwtype.equals("SW")) - { - float max = -1; - - for (int i = 0; i < (noseqs - 1); i++) - { - for (int j = i; j < noseqs; j++) - { - AlignSeq as = new AlignSeq(sequence[i], sequence[j], "pep"); - as.calcScoreMatrix(); - as.traceAlignment(); - as.printAlignment(System.out); - distance[i][j] = (float) as.maxscore; - - if (max < distance[i][j]) - { - max = distance[i][j]; - } - } - } - - for (int i = 0; i < (noseqs - 1); i++) - { - for (int j = i; j < noseqs; j++) - { - distance[i][j] = max - distance[i][j]; - distance[j][i] = distance[i][j]; - } - } - }/*/ - - return distance; - } - - /** - * DOCUMENT ME! - */ - public void makeLeaves() - { - cluster = new Vector(); - - for (int i = 0; i < noseqs; i++) - { - SequenceNode sn = new SequenceNode(); - - sn.setElement(sequence[i]); - sn.setName(sequence[i].getName()); - node.addElement(sn); - - int[] value = new int[1]; - value[0] = i; - - Cluster c = new Cluster(value); - cluster.addElement(c); - } - } - - /** - * DOCUMENT ME! - * - * @param node DOCUMENT ME! - * @param leaves DOCUMENT ME! - * - * @return DOCUMENT ME! - */ - public Vector findLeaves(SequenceNode node, Vector leaves) - { - if (node == null) - { - return leaves; - } - - if ((node.left() == null) && (node.right() == null)) - { - leaves.addElement(node); - - return leaves; - } - else - { - findLeaves((SequenceNode) node.left(), leaves); - findLeaves((SequenceNode) node.right(), leaves); - } - - return leaves; - } - - /** - * DOCUMENT ME! - * - * @param node DOCUMENT ME! - * @param count DOCUMENT ME! - * - * @return DOCUMENT ME! - */ - public Object findLeaf(SequenceNode node, int count) - { - found = _findLeaf(node, count); - - return found; - } - - /** - * DOCUMENT ME! - * - * @param node DOCUMENT ME! - * @param count DOCUMENT ME! - * - * @return DOCUMENT ME! - */ - public Object _findLeaf(SequenceNode node, int count) - { - if (node == null) - { - return null; - } - - if (node.ycount == count) - { - found = node.element(); - - return found; - } - else - { - _findLeaf((SequenceNode) node.left(), count); - _findLeaf((SequenceNode) node.right(), count); - } - - return found; - } - - /** - * printNode is mainly for debugging purposes. - * - * @param node SequenceNode - */ - public void printNode(SequenceNode node) - { - if (node == null) - { - return; - } - - if ((node.left() == null) && (node.right() == null)) - { - System.out.println("Leaf = " + - ((SequenceI) node.element()).getName()); - System.out.println("Dist " + ((SequenceNode) node).dist); - System.out.println("Boot " + node.getBootstrap()); - } - else - { - System.out.println("Dist " + ((SequenceNode) node).dist); - printNode((SequenceNode) node.left()); - printNode((SequenceNode) node.right()); - } - } - - /** - * DOCUMENT ME! - * - * @param node DOCUMENT ME! - */ - public void findMaxDist(SequenceNode node) - { - if (node == null) - { - return; - } - - if ((node.left() == null) && (node.right() == null)) - { - float dist = ((SequenceNode) node).dist; - - if (dist > maxDistValue) - { - maxdist = (SequenceNode) node; - maxDistValue = dist; - } - } - else - { - findMaxDist((SequenceNode) node.left()); - findMaxDist((SequenceNode) node.right()); - } - } - - /** - * DOCUMENT ME! - * - * @return DOCUMENT ME! - */ - public Vector getGroups() - { - return groups; - } - - /** - * DOCUMENT ME! - * - * @return DOCUMENT ME! - */ - public float getMaxHeight() - { - return maxheight; - } - - /** - * DOCUMENT ME! - * - * @param node DOCUMENT ME! - * @param threshold DOCUMENT ME! - */ - public void groupNodes(SequenceNode node, float threshold) - { - if (node == null) - { - return; - } - - if ((node.height / maxheight) > threshold) - { - groups.addElement(node); - } - else - { - groupNodes((SequenceNode) node.left(), threshold); - groupNodes((SequenceNode) node.right(), threshold); - } - } - - /** - * DOCUMENT ME! - * - * @param node DOCUMENT ME! - * - * @return DOCUMENT ME! - */ - public float findHeight(SequenceNode node) - { - if (node == null) - { - return maxheight; - } - - if ((node.left() == null) && (node.right() == null)) - { - node.height = ((SequenceNode) node.parent()).height + node.dist; - - if (node.height > maxheight) - { - return node.height; - } - else - { - return maxheight; - } - } - else - { - if (node.parent() != null) - { - node.height = ((SequenceNode) node.parent()).height + - node.dist; - } - else - { - maxheight = 0; - node.height = (float) 0.0; - } - - maxheight = findHeight((SequenceNode) (node.left())); - maxheight = findHeight((SequenceNode) (node.right())); - } - - return maxheight; - } - - /** - * DOCUMENT ME! - * - * @return DOCUMENT ME! - */ - public SequenceNode reRoot() - { - if (maxdist != null) - { - ycount = 0; - - float tmpdist = maxdist.dist; - - // New top - SequenceNode sn = new SequenceNode(); - sn.setParent(null); - - // New right hand of top - SequenceNode snr = (SequenceNode) maxdist.parent(); - changeDirection(snr, maxdist); - System.out.println("Printing reversed tree"); - printN(snr); - snr.dist = tmpdist / 2; - maxdist.dist = tmpdist / 2; - - snr.setParent(sn); - maxdist.setParent(sn); - - sn.setRight(snr); - sn.setLeft(maxdist); - - top = sn; - - ycount = 0; - reCount(top); - findHeight(top); - } - - return top; - } - - public String printOriginalSequenceData() - { - StringBuffer sb = new StringBuffer(); - for(int i=0; i