X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Fanalysis%2FNJTree.java;h=82e028456f693c4cbf427a463838f3779dbc9fb7;hb=567c2595554096f10feab130153f97286f3f7d80;hp=7f4850e495833cfa99593f0f69c146a8aea9d241;hpb=5a6113ce9fef6f59b895a40a69b55b120735c533;p=jalview.git diff --git a/src/jalview/analysis/NJTree.java b/src/jalview/analysis/NJTree.java old mode 100755 new mode 100644 index 7f4850e..82e0284 --- a/src/jalview/analysis/NJTree.java +++ b/src/jalview/analysis/NJTree.java @@ -1,699 +1,152 @@ -package jalview.analysis; - -import jalview.datamodel.*; -import jalview.util.*; -import jalview.schemes.ResidueProperties; -import java.util.*; - -import jalview.io.NewickFile; - -public class NJTree { - - Vector cluster; - SequenceI[] sequence; - - int done[]; - int noseqs; - int noClus; - - float distance[][]; - - int mini; - int minj; - float ri; - float rj; - - Vector groups = new Vector(); - SequenceNode maxdist; - SequenceNode top; - - float maxDistValue; - float maxheight; - - int ycount; - - Vector node; - - String type; - String pwtype; - - Object found = null; - Object leaves = null; - - int start; - int end; - - public NJTree(SequenceNode node) { - top = node; - maxheight = findHeight(top); - - } - - public NJTree(SequenceI[] seqs, NewickFile treefile) { - top = treefile.getTree(); - maxheight = findHeight(top); - SequenceIdMatcher algnIds = new SequenceIdMatcher(seqs); - - Vector leaves = new Vector(); - findLeaves(top, leaves); - - int i = 0; - int namesleft = seqs.length; - - SequenceNode j; - SequenceI nam; - String realnam; - while (i < leaves.size()) - { - j = (SequenceNode) leaves.elementAt(i++); - realnam = j.getName(); - nam = null; - if (namesleft>-1) - nam = algnIds.findIdMatch(realnam); - if (nam != null) { - j.setElement(nam); - namesleft--; - } else { - j.setElement(new Sequence(realnam, "THISISAPLACEHLDER")); - } - } - } - - public NJTree(SequenceI[] sequence,int start, int end) { - this(sequence,"NJ","BL",start,end); - } - - public NJTree(SequenceI[] sequence,String type,String pwtype,int start, int end ) { - - this.sequence = sequence; - this.node = new Vector(); - this.type = type; - this.pwtype = pwtype; - this.start = start; - this.end = end; - - if (!(type.equals("NJ"))) { - type = "AV"; - } - - if (!(pwtype.equals("PID"))) { - type = "BL"; - } - - int i=0; - - done = new int[sequence.length]; - - - while (i < sequence.length && sequence[i] != null) { - done[i] = 0; - i++; - } - - noseqs = i++; - - distance = findDistances(); - - makeLeaves(); - - noClus = cluster.size(); - - cluster(); - - } - - - public void cluster() { - - while (noClus > 2) { - if (type.equals("NJ")) { - float mind = findMinNJDistance(); - } else { - float mind = findMinDistance(); - } - - Cluster c = joinClusters(mini,minj); - - - done[minj] = 1; - - cluster.setElementAt(null,minj); - cluster.setElementAt(c,mini); - - noClus--; - } - - boolean onefound = false; - - int one = -1; - int two = -1; - - for (int i=0; i < noseqs; i++) { - if (done[i] != 1) { - if (onefound == false) { - two = i; - onefound = true; - } else { - one = i; - } - } - } - - Cluster c = joinClusters(one,two); - top = (SequenceNode)(node.elementAt(one)); - - reCount(top); - findHeight(top); - findMaxDist(top); - - } - - public Cluster joinClusters(int i, int j) { - - float dist = distance[i][j]; - - int noi = ((Cluster)cluster.elementAt(i)).value.length; - int noj = ((Cluster)cluster.elementAt(j)).value.length; - - int[] value = new int[noi + noj]; - - for (int ii = 0; ii < noi;ii++) { - value[ii] = ((Cluster)cluster.elementAt(i)).value[ii]; - } - - for (int ii = noi; ii < noi+ noj;ii++) { - value[ii] = ((Cluster)cluster.elementAt(j)).value[ii-noi]; - } - - Cluster c = new Cluster(value); - - ri = findr(i,j); - rj = findr(j,i); - - if (type.equals("NJ")) { - findClusterNJDistance(i,j); - } else { - findClusterDistance(i,j); - } - - SequenceNode sn = new SequenceNode(); - - sn.setLeft((SequenceNode)(node.elementAt(i))); - sn.setRight((SequenceNode)(node.elementAt(j))); - - SequenceNode tmpi = (SequenceNode)(node.elementAt(i)); - SequenceNode tmpj = (SequenceNode)(node.elementAt(j)); - - if (type.equals("NJ")) { - findNewNJDistances(tmpi,tmpj,dist); - } else { - findNewDistances(tmpi,tmpj,dist); - } - - tmpi.setParent(sn); - tmpj.setParent(sn); - - node.setElementAt(sn,i); - return c; - } - - public void findNewNJDistances(SequenceNode tmpi, SequenceNode tmpj, float dist) { - - float ih = 0; - float jh = 0; - - SequenceNode sni = tmpi; - SequenceNode snj = tmpj; - - tmpi.dist = (dist + ri - rj)/2; - tmpj.dist = (dist - tmpi.dist); - - if (tmpi.dist < 0) { - tmpi.dist = 0; - } - if (tmpj.dist < 0) { - tmpj.dist = 0; - } - } - - public void findNewDistances(SequenceNode tmpi,SequenceNode tmpj,float dist) { - - float ih = 0; - float jh = 0; - - SequenceNode sni = tmpi; - SequenceNode snj = tmpj; - - while (sni != null) { - ih = ih + sni.dist; - sni = (SequenceNode)sni.left(); - } - - while (snj != null) { - jh = jh + snj.dist; - snj = (SequenceNode)snj.left(); - } - - tmpi.dist = (dist/2 - ih); - tmpj.dist = (dist/2 - jh); - } - - - - public void findClusterDistance(int i, int j) { - - int noi = ((Cluster)cluster.elementAt(i)).value.length; - int noj = ((Cluster)cluster.elementAt(j)).value.length; - - // New distances from cluster to others - float[] newdist = new float[noseqs]; - - for (int l = 0; l < noseqs; l++) { - if ( l != i && l != j) { - newdist[l] = (distance[i][l] * noi + distance[j][l] * noj)/(noi + noj); - } else { - newdist[l] = 0; - } - } - - for (int ii=0; ii < noseqs;ii++) { - distance[i][ii] = newdist[ii]; - distance[ii][i] = newdist[ii]; - } - } - - public void findClusterNJDistance(int i, int j) { - - int noi = ((Cluster)cluster.elementAt(i)).value.length; - int noj = ((Cluster)cluster.elementAt(j)).value.length; - - // New distances from cluster to others - float[] newdist = new float[noseqs]; - - for (int l = 0; l < noseqs; l++) { - if ( l != i && l != j) { - newdist[l] = (distance[i][l] + distance[j][l] - distance[i][j])/2; - } else { - newdist[l] = 0; - } - } - - for (int ii=0; ii < noseqs;ii++) { - distance[i][ii] = newdist[ii]; - distance[ii][i] = newdist[ii]; - } - } - - public float findr(int i, int j) { - - float tmp = 1; - for (int k=0; k < noseqs;k++) { - if (k!= i && k!= j && done[k] != 1) { - tmp = tmp + distance[i][k]; - } - } - - if (noClus > 2) { - tmp = tmp/(noClus - 2); - } - - return tmp; - } - - public float findMinNJDistance() { - - float min = 100000; - - for (int i=0; i < noseqs-1; i++) { - for (int j=i+1;j < noseqs;j++) { - if (done[i] != 1 && done[j] != 1) { - float tmp = distance[i][j] - (findr(i,j) + findr(j,i)); - if (tmp < min) { - - mini = i; - minj = j; - - min = tmp; - - } - } - } - } - return min; - } - - public float findMinDistance() { - - float min = 100000; - - for (int i=0; i < noseqs-1;i++) { - for (int j = i+1; j < noseqs;j++) { - if (done[i] != 1 && done[j] != 1) { - if (distance[i][j] < min) { - mini = i; - minj = j; - - min = distance[i][j]; - } - } - } - } - return min; - } - - public float[][] findDistances() { - - float[][] distance = new float[noseqs][noseqs]; - if (pwtype.equals("PID")) { - for (int i = 0; i < noseqs-1; i++) { - for (int j = i; j < noseqs; j++) { - if (j==i) { - distance[i][i] = 0; - } else { - distance[i][j] = 100-Comparison.PID(sequence[i], sequence[j]); - distance[j][i] = distance[i][j]; - } - } - } - } else if (pwtype.equals("BL")) { - int maxscore = 0; - - for (int i = 0; i < noseqs-1; i++) { - for (int j = i; j < noseqs; j++) { - int score = 0; - for (int k=0; k < sequence[i].getLength(); k++) { - try{ - score += - ResidueProperties.getBLOSUM62(sequence[i].getSequence(k, - k + 1), - sequence[j].getSequence(k, - k + 1)); - }catch(Exception ex){System.out.println("err creating BLOSUM62 tree");} - } - distance[i][j] = (float)score; - if (score > maxscore) { - maxscore = score; - } - } - } - for (int i = 0; i < noseqs-1; i++) { - for (int j = i; j < noseqs; j++) { - distance[i][j] = (float)maxscore - distance[i][j]; - distance[j][i] = distance[i][j]; - } - } - } else if (pwtype.equals("SW")) { - float max = -1; - for (int i = 0; i < noseqs-1; i++) { - for (int j = i; j < noseqs; j++) { - AlignSeq as = new AlignSeq(sequence[i],sequence[j],"pep"); - as.calcScoreMatrix(); - as.traceAlignment(); - as.printAlignment(); - distance[i][j] = (float)as.maxscore; - if (max < distance[i][j]) { - max = distance[i][j]; - } - } - } - for (int i = 0; i < noseqs-1; i++) { - for (int j = i; j < noseqs; j++) { - distance[i][j] = max - distance[i][j]; - distance[j][i] = distance[i][j]; - } - } - } - - return distance; - } - - public void makeLeaves() { - cluster = new Vector(); - - for (int i=0; i < noseqs; i++) { - SequenceNode sn = new SequenceNode(); - - sn.setElement(sequence[i]); - sn.setName(sequence[i].getName()); - node.addElement(sn); - - int[] value = new int[1]; - value[0] = i; - - Cluster c = new Cluster(value); - cluster.addElement(c); - } - } - - public Vector findLeaves(SequenceNode node, Vector leaves) { - if (node == null) { - return leaves; - } - - if (node.left() == null && node.right() == null) { - leaves.addElement(node); - return leaves; - } else { - findLeaves((SequenceNode)node.left(),leaves); - findLeaves((SequenceNode)node.right(),leaves); - } - return leaves; - } - - public Object findLeaf(SequenceNode node, int count) { - found = _findLeaf(node,count); - - return found; - } - public Object _findLeaf(SequenceNode node,int count) { - if (node == null) { - return null; - } - if (node.ycount == count) { - found = node.element(); - return found; - } else { - _findLeaf((SequenceNode)node.left(),count); - _findLeaf((SequenceNode)node.right(),count); - } - - return found; - } - - public void printNode(SequenceNode node) { - if (node == null) { - return; - } - if (node.left() == null && node.right() == null) { - System.out.println("Leaf = " + ((SequenceI)node.element()).getName()); - System.out.println("Dist " + ((SequenceNode)node).dist); - System.out.println("Boot " + node.getBootstrap()); - } else { - System.out.println("Dist " + ((SequenceNode)node).dist); - printNode((SequenceNode)node.left()); - printNode((SequenceNode)node.right()); - } - } - public void findMaxDist(SequenceNode node) { - if (node == null) { - return; - } - if (node.left() == null && node.right() == null) { - - float dist = ((SequenceNode)node).dist; - if (dist > maxDistValue) { - maxdist = (SequenceNode)node; - maxDistValue = dist; - } - } else { - findMaxDist((SequenceNode)node.left()); - findMaxDist((SequenceNode)node.right()); - } - } - public Vector getGroups() { - return groups; - } - public float getMaxHeight() { - return maxheight; - } - public void groupNodes(SequenceNode node, float threshold) { - if (node == null) { - return; - } - - if (node.height/maxheight > threshold) { - groups.addElement(node); - } else { - groupNodes((SequenceNode)node.left(),threshold); - groupNodes((SequenceNode)node.right(),threshold); - } - } - - public float findHeight(SequenceNode node) { - - if (node == null) { - return maxheight; - } - - if (node.left() == null && node.right() == null) { - node.height = ((SequenceNode)node.parent()).height + node.dist; - - if (node.height > maxheight) { - return node.height; - } else { - return maxheight; - } - } else { - if (node.parent() != null) { - node.height = ((SequenceNode)node.parent()).height + node.dist; - } else { - maxheight = 0; - node.height = (float)0.0; - } - - maxheight = findHeight((SequenceNode)(node.left())); - maxheight = findHeight((SequenceNode)(node.right())); - } - return maxheight; - } - public SequenceNode reRoot() { - if (maxdist != null) { - ycount = 0; - float tmpdist = maxdist.dist; - - // New top - SequenceNode sn = new SequenceNode(); - sn.setParent(null); - - // New right hand of top - SequenceNode snr = (SequenceNode)maxdist.parent(); - changeDirection(snr,maxdist); - System.out.println("Printing reversed tree"); - printN(snr); - snr.dist = tmpdist/2; - maxdist.dist = tmpdist/2; - - snr.setParent(sn); - maxdist.setParent(sn); - - sn.setRight(snr); - sn.setLeft(maxdist); - - top = sn; - - ycount = 0; - reCount(top); - findHeight(top); - - } - return top; - } - public static void printN(SequenceNode node) { - if (node == null) { - return; - } - - if (node.left() != null && node.right() != null) { - printN((SequenceNode)node.left()); - printN((SequenceNode)node.right()); - } else { - System.out.println(" name = " + ((SequenceI)node.element()).getName()); - } - System.out.println(" dist = " + ((SequenceNode)node).dist + " " + ((SequenceNode)node).count + " " + ((SequenceNode)node).height); - } - - public void reCount(SequenceNode node) { - ycount = 0; - _reCount(node); - } - public void _reCount(SequenceNode node) { - if (node == null) { - return; - } - - if (node.left() != null && node.right() != null) { - _reCount((SequenceNode)node.left()); - _reCount((SequenceNode)node.right()); - - SequenceNode l = (SequenceNode)node.left(); - SequenceNode r = (SequenceNode)node.right(); - - ((SequenceNode)node).count = l.count + r.count; - ((SequenceNode)node).ycount = (l.ycount + r.ycount)/2; - - } else { - ((SequenceNode)node).count = 1; - ((SequenceNode)node).ycount = ycount++; - } - - } - public void swapNodes(SequenceNode node) { - if (node == null) { - return; - } - SequenceNode tmp = (SequenceNode)node.left(); - - node.setLeft(node.right()); - node.setRight(tmp); - } - public void changeDirection(SequenceNode node, SequenceNode dir) { - if (node == null) { - return; - } - if (node.parent() != top) { - changeDirection((SequenceNode)node.parent(), node); - - SequenceNode tmp = (SequenceNode)node.parent(); - - if (dir == node.left()) { - node.setParent(dir); - node.setLeft(tmp); - } else if (dir == node.right()) { - node.setParent(dir); - node.setRight(tmp); - } - - } else { - if (dir == node.left()) { - node.setParent(node.left()); - - if (top.left() == node) { - node.setRight(top.right()); - } else { - node.setRight(top.left()); - } - } else { - node.setParent(node.right()); - - if (top.left() == node) { - node.setLeft(top.right()); - } else { - node.setLeft(top.left()); - } - } - } - } - public void setMaxDist(SequenceNode node) { - this.maxdist = maxdist; - } - public SequenceNode getMaxDist() { - return maxdist; - } - public SequenceNode getTopNode() { - return top; - } - -} - - - -class Cluster { - - int[] value; - - public Cluster(int[] value) { - this.value = value; - } - -} - +/* + * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$) + * Copyright (C) $$Year-Rel$$ The Jalview Authors + * + * This file is part of Jalview. + * + * Jalview is free software: you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, either version 3 + * of the License, or (at your option) any later version. + * + * Jalview is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR + * PURPOSE. See the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Jalview. If not, see . + * The Jalview Authors are detailed in the 'AUTHORS' file. + */ +package jalview.analysis; + +import jalview.api.analysis.ScoreModelI; +import jalview.api.analysis.SimilarityParamsI; +import jalview.datamodel.SequenceNode; +import jalview.viewmodel.AlignmentViewport; + +/** + * DOCUMENT ME! + * + * @author $author$ + * @version $Revision$ + */ +public class NJTree extends TreeBuilder +{ + /** + * Constructor given a viewport, tree type and score model + * + * @param av + * the current alignment viewport + * @param sm + * a distance or similarity score model to use to compute the tree + * @param scoreParameters + */ + public NJTree(AlignmentViewport av, ScoreModelI sm, + SimilarityParamsI scoreParameters) + { + super(av, sm, scoreParameters); + } + // private long _lycount = 0, _lylimit = 0; + + /** + * DOCUMENT ME! + * + * @return DOCUMENT ME! + */ + @Override + protected + double findMinDistance() + { + double min = Double.MAX_VALUE; + + for (int i = 0; i < (noseqs - 1); i++) + { + for (int j = i + 1; j < noseqs; j++) + { + if (!done.get(i) && !done.get(j)) + { + double tmp = distances.getValue(i, j) + - (findr(i, j) + findr(j, i)); + + if (tmp < min) + { + mini = i; + minj = j; + + min = tmp; + } + } + } + } + + return min; + } + + /** + * DOCUMENT ME! + * + * @param tmpi + * DOCUMENT ME! + * @param tmpj + * DOCUMENT ME! + * @param dist + * DOCUMENT ME! + */ + @Override + protected + void findNewDistances(SequenceNode tmpi, SequenceNode tmpj, double dist) + { + + tmpi.dist = ((dist + ri) - rj) / 2; + tmpj.dist = (dist - tmpi.dist); + + if (tmpi.dist < 0) + { + tmpi.dist = 0; + } + + if (tmpj.dist < 0) + { + tmpj.dist = 0; + } + } + + + + /** + * Calculates and saves the distance between the combination of cluster(i) and + * cluster(j) and all other clusters. The new distance to cluster k is + * calculated as the average of the distances from i to k and from j to k, + * less half the distance from i to j. + * + * @param i + * @param j + */ + @Override + protected + void findClusterDistance(int i, int j) + { + // New distances from cluster i to others + double[] newdist = new double[noseqs]; + + double ijDistance = distances.getValue(i, j); + for (int l = 0; l < noseqs; l++) + { + if ((l != i) && (l != j)) + { + newdist[l] = (distances.getValue(i, l) + distances.getValue(j, l) - ijDistance) / 2; + } + else + { + newdist[l] = 0; + } + } + + for (int ii = 0; ii < noseqs; ii++) + { + distances.setValue(i, ii, newdist[ii]); + distances.setValue(ii, i, newdist[ii]); + } + } +}