X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Fanalysis%2FNJTree.java;h=4d0bb9352d7b17fafe38a118b1a755f1eed1bd6c;hb=7bc226b58110fa26d9dbd3f0c78095d06909ffc3;hp=6f897543a8cba901cf3422aeaf0d6a34bf4733d3;hpb=6c4bc68ae8b1c5005e79661bb2d8246515cb787d;p=jalview.git diff --git a/src/jalview/analysis/NJTree.java b/src/jalview/analysis/NJTree.java index 6f89754..4d0bb93 100755 --- a/src/jalview/analysis/NJTree.java +++ b/src/jalview/analysis/NJTree.java @@ -1,33 +1,29 @@ /* -* Jalview - A Sequence Alignment Editor and Viewer -* Copyright (C) 2006 AM Waterhouse, J Procter, G Barton, M Clamp, S Searle -* -* This program is free software; you can redistribute it and/or -* modify it under the terms of the GNU General Public License -* as published by the Free Software Foundation; either version 2 -* of the License, or (at your option) any later version. -* -* This program is distributed in the hope that it will be useful, -* but WITHOUT ANY WARRANTY; without even the implied warranty of -* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -* GNU General Public License for more details. -* -* You should have received a copy of the GNU General Public License -* along with this program; if not, write to the Free Software -* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA -*/ + * Jalview - A Sequence Alignment Editor and Viewer + * Copyright (C) 2007 AM Waterhouse, J Procter, G Barton, M Clamp, S Searle + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA + */ package jalview.analysis; -import jalview.datamodel.*; - -import jalview.io.NewickFile; - -import jalview.schemes.ResidueProperties; - -import jalview.util.*; - import java.util.*; +import jalview.datamodel.*; +import jalview.io.*; +import jalview.schemes.*; +import jalview.util.*; /** * DOCUMENT ME! @@ -37,1206 +33,1239 @@ import java.util.*; */ public class NJTree { - Vector cluster; - SequenceI[] sequence; - - //SequenceData is a string representation of what the user - //sees. The display may contain hidden columns. - public AlignmentView seqData=null; - - int[] done; - int noseqs; - int noClus; - float[][] distance; - int mini; - int minj; - float ri; - float rj; - Vector groups = new Vector(); - SequenceNode maxdist; - SequenceNode top; - float maxDistValue; - float maxheight; - int ycount; - Vector node; - String type; - String pwtype; - Object found = null; - Object leaves = null; - - boolean hasDistances = true; // normal case for jalview trees - boolean hasBootstrap = false; // normal case for jalview trees - - private boolean hasRootDistance = true; - - /** - * Create a new NJTree object with leaves associated with sequences in seqs, - * and original alignment data represented by Cigar strings. - * @param seqs SequenceI[] - * @param odata Cigar[] - * @param treefile NewickFile - */ - public NJTree(SequenceI[] seqs, AlignmentView odata, NewickFile treefile) { - this(seqs, treefile); - if (odata!=null) - seqData = odata; - /* - sequenceString = new String[odata.length]; - char gapChar = jalview.util.Comparison.GapChars.charAt(0); - for (int i = 0; i < odata.length; i++) - { - SequenceI oseq_aligned = odata[i].getSeq(gapChar); - sequenceString[i] = oseq_aligned.getSequence(); - } */ + Vector cluster; + SequenceI[] sequence; + + //SequenceData is a string representation of what the user + //sees. The display may contain hidden columns. + public AlignmentView seqData = null; + + int[] done; + int noseqs; + int noClus; + float[][] distance; + int mini; + int minj; + float ri; + float rj; + Vector groups = new Vector(); + SequenceNode maxdist; + SequenceNode top; + float maxDistValue; + float maxheight; + int ycount; + Vector node; + String type; + String pwtype; + Object found = null; + Object leaves = null; + + boolean hasDistances = true; // normal case for jalview trees + boolean hasBootstrap = false; // normal case for jalview trees + + private boolean hasRootDistance = true; + + /** + * Create a new NJTree object with leaves associated with sequences in seqs, + * and original alignment data represented by Cigar strings. + * @param seqs SequenceI[] + * @param odata Cigar[] + * @param treefile NewickFile + */ + public NJTree(SequenceI[] seqs, AlignmentView odata, NewickFile treefile) + { + this(seqs, treefile); + if (odata != null) + { + seqData = odata; } + /* + sequenceString = new String[odata.length]; + char gapChar = jalview.util.Comparison.GapChars.charAt(0); + for (int i = 0; i < odata.length; i++) + { + SequenceI oseq_aligned = odata[i].getSeq(gapChar); + sequenceString[i] = oseq_aligned.getSequence(); + } */ + } + + /** + * Creates a new NJTree object from a tree from an external source + * + * @param seqs SequenceI which should be associated with leafs of treefile + * @param treefile A parsed tree + */ + public NJTree(SequenceI[] seqs, NewickFile treefile) + { + this.sequence = seqs; + top = treefile.getTree(); /** - * Creates a new NJTree object from a tree from an external source + * There is no dependent alignment to be recovered from an + * imported tree. * - * @param seqs SequenceI which should be associated with leafs of treefile - * @param treefile A parsed tree + if (sequenceString == null) + { + sequenceString = new String[seqs.length]; + for (int i = 0; i < seqs.length; i++) + { + sequenceString[i] = seqs[i].getSequence(); + } + } */ - public NJTree(SequenceI[] seqs, NewickFile treefile) - { - this.sequence = seqs; - top = treefile.getTree(); - - /** - * There is no dependent alignment to be recovered from an - * imported tree. - * - if (sequenceString == null) - { - sequenceString = new String[seqs.length]; - for (int i = 0; i < seqs.length; i++) - { - sequenceString[i] = seqs[i].getSequence(); - } - } - */ - hasDistances = treefile.HasDistances(); - hasBootstrap = treefile.HasBootstrap(); - hasRootDistance = treefile.HasRootDistance(); + hasDistances = treefile.HasDistances(); + hasBootstrap = treefile.HasBootstrap(); + hasRootDistance = treefile.HasRootDistance(); - maxheight = findHeight(top); + maxheight = findHeight(top); - SequenceIdMatcher algnIds = new SequenceIdMatcher(seqs); - - Vector leaves = new Vector(); - findLeaves(top, leaves); - - int i = 0; - int namesleft = seqs.length; - - SequenceNode j; - SequenceI nam; - String realnam; - Vector one2many=new Vector(); - int countOne2Many=0; - while (i < leaves.size()) - { - j = (SequenceNode) leaves.elementAt(i++); - realnam = j.getName(); - nam = null; + SequenceIdMatcher algnIds = new SequenceIdMatcher(seqs); - if (namesleft > -1) - { - nam = algnIds.findIdMatch(realnam); - } + Vector leaves = new Vector(); + findLeaves(top, leaves); - if (nam != null) - { - j.setElement(nam); - if (one2many.contains(nam)) { - countOne2Many++; - // if (jalview.bin.Cache.log.isDebugEnabled()) - // jalview.bin.Cache.log.debug("One 2 many relationship for "+nam.getName()); - } else { - one2many.addElement(nam); - namesleft--; - } - } - else - { - j.setElement(new Sequence(realnam, "THISISAPLACEHLDER")); - j.setPlaceholder(true); - } - } - // if (jalview.bin.Cache.log.isDebugEnabled() && countOne2Many>0) { - // jalview.bin.Cache.log.debug("There were "+countOne2Many+" alignment sequence ids (out of "+one2many.size()+" unique ids) linked to two or more leaves."); - // } - // one2many.clear(); - } + int i = 0; + int namesleft = seqs.length; - /** - * Creates a new NJTree object. - * - * @param sequence DOCUMENT ME! - * @param type DOCUMENT ME! - * @param pwtype DOCUMENT ME! - * @param start DOCUMENT ME! - * @param end DOCUMENT ME! - */ - public NJTree(SequenceI[] sequence, - AlignmentView seqData, - String type, - String pwtype, - int start, int end) + SequenceNode j; + SequenceI nam; + String realnam; + Vector one2many = new Vector(); + int countOne2Many = 0; + while (i < leaves.size()) { - this.sequence = sequence; - this.node = new Vector(); - this.type = type; - this.pwtype = pwtype; - if (seqData!=null) { - this.seqData = seqData; - } else { - SeqCigar[] seqs = new SeqCigar[sequence.length]; - for(int i=0; i -1) + { + nam = algnIds.findIdMatch(realnam); + } - if (!(pwtype.equals("PID"))) + if (nam != null) + { + j.setElement(nam); + if (one2many.contains(nam)) { - type = "BL"; + countOne2Many++; + // if (jalview.bin.Cache.log.isDebugEnabled()) + // jalview.bin.Cache.log.debug("One 2 many relationship for "+nam.getName()); } - - int i = 0; - - done = new int[sequence.length]; - - while ((i < sequence.length) && (sequence[i] != null)) + else { - done[i] = 0; - i++; + one2many.addElement(nam); + namesleft--; } - - noseqs = i++; - - distance = findDistances(this.seqData.getSequenceStrings(Comparison.GapChars.charAt(0))); - - makeLeaves(); - - noClus = cluster.size(); - - cluster(); + } + else + { + j.setElement(new Sequence(realnam, "THISISAPLACEHLDER")); + j.setPlaceholder(true); + } } + // if (jalview.bin.Cache.log.isDebugEnabled() && countOne2Many>0) { + // jalview.bin.Cache.log.debug("There were "+countOne2Many+" alignment sequence ids (out of "+one2many.size()+" unique ids) linked to two or more leaves."); + // } + // one2many.clear(); + } - /** - * DOCUMENT ME! - * - * @return DOCUMENT ME! - */ - public String toString() + /** + * Creates a new NJTree object. + * + * @param sequence DOCUMENT ME! + * @param type DOCUMENT ME! + * @param pwtype DOCUMENT ME! + * @param start DOCUMENT ME! + * @param end DOCUMENT ME! + */ + public NJTree(SequenceI[] sequence, + AlignmentView seqData, + String type, + String pwtype, + int start, int end) + { + this.sequence = sequence; + this.node = new Vector(); + this.type = type; + this.pwtype = pwtype; + if (seqData != null) { - jalview.io.NewickFile fout = new jalview.io.NewickFile(getTopNode()); - - return fout.print(false, true); // distances only + this.seqData = seqData; + } + else + { + SeqCigar[] seqs = new SeqCigar[sequence.length]; + for (int i = 0; i < sequence.length; i++) + { + seqs[i] = new SeqCigar(sequence[i], start, end); + } + CigarArray sdata = new CigarArray(seqs); + sdata.addOperation(CigarArray.M, end - start + 1); + this.seqData = new AlignmentView(sdata, start); } - /** - * - * used when the alignment associated to a tree has changed. - * - * @param alignment Vector - */ - public void UpdatePlaceHolders(Vector alignment) + if (! (type.equals("NJ"))) { - Vector leaves = new Vector(); - findLeaves(top, leaves); + type = "AV"; + } - int sz = leaves.size(); - SequenceIdMatcher seqmatcher = null; - int i = 0; + if (! (pwtype.equals("PID"))) + { + if (ResidueProperties.getScoreMatrix(pwtype) == null) + { + type = "BLOSUM62"; + } + } - while (i < sz) - { - SequenceNode leaf = (SequenceNode) leaves.elementAt(i++); + int i = 0; - if (alignment.contains(leaf.element())) - { - leaf.setPlaceholder(false); - } - else - { - if (seqmatcher == null) - { - // Only create this the first time we need it - SequenceI[] seqs = new SequenceI[alignment.size()]; - - for (int j = 0; j < seqs.length; j++) - seqs[j] = (SequenceI) alignment.elementAt(j); - - seqmatcher = new SequenceIdMatcher(seqs); - } - - SequenceI nam = seqmatcher.findIdMatch(leaf.getName()); - - if (nam != null) - { - if (!leaf.isPlaceholder()) { - // remapping the node to a new sequenceI - should remove any refs to old one. - // TODO - make many sequenceI to one leaf mappings possible! (JBPNote) - } - leaf.setPlaceholder(false); - leaf.setElement(nam); - } - else - { - if (!leaf.isPlaceholder()) { - // Construct a new placeholder sequence object for this leaf - leaf.setElement(new Sequence(leaf.getName(), "THISISAPLACEHLDER")); - } - leaf.setPlaceholder(true); - - } - } - } - } + done = new int[sequence.length]; - /** - * DOCUMENT ME! - */ - public void cluster() + while ( (i < sequence.length) && (sequence[i] != null)) { - while (noClus > 2) - { - if (type.equals("NJ")) - { - findMinNJDistance(); - } - else - { - findMinDistance(); - } + done[i] = 0; + i++; + } - Cluster c = joinClusters(mini, minj); + noseqs = i++; - done[minj] = 1; + distance = findDistances(this.seqData.getSequenceStrings(Comparison. + GapChars.charAt(0))); - cluster.setElementAt(null, minj); - cluster.setElementAt(c, mini); + makeLeaves(); - noClus--; - } + noClus = cluster.size(); - boolean onefound = false; + cluster(); + } - int one = -1; - int two = -1; + /** + * DOCUMENT ME! + * + * @return DOCUMENT ME! + */ + public String toString() + { + jalview.io.NewickFile fout = new jalview.io.NewickFile(getTopNode()); - for (int i = 0; i < noseqs; i++) - { - if (done[i] != 1) - { - if (onefound == false) - { - two = i; - onefound = true; - } - else - { - one = i; - } - } - } + return fout.print(false, true); // distances only + } - joinClusters(one, two); - top = (SequenceNode) (node.elementAt(one)); + /** + * + * used when the alignment associated to a tree has changed. + * + * @param alignment Vector + */ + public void UpdatePlaceHolders(Vector alignment) + { + Vector leaves = new Vector(); + findLeaves(top, leaves); - reCount(top); - findHeight(top); - findMaxDist(top); - } + int sz = leaves.size(); + SequenceIdMatcher seqmatcher = null; + int i = 0; - /** - * DOCUMENT ME! - * - * @param i DOCUMENT ME! - * @param j DOCUMENT ME! - * - * @return DOCUMENT ME! - */ - public Cluster joinClusters(int i, int j) + while (i < sz) { - float dist = distance[i][j]; - - int noi = ((Cluster) cluster.elementAt(i)).value.length; - int noj = ((Cluster) cluster.elementAt(j)).value.length; + SequenceNode leaf = (SequenceNode) leaves.elementAt(i++); - int[] value = new int[noi + noj]; - - for (int ii = 0; ii < noi; ii++) + if (alignment.contains(leaf.element())) + { + leaf.setPlaceholder(false); + } + else + { + if (seqmatcher == null) { - value[ii] = ((Cluster) cluster.elementAt(i)).value[ii]; - } + // Only create this the first time we need it + SequenceI[] seqs = new SequenceI[alignment.size()]; - for (int ii = noi; ii < (noi + noj); ii++) - { - value[ii] = ((Cluster) cluster.elementAt(j)).value[ii - noi]; - } + for (int j = 0; j < seqs.length; j++) + { + seqs[j] = (SequenceI) alignment.elementAt(j); + } - Cluster c = new Cluster(value); + seqmatcher = new SequenceIdMatcher(seqs); + } - ri = findr(i, j); - rj = findr(j, i); + SequenceI nam = seqmatcher.findIdMatch(leaf.getName()); - if (type.equals("NJ")) + if (nam != null) { - findClusterNJDistance(i, j); + if (!leaf.isPlaceholder()) + { + // remapping the node to a new sequenceI - should remove any refs to old one. + // TODO - make many sequenceI to one leaf mappings possible! (JBPNote) + } + leaf.setPlaceholder(false); + leaf.setElement(nam); } else { - findClusterDistance(i, j); + if (!leaf.isPlaceholder()) + { + // Construct a new placeholder sequence object for this leaf + leaf.setElement(new Sequence(leaf.getName(), "THISISAPLACEHLDER")); + } + leaf.setPlaceholder(true); + } + } + } + } + + /** + * DOCUMENT ME! + */ + public void cluster() + { + while (noClus > 2) + { + if (type.equals("NJ")) + { + findMinNJDistance(); + } + else + { + findMinDistance(); + } + + Cluster c = joinClusters(mini, minj); + + done[minj] = 1; + + cluster.setElementAt(null, minj); + cluster.setElementAt(c, mini); - SequenceNode sn = new SequenceNode(); + noClus--; + } - sn.setLeft((SequenceNode) (node.elementAt(i))); - sn.setRight((SequenceNode) (node.elementAt(j))); + boolean onefound = false; - SequenceNode tmpi = (SequenceNode) (node.elementAt(i)); - SequenceNode tmpj = (SequenceNode) (node.elementAt(j)); + int one = -1; + int two = -1; - if (type.equals("NJ")) + for (int i = 0; i < noseqs; i++) + { + if (done[i] != 1) + { + if (onefound == false) { - findNewNJDistances(tmpi, tmpj, dist); + two = i; + onefound = true; } else { - findNewDistances(tmpi, tmpj, dist); + one = i; } + } + } + + joinClusters(one, two); + top = (SequenceNode) (node.elementAt(one)); + + reCount(top); + findHeight(top); + findMaxDist(top); + } - tmpi.setParent(sn); - tmpj.setParent(sn); + /** + * DOCUMENT ME! + * + * @param i DOCUMENT ME! + * @param j DOCUMENT ME! + * + * @return DOCUMENT ME! + */ + public Cluster joinClusters(int i, int j) + { + float dist = distance[i][j]; + + int noi = ( (Cluster) cluster.elementAt(i)).value.length; + int noj = ( (Cluster) cluster.elementAt(j)).value.length; - node.setElementAt(sn, i); + int[] value = new int[noi + noj]; - return c; + for (int ii = 0; ii < noi; ii++) + { + value[ii] = ( (Cluster) cluster.elementAt(i)).value[ii]; } - /** - * DOCUMENT ME! - * - * @param tmpi DOCUMENT ME! - * @param tmpj DOCUMENT ME! - * @param dist DOCUMENT ME! - */ - public void findNewNJDistances(SequenceNode tmpi, SequenceNode tmpj, - float dist) + for (int ii = noi; ii < (noi + noj); ii++) { + value[ii] = ( (Cluster) cluster.elementAt(j)).value[ii - noi]; + } - tmpi.dist = ((dist + ri) - rj) / 2; - tmpj.dist = (dist - tmpi.dist); + Cluster c = new Cluster(value); - if (tmpi.dist < 0) - { - tmpi.dist = 0; - } + ri = findr(i, j); + rj = findr(j, i); - if (tmpj.dist < 0) - { - tmpj.dist = 0; - } + if (type.equals("NJ")) + { + findClusterNJDistance(i, j); } - - /** - * DOCUMENT ME! - * - * @param tmpi DOCUMENT ME! - * @param tmpj DOCUMENT ME! - * @param dist DOCUMENT ME! - */ - public void findNewDistances(SequenceNode tmpi, SequenceNode tmpj, - float dist) + else { - float ih = 0; - float jh = 0; + findClusterDistance(i, j); + } - SequenceNode sni = tmpi; - SequenceNode snj = tmpj; + SequenceNode sn = new SequenceNode(); - while (sni != null) - { - ih = ih + sni.dist; - sni = (SequenceNode) sni.left(); - } + sn.setLeft( (SequenceNode) (node.elementAt(i))); + sn.setRight( (SequenceNode) (node.elementAt(j))); - while (snj != null) - { - jh = jh + snj.dist; - snj = (SequenceNode) snj.left(); - } + SequenceNode tmpi = (SequenceNode) (node.elementAt(i)); + SequenceNode tmpj = (SequenceNode) (node.elementAt(j)); - tmpi.dist = ((dist / 2) - ih); - tmpj.dist = ((dist / 2) - jh); + if (type.equals("NJ")) + { + findNewNJDistances(tmpi, tmpj, dist); } - - /** - * DOCUMENT ME! - * - * @param i DOCUMENT ME! - * @param j DOCUMENT ME! - */ - public void findClusterDistance(int i, int j) + else { - int noi = ((Cluster) cluster.elementAt(i)).value.length; - int noj = ((Cluster) cluster.elementAt(j)).value.length; + findNewDistances(tmpi, tmpj, dist); + } - // New distances from cluster to others - float[] newdist = new float[noseqs]; + tmpi.setParent(sn); + tmpj.setParent(sn); - for (int l = 0; l < noseqs; l++) - { - if ((l != i) && (l != j)) - { - newdist[l] = ((distance[i][l] * noi) + (distance[j][l] * noj)) / (noi + - noj); - } - else - { - newdist[l] = 0; - } - } + node.setElementAt(sn, i); - for (int ii = 0; ii < noseqs; ii++) - { - distance[i][ii] = newdist[ii]; - distance[ii][i] = newdist[ii]; - } + return c; + } + + /** + * DOCUMENT ME! + * + * @param tmpi DOCUMENT ME! + * @param tmpj DOCUMENT ME! + * @param dist DOCUMENT ME! + */ + public void findNewNJDistances(SequenceNode tmpi, SequenceNode tmpj, + float dist) + { + + tmpi.dist = ( (dist + ri) - rj) / 2; + tmpj.dist = (dist - tmpi.dist); + + if (tmpi.dist < 0) + { + tmpi.dist = 0; } - /** - * DOCUMENT ME! - * - * @param i DOCUMENT ME! - * @param j DOCUMENT ME! - */ - public void findClusterNJDistance(int i, int j) + if (tmpj.dist < 0) { + tmpj.dist = 0; + } + } - // New distances from cluster to others - float[] newdist = new float[noseqs]; + /** + * DOCUMENT ME! + * + * @param tmpi DOCUMENT ME! + * @param tmpj DOCUMENT ME! + * @param dist DOCUMENT ME! + */ + public void findNewDistances(SequenceNode tmpi, SequenceNode tmpj, + float dist) + { + float ih = 0; + float jh = 0; - for (int l = 0; l < noseqs; l++) - { - if ((l != i) && (l != j)) - { - newdist[l] = ((distance[i][l] + distance[j][l]) - - distance[i][j]) / 2; - } - else - { - newdist[l] = 0; - } - } + SequenceNode sni = tmpi; + SequenceNode snj = tmpj; - for (int ii = 0; ii < noseqs; ii++) - { - distance[i][ii] = newdist[ii]; - distance[ii][i] = newdist[ii]; - } + while (sni != null) + { + ih = ih + sni.dist; + sni = (SequenceNode) sni.left(); } - /** - * DOCUMENT ME! - * - * @param i DOCUMENT ME! - * @param j DOCUMENT ME! - * - * @return DOCUMENT ME! - */ - public float findr(int i, int j) + while (snj != null) { - float tmp = 1; + jh = jh + snj.dist; + snj = (SequenceNode) snj.left(); + } - for (int k = 0; k < noseqs; k++) - { - if ((k != i) && (k != j) && (done[k] != 1)) - { - tmp = tmp + distance[i][k]; - } - } + tmpi.dist = ( (dist / 2) - ih); + tmpj.dist = ( (dist / 2) - jh); + } - if (noClus > 2) - { - tmp = tmp / (noClus - 2); - } + /** + * DOCUMENT ME! + * + * @param i DOCUMENT ME! + * @param j DOCUMENT ME! + */ + public void findClusterDistance(int i, int j) + { + int noi = ( (Cluster) cluster.elementAt(i)).value.length; + int noj = ( (Cluster) cluster.elementAt(j)).value.length; + + // New distances from cluster to others + float[] newdist = new float[noseqs]; - return tmp; + for (int l = 0; l < noseqs; l++) + { + if ( (l != i) && (l != j)) + { + newdist[l] = ( (distance[i][l] * noi) + (distance[j][l] * noj)) / (noi + + noj); + } + else + { + newdist[l] = 0; + } } - /** - * DOCUMENT ME! - * - * @return DOCUMENT ME! - */ - public float findMinNJDistance() + for (int ii = 0; ii < noseqs; ii++) { - float min = 100000; + distance[i][ii] = newdist[ii]; + distance[ii][i] = newdist[ii]; + } + } - for (int i = 0; i < (noseqs - 1); i++) - { - for (int j = i + 1; j < noseqs; j++) - { - if ((done[i] != 1) && (done[j] != 1)) - { - float tmp = distance[i][j] - (findr(i, j) + findr(j, i)); - - if (tmp < min) - { - mini = i; - minj = j; - - min = tmp; - } - } - } - } + /** + * DOCUMENT ME! + * + * @param i DOCUMENT ME! + * @param j DOCUMENT ME! + */ + public void findClusterNJDistance(int i, int j) + { + + // New distances from cluster to others + float[] newdist = new float[noseqs]; - return min; + for (int l = 0; l < noseqs; l++) + { + if ( (l != i) && (l != j)) + { + newdist[l] = ( (distance[i][l] + distance[j][l]) - + distance[i][j]) / 2; + } + else + { + newdist[l] = 0; + } } - /** - * DOCUMENT ME! - * - * @return DOCUMENT ME! - */ - public float findMinDistance() + for (int ii = 0; ii < noseqs; ii++) { - float min = 100000; + distance[i][ii] = newdist[ii]; + distance[ii][i] = newdist[ii]; + } + } - for (int i = 0; i < (noseqs - 1); i++) - { - for (int j = i + 1; j < noseqs; j++) - { - if ((done[i] != 1) && (done[j] != 1)) - { - if (distance[i][j] < min) - { - mini = i; - minj = j; - - min = distance[i][j]; - } - } - } - } + /** + * DOCUMENT ME! + * + * @param i DOCUMENT ME! + * @param j DOCUMENT ME! + * + * @return DOCUMENT ME! + */ + public float findr(int i, int j) + { + float tmp = 1; - return min; + for (int k = 0; k < noseqs; k++) + { + if ( (k != i) && (k != j) && (done[k] != 1)) + { + tmp = tmp + distance[i][k]; + } } - /** - * DOCUMENT ME! - * - * @return DOCUMENT ME! - */ - public float[][] findDistances(String[] sequenceString) + if (noClus > 2) { - float[][] distance = new float[noseqs][noseqs]; + tmp = tmp / (noClus - 2); + } - if (pwtype.equals("PID")) + return tmp; + } + + /** + * DOCUMENT ME! + * + * @return DOCUMENT ME! + */ + public float findMinNJDistance() + { + float min = 100000; + + for (int i = 0; i < (noseqs - 1); i++) + { + for (int j = i + 1; j < noseqs; j++) + { + if ( (done[i] != 1) && (done[j] != 1)) { - for (int i = 0; i < (noseqs - 1); i++) - { - for (int j = i; j < noseqs; j++) - { - if (j == i) - { - distance[i][i] = 0; - } - else - { - distance[i][j] = 100 - - Comparison.PID(sequenceString[i], sequenceString[j]); - - distance[j][i] = distance[i][j]; - } - } - } + float tmp = distance[i][j] - (findr(i, j) + findr(j, i)); + + if (tmp < min) + { + mini = i; + minj = j; + + min = tmp; + } } - else if (pwtype.equals("BL")) + } + } + + return min; + } + + /** + * DOCUMENT ME! + * + * @return DOCUMENT ME! + */ + public float findMinDistance() + { + float min = 100000; + + for (int i = 0; i < (noseqs - 1); i++) + { + for (int j = i + 1; j < noseqs; j++) + { + if ( (done[i] != 1) && (done[j] != 1)) { - int maxscore = 0; - int end = sequenceString[0].length(); - for (int i = 0; i < (noseqs - 1); i++) - { - for (int j = i; j < noseqs; j++) - { - int score = 0; - - for (int k = 0; k < end; k++) - { - try - { - score += ResidueProperties.getBLOSUM62( - sequenceString[i].charAt(k), - sequenceString[j].charAt(k)); - } - catch (Exception ex) - { - System.err.println("err creating BLOSUM62 tree"); - ex.printStackTrace(); - } - } - - distance[i][j] = (float) score; - - if (score > maxscore) - { - maxscore = score; - } - } - } + if (distance[i][j] < min) + { + mini = i; + minj = j; - for (int i = 0; i < (noseqs - 1); i++) - { - for (int j = i; j < noseqs; j++) - { - distance[i][j] = (float) maxscore - distance[i][j]; - distance[j][i] = distance[i][j]; - } - } + min = distance[i][j]; + } } - /* else if (pwtype.equals("SW")) + } + } + + return min; + } + + /** + * DOCUMENT ME! + * + * @return DOCUMENT ME! + */ + public float[][] findDistances(String[] sequenceString) + { + float[][] distance = new float[noseqs][noseqs]; + + if (pwtype.equals("PID")) + { + for (int i = 0; i < (noseqs - 1); i++) + { + for (int j = i; j < noseqs; j++) { - float max = -1; + if (j == i) + { + distance[i][i] = 0; + } + else + { + distance[i][j] = 100 - + Comparison.PID(sequenceString[i], sequenceString[j]); - for (int i = 0; i < (noseqs - 1); i++) + distance[j][i] = distance[i][j]; + } + } + } + } + else + { + // Pairwise substitution score (with no gap penalties) + ScoreMatrix pwmatrix = ResidueProperties.getScoreMatrix(pwtype); + if (pwmatrix == null) + { + pwmatrix = ResidueProperties.getScoreMatrix("BLOSUM62"); + } + int maxscore = 0; + int end = sequenceString[0].length(); + for (int i = 0; i < (noseqs - 1); i++) + { + for (int j = i; j < noseqs; j++) + { + int score = 0; + + for (int k = 0; k < end; k++) + { + try { - for (int j = i; j < noseqs; j++) - { - AlignSeq as = new AlignSeq(sequence[i], sequence[j], "pep"); - as.calcScoreMatrix(); - as.traceAlignment(); - as.printAlignment(System.out); - distance[i][j] = (float) as.maxscore; - - if (max < distance[i][j]) - { - max = distance[i][j]; - } - } + score += pwmatrix.getPairwiseScore(sequenceString[i].charAt(k), + sequenceString[j].charAt(k)); } - - for (int i = 0; i < (noseqs - 1); i++) + catch (Exception ex) { - for (int j = i; j < noseqs; j++) - { - distance[i][j] = max - distance[i][j]; - distance[j][i] = distance[i][j]; - } + System.err.println("err creating BLOSUM62 tree"); + ex.printStackTrace(); } - }/*/ + } - return distance; - } + distance[i][j] = (float) score; - /** - * DOCUMENT ME! - */ - public void makeLeaves() - { - cluster = new Vector(); + if (score > maxscore) + { + maxscore = score; + } + } + } - for (int i = 0; i < noseqs; i++) + for (int i = 0; i < (noseqs - 1); i++) + { + for (int j = i; j < noseqs; j++) { - SequenceNode sn = new SequenceNode(); + distance[i][j] = (float) maxscore - distance[i][j]; + distance[j][i] = distance[i][j]; + } + } - sn.setElement(sequence[i]); - sn.setName(sequence[i].getName()); - node.addElement(sn); + } + return distance; - int[] value = new int[1]; - value[0] = i; + // else + /* else if (pwtype.equals("SW")) + { + float max = -1; - Cluster c = new Cluster(value); - cluster.addElement(c); - } - } + for (int i = 0; i < (noseqs - 1); i++) + { + for (int j = i; j < noseqs; j++) + { + AlignSeq as = new AlignSeq(sequence[i], sequence[j], "pep"); + as.calcScoreMatrix(); + as.traceAlignment(); + as.printAlignment(System.out); + distance[i][j] = (float) as.maxscore; + + if (max < distance[i][j]) + { + max = distance[i][j]; + } + } + } - /** - * DOCUMENT ME! - * - * @param node DOCUMENT ME! - * @param leaves DOCUMENT ME! - * - * @return DOCUMENT ME! - */ - public Vector findLeaves(SequenceNode node, Vector leaves) + for (int i = 0; i < (noseqs - 1); i++) + { + for (int j = i; j < noseqs; j++) + { + distance[i][j] = max - distance[i][j]; + distance[j][i] = distance[i][j]; + } + } + }/*/ + } + + /** + * DOCUMENT ME! + */ + public void makeLeaves() + { + cluster = new Vector(); + + for (int i = 0; i < noseqs; i++) { - if (node == null) - { - return leaves; - } + SequenceNode sn = new SequenceNode(); - if ((node.left() == null) && (node.right() == null)) - { - leaves.addElement(node); + sn.setElement(sequence[i]); + sn.setName(sequence[i].getName()); + node.addElement(sn); - return leaves; - } - else - { - findLeaves((SequenceNode) node.left(), leaves); - findLeaves((SequenceNode) node.right(), leaves); - } + int[] value = new int[1]; + value[0] = i; - return leaves; + Cluster c = new Cluster(value); + cluster.addElement(c); } + } - /** - * DOCUMENT ME! - * - * @param node DOCUMENT ME! - * @param count DOCUMENT ME! - * - * @return DOCUMENT ME! - */ - public Object findLeaf(SequenceNode node, int count) + /** + * DOCUMENT ME! + * + * @param node DOCUMENT ME! + * @param leaves DOCUMENT ME! + * + * @return DOCUMENT ME! + */ + public Vector findLeaves(SequenceNode node, Vector leaves) + { + if (node == null) { - found = _findLeaf(node, count); - - return found; + return leaves; } - /** - * DOCUMENT ME! - * - * @param node DOCUMENT ME! - * @param count DOCUMENT ME! - * - * @return DOCUMENT ME! - */ - public Object _findLeaf(SequenceNode node, int count) + if ( (node.left() == null) && (node.right() == null)) { - if (node == null) - { - return null; - } + leaves.addElement(node); - if (node.ycount == count) - { - found = node.element(); + return leaves; + } + else + { + findLeaves( (SequenceNode) node.left(), leaves); + findLeaves( (SequenceNode) node.right(), leaves); + } - return found; - } - else - { - _findLeaf((SequenceNode) node.left(), count); - _findLeaf((SequenceNode) node.right(), count); - } + return leaves; + } + + /** + * DOCUMENT ME! + * + * @param node DOCUMENT ME! + * @param count DOCUMENT ME! + * + * @return DOCUMENT ME! + */ + public Object findLeaf(SequenceNode node, int count) + { + found = _findLeaf(node, count); + + return found; + } - return found; + /** + * DOCUMENT ME! + * + * @param node DOCUMENT ME! + * @param count DOCUMENT ME! + * + * @return DOCUMENT ME! + */ + public Object _findLeaf(SequenceNode node, int count) + { + if (node == null) + { + return null; } - /** - * printNode is mainly for debugging purposes. - * - * @param node SequenceNode - */ - public void printNode(SequenceNode node) + if (node.ycount == count) { - if (node == null) - { - return; - } + found = node.element(); - if ((node.left() == null) && (node.right() == null)) - { - System.out.println("Leaf = " + - ((SequenceI) node.element()).getName()); - System.out.println("Dist " + ((SequenceNode) node).dist); - System.out.println("Boot " + node.getBootstrap()); - } - else - { - System.out.println("Dist " + ((SequenceNode) node).dist); - printNode((SequenceNode) node.left()); - printNode((SequenceNode) node.right()); - } + return found; } - - /** - * DOCUMENT ME! - * - * @param node DOCUMENT ME! - */ - public void findMaxDist(SequenceNode node) + else { - if (node == null) - { - return; - } + _findLeaf( (SequenceNode) node.left(), count); + _findLeaf( (SequenceNode) node.right(), count); + } - if ((node.left() == null) && (node.right() == null)) - { - float dist = ((SequenceNode) node).dist; + return found; + } - if (dist > maxDistValue) - { - maxdist = (SequenceNode) node; - maxDistValue = dist; - } - } - else - { - findMaxDist((SequenceNode) node.left()); - findMaxDist((SequenceNode) node.right()); - } + /** + * printNode is mainly for debugging purposes. + * + * @param node SequenceNode + */ + public void printNode(SequenceNode node) + { + if (node == null) + { + return; } - /** - * DOCUMENT ME! - * - * @return DOCUMENT ME! - */ - public Vector getGroups() + if ( (node.left() == null) && (node.right() == null)) { - return groups; + System.out.println("Leaf = " + + ( (SequenceI) node.element()).getName()); + System.out.println("Dist " + ( (SequenceNode) node).dist); + System.out.println("Boot " + node.getBootstrap()); } + else + { + System.out.println("Dist " + ( (SequenceNode) node).dist); + printNode( (SequenceNode) node.left()); + printNode( (SequenceNode) node.right()); + } + } - /** - * DOCUMENT ME! - * - * @return DOCUMENT ME! - */ - public float getMaxHeight() + /** + * DOCUMENT ME! + * + * @param node DOCUMENT ME! + */ + public void findMaxDist(SequenceNode node) + { + if (node == null) { - return maxheight; + return; } - /** - * DOCUMENT ME! - * - * @param node DOCUMENT ME! - * @param threshold DOCUMENT ME! - */ - public void groupNodes(SequenceNode node, float threshold) + if ( (node.left() == null) && (node.right() == null)) { - if (node == null) - { - return; - } + float dist = ( (SequenceNode) node).dist; - if ((node.height / maxheight) > threshold) - { - groups.addElement(node); - } - else - { - groupNodes((SequenceNode) node.left(), threshold); - groupNodes((SequenceNode) node.right(), threshold); - } + if (dist > maxDistValue) + { + maxdist = (SequenceNode) node; + maxDistValue = dist; + } + } + else + { + findMaxDist( (SequenceNode) node.left()); + findMaxDist( (SequenceNode) node.right()); } + } - /** - * DOCUMENT ME! - * - * @param node DOCUMENT ME! - * - * @return DOCUMENT ME! - */ - public float findHeight(SequenceNode node) + /** + * DOCUMENT ME! + * + * @return DOCUMENT ME! + */ + public Vector getGroups() + { + return groups; + } + + /** + * DOCUMENT ME! + * + * @return DOCUMENT ME! + */ + public float getMaxHeight() + { + return maxheight; + } + + /** + * DOCUMENT ME! + * + * @param node DOCUMENT ME! + * @param threshold DOCUMENT ME! + */ + public void groupNodes(SequenceNode node, float threshold) + { + if (node == null) { - if (node == null) - { - return maxheight; - } + return; + } - if ((node.left() == null) && (node.right() == null)) - { - node.height = ((SequenceNode) node.parent()).height + node.dist; + if ( (node.height / maxheight) > threshold) + { + groups.addElement(node); + } + else + { + groupNodes( (SequenceNode) node.left(), threshold); + groupNodes( (SequenceNode) node.right(), threshold); + } + } - if (node.height > maxheight) - { - return node.height; - } - else - { - return maxheight; - } - } - else - { - if (node.parent() != null) - { - node.height = ((SequenceNode) node.parent()).height + - node.dist; - } - else - { - maxheight = 0; - node.height = (float) 0.0; - } + /** + * DOCUMENT ME! + * + * @param node DOCUMENT ME! + * + * @return DOCUMENT ME! + */ + public float findHeight(SequenceNode node) + { + if (node == null) + { + return maxheight; + } - maxheight = findHeight((SequenceNode) (node.left())); - maxheight = findHeight((SequenceNode) (node.right())); - } + if ( (node.left() == null) && (node.right() == null)) + { + node.height = ( (SequenceNode) node.parent()).height + node.dist; + if (node.height > maxheight) + { + return node.height; + } + else + { return maxheight; + } + } + else + { + if (node.parent() != null) + { + node.height = ( (SequenceNode) node.parent()).height + + node.dist; + } + else + { + maxheight = 0; + node.height = (float) 0.0; + } + + maxheight = findHeight( (SequenceNode) (node.left())); + maxheight = findHeight( (SequenceNode) (node.right())); } - /** - * DOCUMENT ME! - * - * @return DOCUMENT ME! - */ - public SequenceNode reRoot() + return maxheight; + } + + /** + * DOCUMENT ME! + * + * @return DOCUMENT ME! + */ + public SequenceNode reRoot() + { + if (maxdist != null) { - if (maxdist != null) - { - ycount = 0; + ycount = 0; - float tmpdist = maxdist.dist; + float tmpdist = maxdist.dist; - // New top - SequenceNode sn = new SequenceNode(); - sn.setParent(null); + // New top + SequenceNode sn = new SequenceNode(); + sn.setParent(null); - // New right hand of top - SequenceNode snr = (SequenceNode) maxdist.parent(); - changeDirection(snr, maxdist); - System.out.println("Printing reversed tree"); - printN(snr); - snr.dist = tmpdist / 2; - maxdist.dist = tmpdist / 2; + // New right hand of top + SequenceNode snr = (SequenceNode) maxdist.parent(); + changeDirection(snr, maxdist); + System.out.println("Printing reversed tree"); + printN(snr); + snr.dist = tmpdist / 2; + maxdist.dist = tmpdist / 2; - snr.setParent(sn); - maxdist.setParent(sn); + snr.setParent(sn); + maxdist.setParent(sn); - sn.setRight(snr); - sn.setLeft(maxdist); + sn.setRight(snr); + sn.setLeft(maxdist); - top = sn; + top = sn; - ycount = 0; - reCount(top); - findHeight(top); - } + ycount = 0; + reCount(top); + findHeight(top); + } + + return top; + } - return top; + /** + * + * @return true if original sequence data can be recovered + */ + public boolean hasOriginalSequenceData() + { + return seqData != null; + } + + /** + * Returns original alignment data used for calculation - or null where + * not available. + * + * @return null or cut'n'pasteable alignment + */ + public String printOriginalSequenceData(char gapChar) + { + if (seqData == null) + { + return null; } - /** - * - * @return true if original sequence data can be recovered - */ - public boolean hasOriginalSequenceData() { - return seqData!=null; + + StringBuffer sb = new StringBuffer(); + String[] seqdatas = seqData.getSequenceStrings(gapChar); + for (int i = 0; i < seqdatas.length; i++) + { + sb.append(new jalview.util.Format("%-" + 15 + "s").form( + sequence[i].getName())); + sb.append(" " + seqdatas[i] + "\n"); } - /** - * Returns original alignment data used for calculation - or null where - * not available. - * - * @return null or cut'n'pasteable alignment - */ - public String printOriginalSequenceData(char gapChar) + return sb.toString(); + } + + /** + * DOCUMENT ME! + * + * @param node DOCUMENT ME! + */ + public void printN(SequenceNode node) + { + if (node == null) { - if (seqData==null) - return null; + return; + } - StringBuffer sb = new StringBuffer(); - String[] seqdatas = seqData.getSequenceStrings(gapChar); - for(int i=0; i