/*
* Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
* Copyright (C) $$Year-Rel$$ The Jalview Authors
*
* This file is part of Jalview.
*
* Jalview is free software: you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation, either version 3
* of the License, or (at your option) any later version.
*
* Jalview is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty
* of MERCHANTABILITY or FITNESS FOR A PARTICULAR
* PURPOSE. See the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with Jalview. If not, see .
* The Jalview Authors are detailed in the 'AUTHORS' file.
*/
package jalview.analysis;
import java.util.ArrayList;
import java.util.BitSet;
import java.util.List;
import java.util.Vector;
import jalview.datamodel.AlignmentAnnotation;
import jalview.datamodel.BinaryNode;
import jalview.datamodel.ContactListI;
import jalview.datamodel.ContactMatrixI;
import jalview.math.Matrix;
import jalview.viewmodel.AlignmentViewport;
/**
* This class implements distance calculations used in constructing a Average
* Distance tree (also known as UPGMA)
*/
public class AverageDistanceEngine extends TreeEngine
{
ContactMatrixI cm;
AlignmentViewport av;
AlignmentAnnotation aa;
// 0 - normalised dot product
// 1 - L1 - ie (abs(v_1-v_2)/dim(v))
// L1 is more rational - since can reason about value of difference,
// normalised dot product might give cleaner clusters, but more difficult to
// understand.
int mode = 1;
/**
* compute cosine distance matrix for a given contact matrix and create a
* UPGMA tree
*
* @param cm
* @param cosineOrDifference
* false - dot product : true - L1
*/
public AverageDistanceEngine(AlignmentViewport av, AlignmentAnnotation aa,
ContactMatrixI cm, boolean cosineOrDifference)
{
this.av = av;
this.aa = aa;
this.cm = cm;
mode = (cosineOrDifference) ? 1 : 0;
calculate(cm);
}
public void calculate(ContactMatrixI cm)
{
this.cm = cm;
node = new Vector();
clusters = new Vector();
distances = new Matrix(new double[cm.getWidth()][cm.getWidth()]);
noseqs = cm.getWidth();
done = new BitSet();
double moduli[] = new double[cm.getWidth()];
double max;
if (mode == 0)
{
max = 1;
}
else
{
max = cm.getMax() * cm.getMax();
}
for (int i = 0; i < cm.getWidth(); i++)
{
// init the tree engine node for this column
BinaryNode cnode = new BinaryNode();
cnode.setElement(Integer.valueOf(i));
cnode.setName("c" + i);
node.addElement(cnode);
BitSet bs = new BitSet();
bs.set(i);
clusters.addElement(bs);
// compute distance matrix element
ContactListI ith = cm.getContactList(i);
distances.setValue(i, i, 0);
if (ith == null)
{
continue;
}
for (int j = 0; j < i; j++)
{
ContactListI jth = cm.getContactList(j);
if (jth == null)
{
break;
}
double prd = 0;
for (int indx = 0; indx < cm.getHeight(); indx++)
{
if (mode == 0)
{
if (j == 0)
{
moduli[i] += ith.getContactAt(indx) * ith.getContactAt(indx);
}
prd += ith.getContactAt(indx) * jth.getContactAt(indx);
}
else
{
prd += Math
.abs(ith.getContactAt(indx) - jth.getContactAt(indx));
}
}
if (mode == 0)
{
if (j == 0)
{
moduli[i] = Math.sqrt(moduli[i]);
}
prd = (moduli[i] != 0 && moduli[j] != 0)
? prd / (moduli[i] * moduli[j])
: 0;
prd = 1 - prd;
}
else
{
prd /= cm.getHeight();
}
distances.setValue(i, j, prd);
distances.setValue(j, i, prd);
}
}
noClus = clusters.size();
cluster();
}
/**
* Calculates and saves the distance between the combination of cluster(i) and
* cluster(j) and all other clusters. An average of the distances from
* cluster(i) and cluster(j) is calculated, weighted by the sizes of each
* cluster.
*
* @param i
* @param j
*/
@Override
protected void findClusterDistance(int i, int j)
{
int noi = clusters.elementAt(i).cardinality();
int noj = clusters.elementAt(j).cardinality();
// New distances from cluster i to others
double[] newdist = new double[noseqs];
for (int l = 0; l < noseqs; l++)
{
if ((l != i) && (l != j))
{
newdist[l] = ((distances.getValue(i, l) * noi)
+ (distances.getValue(j, l) * noj)) / (noi + noj);
}
else
{
newdist[l] = 0;
}
}
for (int ii = 0; ii < noseqs; ii++)
{
distances.setValue(i, ii, newdist[ii]);
distances.setValue(ii, i, newdist[ii]);
}
}
/**
* {@inheritDoc}
*/
@Override
protected double findMinDistance()
{
double min = Double.MAX_VALUE;
for (int i = 0; i < (noseqs - 1); i++)
{
for (int j = i + 1; j < noseqs; j++)
{
if (!done.get(i) && !done.get(j))
{
if (distances.getValue(i, j) < min)
{
mini = i;
minj = j;
min = distances.getValue(i, j);
}
}
}
}
return min;
}
/**
* {@inheritDoc}
*/
@Override
protected void findNewDistances(BinaryNode nodei, BinaryNode nodej,
double dist)
{
double ih = 0;
double jh = 0;
BinaryNode sni = nodei;
BinaryNode snj = nodej;
while (sni != null)
{
ih = ih + sni.dist;
sni = (BinaryNode) sni.left();
}
while (snj != null)
{
jh = jh + snj.dist;
snj = (BinaryNode) snj.left();
}
nodei.dist = ((dist / 2) - ih);
nodej.dist = ((dist / 2) - jh);
}
/***
* not the right place - OH WELL!
*/
/**
* Makes a list of groups, where each group is represented by a node whose
* height (distance from the root node), as a fraction of the height of the
* whole tree, is greater than the given threshold. This corresponds to
* selecting the nodes immediately to the right of a vertical line
* partitioning the tree (if the tree is drawn with root to the left). Each
* such node represents a group that contains all of the sequences linked to
* the child leaf nodes.
*
* @param threshold
* @see #getGroups()
*/
public List groupNodes(float threshold)
{
List groups = new ArrayList();
_groupNodes(groups, getTopNode(), threshold);
return groups;
}
protected void _groupNodes(List groups, BinaryNode nd,
float threshold)
{
if (nd == null)
{
return;
}
if ((nd.height / maxheight) > threshold)
{
groups.add(nd);
}
else
{
_groupNodes(groups, nd.left(), threshold);
_groupNodes(groups, nd.right(), threshold);
}
}
/**
* DOCUMENT ME!
*
* @param nd
* DOCUMENT ME!
*
* @return DOCUMENT ME!
*/
public double findHeight(BinaryNode nd)
{
if (nd == null)
{
return maxheight;
}
if ((nd.left() == null) && (nd.right() == null))
{
nd.height = ((BinaryNode) nd.parent()).height + nd.dist;
if (nd.height > maxheight)
{
return nd.height;
}
else
{
return maxheight;
}
}
else
{
if (nd.parent() != null)
{
nd.height = ((BinaryNode) nd.parent()).height + nd.dist;
}
else
{
maxheight = 0;
nd.height = (float) 0.0;
}
maxheight = findHeight((BinaryNode) (nd.left()));
maxheight = findHeight((BinaryNode) (nd.right()));
}
return maxheight;
}
/**
* Search for leaf nodes below (or at) the given node
*
* @param top2
* root node to search from
*
* @return
*/
public Vector findLeaves(BinaryNode top2)
{
Vector leaves = new Vector();
findLeaves(top2, leaves);
return leaves;
}
/**
* Search for leaf nodes.
*
* @param nd
* root node to search from
* @param leaves
* Vector of leaves to add leaf node objects too.
*
* @return Vector of leaf nodes on binary tree
*/
Vector findLeaves(BinaryNode nd, Vector leaves)
{
if (nd == null)
{
return leaves;
}
if ((nd.left() == null) && (nd.right() == null)) // Interior node
// detection
{
leaves.addElement(nd);
return leaves;
}
else
{
/*
* TODO: Identify internal nodes... if (node.isSequenceLabel()) {
* leaves.addElement(node); }
*/
findLeaves(nd.left(), leaves);
findLeaves(nd.right(), leaves);
}
return leaves;
}
}