/*
* Jalview - A Sequence Alignment Editor and Viewer
* Copyright (C) 2007 AM Waterhouse, J Procter, G Barton, M Clamp, S Searle
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
*/
package jalview.analysis;
import java.util.*;
import jalview.datamodel.*;
import jalview.util.*;
/**
* Routines for manipulating the order of a multiple sequence alignment
* TODO: this class retains some global states concerning sort-order which should be made attributes for the caller's alignment visualization.
* TODO: refactor to allow a subset of selected sequences to be sorted within the context of a whole alignment.
* Sort method template is: SequenceI[] tobesorted, [ input data mapping to each tobesorted element to use ], Alignment context of tobesorted that are to be re-ordered, boolean sortinplace, [special data - ie seuqence to be sorted w.r.t.])
* sortinplace implies that the sorted vector resulting from applying the operation to tobesorted should be mapped back to the original positions in alignment.
* Otherwise, normal behaviour is to re order alignment so that tobesorted is sorted and grouped together starting from the first tobesorted position in the alignment.
* e.g. (a,tb2,b,tb1,c,tb3 becomes a,tb1,tb2,tb3,b,c)
*/
public class AlignmentSorter
{
static boolean sortIdAscending = true;
static int lastGroupHash = 0;
static boolean sortGroupAscending = true;
static AlignmentOrder lastOrder = null;
static boolean sortOrderAscending = true;
static NJTree lastTree = null;
static boolean sortTreeAscending = true;
private static String lastSortByScore;
/**
* Sort by Percentage Identity w.r.t. s
*
* @param align AlignmentI
* @param s SequenceI
* @param tosort sequences from align that are to be sorted.
*/
public static void sortByPID(AlignmentI align, SequenceI s, SequenceI[] tosort)
{
int nSeq = align.getHeight();
float[] scores = new float[nSeq];
SequenceI[] seqs = new SequenceI[nSeq];
for (int i = 0; i < nSeq; i++)
{
scores[i] = Comparison.PID(align.getSequenceAt(i).getSequenceAsString(),
s.getSequenceAsString());
seqs[i] = align.getSequenceAt(i);
}
QuickSort.sort(scores, 0, scores.length - 1, seqs);
setReverseOrder(align, seqs);
}
/**
* Reverse the order of the sort
*
* @param align DOCUMENT ME!
* @param seqs DOCUMENT ME!
*/
private static void setReverseOrder(AlignmentI align, SequenceI[] seqs)
{
int nSeq = seqs.length;
int len = 0;
if ( (nSeq % 2) == 0)
{
len = nSeq / 2;
}
else
{
len = (nSeq + 1) / 2;
}
// NOTE: DO NOT USE align.setSequenceAt() here - it will NOT work
for (int i = 0; i < len; i++)
{
//SequenceI tmp = seqs[i];
align.getSequences().setElementAt(seqs[nSeq - i - 1], i);
align.getSequences().setElementAt(seqs[i], nSeq - i - 1);
}
}
/**
* Sets the Alignment object with the given sequences
*
* @param align Alignment object to be updated
* @param tmp sequences as a vector
*/
private static void setOrder(AlignmentI align, Vector tmp)
{
setOrder(align, vectorSubsetToArray(tmp, align.getSequences()));
}
/**
* Sets the Alignment object with the given sequences
*
* @param align DOCUMENT ME!
* @param seqs sequences as an array
*/
public static void setOrder(AlignmentI align, SequenceI[] seqs)
{
// NOTE: DO NOT USE align.setSequenceAt() here - it will NOT work
Vector algn = align.getSequences();
Vector tmp = new Vector();
for (int i = 0; i < seqs.length; i++)
{
if (algn.contains(seqs[i]))
{
tmp.addElement(seqs[i]);
}
}
algn.removeAllElements();
//User may have hidden seqs, then clicked undo or redo
for (int i = 0; i < tmp.size(); i++)
{
algn.addElement(tmp.elementAt(i));
}
}
/**
* Sorts by ID. Numbers are sorted before letters.
*
* @param align The alignment object to sort
*/
public static void sortByID(AlignmentI align)
{
int nSeq = align.getHeight();
String[] ids = new String[nSeq];
SequenceI[] seqs = new SequenceI[nSeq];
for (int i = 0; i < nSeq; i++)
{
ids[i] = align.getSequenceAt(i).getName();
seqs[i] = align.getSequenceAt(i);
}
QuickSort.sort(ids, seqs);
if (sortIdAscending)
{
setReverseOrder(align, seqs);
}
else
{
setOrder(align, seqs);
}
sortIdAscending = !sortIdAscending;
}
/**
* Sorts the alignment by size of group.
*
Maintains the order of sequences in each group
* by order in given alignment object.
*
* @param align sorts the given alignment object by group
*/
public static void sortByGroup(AlignmentI align)
{
//MAINTAINS ORIGNAL SEQUENCE ORDER,
//ORDERS BY GROUP SIZE
Vector groups = new Vector();
if (groups.hashCode() != lastGroupHash)
{
sortGroupAscending = true;
lastGroupHash = groups.hashCode();
}
else
{
sortGroupAscending = !sortGroupAscending;
}
//SORTS GROUPS BY SIZE
//////////////////////
for (int i = 0; i < align.getGroups().size(); i++)
{
SequenceGroup sg = (SequenceGroup) align.getGroups().elementAt(i);
for (int j = 0; j < groups.size(); j++)
{
SequenceGroup sg2 = (SequenceGroup) groups.elementAt(j);
if (sg.getSize() > sg2.getSize())
{
groups.insertElementAt(sg, j);
break;
}
}
if (!groups.contains(sg))
{
groups.addElement(sg);
}
}
//NOW ADD SEQUENCES MAINTAINING ALIGNMENT ORDER
///////////////////////////////////////////////
Vector seqs = new Vector();
for (int i = 0; i < groups.size(); i++)
{
SequenceGroup sg = (SequenceGroup) groups.elementAt(i);
SequenceI[] orderedseqs = sg.getSequencesInOrder(align);
for (int j = 0; j < orderedseqs.length; j++)
{
seqs.addElement(orderedseqs[j]);
}
}
if (sortGroupAscending)
{
setOrder(align, seqs);
}
else
{
setReverseOrder(align,
vectorSubsetToArray(seqs, align.getSequences()));
}
}
/**
* Converts Vector to array.
* java 1.18 does not have Vector.toArray()
*
* @param tmp Vector of SequenceI objects
*
* @return array of Sequence[]
*/
private static SequenceI[] vectorToArray(Vector tmp)
{
SequenceI[] seqs = new SequenceI[tmp.size()];
for (int i = 0; i < tmp.size(); i++)
{
seqs[i] = (SequenceI) tmp.elementAt(i);
}
return seqs;
}
/**
* DOCUMENT ME!
*
* @param tmp DOCUMENT ME!
* @param mask DOCUMENT ME!
*
* @return DOCUMENT ME!
*/
private static SequenceI[] vectorSubsetToArray(Vector tmp, Vector mask)
{
Vector seqs = new Vector();
int i;
boolean[] tmask = new boolean[mask.size()];
for (i = 0; i < mask.size(); i++)
{
tmask[i] = true;
}
for (i = 0; i < tmp.size(); i++)
{
Object sq = tmp.elementAt(i);
if (mask.contains(sq) && tmask[mask.indexOf(sq)])
{
tmask[mask.indexOf(sq)] = false;
seqs.addElement(sq);
}
}
for (i = 0; i < tmask.length; i++)
{
if (tmask[i])
{
seqs.addElement(mask.elementAt(i));
}
}
return vectorToArray(seqs);
}
/**
* Sorts by a given AlignmentOrder object
*
* @param align Alignment to order
* @param order specified order for alignment
*/
public static void sortBy(AlignmentI align, AlignmentOrder order)
{
// Get an ordered vector of sequences which may also be present in align
Vector tmp = order.getOrder();
if (lastOrder == order)
{
sortOrderAscending = !sortOrderAscending;
}
else
{
sortOrderAscending = true;
}
if (sortOrderAscending)
{
setOrder(align, tmp);
}
else
{
setReverseOrder(align,
vectorSubsetToArray(tmp, align.getSequences()));
}
}
/**
* DOCUMENT ME!
*
* @param align alignment to order
* @param tree tree which has
*
* @return DOCUMENT ME!
*/
private static Vector getOrderByTree(AlignmentI align, NJTree tree)
{
int nSeq = align.getHeight();
Vector tmp = new Vector();
tmp = _sortByTree(tree.getTopNode(), tmp, align.getSequences());
if (tmp.size() != nSeq)
{
// TODO: JBPNote - decide if this is always an error
// (eg. not when a tree is associated to another alignment which has more
// sequences)
if (tmp.size() < nSeq)
{
addStrays(align, tmp);
}
if (tmp.size() != nSeq)
{
System.err.println("ERROR: tmp.size()=" + tmp.size() +
" != nseq=" + nSeq + " in getOrderByTree");
}
}
return tmp;
}
/**
* Sorts the alignment by a given tree
*
* @param align alignment to order
* @param tree tree which has
*/
public static void sortByTree(AlignmentI align, NJTree tree)
{
Vector tmp = getOrderByTree(align, tree);
// tmp should properly permute align with tree.
if (lastTree != tree)
{
sortTreeAscending = true;
lastTree = tree;
}
else
{
sortTreeAscending = !sortTreeAscending;
}
if (sortTreeAscending)
{
setOrder(align, tmp);
}
else
{
setReverseOrder(align,
vectorSubsetToArray(tmp, align.getSequences()));
}
}
/**
* DOCUMENT ME!
*
* @param align DOCUMENT ME!
* @param seqs DOCUMENT ME!
*/
private static void addStrays(AlignmentI align, Vector seqs)
{
int nSeq = align.getHeight();
for (int i = 0; i < nSeq; i++)
{
if (!seqs.contains(align.getSequenceAt(i)))
{
seqs.addElement(align.getSequenceAt(i));
}
}
if (nSeq != seqs.size())
{
System.err.println(
"ERROR: Size still not right even after addStrays");
}
}
/**
* DOCUMENT ME!
*
* @param node DOCUMENT ME!
* @param tmp DOCUMENT ME!
* @param seqset DOCUMENT ME!
*
* @return DOCUMENT ME!
*/
private static Vector _sortByTree(SequenceNode node, Vector tmp,
Vector seqset)
{
if (node == null)
{
return tmp;
}
SequenceNode left = (SequenceNode) node.left();
SequenceNode right = (SequenceNode) node.right();
if ( (left == null) && (right == null))
{
if (!node.isPlaceholder() && (node.element() != null))
{
if (node.element() instanceof SequenceI)
{
if (!tmp.contains(node.element()))
{
tmp.addElement( (SequenceI) node.element());
}
}
}
return tmp;
}
else
{
_sortByTree(left, tmp, seqset);
_sortByTree(right, tmp, seqset);
}
return tmp;
}
// Ordering Objects
// Alignment.sortBy(OrderObj) - sequence of sequence pointer refs in appropriate order
//
/**
* recover the order of sequences given by the safe numbering scheme introducd
* SeqsetUtils.uniquify.
*/
public static void recoverOrder(SequenceI[] alignment)
{
float[] ids = new float[alignment.length];
for (int i = 0; i < alignment.length; i++)
{
ids[i] = (new Float(alignment[i].getName().substring(8))).floatValue();
}
jalview.util.QuickSort.sort(ids, alignment);
}
/**
* Sort sequence in order of increasing score attribute for annotation with a particular
* scoreLabel. Or reverse if same label was used previously
* @param scoreLabel exact label for sequence associated AlignmentAnnotation scores to use for sorting.
* @param alignment sequences to be sorted
*/
public static void sortByAnnotationScore(String scoreLabel, AlignmentI alignment)
{
SequenceI[] seqs = alignment.getSequencesArray();
boolean[] hasScore = new boolean[seqs.length]; // per sequence score presence
int hasScores=0; // number of scores present on set
double[] scores = new double[seqs.length];
double min=0,max=0;
for (int i = 0; i < seqs.length; i++)
{
AlignmentAnnotation[] scoreAnn = seqs[i].getAnnotation(scoreLabel);
if (scoreAnn!=null)
{
hasScores++;
hasScore[i] = true;
scores[i] = scoreAnn[0].getScore(); // take the first instance of this score.
if (hasScores==1)
{
max = min = scores[i];
} else
{
if (maxscores[i])
{
min = scores[i];
}
}
}
else
{
hasScore[i] = false;
}
}
if (hasScores==0)
{
return; // do nothing - no scores present to sort by.
}
if (hasScores