/*
- * Jalview - A Sequence Alignment Editor and Viewer (Version 2.4)
- * Copyright (C) 2008 AM Waterhouse, J Procter, G Barton, M Clamp, S Searle
- *
- * This program is free software; you can redistribute it and/or
+ * Jalview - A Sequence Alignment Editor and Viewer (Version 2.7)
+ * Copyright (C) 2011 J Procter, AM Waterhouse, J Engelhardt, LM Lui, G Barton, M Clamp, S Searle
+ *
+ * This file is part of Jalview.
+ *
+ * Jalview is free software: you can redistribute it and/or
* modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version 2
- * of the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+ * as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version.
+ *
+ * Jalview is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
+ * PURPOSE. See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along with Jalview. If not, see <http://www.gnu.org/licenses/>.
*/
package jalview.analysis;
*/
public class AlignmentSorter
{
+ /**
+ * todo: refactor searches to follow a basic pattern: (search property, last
+ * search state, current sort direction)
+ */
static boolean sortIdAscending = true;
static int lastGroupHash = 0;
static boolean sortTreeAscending = true;
+ /**
+ * last Annotation Label used by sortByScore
+ */
private static String lastSortByScore;
+ private static boolean sortByScoreAscending = true;
+
+ /**
+ * compact representation of last arguments to SortByFeatureScore
+ */
+ private static String lastSortByFeatureScore;
+
+ private static boolean sortByFeatureScoreAscending = true;
+
+ private static boolean sortLengthAscending;
+
/**
* Sort by Percentage Identity w.r.t. s
- *
+ *
* @param align
- * AlignmentI
+ * AlignmentI
* @param s
- * SequenceI
+ * SequenceI
* @param tosort
- * sequences from align that are to be sorted.
+ * sequences from align that are to be sorted.
*/
public static void sortByPID(AlignmentI align, SequenceI s,
SequenceI[] tosort)
{
+ sortByPID(align, s, tosort, 0, -1);
+ }
+
+ /**
+ * Sort by Percentage Identity w.r.t. s
+ *
+ * @param align
+ * AlignmentI
+ * @param s
+ * SequenceI
+ * @param tosort
+ * sequences from align that are to be sorted.
+ * @param start
+ * start column (0 for beginning
+ * @param end
+ */
+ public static void sortByPID(AlignmentI align, SequenceI s,
+ SequenceI[] tosort, int start, int end)
+ {
int nSeq = align.getHeight();
float[] scores = new float[nSeq];
/**
* Reverse the order of the sort
- *
+ *
* @param align
- * DOCUMENT ME!
+ * DOCUMENT ME!
* @param seqs
- * DOCUMENT ME!
+ * DOCUMENT ME!
*/
private static void setReverseOrder(AlignmentI align, SequenceI[] seqs)
{
}
// NOTE: DO NOT USE align.setSequenceAt() here - it will NOT work
- for (int i = 0; i < len; i++)
+ List<SequenceI> asq;
+ synchronized (asq = align.getSequences())
{
- // SequenceI tmp = seqs[i];
- align.getSequences().setElementAt(seqs[nSeq - i - 1], i);
- align.getSequences().setElementAt(seqs[i], nSeq - i - 1);
+ for (int i = 0; i < len; i++)
+ {
+ // SequenceI tmp = seqs[i];
+ asq.set(i, seqs[nSeq - i - 1]);
+ asq.set(nSeq - i - 1, seqs[i]);
+ }
}
}
/**
* Sets the Alignment object with the given sequences
- *
+ *
* @param align
- * Alignment object to be updated
+ * Alignment object to be updated
* @param tmp
- * sequences as a vector
+ * sequences as a vector
*/
private static void setOrder(AlignmentI align, Vector tmp)
{
/**
* Sets the Alignment object with the given sequences
- *
+ *
* @param align
- * DOCUMENT ME!
+ * DOCUMENT ME!
* @param seqs
- * sequences as an array
+ * sequences as an array
*/
public static void setOrder(AlignmentI align, SequenceI[] seqs)
{
// NOTE: DO NOT USE align.setSequenceAt() here - it will NOT work
- Vector algn = align.getSequences();
- Vector tmp = new Vector();
-
- for (int i = 0; i < seqs.length; i++)
+ List<SequenceI> algn;
+ synchronized (algn = align.getSequences())
{
- if (algn.contains(seqs[i]))
+ List<SequenceI> tmp = new ArrayList<SequenceI>();
+
+ for (int i = 0; i < seqs.length; i++)
{
- tmp.addElement(seqs[i]);
+ if (algn.contains(seqs[i]))
+ {
+ tmp.add(seqs[i]);
+ }
}
- }
- algn.removeAllElements();
- // User may have hidden seqs, then clicked undo or redo
- for (int i = 0; i < tmp.size(); i++)
- {
- algn.addElement(tmp.elementAt(i));
+ algn.clear();
+ // User may have hidden seqs, then clicked undo or redo
+ for (int i = 0; i < tmp.size(); i++)
+ {
+ algn.add(tmp.get(i));
+ }
}
-
}
/**
* Sorts by ID. Numbers are sorted before letters.
- *
+ *
* @param align
- * The alignment object to sort
+ * The alignment object to sort
*/
public static void sortByID(AlignmentI align)
{
}
/**
+ * Sorts by sequence length
+ *
+ * @param align
+ * The alignment object to sort
+ */
+ public static void sortByLength(AlignmentI align)
+ {
+ int nSeq = align.getHeight();
+
+ float[] length = new float[nSeq];
+ SequenceI[] seqs = new SequenceI[nSeq];
+
+ for (int i = 0; i < nSeq; i++)
+ {
+ seqs[i] = align.getSequenceAt(i);
+ length[i] = (seqs[i].getEnd() - seqs[i].getStart());
+ }
+
+ QuickSort.sort(length, seqs);
+
+ if (sortLengthAscending)
+ {
+ setReverseOrder(align, seqs);
+ }
+ else
+ {
+ setOrder(align, seqs);
+ }
+
+ sortLengthAscending = !sortLengthAscending;
+ }
+
+ /**
* Sorts the alignment by size of group. <br>
* Maintains the order of sequences in each group by order in given alignment
* object.
- *
+ *
* @param align
- * sorts the given alignment object by group
+ * sorts the given alignment object by group
*/
public static void sortByGroup(AlignmentI align)
{
// SORTS GROUPS BY SIZE
// ////////////////////
- for (int i = 0; i < align.getGroups().size(); i++)
+ for (SequenceGroup sg : align.getGroups())
{
- SequenceGroup sg = (SequenceGroup) align.getGroups().elementAt(i);
-
for (int j = 0; j < groups.size(); j++)
{
SequenceGroup sg2 = (SequenceGroup) groups.elementAt(j);
/**
* Converts Vector to array. java 1.18 does not have Vector.toArray()
- *
+ *
* @param tmp
- * Vector of SequenceI objects
- *
+ * Vector of SequenceI objects
+ *
* @return array of Sequence[]
*/
private static SequenceI[] vectorToArray(Vector tmp)
}
/**
- * DOCUMENT ME!
- *
+ * Select sequences in order from tmp that is present in mask, and any
+ * remaining seqeunces in mask not in tmp
+ *
* @param tmp
- * DOCUMENT ME!
+ * thread safe collection of sequences
* @param mask
- * DOCUMENT ME!
- *
- * @return DOCUMENT ME!
+ * thread safe collection of sequences
+ *
+ * @return intersect(tmp,mask)+intersect(complement(tmp),mask)
*/
- private static SequenceI[] vectorSubsetToArray(Vector tmp, Vector mask)
+ private static SequenceI[] vectorSubsetToArray(List<SequenceI> tmp,
+ List<SequenceI> mask)
{
- Vector seqs = new Vector();
- int i;
+ ArrayList<SequenceI> seqs = new ArrayList<SequenceI>();
+ int i, idx;
boolean[] tmask = new boolean[mask.size()];
for (i = 0; i < mask.size(); i++)
for (i = 0; i < tmp.size(); i++)
{
- Object sq = tmp.elementAt(i);
-
- if (mask.contains(sq) && tmask[mask.indexOf(sq)])
+ SequenceI sq = tmp.get(i);
+ idx = mask.indexOf(sq);
+ if (idx > -1 && tmask[idx])
{
- tmask[mask.indexOf(sq)] = false;
- seqs.addElement(sq);
+ tmask[idx] = false;
+ seqs.add(sq);
}
}
{
if (tmask[i])
{
- seqs.addElement(mask.elementAt(i));
+ seqs.add(mask.get(i));
}
}
- return vectorToArray(seqs);
+ return seqs.toArray(new SequenceI[seqs.size()]);
}
/**
* Sorts by a given AlignmentOrder object
- *
+ *
* @param align
- * Alignment to order
+ * Alignment to order
* @param order
- * specified order for alignment
+ * specified order for alignment
*/
public static void sortBy(AlignmentI align, AlignmentOrder order)
{
/**
* DOCUMENT ME!
- *
+ *
* @param align
- * alignment to order
+ * alignment to order
* @param tree
- * tree which has
- *
+ * tree which has
+ *
* @return DOCUMENT ME!
*/
private static Vector getOrderByTree(AlignmentI align, NJTree tree)
// TODO: JBPNote - decide if this is always an error
// (eg. not when a tree is associated to another alignment which has more
// sequences)
- if (tmp.size() < nSeq)
+ if (tmp.size() != nSeq)
{
addStrays(align, tmp);
}
if (tmp.size() != nSeq)
{
- System.err.println("ERROR: tmp.size()=" + tmp.size() + " != nseq="
- + nSeq + " in getOrderByTree");
+ System.err
+ .println("WARNING: tmp.size()="
+ + tmp.size()
+ + " != nseq="
+ + nSeq
+ + " in getOrderByTree - tree contains sequences not in alignment");
}
}
/**
* Sorts the alignment by a given tree
- *
+ *
* @param align
- * alignment to order
+ * alignment to order
* @param tree
- * tree which has
+ * tree which has
*/
public static void sortByTree(AlignmentI align, NJTree tree)
{
/**
* DOCUMENT ME!
- *
+ *
* @param align
- * DOCUMENT ME!
+ * DOCUMENT ME!
* @param seqs
- * DOCUMENT ME!
+ * DOCUMENT ME!
*/
private static void addStrays(AlignmentI align, Vector seqs)
{
/**
* DOCUMENT ME!
- *
+ *
* @param node
- * DOCUMENT ME!
+ * DOCUMENT ME!
* @param tmp
- * DOCUMENT ME!
+ * DOCUMENT ME!
* @param seqset
- * DOCUMENT ME!
- *
+ * DOCUMENT ME!
+ *
* @return DOCUMENT ME!
*/
private static Vector _sortByTree(SequenceNode node, Vector tmp,
- Vector seqset)
+ List<SequenceI> seqset)
{
if (node == null)
{
{
if (node.element() instanceof SequenceI)
{
- if (!tmp.contains(node.element()))
+ if (!tmp.contains(node.element())) // && (seqset==null ||
+ // seqset.size()==0 ||
+ // seqset.contains(tmp)))
{
- tmp.addElement((SequenceI) node.element());
+ tmp.addElement(node.element());
}
}
}
/**
* Sort sequence in order of increasing score attribute for annotation with a
* particular scoreLabel. Or reverse if same label was used previously
- *
+ *
* @param scoreLabel
- * exact label for sequence associated AlignmentAnnotation
- * scores to use for sorting.
+ * exact label for sequence associated AlignmentAnnotation scores to
+ * use for sorting.
* @param alignment
- * sequences to be sorted
+ * sequences to be sorted
*/
public static void sortByAnnotationScore(String scoreLabel,
AlignmentI alignment)
{
SequenceI[] seqs = alignment.getSequencesArray();
boolean[] hasScore = new boolean[seqs.length]; // per sequence score
- // presence
+ // presence
int hasScores = 0; // number of scores present on set
double[] scores = new double[seqs.length];
double min = 0, max = 0;
hasScores++;
hasScore[i] = true;
scores[i] = scoreAnn[0].getScore(); // take the first instance of this
- // score.
+ // score.
if (hasScores == 1)
{
max = min = scores[i];
{
if (!hasScore[i])
{
- scores[i] = (max + i);
+ scores[i] = (max + i + 1.0);
}
}
}
setReverseOrder(alignment, seqs);
}
}
+
+ /**
+ * types of feature ordering: Sort by score : average score - or total score -
+ * over all features in region Sort by feature label text: (or if null -
+ * feature type text) - numerical or alphabetical Sort by feature density:
+ * based on counts - ignoring individual text or scores for each feature
+ */
+ public static String FEATURE_SCORE = "average_score";
+
+ public static String FEATURE_LABEL = "text";
+
+ public static String FEATURE_DENSITY = "density";
+
+ /**
+ * sort the alignment using the features on each sequence found between start
+ * and stop with the given featureLabel (and optional group qualifier)
+ *
+ * @param featureLabel
+ * (may not be null)
+ * @param groupLabel
+ * (may be null)
+ * @param start
+ * (-1 to include non-positional features)
+ * @param stop
+ * (-1 to only sort on non-positional features)
+ * @param alignment
+ * - aligned sequences containing features
+ * @param method
+ * - one of the string constants FEATURE_SCORE, FEATURE_LABEL,
+ * FEATURE_DENSITY
+ */
+ public static void sortByFeature(String featureLabel, String groupLabel,
+ int start, int stop, AlignmentI alignment, String method)
+ {
+ sortByFeature(featureLabel == null ? null : new String[]
+ { featureLabel }, groupLabel == null ? null : new String[]
+ { groupLabel }, start, stop, alignment, method);
+ }
+
+ private static boolean containsIgnoreCase(final String lab,
+ final String[] labs)
+ {
+ if (labs == null)
+ {
+ return true;
+ }
+ if (lab == null)
+ {
+ return false;
+ }
+ for (int q = 0; q < labs.length; q++)
+ {
+ if (labs[q] != null && lab.equalsIgnoreCase(labs[q]))
+ {
+ return true;
+ }
+ }
+ return false;
+ }
+
+ public static void sortByFeature(String[] featureLabels,
+ String[] groupLabels, int start, int stop, AlignmentI alignment,
+ String method)
+ {
+ if (method != FEATURE_SCORE && method != FEATURE_LABEL
+ && method != FEATURE_DENSITY)
+ {
+ throw new Error(
+ "Implementation Error - sortByFeature method must be one of FEATURE_SCORE, FEATURE_LABEL or FEATURE_DENSITY.");
+ }
+ boolean ignoreScore = method != FEATURE_SCORE;
+ StringBuffer scoreLabel = new StringBuffer();
+ scoreLabel.append(start + stop + method);
+ // This doesn't quite work yet - we'd like to have a canonical ordering that
+ // can be preserved from call to call
+ for (int i = 0; featureLabels != null && i < featureLabels.length; i++)
+ {
+ scoreLabel.append(featureLabels[i] == null ? "null"
+ : featureLabels[i]);
+ }
+ for (int i = 0; groupLabels != null && i < groupLabels.length; i++)
+ {
+ scoreLabel.append(groupLabels[i] == null ? "null" : groupLabels[i]);
+ }
+ SequenceI[] seqs = alignment.getSequencesArray();
+
+ boolean[] hasScore = new boolean[seqs.length]; // per sequence score
+ // presence
+ int hasScores = 0; // number of scores present on set
+ double[] scores = new double[seqs.length];
+ int[] seqScores = new int[seqs.length];
+ Object[] feats = new Object[seqs.length];
+ double min = 0, max = 0;
+ for (int i = 0; i < seqs.length; i++)
+ {
+ SequenceFeature[] sf = seqs[i].getSequenceFeatures();
+ if (sf == null && seqs[i].getDatasetSequence() != null)
+ {
+ sf = seqs[i].getDatasetSequence().getSequenceFeatures();
+ }
+ if (sf == null)
+ {
+ sf = new SequenceFeature[0];
+ }
+ else
+ {
+ SequenceFeature[] tmp = new SequenceFeature[sf.length];
+ for (int s = 0; s < tmp.length; s++)
+ {
+ tmp[s] = sf[s];
+ }
+ sf = tmp;
+ }
+ int sstart = (start == -1) ? start : seqs[i].findPosition(start);
+ int sstop = (stop == -1) ? stop : seqs[i].findPosition(stop);
+ seqScores[i] = 0;
+ scores[i] = 0.0;
+ int n = sf.length;
+ for (int f = 0; f < sf.length; f++)
+ {
+ // filter for selection criteria
+ if (
+ // ignore features outwith alignment start-stop positions.
+ (sf[f].end < sstart || sf[f].begin > sstop) ||
+ // or ignore based on selection criteria
+ (featureLabels != null && !AlignmentSorter
+ .containsIgnoreCase(sf[f].type, featureLabels))
+ || (groupLabels != null
+ // problem here: we cannot eliminate null feature group features
+ && (sf[f].getFeatureGroup() != null && !AlignmentSorter
+ .containsIgnoreCase(sf[f].getFeatureGroup(),
+ groupLabels))))
+ {
+ // forget about this feature
+ sf[f] = null;
+ n--;
+ }
+ else
+ {
+ // or, also take a look at the scores if necessary.
+ if (!ignoreScore && sf[f].getScore() != Float.NaN)
+ {
+ if (seqScores[i] == 0)
+ {
+ hasScores++;
+ }
+ seqScores[i]++;
+ hasScore[i] = true;
+ scores[i] += sf[f].getScore(); // take the first instance of this
+ // score.
+ }
+ }
+ }
+ SequenceFeature[] fs;
+ feats[i] = fs = new SequenceFeature[n];
+ if (n > 0)
+ {
+ n = 0;
+ for (int f = 0; f < sf.length; f++)
+ {
+ if (sf[f] != null)
+ {
+ ((SequenceFeature[]) feats[i])[n++] = sf[f];
+ }
+ }
+ if (method == FEATURE_LABEL)
+ {
+ // order the labels by alphabet
+ String[] labs = new String[fs.length];
+ for (int l = 0; l < labs.length; l++)
+ {
+ labs[l] = (fs[l].getDescription() != null ? fs[l]
+ .getDescription() : fs[l].getType());
+ }
+ jalview.util.QuickSort.sort(labs, ((Object[]) feats[i]));
+ }
+ }
+ if (hasScore[i])
+ {
+ // compute average score
+ scores[i] /= seqScores[i];
+ // update the score bounds.
+ if (hasScores == 1)
+ {
+ max = min = scores[i];
+ }
+ else
+ {
+ if (max < scores[i])
+ {
+ max = scores[i];
+ }
+ if (min > scores[i])
+ {
+ min = scores[i];
+ }
+ }
+ }
+ }
+
+ if (method == FEATURE_SCORE)
+ {
+ if (hasScores == 0)
+ {
+ return; // do nothing - no scores present to sort by.
+ }
+ // pad score matrix
+ if (hasScores < seqs.length)
+ {
+ for (int i = 0; i < seqs.length; i++)
+ {
+ if (!hasScore[i])
+ {
+ scores[i] = (max + 1 + i);
+ }
+ else
+ {
+ int nf = (feats[i] == null) ? 0
+ : ((SequenceFeature[]) feats[i]).length;
+ // System.err.println("Sorting on Score: seq "+seqs[i].getName()+
+ // " Feats: "+nf+" Score : "+scores[i]);
+ }
+ }
+ }
+
+ jalview.util.QuickSort.sort(scores, seqs);
+ }
+ else if (method == FEATURE_DENSITY)
+ {
+
+ // break ties between equivalent numbers for adjacent sequences by adding
+ // 1/Nseq*i on the original order
+ double fr = 0.9 / (1.0 * seqs.length);
+ for (int i = 0; i < seqs.length; i++)
+ {
+ double nf;
+ scores[i] = (0.05 + fr * i)
+ + (nf = ((feats[i] == null) ? 0.0
+ : 1.0 * ((SequenceFeature[]) feats[i]).length));
+ // System.err.println("Sorting on Density: seq "+seqs[i].getName()+
+ // " Feats: "+nf+" Score : "+scores[i]);
+ }
+ jalview.util.QuickSort.sort(scores, seqs);
+ }
+ else
+ {
+ if (method == FEATURE_LABEL)
+ {
+ throw new Error("Not yet implemented.");
+ }
+ }
+ if (lastSortByFeatureScore == null
+ || !scoreLabel.toString().equals(lastSortByFeatureScore))
+ {
+ sortByFeatureScoreAscending = true;
+ }
+ else
+ {
+ sortByFeatureScoreAscending = !sortByFeatureScoreAscending;
+ }
+ if (sortByFeatureScoreAscending)
+ {
+ setOrder(alignment, seqs);
+ }
+ else
+ {
+ setReverseOrder(alignment, seqs);
+ }
+ lastSortByFeatureScore = scoreLabel.toString();
+ }
+
}