/*
- * Jalview - A Sequence Alignment Editor and Viewer (Version 2.8.2)
- * Copyright (C) 2014 The Jalview Authors
+ * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
+ * Copyright (C) $$Year-Rel$$ The Jalview Authors
*
* This file is part of Jalview.
*
*/
package jalview.analysis;
+import jalview.analysis.scoremodels.PIDModel;
+import jalview.analysis.scoremodels.SimilarityParams;
import jalview.datamodel.AlignmentAnnotation;
import jalview.datamodel.AlignmentI;
import jalview.datamodel.AlignmentOrder;
import jalview.datamodel.SequenceGroup;
import jalview.datamodel.SequenceI;
import jalview.datamodel.SequenceNode;
-import jalview.util.Comparison;
-import jalview.util.MessageManager;
import jalview.util.QuickSort;
import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Iterator;
import java.util.List;
-import java.util.Vector;
/**
* Routines for manipulating the order of a multiple sequence alignment TODO:
*/
public class AlignmentSorter
{
- /**
+ /*
* todo: refactor searches to follow a basic pattern: (search property, last
* search state, current sort direction)
*/
static boolean sortOrderAscending = true;
- static NJTree lastTree = null;
+ static TreeModel lastTree = null;
static boolean sortTreeAscending = true;
- /**
- * last Annotation Label used by sortByScore
+ /*
+ * last Annotation Label used for sort by Annotation score
*/
- private static String lastSortByScore;
-
- private static boolean sortByScoreAscending = true;
+ private static String lastSortByAnnotation;
- /**
- * compact representation of last arguments to SortByFeatureScore
+ /*
+ * string hash of last arguments to sortByFeature
+ * (sort order toggles if this is unchanged between sorts)
*/
- private static String lastSortByFeatureScore;
+ private static String sortByFeatureCriteria;
- private static boolean sortByFeatureScoreAscending = true;
+ private static boolean sortByFeatureAscending = true;
private static boolean sortLengthAscending;
/**
- * Sort by Percentage Identity w.r.t. s
- *
- * @param align
- * AlignmentI
- * @param s
- * SequenceI
- * @param tosort
- * sequences from align that are to be sorted.
- */
- public static void sortByPID(AlignmentI align, SequenceI s,
- SequenceI[] tosort)
- {
- sortByPID(align, s, tosort, 0, -1);
- }
-
- /**
- * Sort by Percentage Identity w.r.t. s
+ * Sorts sequences in the alignment by Percentage Identity with the given
+ * reference sequence, sorting the highest identity to the top
*
* @param align
* AlignmentI
* @param s
* SequenceI
- * @param tosort
- * sequences from align that are to be sorted.
- * @param start
- * start column (0 for beginning
* @param end
*/
- public static void sortByPID(AlignmentI align, SequenceI s,
- SequenceI[] tosort, int start, int end)
+ public static void sortByPID(AlignmentI align, SequenceI s)
{
int nSeq = align.getHeight();
float[] scores = new float[nSeq];
SequenceI[] seqs = new SequenceI[nSeq];
+ String refSeq = s.getSequenceAsString();
+ SimilarityParams pidParams = new SimilarityParams(true, true, true,
+ true);
for (int i = 0; i < nSeq; i++)
{
- scores[i] = Comparison.PID(align.getSequenceAt(i)
- .getSequenceAsString(), s.getSequenceAsString());
+ scores[i] = (float) PIDModel.computePID(align.getSequenceAt(i)
+ .getSequenceAsString(), refSeq, pidParams);
seqs[i] = align.getSequenceAt(i);
}
- QuickSort.sort(scores, 0, scores.length - 1, seqs);
+ QuickSort.sort(scores, seqs);
setReverseOrder(align, seqs);
}
* @param tmp
* sequences as a vector
*/
- private static void setOrder(AlignmentI align, Vector tmp)
+ private static void setOrder(AlignmentI align, List<SequenceI> tmp)
{
setOrder(align, vectorSubsetToArray(tmp, align.getSequences()));
}
{
// MAINTAINS ORIGNAL SEQUENCE ORDER,
// ORDERS BY GROUP SIZE
- Vector groups = new Vector();
+ List<SequenceGroup> groups = new ArrayList<SequenceGroup>();
if (groups.hashCode() != lastGroupHash)
{
{
for (int j = 0; j < groups.size(); j++)
{
- SequenceGroup sg2 = (SequenceGroup) groups.elementAt(j);
+ SequenceGroup sg2 = groups.get(j);
if (sg.getSize() > sg2.getSize())
{
- groups.insertElementAt(sg, j);
+ groups.add(j, sg);
break;
}
if (!groups.contains(sg))
{
- groups.addElement(sg);
+ groups.add(sg);
}
}
// NOW ADD SEQUENCES MAINTAINING ALIGNMENT ORDER
// /////////////////////////////////////////////
- Vector seqs = new Vector();
+ List<SequenceI> seqs = new ArrayList<SequenceI>();
for (int i = 0; i < groups.size(); i++)
{
- SequenceGroup sg = (SequenceGroup) groups.elementAt(i);
+ SequenceGroup sg = groups.get(i);
SequenceI[] orderedseqs = sg.getSequencesInOrder(align);
for (int j = 0; j < orderedseqs.length; j++)
{
- seqs.addElement(orderedseqs[j]);
+ seqs.add(orderedseqs[j]);
}
}
}
/**
- * Converts Vector to array. java 1.18 does not have Vector.toArray()
- *
- * @param tmp
- * Vector of SequenceI objects
- *
- * @return array of Sequence[]
- */
- private static SequenceI[] vectorToArray(Vector tmp)
- {
- SequenceI[] seqs = new SequenceI[tmp.size()];
-
- for (int i = 0; i < tmp.size(); i++)
- {
- seqs[i] = (SequenceI) tmp.elementAt(i);
- }
-
- return seqs;
- }
-
- /**
* Select sequences in order from tmp that is present in mask, and any
- * remaining seqeunces in mask not in tmp
+ * remaining sequences in mask not in tmp
*
* @param tmp
* thread safe collection of sequences
private static SequenceI[] vectorSubsetToArray(List<SequenceI> tmp,
List<SequenceI> mask)
{
+ // or?
+ // tmp2 = tmp.retainAll(mask);
+ // return tmp2.addAll(mask.removeAll(tmp2))
+
ArrayList<SequenceI> seqs = new ArrayList<SequenceI>();
int i, idx;
boolean[] tmask = new boolean[mask.size()];
public static void sortBy(AlignmentI align, AlignmentOrder order)
{
// Get an ordered vector of sequences which may also be present in align
- Vector tmp = order.getOrder();
+ List<SequenceI> tmp = order.getOrder();
if (lastOrder == order)
{
*
* @return DOCUMENT ME!
*/
- private static Vector getOrderByTree(AlignmentI align, NJTree tree)
+ private static List<SequenceI> getOrderByTree(AlignmentI align,
+ TreeModel tree)
{
int nSeq = align.getHeight();
- Vector tmp = new Vector();
+ List<SequenceI> tmp = new ArrayList<SequenceI>();
tmp = _sortByTree(tree.getTopNode(), tmp, align.getSequences());
* @param tree
* tree which has
*/
- public static void sortByTree(AlignmentI align, NJTree tree)
+ public static void sortByTree(AlignmentI align, TreeModel tree)
{
- Vector tmp = getOrderByTree(align, tree);
+ List<SequenceI> tmp = getOrderByTree(align, tree);
// tmp should properly permute align with tree.
if (lastTree != tree)
*
* @param align
* DOCUMENT ME!
- * @param seqs
+ * @param tmp
* DOCUMENT ME!
*/
- private static void addStrays(AlignmentI align, Vector seqs)
+ private static void addStrays(AlignmentI align, List<SequenceI> tmp)
{
int nSeq = align.getHeight();
for (int i = 0; i < nSeq; i++)
{
- if (!seqs.contains(align.getSequenceAt(i)))
+ if (!tmp.contains(align.getSequenceAt(i)))
{
- seqs.addElement(align.getSequenceAt(i));
+ tmp.add(align.getSequenceAt(i));
}
}
- if (nSeq != seqs.size())
+ if (nSeq != tmp.size())
{
System.err
.println("ERROR: Size still not right even after addStrays");
*
* @return DOCUMENT ME!
*/
- private static Vector _sortByTree(SequenceNode node, Vector tmp,
- List<SequenceI> seqset)
+ private static List<SequenceI> _sortByTree(SequenceNode node,
+ List<SequenceI> tmp, List<SequenceI> seqset)
{
if (node == null)
{
// seqset.size()==0 ||
// seqset.contains(tmp)))
{
- tmp.addElement(node.element());
+ tmp.add((SequenceI) node.element());
}
}
}
}
jalview.util.QuickSort.sort(scores, seqs);
- if (lastSortByScore != scoreLabel)
+ if (lastSortByAnnotation != scoreLabel)
{
- lastSortByScore = scoreLabel;
+ lastSortByAnnotation = scoreLabel;
setOrder(alignment, seqs);
}
else
public static String FEATURE_DENSITY = "density";
- /**
- * sort the alignment using the features on each sequence found between start
- * and stop with the given featureLabel (and optional group qualifier)
- *
- * @param featureLabel
- * (may not be null)
- * @param groupLabel
- * (may be null)
- * @param start
- * (-1 to include non-positional features)
- * @param stop
- * (-1 to only sort on non-positional features)
- * @param alignment
- * - aligned sequences containing features
- * @param method
- * - one of the string constants FEATURE_SCORE, FEATURE_LABEL,
- * FEATURE_DENSITY
- */
- public static void sortByFeature(String featureLabel, String groupLabel,
- int start, int stop, AlignmentI alignment, String method)
- {
- sortByFeature(featureLabel == null ? null : new String[]
- { featureLabel }, groupLabel == null ? null : new String[]
- { groupLabel }, start, stop, alignment, method);
- }
-
private static boolean containsIgnoreCase(final String lab,
- final String[] labs)
+ final List<String> labs)
{
if (labs == null)
{
{
return false;
}
- for (int q = 0; q < labs.length; q++)
+ for (String label : labs)
{
- if (labs[q] != null && lab.equalsIgnoreCase(labs[q]))
+ if (lab.equalsIgnoreCase(label))
{
return true;
}
return false;
}
- public static void sortByFeature(String[] featureLabels,
- String[] groupLabels, int start, int stop, AlignmentI alignment,
- String method)
+ /**
+ * Sort sequences by feature score or density, optionally restricted by
+ * feature types, feature groups, or alignment start/end positions.
+ * <p>
+ * If the sort is repeated for the same combination of types and groups, sort
+ * order is reversed.
+ *
+ * @param featureTypes
+ * a list of feature types to include (or null for all)
+ * @param groups
+ * a list of feature groups to include (or null for all)
+ * @param startCol
+ * start column position to include (base zero)
+ * @param endCol
+ * end column position to include (base zero)
+ * @param alignment
+ * the alignment to be sorted
+ * @param method
+ * either "average_score" or "density" ("text" not yet implemented)
+ */
+ public static void sortByFeature(List<String> featureTypes,
+ List<String> groups, final int startCol, final int endCol,
+ AlignmentI alignment, String method)
{
if (method != FEATURE_SCORE && method != FEATURE_LABEL
&& method != FEATURE_DENSITY)
{
- throw new Error(MessageManager.getString("error.implementation_error_sortbyfeature"));
- }
- boolean ignoreScore = method != FEATURE_SCORE;
- StringBuffer scoreLabel = new StringBuffer();
- scoreLabel.append(start + stop + method);
- // This doesn't quite work yet - we'd like to have a canonical ordering that
- // can be preserved from call to call
- for (int i = 0; featureLabels != null && i < featureLabels.length; i++)
- {
- scoreLabel.append(featureLabels[i] == null ? "null"
- : featureLabels[i]);
- }
- for (int i = 0; groupLabels != null && i < groupLabels.length; i++)
- {
- scoreLabel.append(groupLabels[i] == null ? "null" : groupLabels[i]);
+ String msg = String
+ .format("Implementation Error - sortByFeature method must be either '%s' or '%s'",
+ FEATURE_SCORE, FEATURE_DENSITY);
+ System.err.println(msg);
+ return;
}
+
+ flipFeatureSortIfUnchanged(method, featureTypes, groups, startCol, endCol);
+
SequenceI[] seqs = alignment.getSequencesArray();
boolean[] hasScore = new boolean[seqs.length]; // per sequence score
int hasScores = 0; // number of scores present on set
double[] scores = new double[seqs.length];
int[] seqScores = new int[seqs.length];
- Object[] feats = new Object[seqs.length];
- double min = 0, max = 0;
+ Object[][] feats = new Object[seqs.length][];
+ double min = 0d;
+ double max = 0d;
+
for (int i = 0; i < seqs.length; i++)
{
- SequenceFeature[] sf = seqs[i].getSequenceFeatures();
- if (sf == null)
- {
- sf = new SequenceFeature[0];
- }
- else
- {
- SequenceFeature[] tmp = new SequenceFeature[sf.length];
- for (int s = 0; s < tmp.length; s++)
- {
- tmp[s] = sf[s];
- }
- sf = tmp;
- }
- int sstart = (start == -1) ? start : seqs[i].findPosition(start);
- int sstop = (stop == -1) ? stop : seqs[i].findPosition(stop);
+ /*
+ * get sequence residues overlapping column region
+ * and features for residue positions and specified types
+ */
+ // TODO new method findPositions(startCol, endCol)? JAL-2544
+ int startResidue = seqs[i].findPosition(startCol);
+ int endResidue = seqs[i].findPosition(endCol);
+ String[] types = featureTypes == null ? null : featureTypes
+ .toArray(new String[featureTypes.size()]);
+ List<SequenceFeature> sfs = seqs[i].getFeatures().findFeatures(
+ startResidue, endResidue, types);
+
seqScores[i] = 0;
scores[i] = 0.0;
- int n = sf.length;
- for (int f = 0; f < sf.length; f++)
+
+ Iterator<SequenceFeature> it = sfs.listIterator();
+ while (it.hasNext())
{
- // filter for selection criteria
- if (
- // ignore features outwith alignment start-stop positions.
- (sf[f].end < sstart || sf[f].begin > sstop) ||
- // or ignore based on selection criteria
- (featureLabels != null && !AlignmentSorter
- .containsIgnoreCase(sf[f].type, featureLabels))
- || (groupLabels != null
- // problem here: we cannot eliminate null feature group features
- && (sf[f].getFeatureGroup() != null && !AlignmentSorter
- .containsIgnoreCase(sf[f].getFeatureGroup(),
- groupLabels))))
+ SequenceFeature sf = it.next();
+
+ /*
+ * double-check feature overlaps columns (JAL-2544)
+ * (could avoid this with a findPositions(fromCol, toCol) method)
+ * findIndex returns base 1 column values, startCol/endCol are base 0
+ */
+ if (seqs[i].findIndex(sf.getBegin()) > endCol + 1
+ || seqs[i].findIndex(sf.getEnd()) < startCol + 1)
+ {
+ it.remove();
+ continue;
+ }
+
+ /*
+ * accept all features with null or empty group, otherwise
+ * check group is one of the currently visible groups
+ */
+ String featureGroup = sf.getFeatureGroup();
+ if (groups != null && featureGroup != null
+ && !"".equals(featureGroup)
+ && !groups.contains(featureGroup))
{
- // forget about this feature
- sf[f] = null;
- n--;
+ it.remove();
}
else
{
- // or, also take a look at the scores if necessary.
- if (!ignoreScore && sf[f].getScore() != Float.NaN)
+ float score = sf.getScore();
+ if (FEATURE_SCORE.equals(method) && !Float.isNaN(score))
{
if (seqScores[i] == 0)
{
}
seqScores[i]++;
hasScore[i] = true;
- scores[i] += sf[f].getScore(); // take the first instance of this
- // score.
+ scores[i] += score;
+ // take the first instance of this score // ??
}
}
}
- SequenceFeature[] fs;
- feats[i] = fs = new SequenceFeature[n];
- if (n > 0)
+
+ feats[i] = sfs.toArray(new SequenceFeature[sfs.size()]);
+ if (!sfs.isEmpty())
{
- n = 0;
- for (int f = 0; f < sf.length; f++)
- {
- if (sf[f] != null)
- {
- ((SequenceFeature[]) feats[i])[n++] = sf[f];
- }
- }
if (method == FEATURE_LABEL)
{
- // order the labels by alphabet
- String[] labs = new String[fs.length];
- for (int l = 0; l < labs.length; l++)
+ // order the labels by alphabet (not yet implemented)
+ String[] labs = new String[sfs.size()];
+ for (int l = 0; l < sfs.size(); l++)
{
- labs[l] = (fs[l].getDescription() != null ? fs[l]
- .getDescription() : fs[l].getType());
+ SequenceFeature sf = sfs.get(l);
+ String description = sf.getDescription();
+ labs[l] = (description != null ? description : sf.getType());
}
- jalview.util.QuickSort.sort(labs, ((Object[]) feats[i]));
+ QuickSort.sort(labs, feats[i]);
}
}
if (hasScore[i])
// update the score bounds.
if (hasScores == 1)
{
- max = min = scores[i];
+ min = scores[i];
+ max = min;
}
else
{
- if (max < scores[i])
- {
- max = scores[i];
- }
- if (min > scores[i])
- {
- min = scores[i];
- }
+ max = Math.max(max, scores[i]);
+ min = Math.min(min, scores[i]);
}
}
}
- if (method == FEATURE_SCORE)
+ if (FEATURE_SCORE.equals(method))
{
if (hasScores == 0)
{
}
else
{
- int nf = (feats[i] == null) ? 0
- : ((SequenceFeature[]) feats[i]).length;
- // System.err.println("Sorting on Score: seq "+seqs[i].getName()+
- // " Feats: "+nf+" Score : "+scores[i]);
+ // int nf = (feats[i] == null) ? 0
+ // : ((SequenceFeature[]) feats[i]).length;
+ // // System.err.println("Sorting on Score: seq " +
+ // seqs[i].getName()
+ // + " Feats: " + nf + " Score : " + scores[i]);
}
}
}
-
- jalview.util.QuickSort.sort(scores, seqs);
+ QuickSort.sortByDouble(scores, seqs, sortByFeatureAscending);
}
- else if (method == FEATURE_DENSITY)
+ else if (FEATURE_DENSITY.equals(method))
{
-
- // break ties between equivalent numbers for adjacent sequences by adding
- // 1/Nseq*i on the original order
- double fr = 0.9 / (1.0 * seqs.length);
for (int i = 0; i < seqs.length; i++)
{
- double nf;
- scores[i] = (0.05 + fr * i)
- + (nf = ((feats[i] == null) ? 0.0
- : 1.0 * ((SequenceFeature[]) feats[i]).length));
+ int featureCount = feats[i] == null ? 0
+ : ((SequenceFeature[]) feats[i]).length;
+ scores[i] = featureCount;
// System.err.println("Sorting on Density: seq "+seqs[i].getName()+
- // " Feats: "+nf+" Score : "+scores[i]);
+ // " Feats: "+featureCount+" Score : "+scores[i]);
}
- jalview.util.QuickSort.sort(scores, seqs);
+ QuickSort.sortByDouble(scores, seqs, sortByFeatureAscending);
}
- else
- {
- if (method == FEATURE_LABEL)
- {
- throw new Error(MessageManager.getString("error.not_yet_implemented"));
- }
- }
- if (lastSortByFeatureScore == null
- || !scoreLabel.toString().equals(lastSortByFeatureScore))
+
+ setOrder(alignment, seqs);
+ }
+
+ /**
+ * Builds a string hash of criteria for sorting, and if unchanged from last
+ * time, reverse the sort order
+ *
+ * @param method
+ * @param featureTypes
+ * @param groups
+ * @param startCol
+ * @param endCol
+ */
+ protected static void flipFeatureSortIfUnchanged(String method,
+ List<String> featureTypes, List<String> groups,
+ final int startCol, final int endCol)
+ {
+ StringBuilder sb = new StringBuilder(64);
+ sb.append(startCol).append(method).append(endCol);
+ if (featureTypes != null)
{
- sortByFeatureScoreAscending = true;
+ Collections.sort(featureTypes);
+ sb.append(featureTypes.toString());
}
- else
+ if (groups != null)
{
- sortByFeatureScoreAscending = !sortByFeatureScoreAscending;
+ Collections.sort(groups);
+ sb.append(groups.toString());
}
- if (sortByFeatureScoreAscending)
+ String scoreCriteria = sb.toString();
+
+ /*
+ * if resorting on the same criteria, toggle sort order
+ */
+ if (sortByFeatureCriteria == null
+ || !scoreCriteria.equals(sortByFeatureCriteria))
{
- setOrder(alignment, seqs);
+ sortByFeatureAscending = true;
}
else
{
- setReverseOrder(alignment, seqs);
+ sortByFeatureAscending = !sortByFeatureAscending;
}
- lastSortByFeatureScore = scoreLabel.toString();
+ sortByFeatureCriteria = scoreCriteria;
}
}