X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;ds=sidebyside;f=src%2Fjalview%2Fanalysis%2FAlignmentSorter.java;h=25717d03bcfc77f8842648e3cbe2f6395e56eb4f;hb=797df64fa2a0a30773d0f48f5494d4155e5a8be3;hp=a3493f7d69363c7b7a274b84f3a44aaeb419e584;hpb=198a4921d78d176afc0cb709bdbcb4627afd1e8b;p=jalview.git
diff --git a/src/jalview/analysis/AlignmentSorter.java b/src/jalview/analysis/AlignmentSorter.java
index a3493f7..25717d0 100755
--- a/src/jalview/analysis/AlignmentSorter.java
+++ b/src/jalview/analysis/AlignmentSorter.java
@@ -1,20 +1,19 @@
/*
- * Jalview - A Sequence Alignment Editor and Viewer
- * Copyright (C) 2007 AM Waterhouse, J Procter, G Barton, M Clamp, S Searle
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version 2
- * of the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+ * Jalview - A Sequence Alignment Editor and Viewer (Version 2.7)
+ * Copyright (C) 2011 J Procter, AM Waterhouse, J Engelhardt, LM Lui, G Barton, M Clamp, S Searle
+ *
+ * This file is part of Jalview.
+ *
+ * Jalview is free software: you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version.
+ *
+ * Jalview is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
+ * PURPOSE. See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along with Jalview. If not, see .
*/
package jalview.analysis;
@@ -23,34 +22,88 @@ import java.util.*;
import jalview.datamodel.*;
import jalview.util.*;
-/**
- * Routines for manipulating the order of a multiple sequence alignment
- * TODO: this class retains some global states concerning sort-order which should be made attributes for the caller's alignment visualization.
- * TODO: refactor to allow a subset of selected sequences to be sorted within the context of a whole alignment.
- * Sort method template is: SequenceI[] tobesorted, [ input data mapping to each tobesorted element to use ], Alignment context of tobesorted that are to be re-ordered, boolean sortinplace, [special data - ie seuqence to be sorted w.r.t.])
- * sortinplace implies that the sorted vector resulting from applying the operation to tobesorted should be mapped back to the original positions in alignment.
- * Otherwise, normal behaviour is to re order alignment so that tobesorted is sorted and grouped together starting from the first tobesorted position in the alignment.
- * e.g. (a,tb2,b,tb1,c,tb3 becomes a,tb1,tb2,tb3,b,c)
+/**
+ * Routines for manipulating the order of a multiple sequence alignment TODO:
+ * this class retains some global states concerning sort-order which should be
+ * made attributes for the caller's alignment visualization. TODO: refactor to
+ * allow a subset of selected sequences to be sorted within the context of a
+ * whole alignment. Sort method template is: SequenceI[] tobesorted, [ input
+ * data mapping to each tobesorted element to use ], Alignment context of
+ * tobesorted that are to be re-ordered, boolean sortinplace, [special data - ie
+ * seuqence to be sorted w.r.t.]) sortinplace implies that the sorted vector
+ * resulting from applying the operation to tobesorted should be mapped back to
+ * the original positions in alignment. Otherwise, normal behaviour is to re
+ * order alignment so that tobesorted is sorted and grouped together starting
+ * from the first tobesorted position in the alignment. e.g. (a,tb2,b,tb1,c,tb3
+ * becomes a,tb1,tb2,tb3,b,c)
*/
public class AlignmentSorter
{
+ /**
+ * todo: refactor searches to follow a basic pattern: (search property, last
+ * search state, current sort direction)
+ */
static boolean sortIdAscending = true;
+
static int lastGroupHash = 0;
+
static boolean sortGroupAscending = true;
+
static AlignmentOrder lastOrder = null;
+
static boolean sortOrderAscending = true;
+
static NJTree lastTree = null;
+
static boolean sortTreeAscending = true;
+
+ /**
+ * last Annotation Label used by sortByScore
+ */
private static String lastSortByScore;
+ private static boolean sortByScoreAscending = true;
+
+ /**
+ * compact representation of last arguments to SortByFeatureScore
+ */
+ private static String lastSortByFeatureScore;
+
+ private static boolean sortByFeatureScoreAscending = true;
+
+ private static boolean sortLengthAscending;
+
/**
* Sort by Percentage Identity w.r.t. s
- *
- * @param align AlignmentI
- * @param s SequenceI
- * @param tosort sequences from align that are to be sorted.
+ *
+ * @param align
+ * AlignmentI
+ * @param s
+ * SequenceI
+ * @param tosort
+ * sequences from align that are to be sorted.
*/
- public static void sortByPID(AlignmentI align, SequenceI s, SequenceI[] tosort)
+ public static void sortByPID(AlignmentI align, SequenceI s,
+ SequenceI[] tosort)
+ {
+ sortByPID(align, s, tosort, 0, -1);
+ }
+
+ /**
+ * Sort by Percentage Identity w.r.t. s
+ *
+ * @param align
+ * AlignmentI
+ * @param s
+ * SequenceI
+ * @param tosort
+ * sequences from align that are to be sorted.
+ * @param start
+ * start column (0 for beginning
+ * @param end
+ */
+ public static void sortByPID(AlignmentI align, SequenceI s,
+ SequenceI[] tosort, int start, int end)
{
int nSeq = align.getHeight();
@@ -59,8 +112,8 @@ public class AlignmentSorter
for (int i = 0; i < nSeq; i++)
{
- scores[i] = Comparison.PID(align.getSequenceAt(i).getSequenceAsString(),
- s.getSequenceAsString());
+ scores[i] = Comparison.PID(align.getSequenceAt(i)
+ .getSequenceAsString(), s.getSequenceAsString());
seqs[i] = align.getSequenceAt(i);
}
@@ -71,9 +124,11 @@ public class AlignmentSorter
/**
* Reverse the order of the sort
- *
- * @param align DOCUMENT ME!
- * @param seqs DOCUMENT ME!
+ *
+ * @param align
+ * DOCUMENT ME!
+ * @param seqs
+ * DOCUMENT ME!
*/
private static void setReverseOrder(AlignmentI align, SequenceI[] seqs)
{
@@ -81,7 +136,7 @@ public class AlignmentSorter
int len = 0;
- if ( (nSeq % 2) == 0)
+ if ((nSeq % 2) == 0)
{
len = nSeq / 2;
}
@@ -93,7 +148,7 @@ public class AlignmentSorter
// NOTE: DO NOT USE align.setSequenceAt() here - it will NOT work
for (int i = 0; i < len; i++)
{
- //SequenceI tmp = seqs[i];
+ // SequenceI tmp = seqs[i];
align.getSequences().setElementAt(seqs[nSeq - i - 1], i);
align.getSequences().setElementAt(seqs[i], nSeq - i - 1);
}
@@ -101,9 +156,11 @@ public class AlignmentSorter
/**
* Sets the Alignment object with the given sequences
- *
- * @param align Alignment object to be updated
- * @param tmp sequences as a vector
+ *
+ * @param align
+ * Alignment object to be updated
+ * @param tmp
+ * sequences as a vector
*/
private static void setOrder(AlignmentI align, Vector tmp)
{
@@ -112,9 +169,11 @@ public class AlignmentSorter
/**
* Sets the Alignment object with the given sequences
- *
- * @param align DOCUMENT ME!
- * @param seqs sequences as an array
+ *
+ * @param align
+ * DOCUMENT ME!
+ * @param seqs
+ * sequences as an array
*/
public static void setOrder(AlignmentI align, SequenceI[] seqs)
{
@@ -131,7 +190,7 @@ public class AlignmentSorter
}
algn.removeAllElements();
- //User may have hidden seqs, then clicked undo or redo
+ // User may have hidden seqs, then clicked undo or redo
for (int i = 0; i < tmp.size(); i++)
{
algn.addElement(tmp.elementAt(i));
@@ -141,8 +200,9 @@ public class AlignmentSorter
/**
* Sorts by ID. Numbers are sorted before letters.
- *
- * @param align The alignment object to sort
+ *
+ * @param align
+ * The alignment object to sort
*/
public static void sortByID(AlignmentI align)
{
@@ -172,16 +232,50 @@ public class AlignmentSorter
}
/**
- * Sorts the alignment by size of group.
- *
Maintains the order of sequences in each group
- * by order in given alignment object.
- *
- * @param align sorts the given alignment object by group
+ * Sorts by sequence length
+ *
+ * @param align
+ * The alignment object to sort
+ */
+ public static void sortByLength(AlignmentI align)
+ {
+ int nSeq = align.getHeight();
+
+ float[] length = new float[nSeq];
+ SequenceI[] seqs = new SequenceI[nSeq];
+
+ for (int i = 0; i < nSeq; i++)
+ {
+ seqs[i] = align.getSequenceAt(i);
+ length[i] = (float) (seqs[i].getEnd() - seqs[i].getStart());
+ }
+
+ QuickSort.sort(length, seqs);
+
+ if (sortLengthAscending)
+ {
+ setReverseOrder(align, seqs);
+ }
+ else
+ {
+ setOrder(align, seqs);
+ }
+
+ sortLengthAscending = !sortLengthAscending;
+ }
+
+ /**
+ * Sorts the alignment by size of group.
+ * Maintains the order of sequences in each group by order in given alignment
+ * object.
+ *
+ * @param align
+ * sorts the given alignment object by group
*/
public static void sortByGroup(AlignmentI align)
{
- //MAINTAINS ORIGNAL SEQUENCE ORDER,
- //ORDERS BY GROUP SIZE
+ // MAINTAINS ORIGNAL SEQUENCE ORDER,
+ // ORDERS BY GROUP SIZE
Vector groups = new Vector();
if (groups.hashCode() != lastGroupHash)
@@ -194,8 +288,8 @@ public class AlignmentSorter
sortGroupAscending = !sortGroupAscending;
}
- //SORTS GROUPS BY SIZE
- //////////////////////
+ // SORTS GROUPS BY SIZE
+ // ////////////////////
for (int i = 0; i < align.getGroups().size(); i++)
{
SequenceGroup sg = (SequenceGroup) align.getGroups().elementAt(i);
@@ -218,8 +312,8 @@ public class AlignmentSorter
}
}
- //NOW ADD SEQUENCES MAINTAINING ALIGNMENT ORDER
- ///////////////////////////////////////////////
+ // NOW ADD SEQUENCES MAINTAINING ALIGNMENT ORDER
+ // /////////////////////////////////////////////
Vector seqs = new Vector();
for (int i = 0; i < groups.size(); i++)
@@ -240,16 +334,16 @@ public class AlignmentSorter
else
{
setReverseOrder(align,
- vectorSubsetToArray(seqs, align.getSequences()));
+ vectorSubsetToArray(seqs, align.getSequences()));
}
}
/**
- * Converts Vector to array.
- * java 1.18 does not have Vector.toArray()
- *
- * @param tmp Vector of SequenceI objects
- *
+ * Converts Vector to array. java 1.18 does not have Vector.toArray()
+ *
+ * @param tmp
+ * Vector of SequenceI objects
+ *
* @return array of Sequence[]
*/
private static SequenceI[] vectorToArray(Vector tmp)
@@ -266,16 +360,18 @@ public class AlignmentSorter
/**
* DOCUMENT ME!
- *
- * @param tmp DOCUMENT ME!
- * @param mask DOCUMENT ME!
- *
+ *
+ * @param tmp
+ * DOCUMENT ME!
+ * @param mask
+ * DOCUMENT ME!
+ *
* @return DOCUMENT ME!
*/
private static SequenceI[] vectorSubsetToArray(Vector tmp, Vector mask)
{
Vector seqs = new Vector();
- int i;
+ int i, idx;
boolean[] tmask = new boolean[mask.size()];
for (i = 0; i < mask.size(); i++)
@@ -286,10 +382,10 @@ public class AlignmentSorter
for (i = 0; i < tmp.size(); i++)
{
Object sq = tmp.elementAt(i);
-
- if (mask.contains(sq) && tmask[mask.indexOf(sq)])
+ idx = mask.indexOf(sq);
+ if (idx > -1 && tmask[idx])
{
- tmask[mask.indexOf(sq)] = false;
+ tmask[idx] = false;
seqs.addElement(sq);
}
}
@@ -307,9 +403,11 @@ public class AlignmentSorter
/**
* Sorts by a given AlignmentOrder object
- *
- * @param align Alignment to order
- * @param order specified order for alignment
+ *
+ * @param align
+ * Alignment to order
+ * @param order
+ * specified order for alignment
*/
public static void sortBy(AlignmentI align, AlignmentOrder order)
{
@@ -331,17 +429,18 @@ public class AlignmentSorter
}
else
{
- setReverseOrder(align,
- vectorSubsetToArray(tmp, align.getSequences()));
+ setReverseOrder(align, vectorSubsetToArray(tmp, align.getSequences()));
}
}
/**
* DOCUMENT ME!
- *
- * @param align alignment to order
- * @param tree tree which has
- *
+ *
+ * @param align
+ * alignment to order
+ * @param tree
+ * tree which has
+ *
* @return DOCUMENT ME!
*/
private static Vector getOrderByTree(AlignmentI align, NJTree tree)
@@ -356,16 +455,16 @@ public class AlignmentSorter
{
// TODO: JBPNote - decide if this is always an error
// (eg. not when a tree is associated to another alignment which has more
- // sequences)
- if (tmp.size() < nSeq)
+ // sequences)
+ if (tmp.size() != nSeq)
{
addStrays(align, tmp);
}
if (tmp.size() != nSeq)
{
- System.err.println("ERROR: tmp.size()=" + tmp.size() +
- " != nseq=" + nSeq + " in getOrderByTree");
+ System.err.println("WARNING: tmp.size()=" + tmp.size() + " != nseq="
+ + nSeq + " in getOrderByTree - tree contains sequences not in alignment");
}
}
@@ -374,9 +473,11 @@ public class AlignmentSorter
/**
* Sorts the alignment by a given tree
- *
- * @param align alignment to order
- * @param tree tree which has
+ *
+ * @param align
+ * alignment to order
+ * @param tree
+ * tree which has
*/
public static void sortByTree(AlignmentI align, NJTree tree)
{
@@ -399,16 +500,17 @@ public class AlignmentSorter
}
else
{
- setReverseOrder(align,
- vectorSubsetToArray(tmp, align.getSequences()));
+ setReverseOrder(align, vectorSubsetToArray(tmp, align.getSequences()));
}
}
/**
* DOCUMENT ME!
- *
- * @param align DOCUMENT ME!
- * @param seqs DOCUMENT ME!
+ *
+ * @param align
+ * DOCUMENT ME!
+ * @param seqs
+ * DOCUMENT ME!
*/
private static void addStrays(AlignmentI align, Vector seqs)
{
@@ -424,22 +526,25 @@ public class AlignmentSorter
if (nSeq != seqs.size())
{
- System.err.println(
- "ERROR: Size still not right even after addStrays");
+ System.err
+ .println("ERROR: Size still not right even after addStrays");
}
}
/**
* DOCUMENT ME!
- *
- * @param node DOCUMENT ME!
- * @param tmp DOCUMENT ME!
- * @param seqset DOCUMENT ME!
- *
+ *
+ * @param node
+ * DOCUMENT ME!
+ * @param tmp
+ * DOCUMENT ME!
+ * @param seqset
+ * DOCUMENT ME!
+ *
* @return DOCUMENT ME!
*/
private static Vector _sortByTree(SequenceNode node, Vector tmp,
- Vector seqset)
+ Vector seqset)
{
if (node == null)
{
@@ -449,15 +554,15 @@ public class AlignmentSorter
SequenceNode left = (SequenceNode) node.left();
SequenceNode right = (SequenceNode) node.right();
- if ( (left == null) && (right == null))
+ if ((left == null) && (right == null))
{
if (!node.isPlaceholder() && (node.element() != null))
{
if (node.element() instanceof SequenceI)
{
- if (!tmp.contains(node.element()))
+ if (!tmp.contains(node.element())) // && (seqset==null || seqset.size()==0 || seqset.contains(tmp)))
{
- tmp.addElement( (SequenceI) node.element());
+ tmp.addElement((SequenceI) node.element());
}
}
}
@@ -474,7 +579,8 @@ public class AlignmentSorter
}
// Ordering Objects
- // Alignment.sortBy(OrderObj) - sequence of sequence pointer refs in appropriate order
+ // Alignment.sortBy(OrderObj) - sequence of sequence pointer refs in
+ // appropriate order
//
/**
@@ -487,42 +593,52 @@ public class AlignmentSorter
for (int i = 0; i < alignment.length; i++)
{
- ids[i] = (new Float(alignment[i].getName().substring(8))).floatValue();
+ ids[i] = (new Float(alignment[i].getName().substring(8)))
+ .floatValue();
}
jalview.util.QuickSort.sort(ids, alignment);
}
+
/**
- * Sort sequence in order of increasing score attribute for annotation with a particular
- * scoreLabel. Or reverse if same label was used previously
- * @param scoreLabel exact label for sequence associated AlignmentAnnotation scores to use for sorting.
- * @param alignment sequences to be sorted
+ * Sort sequence in order of increasing score attribute for annotation with a
+ * particular scoreLabel. Or reverse if same label was used previously
+ *
+ * @param scoreLabel
+ * exact label for sequence associated AlignmentAnnotation scores to
+ * use for sorting.
+ * @param alignment
+ * sequences to be sorted
*/
- public static void sortByAnnotationScore(String scoreLabel, AlignmentI alignment)
+ public static void sortByAnnotationScore(String scoreLabel,
+ AlignmentI alignment)
{
SequenceI[] seqs = alignment.getSequencesArray();
- boolean[] hasScore = new boolean[seqs.length]; // per sequence score presence
- int hasScores=0; // number of scores present on set
+ boolean[] hasScore = new boolean[seqs.length]; // per sequence score
+ // presence
+ int hasScores = 0; // number of scores present on set
double[] scores = new double[seqs.length];
- double min=0,max=0;
+ double min = 0, max = 0;
for (int i = 0; i < seqs.length; i++)
{
AlignmentAnnotation[] scoreAnn = seqs[i].getAnnotation(scoreLabel);
- if (scoreAnn!=null)
+ if (scoreAnn != null)
{
hasScores++;
hasScore[i] = true;
- scores[i] = scoreAnn[0].getScore(); // take the first instance of this score.
- if (hasScores==1)
+ scores[i] = scoreAnn[0].getScore(); // take the first instance of this
+ // score.
+ if (hasScores == 1)
{
max = min = scores[i];
- } else
+ }
+ else
{
- if (maxscores[i])
+ if (min > scores[i])
{
min = scores[i];
}
@@ -533,28 +649,301 @@ public class AlignmentSorter
hasScore[i] = false;
}
}
- if (hasScores==0)
+ if (hasScores == 0)
{
return; // do nothing - no scores present to sort by.
}
- if (hasScores sstop) ||
+ // or ignore based on selection criteria
+ (featureLabels != null && !AlignmentSorter
+ .containsIgnoreCase(sf[f].type, featureLabels))
+ || (groupLabels != null
+ // problem here: we cannot eliminate null feature group features
+ && (sf[f].getFeatureGroup() != null && !AlignmentSorter
+ .containsIgnoreCase(sf[f].getFeatureGroup(),
+ groupLabels))))
+ {
+ // forget about this feature
+ sf[f] = null;
+ n--;
+ }
+ else
+ {
+ // or, also take a look at the scores if necessary.
+ if (!ignoreScore && sf[f].getScore() != Float.NaN)
+ {
+ if (seqScores[i] == 0)
+ {
+ hasScores++;
+ }
+ seqScores[i]++;
+ hasScore[i] = true;
+ scores[i] += sf[f].getScore(); // take the first instance of this
+ // score.
+ }
+ }
+ }
+ SequenceFeature[] fs;
+ feats[i] = fs = new SequenceFeature[n];
+ if (n > 0)
+ {
+ n = 0;
+ for (int f = 0; f < sf.length; f++)
+ {
+ if (sf[f] != null)
+ {
+ ((SequenceFeature[]) feats[i])[n++] = sf[f];
+ }
+ }
+ if (method == FEATURE_LABEL)
+ {
+ // order the labels by alphabet
+ String[] labs = new String[fs.length];
+ for (int l = 0; l < labs.length; l++)
+ {
+ labs[l] = (fs[l].getDescription() != null ? fs[l]
+ .getDescription() : fs[l].getType());
+ }
+ jalview.util.QuickSort.sort(labs, ((Object[]) feats[i]));
+ }
+ }
+ if (hasScore[i])
+ {
+ // compute average score
+ scores[i] /= seqScores[i];
+ // update the score bounds.
+ if (hasScores == 1)
+ {
+ max = min = scores[i];
+ }
+ else
+ {
+ if (max < scores[i])
+ {
+ max = scores[i];
+ }
+ if (min > scores[i])
+ {
+ min = scores[i];
+ }
+ }
+ }
+ }
+
+ if (method == FEATURE_SCORE)
+ {
+ if (hasScores == 0)
+ {
+ return; // do nothing - no scores present to sort by.
+ }
+ // pad score matrix
+ if (hasScores < seqs.length)
+ {
+ for (int i = 0; i < seqs.length; i++)
+ {
+ if (!hasScore[i])
+ {
+ scores[i] = (max + 1 + i);
+ }
+ else
+ {
+ int nf = (feats[i] == null) ? 0
+ : ((SequenceFeature[]) feats[i]).length;
+ // System.err.println("Sorting on Score: seq "+seqs[i].getName()+
+ // " Feats: "+nf+" Score : "+scores[i]);
+ }
+ }
+ }
+
+ jalview.util.QuickSort.sort(scores, seqs);
+ }
+ else if (method == FEATURE_DENSITY)
+ {
+
+ // break ties between equivalent numbers for adjacent sequences by adding
+ // 1/Nseq*i on the original order
+ double fr = 0.9 / (1.0 * seqs.length);
+ for (int i = 0; i < seqs.length; i++)
+ {
+ double nf;
+ scores[i] = (0.05 + fr * i)
+ + (nf = ((feats[i] == null) ? 0.0
+ : 1.0 * ((SequenceFeature[]) feats[i]).length));
+ // System.err.println("Sorting on Density: seq "+seqs[i].getName()+
+ // " Feats: "+nf+" Score : "+scores[i]);
+ }
+ jalview.util.QuickSort.sort(scores, seqs);
+ }
+ else
+ {
+ if (method == FEATURE_LABEL)
+ {
+ throw new Error("Not yet implemented.");
+ }
+ }
+ if (lastSortByFeatureScore == null
+ || !scoreLabel.toString().equals(lastSortByFeatureScore))
+ {
+ sortByFeatureScoreAscending = true;
+ }
+ else
+ {
+ sortByFeatureScoreAscending = !sortByFeatureScoreAscending;
+ }
+ if (sortByFeatureScoreAscending)
+ {
+ setOrder(alignment, seqs);
+ }
+ else
+ {
+ setReverseOrder(alignment, seqs);
+ }
+ lastSortByFeatureScore = scoreLabel.toString();
+ }
+
}