2 * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3 * Copyright (C) $$Year-Rel$$ The Jalview Authors
5 * This file is part of Jalview.
7 * Jalview is free software: you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation, either version 3
10 * of the License, or (at your option) any later version.
12 * Jalview is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty
14 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15 * PURPOSE. See the GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
19 * The Jalview Authors are detailed in the 'AUTHORS' file.
21 package jalview.analysis;
23 import jalview.analysis.scoremodels.PIDModel;
24 import jalview.analysis.scoremodels.SimilarityParams;
25 import jalview.datamodel.AlignmentAnnotation;
26 import jalview.datamodel.AlignmentI;
27 import jalview.datamodel.AlignmentOrder;
28 import jalview.datamodel.SequenceFeature;
29 import jalview.datamodel.SequenceGroup;
30 import jalview.datamodel.SequenceI;
31 import jalview.datamodel.SequenceNode;
32 import jalview.util.MessageManager;
33 import jalview.util.QuickSort;
35 import java.util.ArrayList;
36 import java.util.Arrays;
37 import java.util.List;
40 * Routines for manipulating the order of a multiple sequence alignment TODO:
41 * this class retains some global states concerning sort-order which should be
42 * made attributes for the caller's alignment visualization. TODO: refactor to
43 * allow a subset of selected sequences to be sorted within the context of a
44 * whole alignment. Sort method template is: SequenceI[] tobesorted, [ input
45 * data mapping to each tobesorted element to use ], Alignment context of
46 * tobesorted that are to be re-ordered, boolean sortinplace, [special data - ie
47 * seuqence to be sorted w.r.t.]) sortinplace implies that the sorted vector
48 * resulting from applying the operation to tobesorted should be mapped back to
49 * the original positions in alignment. Otherwise, normal behaviour is to re
50 * order alignment so that tobesorted is sorted and grouped together starting
51 * from the first tobesorted position in the alignment. e.g. (a,tb2,b,tb1,c,tb3
52 * becomes a,tb1,tb2,tb3,b,c)
54 public class AlignmentSorter
57 * todo: refactor searches to follow a basic pattern: (search property, last
58 * search state, current sort direction)
60 static boolean sortIdAscending = true;
62 static int lastGroupHash = 0;
64 static boolean sortGroupAscending = true;
66 static AlignmentOrder lastOrder = null;
68 static boolean sortOrderAscending = true;
70 static TreeModel lastTree = null;
72 static boolean sortTreeAscending = true;
75 * last Annotation Label used by sortByScore
77 private static String lastSortByScore;
79 private static boolean sortByScoreAscending = true;
82 * compact representation of last arguments to SortByFeatureScore
84 private static String lastSortByFeatureScore;
86 private static boolean sortByFeatureScoreAscending = true;
88 private static boolean sortLengthAscending;
91 * Sorts sequences in the alignment by Percentage Identity with the given
92 * reference sequence, sorting the highest identity to the top
100 public static void sortByPID(AlignmentI align, SequenceI s)
102 int nSeq = align.getHeight();
104 float[] scores = new float[nSeq];
105 SequenceI[] seqs = new SequenceI[nSeq];
106 String refSeq = s.getSequenceAsString();
108 SimilarityParams pidParams = new SimilarityParams(true, true, true,
110 for (int i = 0; i < nSeq; i++)
112 scores[i] = (float) PIDModel.computePID(align.getSequenceAt(i)
113 .getSequenceAsString(), refSeq, pidParams);
114 seqs[i] = align.getSequenceAt(i);
117 QuickSort.sort(scores, seqs);
119 setReverseOrder(align, seqs);
123 * Reverse the order of the sort
130 private static void setReverseOrder(AlignmentI align, SequenceI[] seqs)
132 int nSeq = seqs.length;
142 len = (nSeq + 1) / 2;
145 // NOTE: DO NOT USE align.setSequenceAt() here - it will NOT work
147 synchronized (asq = align.getSequences())
149 for (int i = 0; i < len; i++)
151 // SequenceI tmp = seqs[i];
152 asq.set(i, seqs[nSeq - i - 1]);
153 asq.set(nSeq - i - 1, seqs[i]);
159 * Sets the Alignment object with the given sequences
162 * Alignment object to be updated
164 * sequences as a vector
166 private static void setOrder(AlignmentI align, List<SequenceI> tmp)
168 setOrder(align, vectorSubsetToArray(tmp, align.getSequences()));
172 * Sets the Alignment object with the given sequences
177 * sequences as an array
179 public static void setOrder(AlignmentI align, SequenceI[] seqs)
181 // NOTE: DO NOT USE align.setSequenceAt() here - it will NOT work
182 List<SequenceI> algn;
183 synchronized (algn = align.getSequences())
185 List<SequenceI> tmp = new ArrayList<SequenceI>();
187 for (int i = 0; i < seqs.length; i++)
189 if (algn.contains(seqs[i]))
196 // User may have hidden seqs, then clicked undo or redo
197 for (int i = 0; i < tmp.size(); i++)
199 algn.add(tmp.get(i));
205 * Sorts by ID. Numbers are sorted before letters.
208 * The alignment object to sort
210 public static void sortByID(AlignmentI align)
212 int nSeq = align.getHeight();
214 String[] ids = new String[nSeq];
215 SequenceI[] seqs = new SequenceI[nSeq];
217 for (int i = 0; i < nSeq; i++)
219 ids[i] = align.getSequenceAt(i).getName();
220 seqs[i] = align.getSequenceAt(i);
223 QuickSort.sort(ids, seqs);
227 setReverseOrder(align, seqs);
231 setOrder(align, seqs);
234 sortIdAscending = !sortIdAscending;
238 * Sorts by sequence length
241 * The alignment object to sort
243 public static void sortByLength(AlignmentI align)
245 int nSeq = align.getHeight();
247 float[] length = new float[nSeq];
248 SequenceI[] seqs = new SequenceI[nSeq];
250 for (int i = 0; i < nSeq; i++)
252 seqs[i] = align.getSequenceAt(i);
253 length[i] = (seqs[i].getEnd() - seqs[i].getStart());
256 QuickSort.sort(length, seqs);
258 if (sortLengthAscending)
260 setReverseOrder(align, seqs);
264 setOrder(align, seqs);
267 sortLengthAscending = !sortLengthAscending;
271 * Sorts the alignment by size of group. <br>
272 * Maintains the order of sequences in each group by order in given alignment
276 * sorts the given alignment object by group
278 public static void sortByGroup(AlignmentI align)
280 // MAINTAINS ORIGNAL SEQUENCE ORDER,
281 // ORDERS BY GROUP SIZE
282 List<SequenceGroup> groups = new ArrayList<SequenceGroup>();
284 if (groups.hashCode() != lastGroupHash)
286 sortGroupAscending = true;
287 lastGroupHash = groups.hashCode();
291 sortGroupAscending = !sortGroupAscending;
294 // SORTS GROUPS BY SIZE
295 // ////////////////////
296 for (SequenceGroup sg : align.getGroups())
298 for (int j = 0; j < groups.size(); j++)
300 SequenceGroup sg2 = groups.get(j);
302 if (sg.getSize() > sg2.getSize())
310 if (!groups.contains(sg))
316 // NOW ADD SEQUENCES MAINTAINING ALIGNMENT ORDER
317 // /////////////////////////////////////////////
318 List<SequenceI> seqs = new ArrayList<SequenceI>();
320 for (int i = 0; i < groups.size(); i++)
322 SequenceGroup sg = groups.get(i);
323 SequenceI[] orderedseqs = sg.getSequencesInOrder(align);
325 for (int j = 0; j < orderedseqs.length; j++)
327 seqs.add(orderedseqs[j]);
331 if (sortGroupAscending)
333 setOrder(align, seqs);
337 setReverseOrder(align,
338 vectorSubsetToArray(seqs, align.getSequences()));
343 * Select sequences in order from tmp that is present in mask, and any
344 * remaining sequences in mask not in tmp
347 * thread safe collection of sequences
349 * thread safe collection of sequences
351 * @return intersect(tmp,mask)+intersect(complement(tmp),mask)
353 private static SequenceI[] vectorSubsetToArray(List<SequenceI> tmp,
354 List<SequenceI> mask)
357 // tmp2 = tmp.retainAll(mask);
358 // return tmp2.addAll(mask.removeAll(tmp2))
360 ArrayList<SequenceI> seqs = new ArrayList<SequenceI>();
362 boolean[] tmask = new boolean[mask.size()];
364 for (i = 0; i < mask.size(); i++)
369 for (i = 0; i < tmp.size(); i++)
371 SequenceI sq = tmp.get(i);
372 idx = mask.indexOf(sq);
373 if (idx > -1 && tmask[idx])
380 for (i = 0; i < tmask.length; i++)
384 seqs.add(mask.get(i));
388 return seqs.toArray(new SequenceI[seqs.size()]);
392 * Sorts by a given AlignmentOrder object
397 * specified order for alignment
399 public static void sortBy(AlignmentI align, AlignmentOrder order)
401 // Get an ordered vector of sequences which may also be present in align
402 List<SequenceI> tmp = order.getOrder();
404 if (lastOrder == order)
406 sortOrderAscending = !sortOrderAscending;
410 sortOrderAscending = true;
413 if (sortOrderAscending)
415 setOrder(align, tmp);
419 setReverseOrder(align, vectorSubsetToArray(tmp, align.getSequences()));
431 * @return DOCUMENT ME!
433 private static List<SequenceI> getOrderByTree(AlignmentI align,
436 int nSeq = align.getHeight();
438 List<SequenceI> tmp = new ArrayList<SequenceI>();
440 tmp = _sortByTree(tree.getTopNode(), tmp, align.getSequences());
442 if (tmp.size() != nSeq)
444 // TODO: JBPNote - decide if this is always an error
445 // (eg. not when a tree is associated to another alignment which has more
447 if (tmp.size() != nSeq)
449 addStrays(align, tmp);
452 if (tmp.size() != nSeq)
455 .println("WARNING: tmp.size()="
459 + " in getOrderByTree - tree contains sequences not in alignment");
467 * Sorts the alignment by a given tree
474 public static void sortByTree(AlignmentI align, TreeModel tree)
476 List<SequenceI> tmp = getOrderByTree(align, tree);
478 // tmp should properly permute align with tree.
479 if (lastTree != tree)
481 sortTreeAscending = true;
486 sortTreeAscending = !sortTreeAscending;
489 if (sortTreeAscending)
491 setOrder(align, tmp);
495 setReverseOrder(align, vectorSubsetToArray(tmp, align.getSequences()));
507 private static void addStrays(AlignmentI align, List<SequenceI> tmp)
509 int nSeq = align.getHeight();
511 for (int i = 0; i < nSeq; i++)
513 if (!tmp.contains(align.getSequenceAt(i)))
515 tmp.add(align.getSequenceAt(i));
519 if (nSeq != tmp.size())
522 .println("ERROR: Size still not right even after addStrays");
536 * @return DOCUMENT ME!
538 private static List<SequenceI> _sortByTree(SequenceNode node,
539 List<SequenceI> tmp, List<SequenceI> seqset)
546 SequenceNode left = (SequenceNode) node.left();
547 SequenceNode right = (SequenceNode) node.right();
549 if ((left == null) && (right == null))
551 if (!node.isPlaceholder() && (node.element() != null))
553 if (node.element() instanceof SequenceI)
555 if (!tmp.contains(node.element())) // && (seqset==null ||
556 // seqset.size()==0 ||
557 // seqset.contains(tmp)))
559 tmp.add((SequenceI) node.element());
568 _sortByTree(left, tmp, seqset);
569 _sortByTree(right, tmp, seqset);
576 // Alignment.sortBy(OrderObj) - sequence of sequence pointer refs in
581 * recover the order of sequences given by the safe numbering scheme introducd
582 * SeqsetUtils.uniquify.
584 public static void recoverOrder(SequenceI[] alignment)
586 float[] ids = new float[alignment.length];
588 for (int i = 0; i < alignment.length; i++)
590 ids[i] = (new Float(alignment[i].getName().substring(8)))
594 jalview.util.QuickSort.sort(ids, alignment);
598 * Sort sequence in order of increasing score attribute for annotation with a
599 * particular scoreLabel. Or reverse if same label was used previously
602 * exact label for sequence associated AlignmentAnnotation scores to
605 * sequences to be sorted
607 public static void sortByAnnotationScore(String scoreLabel,
608 AlignmentI alignment)
610 SequenceI[] seqs = alignment.getSequencesArray();
611 boolean[] hasScore = new boolean[seqs.length]; // per sequence score
613 int hasScores = 0; // number of scores present on set
614 double[] scores = new double[seqs.length];
615 double min = 0, max = 0;
616 for (int i = 0; i < seqs.length; i++)
618 AlignmentAnnotation[] scoreAnn = seqs[i].getAnnotation(scoreLabel);
619 if (scoreAnn != null)
623 scores[i] = scoreAnn[0].getScore(); // take the first instance of this
627 max = min = scores[i];
648 return; // do nothing - no scores present to sort by.
650 if (hasScores < seqs.length)
652 for (int i = 0; i < seqs.length; i++)
656 scores[i] = (max + i + 1.0);
661 jalview.util.QuickSort.sort(scores, seqs);
662 if (lastSortByScore != scoreLabel)
664 lastSortByScore = scoreLabel;
665 setOrder(alignment, seqs);
669 setReverseOrder(alignment, seqs);
674 * types of feature ordering: Sort by score : average score - or total score -
675 * over all features in region Sort by feature label text: (or if null -
676 * feature type text) - numerical or alphabetical Sort by feature density:
677 * based on counts - ignoring individual text or scores for each feature
679 public static String FEATURE_SCORE = "average_score";
681 public static String FEATURE_LABEL = "text";
683 public static String FEATURE_DENSITY = "density";
686 * sort the alignment using the features on each sequence found between start
687 * and stop with the given featureLabel (and optional group qualifier)
689 * @param featureLabel
694 * (-1 to include non-positional features)
696 * (-1 to only sort on non-positional features)
698 * - aligned sequences containing features
700 * - one of the string constants FEATURE_SCORE, FEATURE_LABEL,
703 public static void sortByFeature(String featureLabel, String groupLabel,
704 int start, int stop, AlignmentI alignment, String method)
707 featureLabel == null ? null
708 : Arrays.asList(new String[] { featureLabel }),
709 groupLabel == null ? null : Arrays
710 .asList(new String[] { groupLabel }), start, stop,
714 private static boolean containsIgnoreCase(final String lab,
715 final List<String> labs)
725 for (String label : labs)
727 if (lab.equalsIgnoreCase(label))
735 public static void sortByFeature(List<String> featureLabels,
736 List<String> groupLabels, int start, int stop,
737 AlignmentI alignment, String method)
739 if (method != FEATURE_SCORE && method != FEATURE_LABEL
740 && method != FEATURE_DENSITY)
744 .getString("error.implementation_error_sortbyfeature"));
747 boolean ignoreScore = method != FEATURE_SCORE;
748 StringBuffer scoreLabel = new StringBuffer();
749 scoreLabel.append(start + stop + method);
750 // This doesn't quite work yet - we'd like to have a canonical ordering that
751 // can be preserved from call to call
752 if (featureLabels != null)
754 for (String label : featureLabels)
756 scoreLabel.append(label);
759 if (groupLabels != null)
761 for (String label : groupLabels)
763 scoreLabel.append(label);
768 * if resorting the same feature, toggle sort order
770 if (lastSortByFeatureScore == null
771 || !scoreLabel.toString().equals(lastSortByFeatureScore))
773 sortByFeatureScoreAscending = true;
777 sortByFeatureScoreAscending = !sortByFeatureScoreAscending;
779 lastSortByFeatureScore = scoreLabel.toString();
781 SequenceI[] seqs = alignment.getSequencesArray();
783 boolean[] hasScore = new boolean[seqs.length]; // per sequence score
785 int hasScores = 0; // number of scores present on set
786 double[] scores = new double[seqs.length];
787 int[] seqScores = new int[seqs.length];
788 Object[] feats = new Object[seqs.length];
789 double min = 0, max = 0;
790 for (int i = 0; i < seqs.length; i++)
792 SequenceFeature[] sf = seqs[i].getSequenceFeatures();
795 sf = new SequenceFeature[0];
799 SequenceFeature[] tmp = new SequenceFeature[sf.length];
800 for (int s = 0; s < tmp.length; s++)
806 int sstart = (start == -1) ? start : seqs[i].findPosition(start);
807 int sstop = (stop == -1) ? stop : seqs[i].findPosition(stop);
811 for (int f = 0; f < sf.length; f++)
813 // filter for selection criteria
814 SequenceFeature feature = sf[f];
817 * double-check feature overlaps columns (JAL-2544)
818 * (could avoid this with a findPositions(fromCol, toCol) method)
819 * findIndex returns base 1 column values, startCol/endCol are base 0
821 boolean noOverlap = seqs[i].findIndex(feature.getBegin()) > stop + 1
822 || seqs[i].findIndex(feature.getEnd()) < start + 1;
823 boolean skipFeatureType = featureLabels != null
824 && !AlignmentSorter.containsIgnoreCase(feature.type,
826 boolean skipFeatureGroup = groupLabels != null
827 && (feature.getFeatureGroup() != null && !AlignmentSorter
828 .containsIgnoreCase(feature.getFeatureGroup(),
830 if (noOverlap || skipFeatureType || skipFeatureGroup)
832 // forget about this feature
838 // or, also take a look at the scores if necessary.
839 if (!ignoreScore && !Float.isNaN(feature.getScore()))
841 if (seqScores[i] == 0)
847 scores[i] += feature.getScore(); // take the first instance of this
852 SequenceFeature[] fs;
853 feats[i] = fs = new SequenceFeature[n];
857 for (int f = 0; f < sf.length; f++)
861 ((SequenceFeature[]) feats[i])[n++] = sf[f];
864 if (method == FEATURE_LABEL)
866 // order the labels by alphabet
867 String[] labs = new String[fs.length];
868 for (int l = 0; l < labs.length; l++)
870 labs[l] = (fs[l].getDescription() != null ? fs[l]
871 .getDescription() : fs[l].getType());
873 QuickSort.sort(labs, ((Object[]) feats[i]));
878 // compute average score
879 scores[i] /= seqScores[i];
880 // update the score bounds.
883 max = min = scores[i];
899 if (method == FEATURE_SCORE)
903 return; // do nothing - no scores present to sort by.
906 if (hasScores < seqs.length)
908 for (int i = 0; i < seqs.length; i++)
912 scores[i] = (max + 1 + i);
916 // int nf = (feats[i] == null) ? 0
917 // : ((SequenceFeature[]) feats[i]).length;
918 // // System.err.println("Sorting on Score: seq " +
920 // + " Feats: " + nf + " Score : " + scores[i]);
924 QuickSort.sortByDouble(scores, seqs, sortByFeatureScoreAscending);
926 else if (method == FEATURE_DENSITY)
928 for (int i = 0; i < seqs.length; i++)
930 int featureCount = feats[i] == null ? 0
931 : ((SequenceFeature[]) feats[i]).length;
932 scores[i] = featureCount;
933 // System.err.println("Sorting on Density: seq "+seqs[i].getName()+
934 // " Feats: "+featureCount+" Score : "+scores[i]);
936 QuickSort.sortByDouble(scores, seqs, sortByFeatureScoreAscending);
940 if (method == FEATURE_LABEL)
943 MessageManager.getString("error.not_yet_implemented"));
947 setOrder(alignment, seqs);