2 * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3 * Copyright (C) $$Year-Rel$$ The Jalview Authors
5 * This file is part of Jalview.
7 * Jalview is free software: you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation, either version 3
10 * of the License, or (at your option) any later version.
12 * Jalview is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty
14 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15 * PURPOSE. See the GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
19 * The Jalview Authors are detailed in the 'AUTHORS' file.
21 package jalview.analysis;
23 import jalview.analysis.scoremodels.PIDModel;
24 import jalview.analysis.scoremodels.SimilarityParams;
25 import jalview.datamodel.AlignmentAnnotation;
26 import jalview.datamodel.AlignmentI;
27 import jalview.datamodel.AlignmentOrder;
28 import jalview.datamodel.SequenceFeature;
29 import jalview.datamodel.SequenceGroup;
30 import jalview.datamodel.SequenceI;
31 import jalview.datamodel.SequenceNode;
32 import jalview.util.QuickSort;
34 import java.util.ArrayList;
35 import java.util.Collections;
36 import java.util.Iterator;
37 import java.util.List;
40 * Routines for manipulating the order of a multiple sequence alignment TODO:
41 * this class retains some global states concerning sort-order which should be
42 * made attributes for the caller's alignment visualization. TODO: refactor to
43 * allow a subset of selected sequences to be sorted within the context of a
44 * whole alignment. Sort method template is: SequenceI[] tobesorted, [ input
45 * data mapping to each tobesorted element to use ], Alignment context of
46 * tobesorted that are to be re-ordered, boolean sortinplace, [special data - ie
47 * seuqence to be sorted w.r.t.]) sortinplace implies that the sorted vector
48 * resulting from applying the operation to tobesorted should be mapped back to
49 * the original positions in alignment. Otherwise, normal behaviour is to re
50 * order alignment so that tobesorted is sorted and grouped together starting
51 * from the first tobesorted position in the alignment. e.g. (a,tb2,b,tb1,c,tb3
52 * becomes a,tb1,tb2,tb3,b,c)
54 public class AlignmentSorter
57 * todo: refactor searches to follow a basic pattern: (search property, last
58 * search state, current sort direction)
60 static boolean sortIdAscending = true;
62 static int lastGroupHash = 0;
64 static boolean sortGroupAscending = true;
66 static AlignmentOrder lastOrder = null;
68 static boolean sortOrderAscending = true;
70 static TreeModel lastTree = null;
72 static boolean sortTreeAscending = true;
75 * last Annotation Label used for sort by Annotation score
77 private static String lastSortByAnnotation;
80 * string hash of last arguments to sortByFeature
81 * (sort order toggles if this is unchanged between sorts)
83 private static String sortByFeatureCriteria;
85 private static boolean sortByFeatureAscending = true;
87 private static boolean sortLengthAscending;
89 private static boolean sortEValueAscending;
91 private static boolean sortBitScoreAscending;
94 * Sorts sequences in the alignment by Percentage Identity with the given
95 * reference sequence, sorting the highest identity to the top
103 public static void sortByPID(AlignmentI align, SequenceI s)
105 int nSeq = align.getHeight();
107 float[] scores = new float[nSeq];
108 SequenceI[] seqs = new SequenceI[nSeq];
109 String refSeq = s.getSequenceAsString();
111 SimilarityParams pidParams = new SimilarityParams(true, true, true,
113 for (int i = 0; i < nSeq; i++)
115 scores[i] = (float) PIDModel.computePID(
116 align.getSequenceAt(i).getSequenceAsString(), refSeq,
118 seqs[i] = align.getSequenceAt(i);
121 QuickSort.sort(scores, seqs);
123 setReverseOrder(align, seqs);
127 * Reverse the order of the sort
134 private static void setReverseOrder(AlignmentI align, SequenceI[] seqs)
136 int nSeq = seqs.length;
146 len = (nSeq + 1) / 2;
149 // NOTE: DO NOT USE align.setSequenceAt() here - it will NOT work
151 synchronized (asq = align.getSequences())
153 for (int i = 0; i < len; i++)
155 // SequenceI tmp = seqs[i];
156 asq.set(i, seqs[nSeq - i - 1]);
157 asq.set(nSeq - i - 1, seqs[i]);
163 * Sets the Alignment object with the given sequences
166 * Alignment object to be updated
168 * sequences as a vector
170 private static void setOrder(AlignmentI align, List<SequenceI> tmp)
172 setOrder(align, vectorSubsetToArray(tmp, align.getSequences()));
176 * Sets the Alignment object with the given sequences
181 * sequences as an array
183 public static void setOrder(AlignmentI align, SequenceI[] seqs)
185 // NOTE: DO NOT USE align.setSequenceAt() here - it will NOT work
186 List<SequenceI> algn;
187 synchronized (algn = align.getSequences())
189 List<SequenceI> tmp = new ArrayList<>();
191 for (int i = 0; i < seqs.length; i++)
193 if (algn.contains(seqs[i]))
200 // User may have hidden seqs, then clicked undo or redo
201 for (int i = 0; i < tmp.size(); i++)
203 algn.add(tmp.get(i));
209 * Sorts by ID. Numbers are sorted before letters.
212 * The alignment object to sort
214 public static void sortByID(AlignmentI align)
216 int nSeq = align.getHeight();
218 String[] ids = new String[nSeq];
219 SequenceI[] seqs = new SequenceI[nSeq];
221 for (int i = 0; i < nSeq; i++)
223 ids[i] = align.getSequenceAt(i).getName();
224 seqs[i] = align.getSequenceAt(i);
227 QuickSort.sort(ids, seqs);
231 setReverseOrder(align, seqs);
235 setOrder(align, seqs);
238 sortIdAscending = !sortIdAscending;
242 * Sorts by sequence length
245 * The alignment object to sort
247 public static void sortByLength(AlignmentI align)
249 int nSeq = align.getHeight();
251 float[] length = new float[nSeq];
252 SequenceI[] seqs = new SequenceI[nSeq];
254 for (int i = 0; i < nSeq; i++)
256 seqs[i] = align.getSequenceAt(i);
257 length[i] = (seqs[i].getEnd() - seqs[i].getStart());
260 QuickSort.sort(length, seqs);
262 if (sortLengthAscending)
264 setReverseOrder(align, seqs);
268 setOrder(align, seqs);
271 sortLengthAscending = !sortLengthAscending;
275 * Sorts by sequence evalue. Currently moves all sequences without an evalue to
276 * the top of the alignment.
279 * The alignment object to sort
281 public static void sortByEValue(AlignmentI align)
283 int nSeq = align.getHeight();
285 double[] evalue = new double[nSeq];
286 SequenceI[] seqs = new SequenceI[nSeq];
288 for (int i = 0; i < nSeq; i++)
290 seqs[i] = align.getSequenceAt(i);
291 AlignmentAnnotation[] ann = seqs[i].getAnnotation("Search Scores");
294 evalue[i] = ann[0].getEValue();
302 QuickSort.sort(evalue, seqs);
304 if (sortEValueAscending)
306 setReverseOrder(align, seqs);
310 setOrder(align, seqs);
313 sortEValueAscending = !sortEValueAscending;
317 * Sorts by sequence bit score. Currently moves all sequences without a bit
318 * score to the top of the alignment
321 * The alignment object to sort
323 public static void sortByBitScore(AlignmentI align)
325 int nSeq = align.getHeight();
327 double[] score = new double[nSeq];
328 SequenceI[] seqs = new SequenceI[nSeq];
330 for (int i = 0; i < nSeq; i++)
332 seqs[i] = align.getSequenceAt(i);
333 AlignmentAnnotation[] ann = seqs[i].getAnnotation("Search Scores");
336 score[i] = ann[0].getEValue();
344 QuickSort.sort(score, seqs);
346 if (sortBitScoreAscending)
348 setReverseOrder(align, seqs);
352 setOrder(align, seqs);
355 sortBitScoreAscending = !sortBitScoreAscending;
359 * Sorts the alignment by size of group. <br>
360 * Maintains the order of sequences in each group by order in given alignment
364 * sorts the given alignment object by group
366 public static void sortByGroup(AlignmentI align)
368 // MAINTAINS ORIGNAL SEQUENCE ORDER,
369 // ORDERS BY GROUP SIZE
370 List<SequenceGroup> groups = new ArrayList<>();
372 if (groups.hashCode() != lastGroupHash)
374 sortGroupAscending = true;
375 lastGroupHash = groups.hashCode();
379 sortGroupAscending = !sortGroupAscending;
382 // SORTS GROUPS BY SIZE
383 // ////////////////////
384 for (SequenceGroup sg : align.getGroups())
386 for (int j = 0; j < groups.size(); j++)
388 SequenceGroup sg2 = groups.get(j);
390 if (sg.getSize() > sg2.getSize())
398 if (!groups.contains(sg))
404 // NOW ADD SEQUENCES MAINTAINING ALIGNMENT ORDER
405 // /////////////////////////////////////////////
406 List<SequenceI> seqs = new ArrayList<>();
408 for (int i = 0; i < groups.size(); i++)
410 SequenceGroup sg = groups.get(i);
411 SequenceI[] orderedseqs = sg.getSequencesInOrder(align);
413 for (int j = 0; j < orderedseqs.length; j++)
415 seqs.add(orderedseqs[j]);
419 if (sortGroupAscending)
421 setOrder(align, seqs);
425 setReverseOrder(align,
426 vectorSubsetToArray(seqs, align.getSequences()));
431 * Select sequences in order from tmp that is present in mask, and any
432 * remaining sequences in mask not in tmp
435 * thread safe collection of sequences
437 * thread safe collection of sequences
439 * @return intersect(tmp,mask)+intersect(complement(tmp),mask)
441 private static SequenceI[] vectorSubsetToArray(List<SequenceI> tmp,
442 List<SequenceI> mask)
445 // tmp2 = tmp.retainAll(mask);
446 // return tmp2.addAll(mask.removeAll(tmp2))
448 ArrayList<SequenceI> seqs = new ArrayList<>();
450 boolean[] tmask = new boolean[mask.size()];
452 for (i = 0; i < mask.size(); i++)
457 for (i = 0; i < tmp.size(); i++)
459 SequenceI sq = tmp.get(i);
460 idx = mask.indexOf(sq);
461 if (idx > -1 && tmask[idx])
468 for (i = 0; i < tmask.length; i++)
472 seqs.add(mask.get(i));
476 return seqs.toArray(new SequenceI[seqs.size()]);
480 * Sorts by a given AlignmentOrder object
485 * specified order for alignment
487 public static void sortBy(AlignmentI align, AlignmentOrder order)
489 // Get an ordered vector of sequences which may also be present in align
490 List<SequenceI> tmp = order.getOrder();
492 if (lastOrder == order)
494 sortOrderAscending = !sortOrderAscending;
498 sortOrderAscending = true;
501 if (sortOrderAscending)
503 setOrder(align, tmp);
507 setReverseOrder(align,
508 vectorSubsetToArray(tmp, align.getSequences()));
520 * @return DOCUMENT ME!
522 private static List<SequenceI> getOrderByTree(AlignmentI align,
525 int nSeq = align.getHeight();
527 List<SequenceI> tmp = new ArrayList<>();
529 tmp = _sortByTree(tree.getTopNode(), tmp, align.getSequences());
531 if (tmp.size() != nSeq)
533 // TODO: JBPNote - decide if this is always an error
534 // (eg. not when a tree is associated to another alignment which has more
536 if (tmp.size() != nSeq)
538 addStrays(align, tmp);
541 if (tmp.size() != nSeq)
543 System.err.println("WARNING: tmp.size()=" + tmp.size() + " != nseq="
545 + " in getOrderByTree - tree contains sequences not in alignment");
553 * Sorts the alignment by a given tree
560 public static void sortByTree(AlignmentI align, TreeModel tree)
562 List<SequenceI> tmp = getOrderByTree(align, tree);
564 // tmp should properly permute align with tree.
565 if (lastTree != tree)
567 sortTreeAscending = true;
572 sortTreeAscending = !sortTreeAscending;
575 if (sortTreeAscending)
577 setOrder(align, tmp);
581 setReverseOrder(align,
582 vectorSubsetToArray(tmp, align.getSequences()));
594 private static void addStrays(AlignmentI align, List<SequenceI> tmp)
596 int nSeq = align.getHeight();
598 for (int i = 0; i < nSeq; i++)
600 if (!tmp.contains(align.getSequenceAt(i)))
602 tmp.add(align.getSequenceAt(i));
606 if (nSeq != tmp.size())
609 .println("ERROR: Size still not right even after addStrays");
623 * @return DOCUMENT ME!
625 private static List<SequenceI> _sortByTree(SequenceNode node,
626 List<SequenceI> tmp, List<SequenceI> seqset)
633 SequenceNode left = (SequenceNode) node.left();
634 SequenceNode right = (SequenceNode) node.right();
636 if ((left == null) && (right == null))
638 if (!node.isPlaceholder() && (node.element() != null))
640 if (node.element() instanceof SequenceI)
642 if (!tmp.contains(node.element())) // && (seqset==null ||
643 // seqset.size()==0 ||
644 // seqset.contains(tmp)))
646 tmp.add((SequenceI) node.element());
655 _sortByTree(left, tmp, seqset);
656 _sortByTree(right, tmp, seqset);
663 // Alignment.sortBy(OrderObj) - sequence of sequence pointer refs in
668 * recover the order of sequences given by the safe numbering scheme introducd
669 * SeqsetUtils.uniquify.
671 public static void recoverOrder(SequenceI[] alignment)
673 float[] ids = new float[alignment.length];
675 for (int i = 0; i < alignment.length; i++)
677 ids[i] = (Float.valueOf(alignment[i].getName().substring(8)))
681 jalview.util.QuickSort.sort(ids, alignment);
685 * Sort sequence in order of increasing score attribute for annotation with a
686 * particular scoreLabel. Or reverse if same label was used previously
689 * exact label for sequence associated AlignmentAnnotation scores to
692 * sequences to be sorted
694 public static void sortByAnnotationScore(String scoreLabel,
695 AlignmentI alignment)
697 SequenceI[] seqs = alignment.getSequencesArray();
698 boolean[] hasScore = new boolean[seqs.length]; // per sequence score
700 int hasScores = 0; // number of scores present on set
701 double[] scores = new double[seqs.length];
702 double min = 0, max = 0;
703 for (int i = 0; i < seqs.length; i++)
705 AlignmentAnnotation[] scoreAnn = seqs[i].getAnnotation(scoreLabel);
706 if (scoreAnn != null)
710 scores[i] = scoreAnn[0].getScore(); // take the first instance of this
714 max = min = scores[i];
735 return; // do nothing - no scores present to sort by.
737 if (hasScores < seqs.length)
739 for (int i = 0; i < seqs.length; i++)
743 scores[i] = (max + i + 1.0);
748 jalview.util.QuickSort.sort(scores, seqs);
749 if (lastSortByAnnotation != scoreLabel)
751 lastSortByAnnotation = scoreLabel;
752 setOrder(alignment, seqs);
756 setReverseOrder(alignment, seqs);
761 * types of feature ordering: Sort by score : average score - or total score -
762 * over all features in region Sort by feature label text: (or if null -
763 * feature type text) - numerical or alphabetical Sort by feature density:
764 * based on counts - ignoring individual text or scores for each feature
766 public static String FEATURE_SCORE = "average_score";
768 public static String FEATURE_LABEL = "text";
770 public static String FEATURE_DENSITY = "density";
773 * Sort sequences by feature score or density, optionally restricted by
774 * feature types, feature groups, or alignment start/end positions.
776 * If the sort is repeated for the same combination of types and groups, sort
779 * @param featureTypes
780 * a list of feature types to include (or null for all)
782 * a list of feature groups to include (or null for all)
784 * start column position to include (base zero)
786 * end column position to include (base zero)
788 * the alignment to be sorted
790 * either "average_score" or "density" ("text" not yet implemented)
792 public static void sortByFeature(List<String> featureTypes,
793 List<String> groups, final int startCol, final int endCol,
794 AlignmentI alignment, String method)
796 if (method != FEATURE_SCORE && method != FEATURE_LABEL
797 && method != FEATURE_DENSITY)
800 .format("Implementation Error - sortByFeature method must be either '%s' or '%s'",
801 FEATURE_SCORE, FEATURE_DENSITY);
802 System.err.println(msg);
806 flipFeatureSortIfUnchanged(method, featureTypes, groups, startCol, endCol);
808 SequenceI[] seqs = alignment.getSequencesArray();
810 boolean[] hasScore = new boolean[seqs.length]; // per sequence score
812 int hasScores = 0; // number of scores present on set
813 double[] scores = new double[seqs.length];
814 int[] seqScores = new int[seqs.length];
815 Object[][] feats = new Object[seqs.length][];
819 for (int i = 0; i < seqs.length; i++)
822 * get sequence residues overlapping column region
823 * and features for residue positions and specified types
825 String[] types = featureTypes == null ? null : featureTypes
826 .toArray(new String[featureTypes.size()]);
827 List<SequenceFeature> sfs = seqs[i].findFeatures(startCol + 1,
833 Iterator<SequenceFeature> it = sfs.listIterator();
836 SequenceFeature sf = it.next();
839 * accept all features with null or empty group, otherwise
840 * check group is one of the currently visible groups
842 String featureGroup = sf.getFeatureGroup();
843 if (groups != null && featureGroup != null
844 && !"".equals(featureGroup)
845 && !groups.contains(featureGroup))
851 float score = sf.getScore();
852 if (FEATURE_SCORE.equals(method) && !Float.isNaN(score))
854 if (seqScores[i] == 0)
861 // take the first instance of this score // ??
866 feats[i] = sfs.toArray(new SequenceFeature[sfs.size()]);
869 if (method == FEATURE_LABEL)
871 // order the labels by alphabet (not yet implemented)
872 String[] labs = new String[sfs.size()];
873 for (int l = 0; l < sfs.size(); l++)
875 SequenceFeature sf = sfs.get(l);
876 String description = sf.getDescription();
877 labs[l] = (description != null ? description : sf.getType());
879 QuickSort.sort(labs, feats[i]);
884 // compute average score
885 scores[i] /= seqScores[i];
886 // update the score bounds.
894 max = Math.max(max, scores[i]);
895 min = Math.min(min, scores[i]);
900 if (FEATURE_SCORE.equals(method))
904 return; // do nothing - no scores present to sort by.
907 if (hasScores < seqs.length)
909 for (int i = 0; i < seqs.length; i++)
913 scores[i] = (max + 1 + i);
917 // int nf = (feats[i] == null) ? 0
918 // : ((SequenceFeature[]) feats[i]).length;
919 // // System.err.println("Sorting on Score: seq " +
921 // + " Feats: " + nf + " Score : " + scores[i]);
925 QuickSort.sortByDouble(scores, seqs, sortByFeatureAscending);
927 else if (FEATURE_DENSITY.equals(method))
929 for (int i = 0; i < seqs.length; i++)
931 int featureCount = feats[i] == null ? 0
932 : ((SequenceFeature[]) feats[i]).length;
933 scores[i] = featureCount;
934 // System.err.println("Sorting on Density: seq "+seqs[i].getName()+
935 // " Feats: "+featureCount+" Score : "+scores[i]);
937 QuickSort.sortByDouble(scores, seqs, sortByFeatureAscending);
940 setOrder(alignment, seqs);
944 * Builds a string hash of criteria for sorting, and if unchanged from last
945 * time, reverse the sort order
948 * @param featureTypes
953 protected static void flipFeatureSortIfUnchanged(String method,
954 List<String> featureTypes, List<String> groups,
955 final int startCol, final int endCol)
957 StringBuilder sb = new StringBuilder(64);
958 sb.append(startCol).append(method).append(endCol);
959 if (featureTypes != null)
961 Collections.sort(featureTypes);
962 sb.append(featureTypes.toString());
966 Collections.sort(groups);
967 sb.append(groups.toString());
969 String scoreCriteria = sb.toString();
972 * if resorting on the same criteria, toggle sort order
974 if (sortByFeatureCriteria == null
975 || !scoreCriteria.equals(sortByFeatureCriteria))
977 sortByFeatureAscending = true;
981 sortByFeatureAscending = !sortByFeatureAscending;
983 sortByFeatureCriteria = scoreCriteria;