2 * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3 * Copyright (C) $$Year-Rel$$ The Jalview Authors
5 * This file is part of Jalview.
7 * Jalview is free software: you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation, either version 3
10 * of the License, or (at your option) any later version.
12 * Jalview is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty
14 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15 * PURPOSE. See the GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
19 * The Jalview Authors are detailed in the 'AUTHORS' file.
21 package jalview.analysis;
23 import jalview.analysis.scoremodels.PIDModel;
24 import jalview.analysis.scoremodels.SimilarityParams;
25 import jalview.bin.ApplicationSingletonProvider;
26 import jalview.bin.ApplicationSingletonProvider.ApplicationSingletonI;
27 import jalview.datamodel.AlignmentAnnotation;
28 import jalview.datamodel.AlignmentI;
29 import jalview.datamodel.AlignmentOrder;
30 import jalview.datamodel.SequenceFeature;
31 import jalview.datamodel.SequenceGroup;
32 import jalview.datamodel.SequenceI;
33 import jalview.datamodel.SequenceNode;
34 import jalview.util.QuickSort;
36 import java.util.ArrayList;
37 import java.util.BitSet;
38 import java.util.Collections;
39 import java.util.Iterator;
40 import java.util.List;
43 * Routines for manipulating the order of a multiple sequence alignment TODO:
44 * this class retains some global states concerning sort-order which should be
45 * made attributes for the caller's alignment visualization. TODO: refactor to
46 * allow a subset of selected sequences to be sorted within the context of a
47 * whole alignment. Sort method template is: SequenceI[] tobesorted, [ input
48 * data mapping to each tobesorted element to use ], Alignment context of
49 * tobesorted that are to be re-ordered, boolean sortinplace, [special data - ie
50 * seuqence to be sorted w.r.t.]) sortinplace implies that the sorted vector
51 * resulting from applying the operation to tobesorted should be mapped back to
52 * the original positions in alignment. Otherwise, normal behaviour is to re
53 * order alignment so that tobesorted is sorted and grouped together starting
54 * from the first tobesorted position in the alignment. e.g. (a,tb2,b,tb1,c,tb3
55 * becomes a,tb1,tb2,tb3,b,c)
57 public class AlignmentSorter implements ApplicationSingletonI
60 private AlignmentSorter()
65 public static AlignmentSorter getInstance()
67 return ApplicationSingletonProvider.getInstance(AlignmentSorter.class);
71 * types of feature ordering: Sort by score : average score - or total score -
72 * over all features in region Sort by feature label text: (or if null -
73 * feature type text) - numerical or alphabetical Sort by feature density:
74 * based on counts - ignoring individual text or scores for each feature
76 public static final String FEATURE_SCORE = "average_score";
78 public static final String FEATURE_LABEL = "text";
80 public static final String FEATURE_DENSITY = "density";
83 * todo: refactor searches to follow a basic pattern: (search property, last
84 * search state, current sort direction)
86 boolean sortIdAscending = true;
88 int lastGroupHash = 0;
90 boolean sortGroupAscending = true;
92 AlignmentOrder lastOrder = null;
94 boolean sortOrderAscending = true;
96 TreeModel lastTree = null;
98 boolean sortTreeAscending = true;
101 * last Annotation Label used for sort by Annotation score
103 private String lastSortByAnnotation;
106 * string hash of last arguments to sortByFeature (sort order toggles if this
107 * is unchanged between sorts)
109 private String sortByFeatureCriteria;
111 private boolean sortByFeatureAscending = true;
113 private boolean sortLengthAscending;
115 private static boolean sortEValueAscending;
117 private static boolean sortBitScoreAscending;
120 * Sorts sequences in the alignment by Percentage Identity with the given
121 * reference sequence, sorting the highest identity to the top
129 public static void sortByPID(AlignmentI align, SequenceI s)
131 int nSeq = align.getHeight();
133 float[] scores = new float[nSeq];
134 SequenceI[] seqs = new SequenceI[nSeq];
135 String refSeq = s.getSequenceAsString();
137 SimilarityParams pidParams = new SimilarityParams(true, true, true,
139 for (int i = 0; i < nSeq; i++)
141 scores[i] = (float) PIDModel.computePID(
142 align.getSequenceAt(i).getSequenceAsString(), refSeq,
144 seqs[i] = align.getSequenceAt(i);
147 QuickSort.sort(scores, seqs);
148 setReverseOrder(align, seqs);
152 * Sorts by ID. Numbers are sorted before letters.
155 * The alignment object to sort
157 public static void sortByID(AlignmentI align)
159 int nSeq = align.getHeight();
161 String[] ids = new String[nSeq];
162 SequenceI[] seqs = new SequenceI[nSeq];
164 for (int i = 0; i < nSeq; i++)
166 ids[i] = align.getSequenceAt(i).getName();
167 seqs[i] = align.getSequenceAt(i);
170 QuickSort.sort(ids, seqs);
171 AlignmentSorter as = getInstance();
172 as.sortIdAscending = !as.sortIdAscending;
173 set(align, seqs, as.sortIdAscending);
177 * Sorts by sequence length
180 * The alignment object to sort
182 public static void sortByLength(AlignmentI align)
184 int nSeq = align.getHeight();
186 float[] length = new float[nSeq];
187 SequenceI[] seqs = new SequenceI[nSeq];
189 for (int i = 0; i < nSeq; i++)
191 seqs[i] = align.getSequenceAt(i);
192 length[i] = (seqs[i].getEnd() - seqs[i].getStart());
195 QuickSort.sort(length, seqs);
196 AlignmentSorter as = getInstance();
197 as.sortLengthAscending = !as.sortLengthAscending;
198 set(align, seqs, as.sortLengthAscending);
202 * Sorts by sequence evalue. Currently moves all sequences without an evalue to
203 * the top of the alignment.
206 * The alignment object to sort
208 public static void sortByEValue(AlignmentI align)
210 int nSeq = align.getHeight();
212 double[] evalue = new double[nSeq];
213 SequenceI[] seqs = new SequenceI[nSeq];
215 for (int i = 0; i < nSeq; i++)
217 seqs[i] = align.getSequenceAt(i);
218 AlignmentAnnotation[] ann = seqs[i].getAnnotation("Search Scores");
221 evalue[i] = ann[0].getEValue();
229 QuickSort.sort(evalue, seqs);
231 if (sortEValueAscending)
233 setReverseOrder(align, seqs);
237 setOrder(align, seqs);
240 sortEValueAscending = !sortEValueAscending;
244 * Sorts by sequence bit score. Currently moves all sequences without a bit
245 * score to the top of the alignment
248 * The alignment object to sort
250 public static void sortByBitScore(AlignmentI align)
252 int nSeq = align.getHeight();
254 double[] score = new double[nSeq];
255 SequenceI[] seqs = new SequenceI[nSeq];
257 for (int i = 0; i < nSeq; i++)
259 seqs[i] = align.getSequenceAt(i);
260 AlignmentAnnotation[] ann = seqs[i].getAnnotation("Search Scores");
263 score[i] = ann[0].getEValue();
271 QuickSort.sort(score, seqs);
273 if (sortBitScoreAscending)
275 setReverseOrder(align, seqs);
279 setOrder(align, seqs);
282 sortBitScoreAscending = !sortBitScoreAscending;
286 * Sorts the alignment by size of group. <br>
287 * Maintains the order of sequences in each group by order in given alignment
291 * sorts the given alignment object by group
293 public static void sortByGroup(AlignmentI align)
295 // MAINTAINS ORIGNAL SEQUENCE ORDER,
296 // ORDERS BY GROUP SIZE
297 List<SequenceGroup> groups = new ArrayList<>();
299 AlignmentSorter as = getInstance();
301 if (groups.hashCode() != as.lastGroupHash)
303 as.sortGroupAscending = true;
304 as.lastGroupHash = groups.hashCode();
308 as.sortGroupAscending = !as.sortGroupAscending;
311 // SORTS GROUPS BY SIZE
312 // ////////////////////
313 for (SequenceGroup sg : align.getGroups())
315 for (int j = 0; j < groups.size(); j++)
317 SequenceGroup sg2 = groups.get(j);
319 if (sg.getSize() > sg2.getSize())
327 if (!groups.contains(sg))
333 // NOW ADD SEQUENCES MAINTAINING ALIGNMENT ORDER
334 // /////////////////////////////////////////////
335 List<SequenceI> tmp = new ArrayList<>();
337 for (int i = 0; i < groups.size(); i++)
339 SequenceGroup sg = groups.get(i);
340 SequenceI[] orderedseqs = sg.getSequencesInOrder(align);
342 for (int j = 0; j < orderedseqs.length; j++)
344 tmp.add(orderedseqs[j]);
347 set(align, tmp, as.sortGroupAscending);
351 * Sorts by a given AlignmentOrder object
356 * specified order for alignment
358 public static void sortBy(AlignmentI align, AlignmentOrder order)
360 // Get an ordered vector of sequences which may also be present in align
361 List<SequenceI> tmp = order.getOrder();
363 AlignmentSorter as = getInstance();
365 if (as.lastOrder == order)
367 as.sortOrderAscending = !as.sortOrderAscending;
371 as.sortOrderAscending = true;
373 set(align, tmp, as.sortOrderAscending);
384 * @return DOCUMENT ME!
386 private static List<SequenceI> getOrderByTree(AlignmentI align,
389 int nSeq = align.getHeight();
391 List<SequenceI> tmp = new ArrayList<>();
393 tmp = _sortByTree(tree.getTopNode(), tmp, align.getSequences());
395 if (tmp.size() != nSeq)
397 // TODO: JBPNote - decide if this is always an error
398 // (eg. not when a tree is associated to another alignment which has more
400 if (tmp.size() != nSeq)
402 addStrays(align, tmp);
405 if (tmp.size() != nSeq)
407 System.err.println("WARNING: tmp.size()=" + tmp.size() + " != nseq="
409 + " in getOrderByTree - tree contains sequences not in alignment");
417 * Sorts the alignment by a given tree
424 public static void sortByTree(AlignmentI align, TreeModel tree)
426 List<SequenceI> tmp = getOrderByTree(align, tree);
428 AlignmentSorter as = getInstance();
430 // tmp should properly permute align with tree.
431 if (as.lastTree != tree)
433 as.sortTreeAscending = true;
438 as.sortTreeAscending = !as.sortTreeAscending;
440 set(align, tmp, as.sortTreeAscending);
451 private static void addStrays(AlignmentI align, List<SequenceI> tmp)
453 int nSeq = align.getHeight();
455 for (int i = 0; i < nSeq; i++)
457 if (!tmp.contains(align.getSequenceAt(i)))
459 tmp.add(align.getSequenceAt(i));
463 if (nSeq != tmp.size())
466 .println("ERROR: Size still not right even after addStrays");
480 * @return DOCUMENT ME!
482 private static List<SequenceI> _sortByTree(SequenceNode node,
483 List<SequenceI> tmp, List<SequenceI> seqset)
490 SequenceNode left = (SequenceNode) node.left();
491 SequenceNode right = (SequenceNode) node.right();
493 if ((left == null) && (right == null))
495 if (!node.isPlaceholder() && (node.element() != null))
497 if (node.element() instanceof SequenceI)
499 if (!tmp.contains(node.element())) // && (seqset==null ||
500 // seqset.size()==0 ||
501 // seqset.contains(tmp)))
503 tmp.add((SequenceI) node.element());
512 _sortByTree(left, tmp, seqset);
513 _sortByTree(right, tmp, seqset);
520 // Alignment.sortBy(OrderObj) - sequence of sequence pointer refs in
525 * recover the order of sequences given by the safe numbering scheme introducd
526 * SeqsetUtils.uniquify.
528 public static void recoverOrder(SequenceI[] alignment)
530 float[] ids = new float[alignment.length];
532 for (int i = 0; i < alignment.length; i++)
534 ids[i] = (Float.valueOf(alignment[i].getName().substring(8)))
538 jalview.util.QuickSort.sort(ids, alignment);
542 * Sort sequence in order of increasing score attribute for annotation with a
543 * particular scoreLabel. Or reverse if same label was used previously
546 * exact label for sequence associated AlignmentAnnotation scores to
549 * sequences to be sorted
551 public static void sortByAnnotationScore(String scoreLabel,
552 AlignmentI alignment)
554 SequenceI[] seqs = alignment.getSequencesArray();
555 boolean[] hasScore = new boolean[seqs.length]; // per sequence score
557 int hasScores = 0; // number of scores present on set
558 double[] scores = new double[seqs.length];
559 double min = 0, max = 0;
560 for (int i = 0; i < seqs.length; i++)
562 AlignmentAnnotation[] scoreAnn = seqs[i].getAnnotation(scoreLabel);
563 if (scoreAnn != null)
567 scores[i] = scoreAnn[0].getScore(); // take the first instance of this
571 max = min = scores[i];
592 return; // do nothing - no scores present to sort by.
594 if (hasScores < seqs.length)
596 for (int i = 0; i < seqs.length; i++)
600 scores[i] = (max + i + 1.0);
605 jalview.util.QuickSort.sort(scores, seqs);
607 AlignmentSorter as = getInstance();
609 if (as.lastSortByAnnotation != scoreLabel)
611 as.lastSortByAnnotation = scoreLabel;
612 setOrder(alignment, seqs);
616 setReverseOrder(alignment, seqs);
621 * Sort sequences by feature score or density, optionally restricted by
622 * feature types, feature groups, or alignment start/end positions.
624 * If the sort is repeated for the same combination of types and groups, sort
627 * @param featureTypes
628 * a list of feature types to include (or null for all)
630 * a list of feature groups to include (or null for all)
632 * start column position to include (base zero)
634 * end column position to include (base zero)
636 * the alignment to be sorted
638 * either "average_score" or "density" ("text" not yet implemented)
640 public static void sortByFeature(List<String> featureTypes,
641 List<String> groups, final int startCol, final int endCol,
642 AlignmentI alignment, String method)
644 if (method != FEATURE_SCORE && method != FEATURE_LABEL
645 && method != FEATURE_DENSITY)
647 String msg = String.format(
648 "Implementation Error - sortByFeature method must be either '%s' or '%s'",
649 FEATURE_SCORE, FEATURE_DENSITY);
650 System.err.println(msg);
654 flipFeatureSortIfUnchanged(method, featureTypes, groups, startCol,
657 SequenceI[] seqs = alignment.getSequencesArray();
659 boolean[] hasScore = new boolean[seqs.length]; // per sequence score
661 int hasScores = 0; // number of scores present on set
662 double[] scores = new double[seqs.length];
663 int[] seqScores = new int[seqs.length];
664 Object[][] feats = new Object[seqs.length][];
668 for (int i = 0; i < seqs.length; i++)
671 * get sequence residues overlapping column region
672 * and features for residue positions and specified types
674 String[] types = featureTypes == null ? null
675 : featureTypes.toArray(new String[featureTypes.size()]);
676 List<SequenceFeature> sfs = seqs[i].findFeatures(startCol + 1,
682 Iterator<SequenceFeature> it = sfs.listIterator();
685 SequenceFeature sf = it.next();
688 * accept all features with null or empty group, otherwise
689 * check group is one of the currently visible groups
691 String featureGroup = sf.getFeatureGroup();
692 if (groups != null && featureGroup != null
693 && !"".equals(featureGroup)
694 && !groups.contains(featureGroup))
700 float score = sf.getScore();
701 if (FEATURE_SCORE.equals(method) && !Float.isNaN(score))
703 if (seqScores[i] == 0)
710 // take the first instance of this score // ??
715 feats[i] = sfs.toArray(new SequenceFeature[sfs.size()]);
718 if (method == FEATURE_LABEL)
720 // order the labels by alphabet (not yet implemented)
721 String[] labs = new String[sfs.size()];
722 for (int l = 0; l < sfs.size(); l++)
724 SequenceFeature sf = sfs.get(l);
725 String description = sf.getDescription();
726 labs[l] = (description != null ? description : sf.getType());
728 QuickSort.sort(labs, feats[i]);
733 // compute average score
734 scores[i] /= seqScores[i];
735 // update the score bounds.
743 max = Math.max(max, scores[i]);
744 min = Math.min(min, scores[i]);
749 boolean doSort = false;
751 if (FEATURE_SCORE.equals(method))
755 return; // do nothing - no scores present to sort by.
758 if (hasScores < seqs.length)
760 for (int i = 0; i < seqs.length; i++)
764 scores[i] = (max + 1 + i);
768 // int nf = (feats[i] == null) ? 0
769 // : ((SequenceFeature[]) feats[i]).length;
770 // // System.err.println("Sorting on Score: seq " +
772 // + " Feats: " + nf + " Score : " + scores[i]);
778 else if (FEATURE_DENSITY.equals(method))
780 for (int i = 0; i < seqs.length; i++)
782 int featureCount = feats[i] == null ? 0
783 : ((SequenceFeature[]) feats[i]).length;
784 scores[i] = featureCount;
785 // System.err.println("Sorting on Density: seq "+seqs[i].getName()+
786 // " Feats: "+featureCount+" Score : "+scores[i]);
792 QuickSort.sortByDouble(scores, seqs,
793 getInstance().sortByFeatureAscending);
795 setOrder(alignment, seqs);
799 * Builds a string hash of criteria for sorting, and if unchanged from last
800 * time, reverse the sort order
803 * @param featureTypes
808 protected static void flipFeatureSortIfUnchanged(String method,
809 List<String> featureTypes, List<String> groups,
810 final int startCol, final int endCol)
812 StringBuilder sb = new StringBuilder(64);
813 sb.append(startCol).append(method).append(endCol);
814 if (featureTypes != null)
816 Collections.sort(featureTypes);
817 sb.append(featureTypes.toString());
821 Collections.sort(groups);
822 sb.append(groups.toString());
824 String scoreCriteria = sb.toString();
827 * if resorting on the same criteria, toggle sort order
829 AlignmentSorter as = getInstance();
830 if (as.sortByFeatureCriteria == null
831 || !scoreCriteria.equals(as.sortByFeatureCriteria))
833 as.sortByFeatureAscending = true;
837 as.sortByFeatureAscending = !as.sortByFeatureAscending;
839 as.sortByFeatureCriteria = scoreCriteria;
843 * Set the alignment's sequences list to contain the sequences from a
844 * temporary list, first adding all the elements from the tmp list, then adding all sequences in the alignment that
845 * are not in the list. Option to do the final sort either in order or in reverse order.
847 * @param align The alignment being sorted
849 * the temporary sequence list
851 * false for reversed order; only sequences already in
852 * the alignment will be used (which is actually already guaranteed
853 * by vectorSubsetToArray)
855 private static void set(AlignmentI align, List<SequenceI> tmp,
858 set(align, vectorSubsetToArray(align.getSequences(), tmp), ascending);
862 * Set the alignment's sequences list to contain these sequences, either in
863 * this order or its reverse.
867 * the new sequence array
869 * false for reversed order; if ascending, only sequences already in
870 * the alignment will be used; if descending, then a direct 1:1
871 * replacement is made
873 private static void set(AlignmentI align, SequenceI[] seqs,
878 setOrder(align, seqs);
882 setReverseOrder(align, seqs);
888 * Replace the alignment's sequences with values in an array, clearing the
889 * alignment's sequence list and filtering for sequences that are actually in
890 * the alignment already.
895 * the array of replacement values, of any length
897 public static void setOrder(AlignmentI align, SequenceI[] seqs)
899 // NOTE: DO NOT USE align.setSequenceAt() here - it will NOT work
900 List<SequenceI> seqList = align.getSequences();
901 synchronized (seqList)
903 List<SequenceI> tmp = new ArrayList<>();
905 for (int i = 0; i < seqs.length; i++)
907 if (seqList.contains(seqs[i]))
914 // User may have hidden seqs, then clicked undo or redo
915 for (int i = 0; i < tmp.size(); i++)
917 seqList.add(tmp.get(i));
923 * Replace the alignment's sequences or a subset of those sequences with
924 * values in an array in reverse order. All sequences are replaced; no check
925 * is made that these sequences are in the alignment already.
930 * the array of replacement values, length must be less than or equal
931 * to Alignment.sequences.size()
933 private static void setReverseOrder(AlignmentI align, SequenceI[] seqs)
935 int nSeq = seqs.length;
937 int len = (nSeq + (nSeq % 2)) / 2;
940 // if ((nSeq % 2) == 0)
946 // len = (nSeq + 1) / 2;
949 // NOTE: DO NOT USE align.setSequenceAt() here - it will NOT work
950 List<SequenceI> seqList = align.getSequences();
951 synchronized (seqList)
953 for (int i = 0; i < len; i++)
955 // SequenceI tmp = seqs[i];
956 seqList.set(i, seqs[nSeq - i - 1]);
957 seqList.set(nSeq - i - 1, seqs[i]);
963 * Create and array of reordered sequences in order first from tmp that are
964 * present in seqList already, then, after that, any remaining sequences in
965 * seqList not in tmp. Any sequences in tmp that are not in seqList already
969 * thread safe collection of sequences originally in the alignment
971 * thread safe collection of sequences or subsequences possibly in
974 * @return intersect(tmp,seqList)+intersect(complement(tmp),seqList)
976 private static SequenceI[] vectorSubsetToArray(List<SequenceI> seqList,
979 ArrayList<SequenceI> seqs = new ArrayList<>();
980 int n = seqList.size();
981 BitSet bs = new BitSet(n);
983 for (int i = 0, nt = tmp.size(); i < nt; i++)
985 SequenceI sq = tmp.get(i);
986 int idx = seqList.indexOf(sq);
987 if (idx >= 0 && bs.get(idx))
994 for (int i = bs.nextSetBit(0); i >= 0; i = bs.nextSetBit(i + 1))
996 seqs.add(seqList.get(i));
999 return seqs.toArray(new SequenceI[seqs.size()]);