2 * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3 * Copyright (C) $$Year-Rel$$ The Jalview Authors
5 * This file is part of Jalview.
7 * Jalview is free software: you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation, either version 3
10 * of the License, or (at your option) any later version.
12 * Jalview is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty
14 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15 * PURPOSE. See the GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
19 * The Jalview Authors are detailed in the 'AUTHORS' file.
21 package jalview.analysis;
23 import jalview.analysis.scoremodels.PIDModel;
24 import jalview.analysis.scoremodels.SimilarityParams;
25 import jalview.bin.ApplicationSingletonProvider;
26 import jalview.bin.ApplicationSingletonProvider.ApplicationSingletonI;
27 import jalview.datamodel.AlignmentAnnotation;
28 import jalview.datamodel.AlignmentI;
29 import jalview.datamodel.AlignmentOrder;
30 import jalview.datamodel.SequenceFeature;
31 import jalview.datamodel.SequenceGroup;
32 import jalview.datamodel.SequenceI;
33 import jalview.datamodel.SequenceNode;
34 import jalview.util.QuickSort;
36 import java.util.ArrayList;
37 import java.util.BitSet;
38 import java.util.Collections;
39 import java.util.Iterator;
40 import java.util.List;
43 * Routines for manipulating the order of a multiple sequence alignment TODO:
44 * this class retains some global states concerning sort-order which should be
45 * made attributes for the caller's alignment visualization. TODO: refactor to
46 * allow a subset of selected sequences to be sorted within the context of a
47 * whole alignment. Sort method template is: SequenceI[] tobesorted, [ input
48 * data mapping to each tobesorted element to use ], Alignment context of
49 * tobesorted that are to be re-ordered, boolean sortinplace, [special data - ie
50 * seuqence to be sorted w.r.t.]) sortinplace implies that the sorted vector
51 * resulting from applying the operation to tobesorted should be mapped back to
52 * the original positions in alignment. Otherwise, normal behaviour is to re
53 * order alignment so that tobesorted is sorted and grouped together starting
54 * from the first tobesorted position in the alignment. e.g. (a,tb2,b,tb1,c,tb3
55 * becomes a,tb1,tb2,tb3,b,c)
57 public class AlignmentSorter implements ApplicationSingletonI
60 private AlignmentSorter()
65 public static AlignmentSorter getInstance()
67 return (AlignmentSorter) ApplicationSingletonProvider
68 .getInstance(AlignmentSorter.class);
72 * types of feature ordering: Sort by score : average score - or total score -
73 * over all features in region Sort by feature label text: (or if null -
74 * feature type text) - numerical or alphabetical Sort by feature density:
75 * based on counts - ignoring individual text or scores for each feature
77 public static final String FEATURE_SCORE = "average_score";
79 public static final String FEATURE_LABEL = "text";
81 public static final String FEATURE_DENSITY = "density";
84 * todo: refactor searches to follow a basic pattern: (search property, last
85 * search state, current sort direction)
87 boolean sortIdAscending = true;
89 int lastGroupHash = 0;
91 boolean sortGroupAscending = true;
93 AlignmentOrder lastOrder = null;
95 boolean sortOrderAscending = true;
97 TreeModel lastTree = null;
99 boolean sortTreeAscending = true;
102 * last Annotation Label used for sort by Annotation score
104 private String lastSortByAnnotation;
107 * string hash of last arguments to sortByFeature (sort order toggles if this
108 * is unchanged between sorts)
110 private String sortByFeatureCriteria;
112 private boolean sortByFeatureAscending = true;
114 private boolean sortLengthAscending;
116 private static boolean sortEValueAscending;
118 private static boolean sortBitScoreAscending;
121 * Sorts sequences in the alignment by Percentage Identity with the given
122 * reference sequence, sorting the highest identity to the top
130 public static void sortByPID(AlignmentI align, SequenceI s)
132 int nSeq = align.getHeight();
134 float[] scores = new float[nSeq];
135 SequenceI[] seqs = new SequenceI[nSeq];
136 String refSeq = s.getSequenceAsString();
138 SimilarityParams pidParams = new SimilarityParams(true, true, true,
140 for (int i = 0; i < nSeq; i++)
142 scores[i] = (float) PIDModel.computePID(
143 align.getSequenceAt(i).getSequenceAsString(), refSeq,
145 seqs[i] = align.getSequenceAt(i);
148 QuickSort.sort(scores, seqs);
149 setReverseOrder(align, seqs);
153 * Sorts by ID. Numbers are sorted before letters.
156 * The alignment object to sort
158 public static void sortByID(AlignmentI align)
160 int nSeq = align.getHeight();
162 String[] ids = new String[nSeq];
163 SequenceI[] seqs = new SequenceI[nSeq];
165 for (int i = 0; i < nSeq; i++)
167 ids[i] = align.getSequenceAt(i).getName();
168 seqs[i] = align.getSequenceAt(i);
171 QuickSort.sort(ids, seqs);
172 AlignmentSorter as = getInstance();
173 as.sortIdAscending = !as.sortIdAscending;
174 set(align, seqs, as.sortIdAscending);
178 * Sorts by sequence length
181 * The alignment object to sort
183 public static void sortByLength(AlignmentI align)
185 int nSeq = align.getHeight();
187 float[] length = new float[nSeq];
188 SequenceI[] seqs = new SequenceI[nSeq];
190 for (int i = 0; i < nSeq; i++)
192 seqs[i] = align.getSequenceAt(i);
193 length[i] = (seqs[i].getEnd() - seqs[i].getStart());
196 QuickSort.sort(length, seqs);
197 AlignmentSorter as = getInstance();
198 as.sortLengthAscending = !as.sortLengthAscending;
199 set(align, seqs, as.sortLengthAscending);
203 * Sorts by sequence evalue. Currently moves all sequences without an evalue to
204 * the top of the alignment.
207 * The alignment object to sort
209 public static void sortByEValue(AlignmentI align)
211 int nSeq = align.getHeight();
213 double[] evalue = new double[nSeq];
214 SequenceI[] seqs = new SequenceI[nSeq];
216 for (int i = 0; i < nSeq; i++)
218 seqs[i] = align.getSequenceAt(i);
219 AlignmentAnnotation[] ann = seqs[i].getAnnotation("Search Scores");
222 evalue[i] = ann[0].getEValue();
230 QuickSort.sort(evalue, seqs);
232 if (sortEValueAscending)
234 setReverseOrder(align, seqs);
238 setOrder(align, seqs);
241 sortEValueAscending = !sortEValueAscending;
245 * Sorts by sequence bit score. Currently moves all sequences without a bit
246 * score to the top of the alignment
249 * The alignment object to sort
251 public static void sortByBitScore(AlignmentI align)
253 int nSeq = align.getHeight();
255 double[] score = new double[nSeq];
256 SequenceI[] seqs = new SequenceI[nSeq];
258 for (int i = 0; i < nSeq; i++)
260 seqs[i] = align.getSequenceAt(i);
261 AlignmentAnnotation[] ann = seqs[i].getAnnotation("Search Scores");
264 score[i] = ann[0].getEValue();
272 QuickSort.sort(score, seqs);
274 if (sortBitScoreAscending)
276 setReverseOrder(align, seqs);
280 setOrder(align, seqs);
283 sortBitScoreAscending = !sortBitScoreAscending;
287 * Sorts the alignment by size of group. <br>
288 * Maintains the order of sequences in each group by order in given alignment
292 * sorts the given alignment object by group
294 public static void sortByGroup(AlignmentI align)
296 // MAINTAINS ORIGNAL SEQUENCE ORDER,
297 // ORDERS BY GROUP SIZE
298 List<SequenceGroup> groups = new ArrayList<>();
300 AlignmentSorter as = getInstance();
302 if (groups.hashCode() != as.lastGroupHash)
304 as.sortGroupAscending = true;
305 as.lastGroupHash = groups.hashCode();
309 as.sortGroupAscending = !as.sortGroupAscending;
312 // SORTS GROUPS BY SIZE
313 // ////////////////////
314 for (SequenceGroup sg : align.getGroups())
316 for (int j = 0; j < groups.size(); j++)
318 SequenceGroup sg2 = groups.get(j);
320 if (sg.getSize() > sg2.getSize())
328 if (!groups.contains(sg))
334 // NOW ADD SEQUENCES MAINTAINING ALIGNMENT ORDER
335 // /////////////////////////////////////////////
336 List<SequenceI> tmp = new ArrayList<>();
338 for (int i = 0; i < groups.size(); i++)
340 SequenceGroup sg = groups.get(i);
341 SequenceI[] orderedseqs = sg.getSequencesInOrder(align);
343 for (int j = 0; j < orderedseqs.length; j++)
345 tmp.add(orderedseqs[j]);
348 set(align, tmp, as.sortGroupAscending);
352 * Sorts by a given AlignmentOrder object
357 * specified order for alignment
359 public static void sortBy(AlignmentI align, AlignmentOrder order)
361 // Get an ordered vector of sequences which may also be present in align
362 List<SequenceI> tmp = order.getOrder();
364 AlignmentSorter as = getInstance();
366 if (as.lastOrder == order)
368 as.sortOrderAscending = !as.sortOrderAscending;
372 as.sortOrderAscending = true;
374 set(align, tmp, as.sortOrderAscending);
385 * @return DOCUMENT ME!
387 private static List<SequenceI> getOrderByTree(AlignmentI align,
390 int nSeq = align.getHeight();
392 List<SequenceI> tmp = new ArrayList<>();
394 tmp = _sortByTree(tree.getTopNode(), tmp, align.getSequences());
396 if (tmp.size() != nSeq)
398 // TODO: JBPNote - decide if this is always an error
399 // (eg. not when a tree is associated to another alignment which has more
401 if (tmp.size() != nSeq)
403 addStrays(align, tmp);
406 if (tmp.size() != nSeq)
408 System.err.println("WARNING: tmp.size()=" + tmp.size() + " != nseq="
410 + " in getOrderByTree - tree contains sequences not in alignment");
418 * Sorts the alignment by a given tree
425 public static void sortByTree(AlignmentI align, TreeModel tree)
427 List<SequenceI> tmp = getOrderByTree(align, tree);
429 AlignmentSorter as = getInstance();
431 // tmp should properly permute align with tree.
432 if (as.lastTree != tree)
434 as.sortTreeAscending = true;
439 as.sortTreeAscending = !as.sortTreeAscending;
441 set(align, tmp, as.sortTreeAscending);
452 private static void addStrays(AlignmentI align, List<SequenceI> tmp)
454 int nSeq = align.getHeight();
456 for (int i = 0; i < nSeq; i++)
458 if (!tmp.contains(align.getSequenceAt(i)))
460 tmp.add(align.getSequenceAt(i));
464 if (nSeq != tmp.size())
467 .println("ERROR: Size still not right even after addStrays");
481 * @return DOCUMENT ME!
483 private static List<SequenceI> _sortByTree(SequenceNode node,
484 List<SequenceI> tmp, List<SequenceI> seqset)
491 SequenceNode left = (SequenceNode) node.left();
492 SequenceNode right = (SequenceNode) node.right();
494 if ((left == null) && (right == null))
496 if (!node.isPlaceholder() && (node.element() != null))
498 if (node.element() instanceof SequenceI)
500 if (!tmp.contains(node.element())) // && (seqset==null ||
501 // seqset.size()==0 ||
502 // seqset.contains(tmp)))
504 tmp.add((SequenceI) node.element());
513 _sortByTree(left, tmp, seqset);
514 _sortByTree(right, tmp, seqset);
521 // Alignment.sortBy(OrderObj) - sequence of sequence pointer refs in
526 * recover the order of sequences given by the safe numbering scheme introducd
527 * SeqsetUtils.uniquify.
529 public static void recoverOrder(SequenceI[] alignment)
531 float[] ids = new float[alignment.length];
533 for (int i = 0; i < alignment.length; i++)
535 ids[i] = (Float.valueOf(alignment[i].getName().substring(8)))
539 jalview.util.QuickSort.sort(ids, alignment);
543 * Sort sequence in order of increasing score attribute for annotation with a
544 * particular scoreLabel. Or reverse if same label was used previously
547 * exact label for sequence associated AlignmentAnnotation scores to
550 * sequences to be sorted
552 public static void sortByAnnotationScore(String scoreLabel,
553 AlignmentI alignment)
555 SequenceI[] seqs = alignment.getSequencesArray();
556 boolean[] hasScore = new boolean[seqs.length]; // per sequence score
558 int hasScores = 0; // number of scores present on set
559 double[] scores = new double[seqs.length];
560 double min = 0, max = 0;
561 for (int i = 0; i < seqs.length; i++)
563 AlignmentAnnotation[] scoreAnn = seqs[i].getAnnotation(scoreLabel);
564 if (scoreAnn != null)
568 scores[i] = scoreAnn[0].getScore(); // take the first instance of this
572 max = min = scores[i];
593 return; // do nothing - no scores present to sort by.
595 if (hasScores < seqs.length)
597 for (int i = 0; i < seqs.length; i++)
601 scores[i] = (max + i + 1.0);
606 jalview.util.QuickSort.sort(scores, seqs);
608 AlignmentSorter as = getInstance();
610 if (as.lastSortByAnnotation != scoreLabel)
612 as.lastSortByAnnotation = scoreLabel;
613 setOrder(alignment, seqs);
617 setReverseOrder(alignment, seqs);
622 * Sort sequences by feature score or density, optionally restricted by
623 * feature types, feature groups, or alignment start/end positions.
625 * If the sort is repeated for the same combination of types and groups, sort
628 * @param featureTypes
629 * a list of feature types to include (or null for all)
631 * a list of feature groups to include (or null for all)
633 * start column position to include (base zero)
635 * end column position to include (base zero)
637 * the alignment to be sorted
639 * either "average_score" or "density" ("text" not yet implemented)
641 public static void sortByFeature(List<String> featureTypes,
642 List<String> groups, final int startCol, final int endCol,
643 AlignmentI alignment, String method)
645 if (method != FEATURE_SCORE && method != FEATURE_LABEL
646 && method != FEATURE_DENSITY)
648 String msg = String.format(
649 "Implementation Error - sortByFeature method must be either '%s' or '%s'",
650 FEATURE_SCORE, FEATURE_DENSITY);
651 System.err.println(msg);
655 flipFeatureSortIfUnchanged(method, featureTypes, groups, startCol,
658 SequenceI[] seqs = alignment.getSequencesArray();
660 boolean[] hasScore = new boolean[seqs.length]; // per sequence score
662 int hasScores = 0; // number of scores present on set
663 double[] scores = new double[seqs.length];
664 int[] seqScores = new int[seqs.length];
665 Object[][] feats = new Object[seqs.length][];
669 for (int i = 0; i < seqs.length; i++)
672 * get sequence residues overlapping column region
673 * and features for residue positions and specified types
675 String[] types = featureTypes == null ? null
676 : featureTypes.toArray(new String[featureTypes.size()]);
677 List<SequenceFeature> sfs = seqs[i].findFeatures(startCol + 1,
683 Iterator<SequenceFeature> it = sfs.listIterator();
686 SequenceFeature sf = it.next();
689 * accept all features with null or empty group, otherwise
690 * check group is one of the currently visible groups
692 String featureGroup = sf.getFeatureGroup();
693 if (groups != null && featureGroup != null
694 && !"".equals(featureGroup)
695 && !groups.contains(featureGroup))
701 float score = sf.getScore();
702 if (FEATURE_SCORE.equals(method) && !Float.isNaN(score))
704 if (seqScores[i] == 0)
711 // take the first instance of this score // ??
716 feats[i] = sfs.toArray(new SequenceFeature[sfs.size()]);
719 if (method == FEATURE_LABEL)
721 // order the labels by alphabet (not yet implemented)
722 String[] labs = new String[sfs.size()];
723 for (int l = 0; l < sfs.size(); l++)
725 SequenceFeature sf = sfs.get(l);
726 String description = sf.getDescription();
727 labs[l] = (description != null ? description : sf.getType());
729 QuickSort.sort(labs, feats[i]);
734 // compute average score
735 scores[i] /= seqScores[i];
736 // update the score bounds.
744 max = Math.max(max, scores[i]);
745 min = Math.min(min, scores[i]);
750 boolean doSort = false;
752 if (FEATURE_SCORE.equals(method))
756 return; // do nothing - no scores present to sort by.
759 if (hasScores < seqs.length)
761 for (int i = 0; i < seqs.length; i++)
765 scores[i] = (max + 1 + i);
769 // int nf = (feats[i] == null) ? 0
770 // : ((SequenceFeature[]) feats[i]).length;
771 // // System.err.println("Sorting on Score: seq " +
773 // + " Feats: " + nf + " Score : " + scores[i]);
779 else if (FEATURE_DENSITY.equals(method))
781 for (int i = 0; i < seqs.length; i++)
783 int featureCount = feats[i] == null ? 0
784 : ((SequenceFeature[]) feats[i]).length;
785 scores[i] = featureCount;
786 // System.err.println("Sorting on Density: seq "+seqs[i].getName()+
787 // " Feats: "+featureCount+" Score : "+scores[i]);
793 QuickSort.sortByDouble(scores, seqs,
794 getInstance().sortByFeatureAscending);
796 setOrder(alignment, seqs);
800 * Builds a string hash of criteria for sorting, and if unchanged from last
801 * time, reverse the sort order
804 * @param featureTypes
809 protected static void flipFeatureSortIfUnchanged(String method,
810 List<String> featureTypes, List<String> groups,
811 final int startCol, final int endCol)
813 StringBuilder sb = new StringBuilder(64);
814 sb.append(startCol).append(method).append(endCol);
815 if (featureTypes != null)
817 Collections.sort(featureTypes);
818 sb.append(featureTypes.toString());
822 Collections.sort(groups);
823 sb.append(groups.toString());
825 String scoreCriteria = sb.toString();
828 * if resorting on the same criteria, toggle sort order
830 AlignmentSorter as = getInstance();
831 if (as.sortByFeatureCriteria == null
832 || !scoreCriteria.equals(as.sortByFeatureCriteria))
834 as.sortByFeatureAscending = true;
838 as.sortByFeatureAscending = !as.sortByFeatureAscending;
840 as.sortByFeatureCriteria = scoreCriteria;
844 * Set the alignment's sequences list to contain the sequences from a
845 * temporary list, first adding all the elements from the tmp list, then adding all sequences in the alignment that
846 * are not in the list. Option to do the final sort either in order or in reverse order.
848 * @param align The alignment being sorted
850 * the temporary sequence list
852 * false for reversed order; only sequences already in
853 * the alignment will be used (which is actually already guaranteed
854 * by vectorSubsetToArray)
856 private static void set(AlignmentI align, List<SequenceI> tmp,
859 set(align, vectorSubsetToArray(align.getSequences(), tmp), ascending);
863 * Set the alignment's sequences list to contain these sequences, either in
864 * this order or its reverse.
868 * the new sequence array
870 * false for reversed order; if ascending, only sequences already in
871 * the alignment will be used; if descending, then a direct 1:1
872 * replacement is made
874 private static void set(AlignmentI align, SequenceI[] seqs,
879 setOrder(align, seqs);
883 setReverseOrder(align, seqs);
889 * Replace the alignment's sequences with values in an array, clearing the
890 * alignment's sequence list and filtering for sequences that are actually in
891 * the alignment already.
896 * the array of replacement values, of any length
898 public static void setOrder(AlignmentI align, SequenceI[] seqs)
900 // NOTE: DO NOT USE align.setSequenceAt() here - it will NOT work
901 List<SequenceI> seqList = align.getSequences();
902 synchronized (seqList)
904 List<SequenceI> tmp = new ArrayList<>();
906 for (int i = 0; i < seqs.length; i++)
908 if (seqList.contains(seqs[i]))
915 // User may have hidden seqs, then clicked undo or redo
916 for (int i = 0; i < tmp.size(); i++)
918 seqList.add(tmp.get(i));
924 * Replace the alignment's sequences or a subset of those sequences with
925 * values in an array in reverse order. All sequences are replaced; no check
926 * is made that these sequences are in the alignment already.
931 * the array of replacement values, length must be less than or equal
932 * to Alignment.sequences.size()
934 private static void setReverseOrder(AlignmentI align, SequenceI[] seqs)
936 int nSeq = seqs.length;
938 int len = (nSeq + (nSeq % 2)) / 2;
941 // if ((nSeq % 2) == 0)
947 // len = (nSeq + 1) / 2;
950 // NOTE: DO NOT USE align.setSequenceAt() here - it will NOT work
951 List<SequenceI> seqList = align.getSequences();
952 synchronized (seqList)
954 for (int i = 0; i < len; i++)
956 // SequenceI tmp = seqs[i];
957 seqList.set(i, seqs[nSeq - i - 1]);
958 seqList.set(nSeq - i - 1, seqs[i]);
964 * Create and array of reordered sequences in order first from tmp that are
965 * present in seqList already, then, after that, any remaining sequences in
966 * seqList not in tmp. Any sequences in tmp that are not in seqList already
970 * thread safe collection of sequences originally in the alignment
972 * thread safe collection of sequences or subsequences possibly in
975 * @return intersect(tmp,seqList)+intersect(complement(tmp),seqList)
977 private static SequenceI[] vectorSubsetToArray(List<SequenceI> seqList,
980 ArrayList<SequenceI> seqs = new ArrayList<>();
981 int n = seqList.size();
982 BitSet bs = new BitSet(n);
984 for (int i = 0, nt = tmp.size(); i < nt; i++)
986 SequenceI sq = tmp.get(i);
987 int idx = seqList.indexOf(sq);
988 if (idx >= 0 && bs.get(idx))
995 for (int i = bs.nextSetBit(0); i >= 0; i = bs.nextSetBit(i + 1))
997 seqs.add(seqList.get(i));
1000 return seqs.toArray(new SequenceI[seqs.size()]);