2 * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3 * Copyright (C) $$Year-Rel$$ The Jalview Authors
5 * This file is part of Jalview.
7 * Jalview is free software: you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation, either version 3
10 * of the License, or (at your option) any later version.
12 * Jalview is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty
14 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15 * PURPOSE. See the GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
19 * The Jalview Authors are detailed in the 'AUTHORS' file.
21 package jalview.analysis;
23 import jalview.analysis.scoremodels.PIDModel;
24 import jalview.analysis.scoremodels.SimilarityParams;
25 import jalview.bin.ApplicationSingletonProvider;
26 import jalview.bin.ApplicationSingletonProvider.ApplicationSingletonI;
27 import jalview.datamodel.AlignmentAnnotation;
28 import jalview.datamodel.AlignmentI;
29 import jalview.datamodel.AlignmentOrder;
30 import jalview.datamodel.SequenceFeature;
31 import jalview.datamodel.SequenceGroup;
32 import jalview.datamodel.SequenceI;
33 import jalview.datamodel.SequenceNode;
34 import jalview.util.QuickSort;
36 import java.util.ArrayList;
37 import java.util.BitSet;
38 import java.util.Collections;
39 import java.util.Iterator;
40 import java.util.List;
43 * Routines for manipulating the order of a multiple sequence alignment TODO:
44 * this class retains some global states concerning sort-order which should be
45 * made attributes for the caller's alignment visualization. TODO: refactor to
46 * allow a subset of selected sequences to be sorted within the context of a
47 * whole alignment. Sort method template is: SequenceI[] tobesorted, [ input
48 * data mapping to each tobesorted element to use ], Alignment context of
49 * tobesorted that are to be re-ordered, boolean sortinplace, [special data - ie
50 * seuqence to be sorted w.r.t.]) sortinplace implies that the sorted vector
51 * resulting from applying the operation to tobesorted should be mapped back to
52 * the original positions in alignment. Otherwise, normal behaviour is to re
53 * order alignment so that tobesorted is sorted and grouped together starting
54 * from the first tobesorted position in the alignment. e.g. (a,tb2,b,tb1,c,tb3
55 * becomes a,tb1,tb2,tb3,b,c)
57 public class AlignmentSorter implements ApplicationSingletonI
60 private AlignmentSorter()
65 public static AlignmentSorter getInstance()
67 return (AlignmentSorter) ApplicationSingletonProvider
68 .getInstance(AlignmentSorter.class);
72 * types of feature ordering: Sort by score : average score - or total score -
73 * over all features in region Sort by feature label text: (or if null -
74 * feature type text) - numerical or alphabetical Sort by feature density:
75 * based on counts - ignoring individual text or scores for each feature
77 public static final String FEATURE_SCORE = "average_score";
79 public static final String FEATURE_LABEL = "text";
81 public static final String FEATURE_DENSITY = "density";
84 * todo: refactor searches to follow a basic pattern: (search property, last
85 * search state, current sort direction)
87 boolean sortIdAscending = true;
89 int lastGroupHash = 0;
91 boolean sortGroupAscending = true;
93 AlignmentOrder lastOrder = null;
95 boolean sortOrderAscending = true;
97 TreeModel lastTree = null;
99 boolean sortTreeAscending = true;
102 * last Annotation Label used for sort by Annotation score
104 private String lastSortByAnnotation;
107 * string hash of last arguments to sortByFeature (sort order toggles if this
108 * is unchanged between sorts)
110 private String sortByFeatureCriteria;
112 private boolean sortByFeatureAscending = true;
114 private boolean sortLengthAscending;
117 * Sorts sequences in the alignment by Percentage Identity with the given
118 * reference sequence, sorting the highest identity to the top
126 public static void sortByPID(AlignmentI align, SequenceI s)
128 int nSeq = align.getHeight();
130 float[] scores = new float[nSeq];
131 SequenceI[] seqs = new SequenceI[nSeq];
132 String refSeq = s.getSequenceAsString();
134 SimilarityParams pidParams = new SimilarityParams(true, true, true,
136 for (int i = 0; i < nSeq; i++)
138 scores[i] = (float) PIDModel.computePID(
139 align.getSequenceAt(i).getSequenceAsString(), refSeq,
141 seqs[i] = align.getSequenceAt(i);
144 QuickSort.sort(scores, seqs);
145 setReverseOrder(align, seqs);
149 * Sorts by ID. Numbers are sorted before letters.
152 * The alignment object to sort
154 public static void sortByID(AlignmentI align)
156 int nSeq = align.getHeight();
158 String[] ids = new String[nSeq];
159 SequenceI[] seqs = new SequenceI[nSeq];
161 for (int i = 0; i < nSeq; i++)
163 ids[i] = align.getSequenceAt(i).getName();
164 seqs[i] = align.getSequenceAt(i);
167 QuickSort.sort(ids, seqs);
168 AlignmentSorter as = getInstance();
169 as.sortIdAscending = !as.sortIdAscending;
170 set(align, seqs, as.sortIdAscending);
174 * Sorts by sequence length
177 * The alignment object to sort
179 public static void sortByLength(AlignmentI align)
181 int nSeq = align.getHeight();
183 float[] length = new float[nSeq];
184 SequenceI[] seqs = new SequenceI[nSeq];
186 for (int i = 0; i < nSeq; i++)
188 seqs[i] = align.getSequenceAt(i);
189 length[i] = (seqs[i].getEnd() - seqs[i].getStart());
192 QuickSort.sort(length, seqs);
193 AlignmentSorter as = getInstance();
194 as.sortLengthAscending = !as.sortLengthAscending;
195 set(align, seqs, as.sortLengthAscending);
199 * Sorts the alignment by size of group. <br>
200 * Maintains the order of sequences in each group by order in given alignment
204 * sorts the given alignment object by group
206 public static void sortByGroup(AlignmentI align)
208 // MAINTAINS ORIGNAL SEQUENCE ORDER,
209 // ORDERS BY GROUP SIZE
210 List<SequenceGroup> groups = new ArrayList<>();
212 AlignmentSorter as = getInstance();
214 if (groups.hashCode() != as.lastGroupHash)
216 as.sortGroupAscending = true;
217 as.lastGroupHash = groups.hashCode();
221 as.sortGroupAscending = !as.sortGroupAscending;
224 // SORTS GROUPS BY SIZE
225 // ////////////////////
226 for (SequenceGroup sg : align.getGroups())
228 for (int j = 0; j < groups.size(); j++)
230 SequenceGroup sg2 = groups.get(j);
232 if (sg.getSize() > sg2.getSize())
240 if (!groups.contains(sg))
246 // NOW ADD SEQUENCES MAINTAINING ALIGNMENT ORDER
247 // /////////////////////////////////////////////
248 List<SequenceI> tmp = new ArrayList<>();
250 for (int i = 0; i < groups.size(); i++)
252 SequenceGroup sg = groups.get(i);
253 SequenceI[] orderedseqs = sg.getSequencesInOrder(align);
255 for (int j = 0; j < orderedseqs.length; j++)
257 tmp.add(orderedseqs[j]);
260 set(align, tmp, as.sortGroupAscending);
264 * Sorts by a given AlignmentOrder object
269 * specified order for alignment
271 public static void sortBy(AlignmentI align, AlignmentOrder order)
273 // Get an ordered vector of sequences which may also be present in align
274 List<SequenceI> tmp = order.getOrder();
276 AlignmentSorter as = getInstance();
278 if (as.lastOrder == order)
280 as.sortOrderAscending = !as.sortOrderAscending;
284 as.sortOrderAscending = true;
286 set(align, tmp, as.sortOrderAscending);
297 * @return DOCUMENT ME!
299 private static List<SequenceI> getOrderByTree(AlignmentI align,
302 int nSeq = align.getHeight();
304 List<SequenceI> tmp = new ArrayList<>();
306 tmp = _sortByTree(tree.getTopNode(), tmp, align.getSequences());
308 if (tmp.size() != nSeq)
310 // TODO: JBPNote - decide if this is always an error
311 // (eg. not when a tree is associated to another alignment which has more
313 if (tmp.size() != nSeq)
315 addStrays(align, tmp);
318 if (tmp.size() != nSeq)
320 System.err.println("WARNING: tmp.size()=" + tmp.size() + " != nseq="
322 + " in getOrderByTree - tree contains sequences not in alignment");
330 * Sorts the alignment by a given tree
337 public static void sortByTree(AlignmentI align, TreeModel tree)
339 List<SequenceI> tmp = getOrderByTree(align, tree);
341 AlignmentSorter as = getInstance();
343 // tmp should properly permute align with tree.
344 if (as.lastTree != tree)
346 as.sortTreeAscending = true;
351 as.sortTreeAscending = !as.sortTreeAscending;
353 set(align, tmp, as.sortTreeAscending);
364 private static void addStrays(AlignmentI align, List<SequenceI> tmp)
366 int nSeq = align.getHeight();
368 for (int i = 0; i < nSeq; i++)
370 if (!tmp.contains(align.getSequenceAt(i)))
372 tmp.add(align.getSequenceAt(i));
376 if (nSeq != tmp.size())
379 .println("ERROR: Size still not right even after addStrays");
393 * @return DOCUMENT ME!
395 private static List<SequenceI> _sortByTree(SequenceNode node,
396 List<SequenceI> tmp, List<SequenceI> seqset)
403 SequenceNode left = (SequenceNode) node.left();
404 SequenceNode right = (SequenceNode) node.right();
406 if ((left == null) && (right == null))
408 if (!node.isPlaceholder() && (node.element() != null))
410 if (node.element() instanceof SequenceI)
412 if (!tmp.contains(node.element())) // && (seqset==null ||
413 // seqset.size()==0 ||
414 // seqset.contains(tmp)))
416 tmp.add((SequenceI) node.element());
425 _sortByTree(left, tmp, seqset);
426 _sortByTree(right, tmp, seqset);
433 // Alignment.sortBy(OrderObj) - sequence of sequence pointer refs in
438 * recover the order of sequences given by the safe numbering scheme introducd
439 * SeqsetUtils.uniquify.
441 public static void recoverOrder(SequenceI[] alignment)
443 float[] ids = new float[alignment.length];
445 for (int i = 0; i < alignment.length; i++)
447 ids[i] = (Float.valueOf(alignment[i].getName().substring(8)))
451 jalview.util.QuickSort.sort(ids, alignment);
455 * Sort sequence in order of increasing score attribute for annotation with a
456 * particular scoreLabel. Or reverse if same label was used previously
459 * exact label for sequence associated AlignmentAnnotation scores to
462 * sequences to be sorted
464 public static void sortByAnnotationScore(String scoreLabel,
465 AlignmentI alignment)
467 SequenceI[] seqs = alignment.getSequencesArray();
468 boolean[] hasScore = new boolean[seqs.length]; // per sequence score
470 int hasScores = 0; // number of scores present on set
471 double[] scores = new double[seqs.length];
472 double min = 0, max = 0;
473 for (int i = 0; i < seqs.length; i++)
475 AlignmentAnnotation[] scoreAnn = seqs[i].getAnnotation(scoreLabel);
476 if (scoreAnn != null)
480 scores[i] = scoreAnn[0].getScore(); // take the first instance of this
484 max = min = scores[i];
505 return; // do nothing - no scores present to sort by.
507 if (hasScores < seqs.length)
509 for (int i = 0; i < seqs.length; i++)
513 scores[i] = (max + i + 1.0);
518 jalview.util.QuickSort.sort(scores, seqs);
520 AlignmentSorter as = getInstance();
522 if (as.lastSortByAnnotation != scoreLabel)
524 as.lastSortByAnnotation = scoreLabel;
525 setOrder(alignment, seqs);
529 setReverseOrder(alignment, seqs);
534 * Sort sequences by feature score or density, optionally restricted by
535 * feature types, feature groups, or alignment start/end positions.
537 * If the sort is repeated for the same combination of types and groups, sort
540 * @param featureTypes
541 * a list of feature types to include (or null for all)
543 * a list of feature groups to include (or null for all)
545 * start column position to include (base zero)
547 * end column position to include (base zero)
549 * the alignment to be sorted
551 * either "average_score" or "density" ("text" not yet implemented)
553 public static void sortByFeature(List<String> featureTypes,
554 List<String> groups, final int startCol, final int endCol,
555 AlignmentI alignment, String method)
557 if (method != FEATURE_SCORE && method != FEATURE_LABEL
558 && method != FEATURE_DENSITY)
560 String msg = String.format(
561 "Implementation Error - sortByFeature method must be either '%s' or '%s'",
562 FEATURE_SCORE, FEATURE_DENSITY);
563 System.err.println(msg);
567 flipFeatureSortIfUnchanged(method, featureTypes, groups, startCol,
570 SequenceI[] seqs = alignment.getSequencesArray();
572 boolean[] hasScore = new boolean[seqs.length]; // per sequence score
574 int hasScores = 0; // number of scores present on set
575 double[] scores = new double[seqs.length];
576 int[] seqScores = new int[seqs.length];
577 Object[][] feats = new Object[seqs.length][];
581 for (int i = 0; i < seqs.length; i++)
584 * get sequence residues overlapping column region
585 * and features for residue positions and specified types
587 String[] types = featureTypes == null ? null
588 : featureTypes.toArray(new String[featureTypes.size()]);
589 List<SequenceFeature> sfs = seqs[i].findFeatures(startCol + 1,
595 Iterator<SequenceFeature> it = sfs.listIterator();
598 SequenceFeature sf = it.next();
601 * accept all features with null or empty group, otherwise
602 * check group is one of the currently visible groups
604 String featureGroup = sf.getFeatureGroup();
605 if (groups != null && featureGroup != null
606 && !"".equals(featureGroup)
607 && !groups.contains(featureGroup))
613 float score = sf.getScore();
614 if (FEATURE_SCORE.equals(method) && !Float.isNaN(score))
616 if (seqScores[i] == 0)
623 // take the first instance of this score // ??
628 feats[i] = sfs.toArray(new SequenceFeature[sfs.size()]);
631 if (method == FEATURE_LABEL)
633 // order the labels by alphabet (not yet implemented)
634 String[] labs = new String[sfs.size()];
635 for (int l = 0; l < sfs.size(); l++)
637 SequenceFeature sf = sfs.get(l);
638 String description = sf.getDescription();
639 labs[l] = (description != null ? description : sf.getType());
641 QuickSort.sort(labs, feats[i]);
646 // compute average score
647 scores[i] /= seqScores[i];
648 // update the score bounds.
656 max = Math.max(max, scores[i]);
657 min = Math.min(min, scores[i]);
662 boolean doSort = false;
664 if (FEATURE_SCORE.equals(method))
668 return; // do nothing - no scores present to sort by.
671 if (hasScores < seqs.length)
673 for (int i = 0; i < seqs.length; i++)
677 scores[i] = (max + 1 + i);
681 // int nf = (feats[i] == null) ? 0
682 // : ((SequenceFeature[]) feats[i]).length;
683 // // System.err.println("Sorting on Score: seq " +
685 // + " Feats: " + nf + " Score : " + scores[i]);
691 else if (FEATURE_DENSITY.equals(method))
693 for (int i = 0; i < seqs.length; i++)
695 int featureCount = feats[i] == null ? 0
696 : ((SequenceFeature[]) feats[i]).length;
697 scores[i] = featureCount;
698 // System.err.println("Sorting on Density: seq "+seqs[i].getName()+
699 // " Feats: "+featureCount+" Score : "+scores[i]);
705 QuickSort.sortByDouble(scores, seqs,
706 getInstance().sortByFeatureAscending);
708 setOrder(alignment, seqs);
712 * Builds a string hash of criteria for sorting, and if unchanged from last
713 * time, reverse the sort order
716 * @param featureTypes
721 protected static void flipFeatureSortIfUnchanged(String method,
722 List<String> featureTypes, List<String> groups,
723 final int startCol, final int endCol)
725 StringBuilder sb = new StringBuilder(64);
726 sb.append(startCol).append(method).append(endCol);
727 if (featureTypes != null)
729 Collections.sort(featureTypes);
730 sb.append(featureTypes.toString());
734 Collections.sort(groups);
735 sb.append(groups.toString());
737 String scoreCriteria = sb.toString();
740 * if resorting on the same criteria, toggle sort order
742 AlignmentSorter as = getInstance();
743 if (as.sortByFeatureCriteria == null
744 || !scoreCriteria.equals(as.sortByFeatureCriteria))
746 as.sortByFeatureAscending = true;
750 as.sortByFeatureAscending = !as.sortByFeatureAscending;
752 as.sortByFeatureCriteria = scoreCriteria;
756 * Set the alignment's sequences list to contain the sequences from a
757 * temporary list, first adding all the elements from the tmp list, then adding all sequences in the alignment that
758 * are not in the list. Option to do the final sort either in order or in reverse order.
760 * @param align The alignment being sorted
762 * the temporary sequence list
764 * false for reversed order; only sequences already in
765 * the alignment will be used (which is actually already guaranteed
766 * by vectorSubsetToArray)
768 private static void set(AlignmentI align, List<SequenceI> tmp,
771 set(align, vectorSubsetToArray(align.getSequences(), tmp), ascending);
775 * Set the alignment's sequences list to contain these sequences, either in
776 * this order or its reverse.
780 * the new sequence array
782 * false for reversed order; if ascending, only sequences already in
783 * the alignment will be used; if descending, then a direct 1:1
784 * replacement is made
786 private static void set(AlignmentI align, SequenceI[] seqs,
791 setOrder(align, seqs);
795 setReverseOrder(align, seqs);
801 * Replace the alignment's sequences with values in an array, clearing the
802 * alignment's sequence list and filtering for sequences that are actually in
803 * the alignment already.
808 * the array of replacement values, of any length
810 public static void setOrder(AlignmentI align, SequenceI[] seqs)
812 // NOTE: DO NOT USE align.setSequenceAt() here - it will NOT work
813 List<SequenceI> seqList = align.getSequences();
814 synchronized (seqList)
816 List<SequenceI> tmp = new ArrayList<>();
818 for (int i = 0; i < seqs.length; i++)
820 if (seqList.contains(seqs[i]))
827 // User may have hidden seqs, then clicked undo or redo
828 for (int i = 0; i < tmp.size(); i++)
830 seqList.add(tmp.get(i));
836 * Replace the alignment's sequences or a subset of those sequences with
837 * values in an array in reverse order. All sequences are replaced; no check
838 * is made that these sequences are in the alignment already.
843 * the array of replacement values, length must be less than or equal
844 * to Alignment.sequences.size()
846 private static void setReverseOrder(AlignmentI align, SequenceI[] seqs)
848 int nSeq = seqs.length;
850 int len = (nSeq + (nSeq % 2)) / 2;
853 // if ((nSeq % 2) == 0)
859 // len = (nSeq + 1) / 2;
862 // NOTE: DO NOT USE align.setSequenceAt() here - it will NOT work
863 List<SequenceI> seqList = align.getSequences();
864 synchronized (seqList)
866 for (int i = 0; i < len; i++)
868 // SequenceI tmp = seqs[i];
869 seqList.set(i, seqs[nSeq - i - 1]);
870 seqList.set(nSeq - i - 1, seqs[i]);
876 * Create and array of reordered sequences in order first from tmp that are
877 * present in seqList already, then, after that, any remaining sequences in
878 * seqList not in tmp. Any sequences in tmp that are not in seqList already
882 * thread safe collection of sequences originally in the alignment
884 * thread safe collection of sequences or subsequences possibly in
887 * @return intersect(tmp,seqList)+intersect(complement(tmp),seqList)
889 private static SequenceI[] vectorSubsetToArray(List<SequenceI> seqList,
892 ArrayList<SequenceI> seqs = new ArrayList<>();
893 int n = seqList.size();
894 BitSet bs = new BitSet(n);
896 for (int i = 0, nt = tmp.size(); i < nt; i++)
898 SequenceI sq = tmp.get(i);
899 int idx = seqList.indexOf(sq);
900 if (idx >= 0 && bs.get(idx))
907 for (int i = bs.nextSetBit(0); i >= 0; i = bs.nextSetBit(i + 1))
909 seqs.add(seqList.get(i));
912 return seqs.toArray(new SequenceI[seqs.size()]);