2 * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3 * Copyright (C) $$Year-Rel$$ The Jalview Authors
5 * This file is part of Jalview.
7 * Jalview is free software: you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation, either version 3
10 * of the License, or (at your option) any later version.
12 * Jalview is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty
14 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15 * PURPOSE. See the GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
19 * The Jalview Authors are detailed in the 'AUTHORS' file.
21 package jalview.analysis;
23 import jalview.analysis.scoremodels.PIDModel;
24 import jalview.analysis.scoremodels.SimilarityParams;
25 import jalview.bin.ApplicationSingletonProvider;
26 import jalview.bin.ApplicationSingletonProvider.ApplicationSingletonI;
27 import jalview.datamodel.AlignmentAnnotation;
28 import jalview.datamodel.AlignmentI;
29 import jalview.datamodel.AlignmentOrder;
30 import jalview.datamodel.SequenceFeature;
31 import jalview.datamodel.SequenceGroup;
32 import jalview.datamodel.SequenceI;
33 import jalview.datamodel.SequenceNode;
34 import jalview.util.QuickSort;
36 import java.util.ArrayList;
37 import java.util.Collections;
38 import java.util.Iterator;
39 import java.util.List;
42 * Routines for manipulating the order of a multiple sequence alignment TODO:
43 * this class retains some global states concerning sort-order which should be
44 * made attributes for the caller's alignment visualization. TODO: refactor to
45 * allow a subset of selected sequences to be sorted within the context of a
46 * whole alignment. Sort method template is: SequenceI[] tobesorted, [ input
47 * data mapping to each tobesorted element to use ], Alignment context of
48 * tobesorted that are to be re-ordered, boolean sortinplace, [special data - ie
49 * seuqence to be sorted w.r.t.]) sortinplace implies that the sorted vector
50 * resulting from applying the operation to tobesorted should be mapped back to
51 * the original positions in alignment. Otherwise, normal behaviour is to re
52 * order alignment so that tobesorted is sorted and grouped together starting
53 * from the first tobesorted position in the alignment. e.g. (a,tb2,b,tb1,c,tb3
54 * becomes a,tb1,tb2,tb3,b,c)
56 public class AlignmentSorter implements ApplicationSingletonI
59 private AlignmentSorter()
64 public static AlignmentSorter getInstance()
66 return (AlignmentSorter) ApplicationSingletonProvider
67 .getInstance(AlignmentSorter.class);
71 * types of feature ordering: Sort by score : average score - or total score -
72 * over all features in region Sort by feature label text: (or if null -
73 * feature type text) - numerical or alphabetical Sort by feature density:
74 * based on counts - ignoring individual text or scores for each feature
76 public static final String FEATURE_SCORE = "average_score";
78 public static final String FEATURE_LABEL = "text";
80 public static final String FEATURE_DENSITY = "density";
83 * todo: refactor searches to follow a basic pattern: (search property, last
84 * search state, current sort direction)
86 boolean sortIdAscending = true;
88 int lastGroupHash = 0;
90 boolean sortGroupAscending = true;
92 AlignmentOrder lastOrder = null;
94 boolean sortOrderAscending = true;
96 TreeModel lastTree = null;
98 boolean sortTreeAscending = true;
102 * last Annotation Label used for sort by Annotation score
104 private String lastSortByAnnotation;
107 * string hash of last arguments to sortByFeature (sort order toggles if this
108 * is unchanged between sorts)
110 private String sortByFeatureCriteria;
112 private boolean sortByFeatureAscending = true;
114 private boolean sortLengthAscending;
117 * Sorts sequences in the alignment by Percentage Identity with the given
118 * reference sequence, sorting the highest identity to the top
126 public static void sortByPID(AlignmentI align, SequenceI s)
128 int nSeq = align.getHeight();
130 float[] scores = new float[nSeq];
131 SequenceI[] seqs = new SequenceI[nSeq];
132 String refSeq = s.getSequenceAsString();
134 SimilarityParams pidParams = new SimilarityParams(true, true, true,
136 for (int i = 0; i < nSeq; i++)
138 scores[i] = (float) PIDModel.computePID(
139 align.getSequenceAt(i).getSequenceAsString(), refSeq,
141 seqs[i] = align.getSequenceAt(i);
144 QuickSort.sort(scores, seqs);
145 setReverseOrder(align, seqs);
149 * Sorts by ID. Numbers are sorted before letters.
152 * The alignment object to sort
154 public static void sortByID(AlignmentI align)
156 int nSeq = align.getHeight();
158 String[] ids = new String[nSeq];
159 SequenceI[] seqs = new SequenceI[nSeq];
161 for (int i = 0; i < nSeq; i++)
163 ids[i] = align.getSequenceAt(i).getName();
164 seqs[i] = align.getSequenceAt(i);
167 QuickSort.sort(ids, seqs);
168 AlignmentSorter as = getInstance();
169 as.sortIdAscending = !as.sortIdAscending;
170 sort(align, seqs, as.sortIdAscending);
174 * Sorts by sequence length
177 * The alignment object to sort
179 public static void sortByLength(AlignmentI align)
181 int nSeq = align.getHeight();
183 float[] length = new float[nSeq];
184 SequenceI[] seqs = new SequenceI[nSeq];
186 for (int i = 0; i < nSeq; i++)
188 seqs[i] = align.getSequenceAt(i);
189 length[i] = (seqs[i].getEnd() - seqs[i].getStart());
192 QuickSort.sort(length, seqs);
193 AlignmentSorter as = getInstance();
194 as.sortLengthAscending = !as.sortLengthAscending;
195 sort(align, seqs, as.sortLengthAscending);
199 * Sorts the alignment by size of group. <br>
200 * Maintains the order of sequences in each group by order in given alignment
204 * sorts the given alignment object by group
206 public static void sortByGroup(AlignmentI align)
208 // MAINTAINS ORIGNAL SEQUENCE ORDER,
209 // ORDERS BY GROUP SIZE
210 List<SequenceGroup> groups = new ArrayList<>();
212 AlignmentSorter as = getInstance();
214 if (groups.hashCode() != as.lastGroupHash)
216 as.sortGroupAscending = true;
217 as.lastGroupHash = groups.hashCode();
221 as.sortGroupAscending = !as.sortGroupAscending;
224 // SORTS GROUPS BY SIZE
225 // ////////////////////
226 for (SequenceGroup sg : align.getGroups())
228 for (int j = 0; j < groups.size(); j++)
230 SequenceGroup sg2 = groups.get(j);
232 if (sg.getSize() > sg2.getSize())
240 if (!groups.contains(sg))
246 // NOW ADD SEQUENCES MAINTAINING ALIGNMENT ORDER
247 // /////////////////////////////////////////////
248 List<SequenceI> tmp = new ArrayList<>();
250 for (int i = 0; i < groups.size(); i++)
252 SequenceGroup sg = groups.get(i);
253 SequenceI[] orderedseqs = sg.getSequencesInOrder(align);
255 for (int j = 0; j < orderedseqs.length; j++)
257 tmp.add(orderedseqs[j]);
260 sort(align, tmp, as.sortGroupAscending);
264 * Select sequences in order from tmp that is present in mask, and any
265 * remaining sequences in mask not in tmp
268 * thread safe collection of sequences
270 * thread safe collection of sequences
272 * @return intersect(tmp,mask)+intersect(complement(tmp),mask)
274 private static SequenceI[] vectorSubsetToArray(List<SequenceI> tmp,
275 List<SequenceI> mask)
278 // tmp2 = tmp.retainAll(mask);
279 // return tmp2.addAll(mask.removeAll(tmp2))
281 ArrayList<SequenceI> seqs = new ArrayList<>();
283 boolean[] tmask = new boolean[mask.size()];
285 for (i = 0; i < mask.size(); i++)
290 for (i = 0; i < tmp.size(); i++)
292 SequenceI sq = tmp.get(i);
293 idx = mask.indexOf(sq);
294 if (idx > -1 && tmask[idx])
301 for (i = 0; i < tmask.length; i++)
305 seqs.add(mask.get(i));
309 return seqs.toArray(new SequenceI[seqs.size()]);
313 * Sorts by a given AlignmentOrder object
318 * specified order for alignment
320 public static void sortBy(AlignmentI align, AlignmentOrder order)
322 // Get an ordered vector of sequences which may also be present in align
323 List<SequenceI> tmp = order.getOrder();
325 AlignmentSorter as = getInstance();
327 if (as.lastOrder == order)
329 as.sortOrderAscending = !as.sortOrderAscending;
333 as.sortOrderAscending = true;
335 sort(align, tmp, as.sortOrderAscending);
346 * @return DOCUMENT ME!
348 private static List<SequenceI> getOrderByTree(AlignmentI align,
351 int nSeq = align.getHeight();
353 List<SequenceI> tmp = new ArrayList<>();
355 tmp = _sortByTree(tree.getTopNode(), tmp, align.getSequences());
357 if (tmp.size() != nSeq)
359 // TODO: JBPNote - decide if this is always an error
360 // (eg. not when a tree is associated to another alignment which has more
362 if (tmp.size() != nSeq)
364 addStrays(align, tmp);
367 if (tmp.size() != nSeq)
369 System.err.println("WARNING: tmp.size()=" + tmp.size() + " != nseq="
371 + " in getOrderByTree - tree contains sequences not in alignment");
379 * Sorts the alignment by a given tree
386 public static void sortByTree(AlignmentI align, TreeModel tree)
388 List<SequenceI> tmp = getOrderByTree(align, tree);
390 AlignmentSorter as = getInstance();
392 // tmp should properly permute align with tree.
393 if (as.lastTree != tree)
395 as.sortTreeAscending = true;
400 as.sortTreeAscending = !as.sortTreeAscending;
402 sort(align, tmp, as.sortTreeAscending);
413 private static void addStrays(AlignmentI align, List<SequenceI> tmp)
415 int nSeq = align.getHeight();
417 for (int i = 0; i < nSeq; i++)
419 if (!tmp.contains(align.getSequenceAt(i)))
421 tmp.add(align.getSequenceAt(i));
425 if (nSeq != tmp.size())
428 .println("ERROR: Size still not right even after addStrays");
442 * @return DOCUMENT ME!
444 private static List<SequenceI> _sortByTree(SequenceNode node,
445 List<SequenceI> tmp, List<SequenceI> seqset)
452 SequenceNode left = (SequenceNode) node.left();
453 SequenceNode right = (SequenceNode) node.right();
455 if ((left == null) && (right == null))
457 if (!node.isPlaceholder() && (node.element() != null))
459 if (node.element() instanceof SequenceI)
461 if (!tmp.contains(node.element())) // && (seqset==null ||
462 // seqset.size()==0 ||
463 // seqset.contains(tmp)))
465 tmp.add((SequenceI) node.element());
474 _sortByTree(left, tmp, seqset);
475 _sortByTree(right, tmp, seqset);
482 // Alignment.sortBy(OrderObj) - sequence of sequence pointer refs in
487 * recover the order of sequences given by the safe numbering scheme introducd
488 * SeqsetUtils.uniquify.
490 public static void recoverOrder(SequenceI[] alignment)
492 float[] ids = new float[alignment.length];
494 for (int i = 0; i < alignment.length; i++)
496 ids[i] = (Float.valueOf(alignment[i].getName().substring(8)))
500 jalview.util.QuickSort.sort(ids, alignment);
504 * Sort sequence in order of increasing score attribute for annotation with a
505 * particular scoreLabel. Or reverse if same label was used previously
508 * exact label for sequence associated AlignmentAnnotation scores to
511 * sequences to be sorted
513 public static void sortByAnnotationScore(String scoreLabel,
514 AlignmentI alignment)
516 SequenceI[] seqs = alignment.getSequencesArray();
517 boolean[] hasScore = new boolean[seqs.length]; // per sequence score
519 int hasScores = 0; // number of scores present on set
520 double[] scores = new double[seqs.length];
521 double min = 0, max = 0;
522 for (int i = 0; i < seqs.length; i++)
524 AlignmentAnnotation[] scoreAnn = seqs[i].getAnnotation(scoreLabel);
525 if (scoreAnn != null)
529 scores[i] = scoreAnn[0].getScore(); // take the first instance of this
533 max = min = scores[i];
554 return; // do nothing - no scores present to sort by.
556 if (hasScores < seqs.length)
558 for (int i = 0; i < seqs.length; i++)
562 scores[i] = (max + i + 1.0);
567 jalview.util.QuickSort.sort(scores, seqs);
569 AlignmentSorter as = getInstance();
571 if (as.lastSortByAnnotation != scoreLabel)
573 as.lastSortByAnnotation = scoreLabel;
574 setOrder(alignment, seqs);
578 setReverseOrder(alignment, seqs);
583 * Sort sequences by feature score or density, optionally restricted by
584 * feature types, feature groups, or alignment start/end positions.
586 * If the sort is repeated for the same combination of types and groups, sort
589 * @param featureTypes
590 * a list of feature types to include (or null for all)
592 * a list of feature groups to include (or null for all)
594 * start column position to include (base zero)
596 * end column position to include (base zero)
598 * the alignment to be sorted
600 * either "average_score" or "density" ("text" not yet implemented)
602 public static void sortByFeature(List<String> featureTypes,
603 List<String> groups, final int startCol, final int endCol,
604 AlignmentI alignment, String method)
606 if (method != FEATURE_SCORE && method != FEATURE_LABEL
607 && method != FEATURE_DENSITY)
610 .format("Implementation Error - sortByFeature method must be either '%s' or '%s'",
611 FEATURE_SCORE, FEATURE_DENSITY);
612 System.err.println(msg);
616 flipFeatureSortIfUnchanged(method, featureTypes, groups, startCol, endCol);
618 SequenceI[] seqs = alignment.getSequencesArray();
620 boolean[] hasScore = new boolean[seqs.length]; // per sequence score
622 int hasScores = 0; // number of scores present on set
623 double[] scores = new double[seqs.length];
624 int[] seqScores = new int[seqs.length];
625 Object[][] feats = new Object[seqs.length][];
629 for (int i = 0; i < seqs.length; i++)
632 * get sequence residues overlapping column region
633 * and features for residue positions and specified types
635 String[] types = featureTypes == null ? null : featureTypes
636 .toArray(new String[featureTypes.size()]);
637 List<SequenceFeature> sfs = seqs[i].findFeatures(startCol + 1,
643 Iterator<SequenceFeature> it = sfs.listIterator();
646 SequenceFeature sf = it.next();
649 * accept all features with null or empty group, otherwise
650 * check group is one of the currently visible groups
652 String featureGroup = sf.getFeatureGroup();
653 if (groups != null && featureGroup != null
654 && !"".equals(featureGroup)
655 && !groups.contains(featureGroup))
661 float score = sf.getScore();
662 if (FEATURE_SCORE.equals(method) && !Float.isNaN(score))
664 if (seqScores[i] == 0)
671 // take the first instance of this score // ??
676 feats[i] = sfs.toArray(new SequenceFeature[sfs.size()]);
679 if (method == FEATURE_LABEL)
681 // order the labels by alphabet (not yet implemented)
682 String[] labs = new String[sfs.size()];
683 for (int l = 0; l < sfs.size(); l++)
685 SequenceFeature sf = sfs.get(l);
686 String description = sf.getDescription();
687 labs[l] = (description != null ? description : sf.getType());
689 QuickSort.sort(labs, feats[i]);
694 // compute average score
695 scores[i] /= seqScores[i];
696 // update the score bounds.
704 max = Math.max(max, scores[i]);
705 min = Math.min(min, scores[i]);
710 boolean doSort = false;
712 if (FEATURE_SCORE.equals(method))
716 return; // do nothing - no scores present to sort by.
719 if (hasScores < seqs.length)
721 for (int i = 0; i < seqs.length; i++)
725 scores[i] = (max + 1 + i);
729 // int nf = (feats[i] == null) ? 0
730 // : ((SequenceFeature[]) feats[i]).length;
731 // // System.err.println("Sorting on Score: seq " +
733 // + " Feats: " + nf + " Score : " + scores[i]);
739 else if (FEATURE_DENSITY.equals(method))
741 for (int i = 0; i < seqs.length; i++)
743 int featureCount = feats[i] == null ? 0
744 : ((SequenceFeature[]) feats[i]).length;
745 scores[i] = featureCount;
746 // System.err.println("Sorting on Density: seq "+seqs[i].getName()+
747 // " Feats: "+featureCount+" Score : "+scores[i]);
753 QuickSort.sortByDouble(scores, seqs, getInstance().sortByFeatureAscending);
755 setOrder(alignment, seqs);
759 * Builds a string hash of criteria for sorting, and if unchanged from last
760 * time, reverse the sort order
763 * @param featureTypes
768 protected static void flipFeatureSortIfUnchanged(String method,
769 List<String> featureTypes, List<String> groups,
770 final int startCol, final int endCol)
772 StringBuilder sb = new StringBuilder(64);
773 sb.append(startCol).append(method).append(endCol);
774 if (featureTypes != null)
776 Collections.sort(featureTypes);
777 sb.append(featureTypes.toString());
781 Collections.sort(groups);
782 sb.append(groups.toString());
784 String scoreCriteria = sb.toString();
787 * if resorting on the same criteria, toggle sort order
789 AlignmentSorter as = getInstance();
790 if (as.sortByFeatureCriteria == null
791 || !scoreCriteria.equals(as.sortByFeatureCriteria))
793 as.sortByFeatureAscending = true;
797 as.sortByFeatureAscending = !as.sortByFeatureAscending;
799 as.sortByFeatureCriteria = scoreCriteria;
802 private static void sort(AlignmentI align, List<SequenceI> tmp,
805 sort(align, vectorSubsetToArray(tmp, align.getSequences()), ascending);
808 private static void sort(AlignmentI align, SequenceI[] seqs,
813 setOrder(align, seqs);
817 setReverseOrder(align, seqs);
824 * Sets the Alignment object with the given sequences
829 * sequences as an array
831 public static void setOrder(AlignmentI align, SequenceI[] seqs)
833 // NOTE: DO NOT USE align.setSequenceAt() here - it will NOT work
834 List<SequenceI> algn = align.getSequences();
837 List<SequenceI> tmp = new ArrayList<>();
839 for (int i = 0; i < seqs.length; i++)
841 if (algn.contains(seqs[i]))
848 // User may have hidden seqs, then clicked undo or redo
849 for (int i = 0; i < tmp.size(); i++)
851 algn.add(tmp.get(i));
857 * Reverse the order of the sort
864 private static void setReverseOrder(AlignmentI align, SequenceI[] seqs)
866 int nSeq = seqs.length;
868 int len = (nSeq + (nSeq % 2)) / 2;
871 // if ((nSeq % 2) == 0)
877 // len = (nSeq + 1) / 2;
880 // NOTE: DO NOT USE align.setSequenceAt() here - it will NOT work
881 List<SequenceI> asq = align.getSequences();
884 for (int i = 0; i < len; i++)
886 // SequenceI tmp = seqs[i];
887 asq.set(i, seqs[nSeq - i - 1]);
888 asq.set(nSeq - i - 1, seqs[i]);