2 * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3 * Copyright (C) $$Year-Rel$$ The Jalview Authors
5 * This file is part of Jalview.
7 * Jalview is free software: you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation, either version 3
10 * of the License, or (at your option) any later version.
12 * Jalview is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty
14 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15 * PURPOSE. See the GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
19 * The Jalview Authors are detailed in the 'AUTHORS' file.
21 package jalview.analysis;
23 import jalview.analysis.scoremodels.PIDModel;
24 import jalview.analysis.scoremodels.SimilarityParams;
25 import jalview.datamodel.AlignmentAnnotation;
26 import jalview.datamodel.AlignmentI;
27 import jalview.datamodel.AlignmentOrder;
28 import jalview.datamodel.SequenceFeature;
29 import jalview.datamodel.SequenceGroup;
30 import jalview.datamodel.SequenceI;
31 import jalview.datamodel.SequenceNode;
32 import jalview.util.Platform;
33 import jalview.util.QuickSort;
35 import java.util.ArrayList;
36 import java.util.Collections;
37 import java.util.Iterator;
38 import java.util.List;
41 * Routines for manipulating the order of a multiple sequence alignment TODO:
42 * this class retains some global states concerning sort-order which should be
43 * made attributes for the caller's alignment visualization. TODO: refactor to
44 * allow a subset of selected sequences to be sorted within the context of a
45 * whole alignment. Sort method template is: SequenceI[] tobesorted, [ input
46 * data mapping to each tobesorted element to use ], Alignment context of
47 * tobesorted that are to be re-ordered, boolean sortinplace, [special data - ie
48 * seuqence to be sorted w.r.t.]) sortinplace implies that the sorted vector
49 * resulting from applying the operation to tobesorted should be mapped back to
50 * the original positions in alignment. Otherwise, normal behaviour is to re
51 * order alignment so that tobesorted is sorted and grouped together starting
52 * from the first tobesorted position in the alignment. e.g. (a,tb2,b,tb1,c,tb3
53 * becomes a,tb1,tb2,tb3,b,c)
55 public class AlignmentSorter
59 * types of feature ordering: Sort by score : average score - or total score -
60 * over all features in region Sort by feature label text: (or if null -
61 * feature type text) - numerical or alphabetical Sort by feature density:
62 * based on counts - ignoring individual text or scores for each feature
64 public static final String FEATURE_SCORE = "average_score";
66 public static final String FEATURE_LABEL = "text";
68 public static final String FEATURE_DENSITY = "density";
70 static AlignmentSorter instance;
72 public static AlignmentSorter getInstance()
75 // BH 2019.05.08 need to isolate static fields in JavaScript
77 AlignmentSorter i = instance;
78 @SuppressWarnings("unused")
82 g = Thread.currentThread().getThreadGroup();
84 * @j2sNative i = g._jalviewAlignmentSorterInstance;
90 i = new AlignmentSorter();
95 * @j2sNative g._jalviewAlignmentSorterInstance = i;
108 * todo: refactor searches to follow a basic pattern: (search property, last
109 * search state, current sort direction)
111 boolean sortIdAscending = true;
113 int lastGroupHash = 0;
115 boolean sortGroupAscending = true;
117 AlignmentOrder lastOrder = null;
119 boolean sortOrderAscending = true;
121 TreeModel lastTree = null;
123 boolean sortTreeAscending = true;
127 * last Annotation Label used for sort by Annotation score
129 private String lastSortByAnnotation;
132 * string hash of last arguments to sortByFeature (sort order toggles if this
133 * is unchanged between sorts)
135 private String sortByFeatureCriteria;
137 private boolean sortByFeatureAscending = true;
139 private boolean sortLengthAscending;
142 * Sorts sequences in the alignment by Percentage Identity with the given
143 * reference sequence, sorting the highest identity to the top
151 public static void sortByPID(AlignmentI align, SequenceI s)
153 int nSeq = align.getHeight();
155 float[] scores = new float[nSeq];
156 SequenceI[] seqs = new SequenceI[nSeq];
157 String refSeq = s.getSequenceAsString();
159 SimilarityParams pidParams = new SimilarityParams(true, true, true,
161 for (int i = 0; i < nSeq; i++)
163 scores[i] = (float) PIDModel.computePID(
164 align.getSequenceAt(i).getSequenceAsString(), refSeq,
166 seqs[i] = align.getSequenceAt(i);
169 QuickSort.sort(scores, seqs);
171 setReverseOrder(align, seqs);
175 * Reverse the order of the sort
182 private static void setReverseOrder(AlignmentI align, SequenceI[] seqs)
184 int nSeq = seqs.length;
194 len = (nSeq + 1) / 2;
197 // NOTE: DO NOT USE align.setSequenceAt() here - it will NOT work
198 List<SequenceI> asq = align.getSequences();
201 for (int i = 0; i < len; i++)
203 // SequenceI tmp = seqs[i];
204 asq.set(i, seqs[nSeq - i - 1]);
205 asq.set(nSeq - i - 1, seqs[i]);
211 * Sets the Alignment object with the given sequences
214 * Alignment object to be updated
216 * sequences as a vector
218 private static void setOrder(AlignmentI align, List<SequenceI> tmp)
220 setOrder(align, vectorSubsetToArray(tmp, align.getSequences()));
224 * Sets the Alignment object with the given sequences
229 * sequences as an array
231 public static void setOrder(AlignmentI align, SequenceI[] seqs)
233 // NOTE: DO NOT USE align.setSequenceAt() here - it will NOT work
234 List<SequenceI> algn = align.getSequences();
237 List<SequenceI> tmp = new ArrayList<>();
239 for (int i = 0; i < seqs.length; i++)
241 if (algn.contains(seqs[i]))
248 // User may have hidden seqs, then clicked undo or redo
249 for (int i = 0; i < tmp.size(); i++)
251 algn.add(tmp.get(i));
257 * Sorts by ID. Numbers are sorted before letters.
260 * The alignment object to sort
262 public static void sortByID(AlignmentI align)
264 int nSeq = align.getHeight();
266 String[] ids = new String[nSeq];
267 SequenceI[] seqs = new SequenceI[nSeq];
269 for (int i = 0; i < nSeq; i++)
271 ids[i] = align.getSequenceAt(i).getName();
272 seqs[i] = align.getSequenceAt(i);
275 QuickSort.sort(ids, seqs);
277 AlignmentSorter as = getInstance();
278 if (as.sortIdAscending)
280 setReverseOrder(align, seqs);
284 setOrder(align, seqs);
287 as.sortIdAscending = !as.sortIdAscending;
291 * Sorts by sequence length
294 * The alignment object to sort
296 public static void sortByLength(AlignmentI align)
298 int nSeq = align.getHeight();
300 float[] length = new float[nSeq];
301 SequenceI[] seqs = new SequenceI[nSeq];
303 for (int i = 0; i < nSeq; i++)
305 seqs[i] = align.getSequenceAt(i);
306 length[i] = (seqs[i].getEnd() - seqs[i].getStart());
309 QuickSort.sort(length, seqs);
311 AlignmentSorter as = getInstance();
313 if (as.sortLengthAscending)
315 setReverseOrder(align, seqs);
319 setOrder(align, seqs);
322 as.sortLengthAscending = !as.sortLengthAscending;
326 * Sorts the alignment by size of group. <br>
327 * Maintains the order of sequences in each group by order in given alignment
331 * sorts the given alignment object by group
333 public static void sortByGroup(AlignmentI align)
335 // MAINTAINS ORIGNAL SEQUENCE ORDER,
336 // ORDERS BY GROUP SIZE
337 List<SequenceGroup> groups = new ArrayList<>();
339 AlignmentSorter as = getInstance();
341 if (groups.hashCode() != as.lastGroupHash)
343 as.sortGroupAscending = true;
344 as.lastGroupHash = groups.hashCode();
348 as.sortGroupAscending = !as.sortGroupAscending;
351 // SORTS GROUPS BY SIZE
352 // ////////////////////
353 for (SequenceGroup sg : align.getGroups())
355 for (int j = 0; j < groups.size(); j++)
357 SequenceGroup sg2 = groups.get(j);
359 if (sg.getSize() > sg2.getSize())
367 if (!groups.contains(sg))
373 // NOW ADD SEQUENCES MAINTAINING ALIGNMENT ORDER
374 // /////////////////////////////////////////////
375 List<SequenceI> seqs = new ArrayList<>();
377 for (int i = 0; i < groups.size(); i++)
379 SequenceGroup sg = groups.get(i);
380 SequenceI[] orderedseqs = sg.getSequencesInOrder(align);
382 for (int j = 0; j < orderedseqs.length; j++)
384 seqs.add(orderedseqs[j]);
388 if (as.sortGroupAscending)
390 setOrder(align, seqs);
394 setReverseOrder(align,
395 vectorSubsetToArray(seqs, align.getSequences()));
400 * Select sequences in order from tmp that is present in mask, and any
401 * remaining sequences in mask not in tmp
404 * thread safe collection of sequences
406 * thread safe collection of sequences
408 * @return intersect(tmp,mask)+intersect(complement(tmp),mask)
410 private static SequenceI[] vectorSubsetToArray(List<SequenceI> tmp,
411 List<SequenceI> mask)
414 // tmp2 = tmp.retainAll(mask);
415 // return tmp2.addAll(mask.removeAll(tmp2))
417 ArrayList<SequenceI> seqs = new ArrayList<>();
419 boolean[] tmask = new boolean[mask.size()];
421 for (i = 0; i < mask.size(); i++)
426 for (i = 0; i < tmp.size(); i++)
428 SequenceI sq = tmp.get(i);
429 idx = mask.indexOf(sq);
430 if (idx > -1 && tmask[idx])
437 for (i = 0; i < tmask.length; i++)
441 seqs.add(mask.get(i));
445 return seqs.toArray(new SequenceI[seqs.size()]);
449 * Sorts by a given AlignmentOrder object
454 * specified order for alignment
456 public static void sortBy(AlignmentI align, AlignmentOrder order)
458 // Get an ordered vector of sequences which may also be present in align
459 List<SequenceI> tmp = order.getOrder();
461 AlignmentSorter as = getInstance();
463 if (as.lastOrder == order)
465 as.sortOrderAscending = !as.sortOrderAscending;
469 as.sortOrderAscending = true;
472 if (as.sortOrderAscending)
474 setOrder(align, tmp);
478 setReverseOrder(align,
479 vectorSubsetToArray(tmp, align.getSequences()));
491 * @return DOCUMENT ME!
493 private static List<SequenceI> getOrderByTree(AlignmentI align,
496 int nSeq = align.getHeight();
498 List<SequenceI> tmp = new ArrayList<>();
500 tmp = _sortByTree(tree.getTopNode(), tmp, align.getSequences());
502 if (tmp.size() != nSeq)
504 // TODO: JBPNote - decide if this is always an error
505 // (eg. not when a tree is associated to another alignment which has more
507 if (tmp.size() != nSeq)
509 addStrays(align, tmp);
512 if (tmp.size() != nSeq)
514 System.err.println("WARNING: tmp.size()=" + tmp.size() + " != nseq="
516 + " in getOrderByTree - tree contains sequences not in alignment");
524 * Sorts the alignment by a given tree
531 public static void sortByTree(AlignmentI align, TreeModel tree)
533 List<SequenceI> tmp = getOrderByTree(align, tree);
535 AlignmentSorter as = getInstance();
537 // tmp should properly permute align with tree.
538 if (as.lastTree != tree)
540 as.sortTreeAscending = true;
545 as.sortTreeAscending = !as.sortTreeAscending;
548 if (as.sortTreeAscending)
550 setOrder(align, tmp);
554 setReverseOrder(align,
555 vectorSubsetToArray(tmp, align.getSequences()));
567 private static void addStrays(AlignmentI align, List<SequenceI> tmp)
569 int nSeq = align.getHeight();
571 for (int i = 0; i < nSeq; i++)
573 if (!tmp.contains(align.getSequenceAt(i)))
575 tmp.add(align.getSequenceAt(i));
579 if (nSeq != tmp.size())
582 .println("ERROR: Size still not right even after addStrays");
596 * @return DOCUMENT ME!
598 private static List<SequenceI> _sortByTree(SequenceNode node,
599 List<SequenceI> tmp, List<SequenceI> seqset)
606 SequenceNode left = (SequenceNode) node.left();
607 SequenceNode right = (SequenceNode) node.right();
609 if ((left == null) && (right == null))
611 if (!node.isPlaceholder() && (node.element() != null))
613 if (node.element() instanceof SequenceI)
615 if (!tmp.contains(node.element())) // && (seqset==null ||
616 // seqset.size()==0 ||
617 // seqset.contains(tmp)))
619 tmp.add((SequenceI) node.element());
628 _sortByTree(left, tmp, seqset);
629 _sortByTree(right, tmp, seqset);
636 // Alignment.sortBy(OrderObj) - sequence of sequence pointer refs in
641 * recover the order of sequences given by the safe numbering scheme introducd
642 * SeqsetUtils.uniquify.
644 public static void recoverOrder(SequenceI[] alignment)
646 float[] ids = new float[alignment.length];
648 for (int i = 0; i < alignment.length; i++)
650 ids[i] = (new Float(alignment[i].getName().substring(8)))
654 jalview.util.QuickSort.sort(ids, alignment);
658 * Sort sequence in order of increasing score attribute for annotation with a
659 * particular scoreLabel. Or reverse if same label was used previously
662 * exact label for sequence associated AlignmentAnnotation scores to
665 * sequences to be sorted
667 public static void sortByAnnotationScore(String scoreLabel,
668 AlignmentI alignment)
670 SequenceI[] seqs = alignment.getSequencesArray();
671 boolean[] hasScore = new boolean[seqs.length]; // per sequence score
673 int hasScores = 0; // number of scores present on set
674 double[] scores = new double[seqs.length];
675 double min = 0, max = 0;
676 for (int i = 0; i < seqs.length; i++)
678 AlignmentAnnotation[] scoreAnn = seqs[i].getAnnotation(scoreLabel);
679 if (scoreAnn != null)
683 scores[i] = scoreAnn[0].getScore(); // take the first instance of this
687 max = min = scores[i];
708 return; // do nothing - no scores present to sort by.
710 if (hasScores < seqs.length)
712 for (int i = 0; i < seqs.length; i++)
716 scores[i] = (max + i + 1.0);
721 jalview.util.QuickSort.sort(scores, seqs);
723 AlignmentSorter as = getInstance();
725 if (as.lastSortByAnnotation != scoreLabel)
727 as.lastSortByAnnotation = scoreLabel;
728 setOrder(alignment, seqs);
732 setReverseOrder(alignment, seqs);
737 * Sort sequences by feature score or density, optionally restricted by
738 * feature types, feature groups, or alignment start/end positions.
740 * If the sort is repeated for the same combination of types and groups, sort
743 * @param featureTypes
744 * a list of feature types to include (or null for all)
746 * a list of feature groups to include (or null for all)
748 * start column position to include (base zero)
750 * end column position to include (base zero)
752 * the alignment to be sorted
754 * either "average_score" or "density" ("text" not yet implemented)
756 public static void sortByFeature(List<String> featureTypes,
757 List<String> groups, final int startCol, final int endCol,
758 AlignmentI alignment, String method)
760 if (method != FEATURE_SCORE && method != FEATURE_LABEL
761 && method != FEATURE_DENSITY)
764 .format("Implementation Error - sortByFeature method must be either '%s' or '%s'",
765 FEATURE_SCORE, FEATURE_DENSITY);
766 System.err.println(msg);
770 flipFeatureSortIfUnchanged(method, featureTypes, groups, startCol, endCol);
772 SequenceI[] seqs = alignment.getSequencesArray();
774 boolean[] hasScore = new boolean[seqs.length]; // per sequence score
776 int hasScores = 0; // number of scores present on set
777 double[] scores = new double[seqs.length];
778 int[] seqScores = new int[seqs.length];
779 Object[][] feats = new Object[seqs.length][];
783 for (int i = 0; i < seqs.length; i++)
786 * get sequence residues overlapping column region
787 * and features for residue positions and specified types
789 String[] types = featureTypes == null ? null : featureTypes
790 .toArray(new String[featureTypes.size()]);
791 List<SequenceFeature> sfs = seqs[i].findFeatures(startCol + 1,
797 Iterator<SequenceFeature> it = sfs.listIterator();
800 SequenceFeature sf = it.next();
803 * accept all features with null or empty group, otherwise
804 * check group is one of the currently visible groups
806 String featureGroup = sf.getFeatureGroup();
807 if (groups != null && featureGroup != null
808 && !"".equals(featureGroup)
809 && !groups.contains(featureGroup))
815 float score = sf.getScore();
816 if (FEATURE_SCORE.equals(method) && !Float.isNaN(score))
818 if (seqScores[i] == 0)
825 // take the first instance of this score // ??
830 feats[i] = sfs.toArray(new SequenceFeature[sfs.size()]);
833 if (method == FEATURE_LABEL)
835 // order the labels by alphabet (not yet implemented)
836 String[] labs = new String[sfs.size()];
837 for (int l = 0; l < sfs.size(); l++)
839 SequenceFeature sf = sfs.get(l);
840 String description = sf.getDescription();
841 labs[l] = (description != null ? description : sf.getType());
843 QuickSort.sort(labs, feats[i]);
848 // compute average score
849 scores[i] /= seqScores[i];
850 // update the score bounds.
858 max = Math.max(max, scores[i]);
859 min = Math.min(min, scores[i]);
864 boolean doSort = false;
866 if (FEATURE_SCORE.equals(method))
870 return; // do nothing - no scores present to sort by.
873 if (hasScores < seqs.length)
875 for (int i = 0; i < seqs.length; i++)
879 scores[i] = (max + 1 + i);
883 // int nf = (feats[i] == null) ? 0
884 // : ((SequenceFeature[]) feats[i]).length;
885 // // System.err.println("Sorting on Score: seq " +
887 // + " Feats: " + nf + " Score : " + scores[i]);
893 else if (FEATURE_DENSITY.equals(method))
895 for (int i = 0; i < seqs.length; i++)
897 int featureCount = feats[i] == null ? 0
898 : ((SequenceFeature[]) feats[i]).length;
899 scores[i] = featureCount;
900 // System.err.println("Sorting on Density: seq "+seqs[i].getName()+
901 // " Feats: "+featureCount+" Score : "+scores[i]);
907 QuickSort.sortByDouble(scores, seqs, getInstance().sortByFeatureAscending);
909 setOrder(alignment, seqs);
913 * Builds a string hash of criteria for sorting, and if unchanged from last
914 * time, reverse the sort order
917 * @param featureTypes
922 protected static void flipFeatureSortIfUnchanged(String method,
923 List<String> featureTypes, List<String> groups,
924 final int startCol, final int endCol)
926 StringBuilder sb = new StringBuilder(64);
927 sb.append(startCol).append(method).append(endCol);
928 if (featureTypes != null)
930 Collections.sort(featureTypes);
931 sb.append(featureTypes.toString());
935 Collections.sort(groups);
936 sb.append(groups.toString());
938 String scoreCriteria = sb.toString();
941 * if resorting on the same criteria, toggle sort order
943 AlignmentSorter as = getInstance();
944 if (as.sortByFeatureCriteria == null
945 || !scoreCriteria.equals(as.sortByFeatureCriteria))
947 as.sortByFeatureAscending = true;
951 as.sortByFeatureAscending = !as.sortByFeatureAscending;
953 as.sortByFeatureCriteria = scoreCriteria;