2 * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3 * Copyright (C) $$Year-Rel$$ The Jalview Authors
5 * This file is part of Jalview.
7 * Jalview is free software: you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation, either version 3
10 * of the License, or (at your option) any later version.
12 * Jalview is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty
14 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15 * PURPOSE. See the GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
19 * The Jalview Authors are detailed in the 'AUTHORS' file.
21 package jalview.analysis;
23 import jalview.analysis.scoremodels.PIDModel;
24 import jalview.analysis.scoremodels.SimilarityParams;
25 import jalview.datamodel.AlignmentAnnotation;
26 import jalview.datamodel.AlignmentI;
27 import jalview.datamodel.AlignmentOrder;
28 import jalview.datamodel.SequenceFeature;
29 import jalview.datamodel.SequenceGroup;
30 import jalview.datamodel.SequenceI;
31 import jalview.datamodel.SequenceNode;
32 import jalview.util.QuickSort;
34 import java.util.ArrayList;
35 import java.util.Collections;
36 import java.util.Iterator;
37 import java.util.List;
40 * Routines for manipulating the order of a multiple sequence alignment TODO:
41 * this class retains some global states concerning sort-order which should be
42 * made attributes for the caller's alignment visualization. TODO: refactor to
43 * allow a subset of selected sequences to be sorted within the context of a
44 * whole alignment. Sort method template is: SequenceI[] tobesorted, [ input
45 * data mapping to each tobesorted element to use ], Alignment context of
46 * tobesorted that are to be re-ordered, boolean sortinplace, [special data - ie
47 * seuqence to be sorted w.r.t.]) sortinplace implies that the sorted vector
48 * resulting from applying the operation to tobesorted should be mapped back to
49 * the original positions in alignment. Otherwise, normal behaviour is to re
50 * order alignment so that tobesorted is sorted and grouped together starting
51 * from the first tobesorted position in the alignment. e.g. (a,tb2,b,tb1,c,tb3
52 * becomes a,tb1,tb2,tb3,b,c)
54 public class AlignmentSorter
57 * todo: refactor searches to follow a basic pattern: (search property, last
58 * search state, current sort direction)
60 static boolean sortIdAscending = true;
62 static int lastGroupHash = 0;
64 static boolean sortGroupAscending = true;
66 static AlignmentOrder lastOrder = null;
68 static boolean sortOrderAscending = true;
70 static TreeModel lastTree = null;
72 static boolean sortTreeAscending = true;
75 * last Annotation Label used for sort by Annotation score
77 private static String lastSortByAnnotation;
80 * string hash of last arguments to sortByFeature
81 * (sort order toggles if this is unchanged between sorts)
83 private static String sortByFeatureCriteria;
85 private static boolean sortByFeatureAscending = true;
87 private static boolean sortLengthAscending;
90 * Sorts sequences in the alignment by Percentage Identity with the given
91 * reference sequence, sorting the highest identity to the top
99 public static void sortByPID(AlignmentI align, SequenceI s)
101 int nSeq = align.getHeight();
103 float[] scores = new float[nSeq];
104 SequenceI[] seqs = new SequenceI[nSeq];
105 String refSeq = s.getSequenceAsString();
107 SimilarityParams pidParams = new SimilarityParams(true, true, true,
109 for (int i = 0; i < nSeq; i++)
111 scores[i] = (float) PIDModel.computePID(align.getSequenceAt(i)
112 .getSequenceAsString(), refSeq, pidParams);
113 seqs[i] = align.getSequenceAt(i);
116 QuickSort.sort(scores, seqs);
118 setReverseOrder(align, seqs);
122 * Reverse the order of the sort
129 private static void setReverseOrder(AlignmentI align, SequenceI[] seqs)
131 int nSeq = seqs.length;
141 len = (nSeq + 1) / 2;
144 // NOTE: DO NOT USE align.setSequenceAt() here - it will NOT work
146 synchronized (asq = align.getSequences())
148 for (int i = 0; i < len; i++)
150 // SequenceI tmp = seqs[i];
151 asq.set(i, seqs[nSeq - i - 1]);
152 asq.set(nSeq - i - 1, seqs[i]);
158 * Sets the Alignment object with the given sequences
161 * Alignment object to be updated
163 * sequences as a vector
165 private static void setOrder(AlignmentI align, List<SequenceI> tmp)
167 setOrder(align, vectorSubsetToArray(tmp, align.getSequences()));
171 * Sets the Alignment object with the given sequences
176 * sequences as an array
178 public static void setOrder(AlignmentI align, SequenceI[] seqs)
180 // NOTE: DO NOT USE align.setSequenceAt() here - it will NOT work
181 List<SequenceI> algn;
182 synchronized (algn = align.getSequences())
184 List<SequenceI> tmp = new ArrayList<SequenceI>();
186 for (int i = 0; i < seqs.length; i++)
188 if (algn.contains(seqs[i]))
195 // User may have hidden seqs, then clicked undo or redo
196 for (int i = 0; i < tmp.size(); i++)
198 algn.add(tmp.get(i));
204 * Sorts by ID. Numbers are sorted before letters.
207 * The alignment object to sort
209 public static void sortByID(AlignmentI align)
211 int nSeq = align.getHeight();
213 String[] ids = new String[nSeq];
214 SequenceI[] seqs = new SequenceI[nSeq];
216 for (int i = 0; i < nSeq; i++)
218 ids[i] = align.getSequenceAt(i).getName();
219 seqs[i] = align.getSequenceAt(i);
222 QuickSort.sort(ids, seqs);
226 setReverseOrder(align, seqs);
230 setOrder(align, seqs);
233 sortIdAscending = !sortIdAscending;
237 * Sorts by sequence length
240 * The alignment object to sort
242 public static void sortByLength(AlignmentI align)
244 int nSeq = align.getHeight();
246 float[] length = new float[nSeq];
247 SequenceI[] seqs = new SequenceI[nSeq];
249 for (int i = 0; i < nSeq; i++)
251 seqs[i] = align.getSequenceAt(i);
252 length[i] = (seqs[i].getEnd() - seqs[i].getStart());
255 QuickSort.sort(length, seqs);
257 if (sortLengthAscending)
259 setReverseOrder(align, seqs);
263 setOrder(align, seqs);
266 sortLengthAscending = !sortLengthAscending;
270 * Sorts the alignment by size of group. <br>
271 * Maintains the order of sequences in each group by order in given alignment
275 * sorts the given alignment object by group
277 public static void sortByGroup(AlignmentI align)
279 // MAINTAINS ORIGNAL SEQUENCE ORDER,
280 // ORDERS BY GROUP SIZE
281 List<SequenceGroup> groups = new ArrayList<SequenceGroup>();
283 if (groups.hashCode() != lastGroupHash)
285 sortGroupAscending = true;
286 lastGroupHash = groups.hashCode();
290 sortGroupAscending = !sortGroupAscending;
293 // SORTS GROUPS BY SIZE
294 // ////////////////////
295 for (SequenceGroup sg : align.getGroups())
297 for (int j = 0; j < groups.size(); j++)
299 SequenceGroup sg2 = groups.get(j);
301 if (sg.getSize() > sg2.getSize())
309 if (!groups.contains(sg))
315 // NOW ADD SEQUENCES MAINTAINING ALIGNMENT ORDER
316 // /////////////////////////////////////////////
317 List<SequenceI> seqs = new ArrayList<SequenceI>();
319 for (int i = 0; i < groups.size(); i++)
321 SequenceGroup sg = groups.get(i);
322 SequenceI[] orderedseqs = sg.getSequencesInOrder(align);
324 for (int j = 0; j < orderedseqs.length; j++)
326 seqs.add(orderedseqs[j]);
330 if (sortGroupAscending)
332 setOrder(align, seqs);
336 setReverseOrder(align,
337 vectorSubsetToArray(seqs, align.getSequences()));
342 * Select sequences in order from tmp that is present in mask, and any
343 * remaining sequences in mask not in tmp
346 * thread safe collection of sequences
348 * thread safe collection of sequences
350 * @return intersect(tmp,mask)+intersect(complement(tmp),mask)
352 private static SequenceI[] vectorSubsetToArray(List<SequenceI> tmp,
353 List<SequenceI> mask)
356 // tmp2 = tmp.retainAll(mask);
357 // return tmp2.addAll(mask.removeAll(tmp2))
359 ArrayList<SequenceI> seqs = new ArrayList<SequenceI>();
361 boolean[] tmask = new boolean[mask.size()];
363 for (i = 0; i < mask.size(); i++)
368 for (i = 0; i < tmp.size(); i++)
370 SequenceI sq = tmp.get(i);
371 idx = mask.indexOf(sq);
372 if (idx > -1 && tmask[idx])
379 for (i = 0; i < tmask.length; i++)
383 seqs.add(mask.get(i));
387 return seqs.toArray(new SequenceI[seqs.size()]);
391 * Sorts by a given AlignmentOrder object
396 * specified order for alignment
398 public static void sortBy(AlignmentI align, AlignmentOrder order)
400 // Get an ordered vector of sequences which may also be present in align
401 List<SequenceI> tmp = order.getOrder();
403 if (lastOrder == order)
405 sortOrderAscending = !sortOrderAscending;
409 sortOrderAscending = true;
412 if (sortOrderAscending)
414 setOrder(align, tmp);
418 setReverseOrder(align, vectorSubsetToArray(tmp, align.getSequences()));
430 * @return DOCUMENT ME!
432 private static List<SequenceI> getOrderByTree(AlignmentI align,
435 int nSeq = align.getHeight();
437 List<SequenceI> tmp = new ArrayList<SequenceI>();
439 tmp = _sortByTree(tree.getTopNode(), tmp, align.getSequences());
441 if (tmp.size() != nSeq)
443 // TODO: JBPNote - decide if this is always an error
444 // (eg. not when a tree is associated to another alignment which has more
446 if (tmp.size() != nSeq)
448 addStrays(align, tmp);
451 if (tmp.size() != nSeq)
454 .println("WARNING: tmp.size()="
458 + " in getOrderByTree - tree contains sequences not in alignment");
466 * Sorts the alignment by a given tree
473 public static void sortByTree(AlignmentI align, TreeModel tree)
475 List<SequenceI> tmp = getOrderByTree(align, tree);
477 // tmp should properly permute align with tree.
478 if (lastTree != tree)
480 sortTreeAscending = true;
485 sortTreeAscending = !sortTreeAscending;
488 if (sortTreeAscending)
490 setOrder(align, tmp);
494 setReverseOrder(align, vectorSubsetToArray(tmp, align.getSequences()));
506 private static void addStrays(AlignmentI align, List<SequenceI> tmp)
508 int nSeq = align.getHeight();
510 for (int i = 0; i < nSeq; i++)
512 if (!tmp.contains(align.getSequenceAt(i)))
514 tmp.add(align.getSequenceAt(i));
518 if (nSeq != tmp.size())
521 .println("ERROR: Size still not right even after addStrays");
535 * @return DOCUMENT ME!
537 private static List<SequenceI> _sortByTree(SequenceNode node,
538 List<SequenceI> tmp, List<SequenceI> seqset)
545 SequenceNode left = (SequenceNode) node.left();
546 SequenceNode right = (SequenceNode) node.right();
548 if ((left == null) && (right == null))
550 if (!node.isPlaceholder() && (node.element() != null))
552 if (node.element() instanceof SequenceI)
554 if (!tmp.contains(node.element())) // && (seqset==null ||
555 // seqset.size()==0 ||
556 // seqset.contains(tmp)))
558 tmp.add((SequenceI) node.element());
567 _sortByTree(left, tmp, seqset);
568 _sortByTree(right, tmp, seqset);
575 // Alignment.sortBy(OrderObj) - sequence of sequence pointer refs in
580 * recover the order of sequences given by the safe numbering scheme introducd
581 * SeqsetUtils.uniquify.
583 public static void recoverOrder(SequenceI[] alignment)
585 float[] ids = new float[alignment.length];
587 for (int i = 0; i < alignment.length; i++)
589 ids[i] = (new Float(alignment[i].getName().substring(8)))
593 jalview.util.QuickSort.sort(ids, alignment);
597 * Sort sequence in order of increasing score attribute for annotation with a
598 * particular scoreLabel. Or reverse if same label was used previously
601 * exact label for sequence associated AlignmentAnnotation scores to
604 * sequences to be sorted
606 public static void sortByAnnotationScore(String scoreLabel,
607 AlignmentI alignment)
609 SequenceI[] seqs = alignment.getSequencesArray();
610 boolean[] hasScore = new boolean[seqs.length]; // per sequence score
612 int hasScores = 0; // number of scores present on set
613 double[] scores = new double[seqs.length];
614 double min = 0, max = 0;
615 for (int i = 0; i < seqs.length; i++)
617 AlignmentAnnotation[] scoreAnn = seqs[i].getAnnotation(scoreLabel);
618 if (scoreAnn != null)
622 scores[i] = scoreAnn[0].getScore(); // take the first instance of this
626 max = min = scores[i];
647 return; // do nothing - no scores present to sort by.
649 if (hasScores < seqs.length)
651 for (int i = 0; i < seqs.length; i++)
655 scores[i] = (max + i + 1.0);
660 jalview.util.QuickSort.sort(scores, seqs);
661 if (lastSortByAnnotation != scoreLabel)
663 lastSortByAnnotation = scoreLabel;
664 setOrder(alignment, seqs);
668 setReverseOrder(alignment, seqs);
673 * types of feature ordering: Sort by score : average score - or total score -
674 * over all features in region Sort by feature label text: (or if null -
675 * feature type text) - numerical or alphabetical Sort by feature density:
676 * based on counts - ignoring individual text or scores for each feature
678 public static String FEATURE_SCORE = "average_score";
680 public static String FEATURE_LABEL = "text";
682 public static String FEATURE_DENSITY = "density";
684 private static boolean containsIgnoreCase(final String lab,
685 final List<String> labs)
695 for (String label : labs)
697 if (lab.equalsIgnoreCase(label))
706 * Sort sequences by feature score or density, optionally restricted by
707 * feature types, feature groups, or alignment start/end positions.
709 * If the sort is repeated for the same combination of types and groups, sort
712 * @param featureTypes
713 * a list of feature types to include (or null for all)
715 * a list of feature groups to include (or null for all)
717 * start column position to include (base zero)
719 * end column position to include (base zero)
721 * the alignment to be sorted
723 * either "average_score" or "density" ("text" not yet implemented)
725 public static void sortByFeature(List<String> featureTypes,
726 List<String> groups, final int startCol, final int endCol,
727 AlignmentI alignment, String method)
729 if (method != FEATURE_SCORE && method != FEATURE_LABEL
730 && method != FEATURE_DENSITY)
733 .format("Implementation Error - sortByFeature method must be either '%s' or '%s'",
734 FEATURE_SCORE, FEATURE_DENSITY);
735 System.err.println(msg);
739 flipFeatureSortIfUnchanged(method, featureTypes, groups, startCol, endCol);
741 SequenceI[] seqs = alignment.getSequencesArray();
743 boolean[] hasScore = new boolean[seqs.length]; // per sequence score
745 int hasScores = 0; // number of scores present on set
746 double[] scores = new double[seqs.length];
747 int[] seqScores = new int[seqs.length];
748 Object[][] feats = new Object[seqs.length][];
752 for (int i = 0; i < seqs.length; i++)
755 * get sequence residues overlapping column region
756 * and features for residue positions and specified types
758 String[] types = featureTypes == null ? null : featureTypes
759 .toArray(new String[featureTypes.size()]);
760 List<SequenceFeature> sfs = seqs[i].findFeatures(startCol + 1,
766 Iterator<SequenceFeature> it = sfs.listIterator();
769 SequenceFeature sf = it.next();
772 * accept all features with null or empty group, otherwise
773 * check group is one of the currently visible groups
775 String featureGroup = sf.getFeatureGroup();
776 if (groups != null && featureGroup != null
777 && !"".equals(featureGroup)
778 && !groups.contains(featureGroup))
784 float score = sf.getScore();
785 if (FEATURE_SCORE.equals(method) && !Float.isNaN(score))
787 if (seqScores[i] == 0)
794 // take the first instance of this score // ??
799 feats[i] = sfs.toArray(new SequenceFeature[sfs.size()]);
802 if (method == FEATURE_LABEL)
804 // order the labels by alphabet (not yet implemented)
805 String[] labs = new String[sfs.size()];
806 for (int l = 0; l < sfs.size(); l++)
808 SequenceFeature sf = sfs.get(l);
809 String description = sf.getDescription();
810 labs[l] = (description != null ? description : sf.getType());
812 QuickSort.sort(labs, feats[i]);
817 // compute average score
818 scores[i] /= seqScores[i];
819 // update the score bounds.
827 max = Math.max(max, scores[i]);
828 min = Math.min(min, scores[i]);
833 if (FEATURE_SCORE.equals(method))
837 return; // do nothing - no scores present to sort by.
840 if (hasScores < seqs.length)
842 for (int i = 0; i < seqs.length; i++)
846 scores[i] = (max + 1 + i);
850 // int nf = (feats[i] == null) ? 0
851 // : ((SequenceFeature[]) feats[i]).length;
852 // // System.err.println("Sorting on Score: seq " +
854 // + " Feats: " + nf + " Score : " + scores[i]);
858 QuickSort.sortByDouble(scores, seqs, sortByFeatureAscending);
860 else if (FEATURE_DENSITY.equals(method))
862 for (int i = 0; i < seqs.length; i++)
864 int featureCount = feats[i] == null ? 0
865 : ((SequenceFeature[]) feats[i]).length;
866 scores[i] = featureCount;
867 // System.err.println("Sorting on Density: seq "+seqs[i].getName()+
868 // " Feats: "+featureCount+" Score : "+scores[i]);
870 QuickSort.sortByDouble(scores, seqs, sortByFeatureAscending);
873 setOrder(alignment, seqs);
877 * Builds a string hash of criteria for sorting, and if unchanged from last
878 * time, reverse the sort order
881 * @param featureTypes
886 protected static void flipFeatureSortIfUnchanged(String method,
887 List<String> featureTypes, List<String> groups,
888 final int startCol, final int endCol)
890 StringBuilder sb = new StringBuilder(64);
891 sb.append(startCol).append(method).append(endCol);
892 if (featureTypes != null)
894 Collections.sort(featureTypes);
895 sb.append(featureTypes.toString());
899 Collections.sort(groups);
900 sb.append(groups.toString());
902 String scoreCriteria = sb.toString();
905 * if resorting on the same criteria, toggle sort order
907 if (sortByFeatureCriteria == null
908 || !scoreCriteria.equals(sortByFeatureCriteria))
910 sortByFeatureAscending = true;
914 sortByFeatureAscending = !sortByFeatureAscending;
916 sortByFeatureCriteria = scoreCriteria;