2 * Jalview - A Sequence Alignment Editor and Viewer (Version 2.8.2)
3 * Copyright (C) 2014 The Jalview Authors
5 * This file is part of Jalview.
7 * Jalview is free software: you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation, either version 3
10 * of the License, or (at your option) any later version.
12 * Jalview is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty
14 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15 * PURPOSE. See the GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
19 * The Jalview Authors are detailed in the 'AUTHORS' file.
21 package jalview.analysis;
23 import jalview.datamodel.AlignmentAnnotation;
24 import jalview.datamodel.AlignmentI;
25 import jalview.datamodel.AlignmentOrder;
26 import jalview.datamodel.SequenceFeature;
27 import jalview.datamodel.SequenceGroup;
28 import jalview.datamodel.SequenceI;
29 import jalview.datamodel.SequenceNode;
30 import jalview.util.Comparison;
31 import jalview.util.MessageManager;
32 import jalview.util.QuickSort;
34 import java.util.ArrayList;
35 import java.util.List;
36 import java.util.Vector;
39 * Routines for manipulating the order of a multiple sequence alignment TODO:
40 * this class retains some global states concerning sort-order which should be
41 * made attributes for the caller's alignment visualization. TODO: refactor to
42 * allow a subset of selected sequences to be sorted within the context of a
43 * whole alignment. Sort method template is: SequenceI[] tobesorted, [ input
44 * data mapping to each tobesorted element to use ], Alignment context of
45 * tobesorted that are to be re-ordered, boolean sortinplace, [special data - ie
46 * seuqence to be sorted w.r.t.]) sortinplace implies that the sorted vector
47 * resulting from applying the operation to tobesorted should be mapped back to
48 * the original positions in alignment. Otherwise, normal behaviour is to re
49 * order alignment so that tobesorted is sorted and grouped together starting
50 * from the first tobesorted position in the alignment. e.g. (a,tb2,b,tb1,c,tb3
51 * becomes a,tb1,tb2,tb3,b,c)
53 public class AlignmentSorter
56 * todo: refactor searches to follow a basic pattern: (search property, last
57 * search state, current sort direction)
59 static boolean sortIdAscending = true;
61 static int lastGroupHash = 0;
63 static boolean sortGroupAscending = true;
65 static AlignmentOrder lastOrder = null;
67 static boolean sortOrderAscending = true;
69 static NJTree lastTree = null;
71 static boolean sortTreeAscending = true;
74 * last Annotation Label used by sortByScore
76 private static String lastSortByScore;
78 private static boolean sortByScoreAscending = true;
81 * compact representation of last arguments to SortByFeatureScore
83 private static String lastSortByFeatureScore;
85 private static boolean sortByFeatureScoreAscending = true;
87 private static boolean sortLengthAscending;
90 * Sort by Percentage Identity w.r.t. s
97 * sequences from align that are to be sorted.
99 public static void sortByPID(AlignmentI align, SequenceI s,
102 sortByPID(align, s, tosort, 0, -1);
106 * Sort by Percentage Identity w.r.t. s
113 * sequences from align that are to be sorted.
115 * start column (0 for beginning
118 public static void sortByPID(AlignmentI align, SequenceI s,
119 SequenceI[] tosort, int start, int end)
121 int nSeq = align.getHeight();
123 float[] scores = new float[nSeq];
124 SequenceI[] seqs = new SequenceI[nSeq];
126 for (int i = 0; i < nSeq; i++)
128 scores[i] = Comparison.PID(align.getSequenceAt(i)
129 .getSequenceAsString(), s.getSequenceAsString());
130 seqs[i] = align.getSequenceAt(i);
133 QuickSort.sort(scores, 0, scores.length - 1, seqs);
135 setReverseOrder(align, seqs);
139 * Reverse the order of the sort
146 private static void setReverseOrder(AlignmentI align, SequenceI[] seqs)
148 int nSeq = seqs.length;
158 len = (nSeq + 1) / 2;
161 // NOTE: DO NOT USE align.setSequenceAt() here - it will NOT work
163 synchronized (asq = align.getSequences())
165 for (int i = 0; i < len; i++)
167 // SequenceI tmp = seqs[i];
168 asq.set(i, seqs[nSeq - i - 1]);
169 asq.set(nSeq - i - 1, seqs[i]);
175 * Sets the Alignment object with the given sequences
178 * Alignment object to be updated
180 * sequences as a vector
182 private static void setOrder(AlignmentI align, Vector tmp)
184 setOrder(align, vectorSubsetToArray(tmp, align.getSequences()));
188 * Sets the Alignment object with the given sequences
193 * sequences as an array
195 public static void setOrder(AlignmentI align, SequenceI[] seqs)
197 // NOTE: DO NOT USE align.setSequenceAt() here - it will NOT work
198 List<SequenceI> algn;
199 synchronized (algn = align.getSequences())
201 List<SequenceI> tmp = new ArrayList<SequenceI>();
203 for (int i = 0; i < seqs.length; i++)
205 if (algn.contains(seqs[i]))
212 // User may have hidden seqs, then clicked undo or redo
213 for (int i = 0; i < tmp.size(); i++)
215 algn.add(tmp.get(i));
221 * Sorts by ID. Numbers are sorted before letters.
224 * The alignment object to sort
226 public static void sortByID(AlignmentI align)
228 int nSeq = align.getHeight();
230 String[] ids = new String[nSeq];
231 SequenceI[] seqs = new SequenceI[nSeq];
233 for (int i = 0; i < nSeq; i++)
235 ids[i] = align.getSequenceAt(i).getName();
236 seqs[i] = align.getSequenceAt(i);
239 QuickSort.sort(ids, seqs);
243 setReverseOrder(align, seqs);
247 setOrder(align, seqs);
250 sortIdAscending = !sortIdAscending;
254 * Sorts by sequence length
257 * The alignment object to sort
259 public static void sortByLength(AlignmentI align)
261 int nSeq = align.getHeight();
263 float[] length = new float[nSeq];
264 SequenceI[] seqs = new SequenceI[nSeq];
266 for (int i = 0; i < nSeq; i++)
268 seqs[i] = align.getSequenceAt(i);
269 length[i] = (seqs[i].getEnd() - seqs[i].getStart());
272 QuickSort.sort(length, seqs);
274 if (sortLengthAscending)
276 setReverseOrder(align, seqs);
280 setOrder(align, seqs);
283 sortLengthAscending = !sortLengthAscending;
287 * Sorts the alignment by size of group. <br>
288 * Maintains the order of sequences in each group by order in given alignment
292 * sorts the given alignment object by group
294 public static void sortByGroup(AlignmentI align)
296 // MAINTAINS ORIGNAL SEQUENCE ORDER,
297 // ORDERS BY GROUP SIZE
298 Vector groups = new Vector();
300 if (groups.hashCode() != lastGroupHash)
302 sortGroupAscending = true;
303 lastGroupHash = groups.hashCode();
307 sortGroupAscending = !sortGroupAscending;
310 // SORTS GROUPS BY SIZE
311 // ////////////////////
312 for (SequenceGroup sg : align.getGroups())
314 for (int j = 0; j < groups.size(); j++)
316 SequenceGroup sg2 = (SequenceGroup) groups.elementAt(j);
318 if (sg.getSize() > sg2.getSize())
320 groups.insertElementAt(sg, j);
326 if (!groups.contains(sg))
328 groups.addElement(sg);
332 // NOW ADD SEQUENCES MAINTAINING ALIGNMENT ORDER
333 // /////////////////////////////////////////////
334 Vector seqs = new Vector();
336 for (int i = 0; i < groups.size(); i++)
338 SequenceGroup sg = (SequenceGroup) groups.elementAt(i);
339 SequenceI[] orderedseqs = sg.getSequencesInOrder(align);
341 for (int j = 0; j < orderedseqs.length; j++)
343 seqs.addElement(orderedseqs[j]);
347 if (sortGroupAscending)
349 setOrder(align, seqs);
353 setReverseOrder(align,
354 vectorSubsetToArray(seqs, align.getSequences()));
359 * Converts Vector to array. java 1.18 does not have Vector.toArray()
362 * Vector of SequenceI objects
364 * @return array of Sequence[]
366 private static SequenceI[] vectorToArray(Vector tmp)
368 SequenceI[] seqs = new SequenceI[tmp.size()];
370 for (int i = 0; i < tmp.size(); i++)
372 seqs[i] = (SequenceI) tmp.elementAt(i);
379 * Select sequences in order from tmp that is present in mask, and any
380 * remaining seqeunces in mask not in tmp
383 * thread safe collection of sequences
385 * thread safe collection of sequences
387 * @return intersect(tmp,mask)+intersect(complement(tmp),mask)
389 private static SequenceI[] vectorSubsetToArray(List<SequenceI> tmp,
390 List<SequenceI> mask)
392 ArrayList<SequenceI> seqs = new ArrayList<SequenceI>();
394 boolean[] tmask = new boolean[mask.size()];
396 for (i = 0; i < mask.size(); i++)
401 for (i = 0; i < tmp.size(); i++)
403 SequenceI sq = tmp.get(i);
404 idx = mask.indexOf(sq);
405 if (idx > -1 && tmask[idx])
412 for (i = 0; i < tmask.length; i++)
416 seqs.add(mask.get(i));
420 return seqs.toArray(new SequenceI[seqs.size()]);
424 * Sorts by a given AlignmentOrder object
429 * specified order for alignment
431 public static void sortBy(AlignmentI align, AlignmentOrder order)
433 // Get an ordered vector of sequences which may also be present in align
434 Vector tmp = order.getOrder();
436 if (lastOrder == order)
438 sortOrderAscending = !sortOrderAscending;
442 sortOrderAscending = true;
445 if (sortOrderAscending)
447 setOrder(align, tmp);
451 setReverseOrder(align, vectorSubsetToArray(tmp, align.getSequences()));
463 * @return DOCUMENT ME!
465 private static Vector getOrderByTree(AlignmentI align, NJTree tree)
467 int nSeq = align.getHeight();
469 Vector tmp = new Vector();
471 tmp = _sortByTree(tree.getTopNode(), tmp, align.getSequences());
473 if (tmp.size() != nSeq)
475 // TODO: JBPNote - decide if this is always an error
476 // (eg. not when a tree is associated to another alignment which has more
478 if (tmp.size() != nSeq)
480 addStrays(align, tmp);
483 if (tmp.size() != nSeq)
486 .println("WARNING: tmp.size()="
490 + " in getOrderByTree - tree contains sequences not in alignment");
498 * Sorts the alignment by a given tree
505 public static void sortByTree(AlignmentI align, NJTree tree)
507 Vector tmp = getOrderByTree(align, tree);
509 // tmp should properly permute align with tree.
510 if (lastTree != tree)
512 sortTreeAscending = true;
517 sortTreeAscending = !sortTreeAscending;
520 if (sortTreeAscending)
522 setOrder(align, tmp);
526 setReverseOrder(align, vectorSubsetToArray(tmp, align.getSequences()));
538 private static void addStrays(AlignmentI align, Vector seqs)
540 int nSeq = align.getHeight();
542 for (int i = 0; i < nSeq; i++)
544 if (!seqs.contains(align.getSequenceAt(i)))
546 seqs.addElement(align.getSequenceAt(i));
550 if (nSeq != seqs.size())
553 .println("ERROR: Size still not right even after addStrays");
567 * @return DOCUMENT ME!
569 private static Vector _sortByTree(SequenceNode node, Vector tmp,
570 List<SequenceI> seqset)
577 SequenceNode left = (SequenceNode) node.left();
578 SequenceNode right = (SequenceNode) node.right();
580 if ((left == null) && (right == null))
582 if (!node.isPlaceholder() && (node.element() != null))
584 if (node.element() instanceof SequenceI)
586 if (!tmp.contains(node.element())) // && (seqset==null ||
587 // seqset.size()==0 ||
588 // seqset.contains(tmp)))
590 tmp.addElement(node.element());
599 _sortByTree(left, tmp, seqset);
600 _sortByTree(right, tmp, seqset);
607 // Alignment.sortBy(OrderObj) - sequence of sequence pointer refs in
612 * recover the order of sequences given by the safe numbering scheme introducd
613 * SeqsetUtils.uniquify.
615 public static void recoverOrder(SequenceI[] alignment)
617 float[] ids = new float[alignment.length];
619 for (int i = 0; i < alignment.length; i++)
621 ids[i] = (new Float(alignment[i].getName().substring(8)))
625 jalview.util.QuickSort.sort(ids, alignment);
629 * Sort sequence in order of increasing score attribute for annotation with a
630 * particular scoreLabel. Or reverse if same label was used previously
633 * exact label for sequence associated AlignmentAnnotation scores to
636 * sequences to be sorted
638 public static void sortByAnnotationScore(String scoreLabel,
639 AlignmentI alignment)
641 SequenceI[] seqs = alignment.getSequencesArray();
642 boolean[] hasScore = new boolean[seqs.length]; // per sequence score
644 int hasScores = 0; // number of scores present on set
645 double[] scores = new double[seqs.length];
646 double min = 0, max = 0;
647 for (int i = 0; i < seqs.length; i++)
649 AlignmentAnnotation[] scoreAnn = seqs[i].getAnnotation(scoreLabel);
650 if (scoreAnn != null)
654 scores[i] = scoreAnn[0].getScore(); // take the first instance of this
658 max = min = scores[i];
679 return; // do nothing - no scores present to sort by.
681 if (hasScores < seqs.length)
683 for (int i = 0; i < seqs.length; i++)
687 scores[i] = (max + i + 1.0);
692 jalview.util.QuickSort.sort(scores, seqs);
693 if (lastSortByScore != scoreLabel)
695 lastSortByScore = scoreLabel;
696 setOrder(alignment, seqs);
700 setReverseOrder(alignment, seqs);
705 * types of feature ordering: Sort by score : average score - or total score -
706 * over all features in region Sort by feature label text: (or if null -
707 * feature type text) - numerical or alphabetical Sort by feature density:
708 * based on counts - ignoring individual text or scores for each feature
710 public static String FEATURE_SCORE = "average_score";
712 public static String FEATURE_LABEL = "text";
714 public static String FEATURE_DENSITY = "density";
717 * sort the alignment using the features on each sequence found between start
718 * and stop with the given featureLabel (and optional group qualifier)
720 * @param featureLabel
725 * (-1 to include non-positional features)
727 * (-1 to only sort on non-positional features)
729 * - aligned sequences containing features
731 * - one of the string constants FEATURE_SCORE, FEATURE_LABEL,
734 public static void sortByFeature(String featureLabel, String groupLabel,
735 int start, int stop, AlignmentI alignment, String method)
737 sortByFeature(featureLabel == null ? null : new String[]
738 { featureLabel }, groupLabel == null ? null : new String[]
739 { groupLabel }, start, stop, alignment, method);
742 private static boolean containsIgnoreCase(final String lab,
753 for (int q = 0; q < labs.length; q++)
755 if (labs[q] != null && lab.equalsIgnoreCase(labs[q]))
763 public static void sortByFeature(String[] featureLabels,
764 String[] groupLabels, int start, int stop, AlignmentI alignment,
767 if (method != FEATURE_SCORE && method != FEATURE_LABEL
768 && method != FEATURE_DENSITY)
770 throw new Error(MessageManager.getString("error.implementation_error_sortbyfeature"));
772 boolean ignoreScore = method != FEATURE_SCORE;
773 StringBuffer scoreLabel = new StringBuffer();
774 scoreLabel.append(start + stop + method);
775 // This doesn't quite work yet - we'd like to have a canonical ordering that
776 // can be preserved from call to call
777 for (int i = 0; featureLabels != null && i < featureLabels.length; i++)
779 scoreLabel.append(featureLabels[i] == null ? "null"
782 for (int i = 0; groupLabels != null && i < groupLabels.length; i++)
784 scoreLabel.append(groupLabels[i] == null ? "null" : groupLabels[i]);
786 SequenceI[] seqs = alignment.getSequencesArray();
788 boolean[] hasScore = new boolean[seqs.length]; // per sequence score
790 int hasScores = 0; // number of scores present on set
791 double[] scores = new double[seqs.length];
792 int[] seqScores = new int[seqs.length];
793 Object[] feats = new Object[seqs.length];
794 double min = 0, max = 0;
795 for (int i = 0; i < seqs.length; i++)
797 SequenceFeature[] sf = seqs[i].getSequenceFeatures();
800 sf = new SequenceFeature[0];
804 SequenceFeature[] tmp = new SequenceFeature[sf.length];
805 for (int s = 0; s < tmp.length; s++)
811 int sstart = (start == -1) ? start : seqs[i].findPosition(start);
812 int sstop = (stop == -1) ? stop : seqs[i].findPosition(stop);
816 for (int f = 0; f < sf.length; f++)
818 // filter for selection criteria
820 // ignore features outwith alignment start-stop positions.
821 (sf[f].end < sstart || sf[f].begin > sstop) ||
822 // or ignore based on selection criteria
823 (featureLabels != null && !AlignmentSorter
824 .containsIgnoreCase(sf[f].type, featureLabels))
825 || (groupLabels != null
826 // problem here: we cannot eliminate null feature group features
827 && (sf[f].getFeatureGroup() != null && !AlignmentSorter
828 .containsIgnoreCase(sf[f].getFeatureGroup(),
831 // forget about this feature
837 // or, also take a look at the scores if necessary.
838 if (!ignoreScore && sf[f].getScore() != Float.NaN)
840 if (seqScores[i] == 0)
846 scores[i] += sf[f].getScore(); // take the first instance of this
851 SequenceFeature[] fs;
852 feats[i] = fs = new SequenceFeature[n];
856 for (int f = 0; f < sf.length; f++)
860 ((SequenceFeature[]) feats[i])[n++] = sf[f];
863 if (method == FEATURE_LABEL)
865 // order the labels by alphabet
866 String[] labs = new String[fs.length];
867 for (int l = 0; l < labs.length; l++)
869 labs[l] = (fs[l].getDescription() != null ? fs[l]
870 .getDescription() : fs[l].getType());
872 jalview.util.QuickSort.sort(labs, ((Object[]) feats[i]));
877 // compute average score
878 scores[i] /= seqScores[i];
879 // update the score bounds.
882 max = min = scores[i];
898 if (method == FEATURE_SCORE)
902 return; // do nothing - no scores present to sort by.
905 if (hasScores < seqs.length)
907 for (int i = 0; i < seqs.length; i++)
911 scores[i] = (max + 1 + i);
915 int nf = (feats[i] == null) ? 0
916 : ((SequenceFeature[]) feats[i]).length;
917 // System.err.println("Sorting on Score: seq "+seqs[i].getName()+
918 // " Feats: "+nf+" Score : "+scores[i]);
923 jalview.util.QuickSort.sort(scores, seqs);
925 else if (method == FEATURE_DENSITY)
928 // break ties between equivalent numbers for adjacent sequences by adding
929 // 1/Nseq*i on the original order
930 double fr = 0.9 / (1.0 * seqs.length);
931 for (int i = 0; i < seqs.length; i++)
934 scores[i] = (0.05 + fr * i)
935 + (nf = ((feats[i] == null) ? 0.0
936 : 1.0 * ((SequenceFeature[]) feats[i]).length));
937 // System.err.println("Sorting on Density: seq "+seqs[i].getName()+
938 // " Feats: "+nf+" Score : "+scores[i]);
940 jalview.util.QuickSort.sort(scores, seqs);
944 if (method == FEATURE_LABEL)
946 throw new Error(MessageManager.getString("error.not_yet_implemented"));
949 if (lastSortByFeatureScore == null
950 || !scoreLabel.toString().equals(lastSortByFeatureScore))
952 sortByFeatureScoreAscending = true;
956 sortByFeatureScoreAscending = !sortByFeatureScoreAscending;
958 if (sortByFeatureScoreAscending)
960 setOrder(alignment, seqs);
964 setReverseOrder(alignment, seqs);
966 lastSortByFeatureScore = scoreLabel.toString();