2 * Jalview - A Sequence Alignment Editor and Viewer (Version 2.8.0b1)
3 * Copyright (C) 2014 The Jalview Authors
5 * This file is part of Jalview.
7 * Jalview is free software: you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version.
11 * Jalview is distributed in the hope that it will be useful, but
12 * WITHOUT ANY WARRANTY; without even the implied warranty
13 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
14 * PURPOSE. See the GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License along with Jalview. If not, see <http://www.gnu.org/licenses/>.
17 * The Jalview Authors are detailed in the 'AUTHORS' file.
19 package jalview.analysis;
23 import jalview.datamodel.*;
24 import jalview.util.*;
27 * Routines for manipulating the order of a multiple sequence alignment TODO:
28 * this class retains some global states concerning sort-order which should be
29 * made attributes for the caller's alignment visualization. TODO: refactor to
30 * allow a subset of selected sequences to be sorted within the context of a
31 * whole alignment. Sort method template is: SequenceI[] tobesorted, [ input
32 * data mapping to each tobesorted element to use ], Alignment context of
33 * tobesorted that are to be re-ordered, boolean sortinplace, [special data - ie
34 * seuqence to be sorted w.r.t.]) sortinplace implies that the sorted vector
35 * resulting from applying the operation to tobesorted should be mapped back to
36 * the original positions in alignment. Otherwise, normal behaviour is to re
37 * order alignment so that tobesorted is sorted and grouped together starting
38 * from the first tobesorted position in the alignment. e.g. (a,tb2,b,tb1,c,tb3
39 * becomes a,tb1,tb2,tb3,b,c)
41 public class AlignmentSorter
44 * todo: refactor searches to follow a basic pattern: (search property, last
45 * search state, current sort direction)
47 static boolean sortIdAscending = true;
49 static int lastGroupHash = 0;
51 static boolean sortGroupAscending = true;
53 static AlignmentOrder lastOrder = null;
55 static boolean sortOrderAscending = true;
57 static NJTree lastTree = null;
59 static boolean sortTreeAscending = true;
62 * last Annotation Label used by sortByScore
64 private static String lastSortByScore;
66 private static boolean sortByScoreAscending = true;
69 * compact representation of last arguments to SortByFeatureScore
71 private static String lastSortByFeatureScore;
73 private static boolean sortByFeatureScoreAscending = true;
75 private static boolean sortLengthAscending;
78 * Sort by Percentage Identity w.r.t. s
85 * sequences from align that are to be sorted.
87 public static void sortByPID(AlignmentI align, SequenceI s,
90 sortByPID(align, s, tosort, 0, -1);
94 * Sort by Percentage Identity w.r.t. s
101 * sequences from align that are to be sorted.
103 * start column (0 for beginning
106 public static void sortByPID(AlignmentI align, SequenceI s,
107 SequenceI[] tosort, int start, int end)
109 int nSeq = align.getHeight();
111 float[] scores = new float[nSeq];
112 SequenceI[] seqs = new SequenceI[nSeq];
114 for (int i = 0; i < nSeq; i++)
116 scores[i] = Comparison.PID(align.getSequenceAt(i)
117 .getSequenceAsString(), s.getSequenceAsString());
118 seqs[i] = align.getSequenceAt(i);
121 QuickSort.sort(scores, 0, scores.length - 1, seqs);
123 setReverseOrder(align, seqs);
127 * Reverse the order of the sort
134 private static void setReverseOrder(AlignmentI align, SequenceI[] seqs)
136 int nSeq = seqs.length;
146 len = (nSeq + 1) / 2;
149 // NOTE: DO NOT USE align.setSequenceAt() here - it will NOT work
151 synchronized (asq = align.getSequences())
153 for (int i = 0; i < len; i++)
155 // SequenceI tmp = seqs[i];
156 asq.set(i, seqs[nSeq - i - 1]);
157 asq.set(nSeq - i - 1, seqs[i]);
163 * Sets the Alignment object with the given sequences
166 * Alignment object to be updated
168 * sequences as a vector
170 private static void setOrder(AlignmentI align, Vector tmp)
172 setOrder(align, vectorSubsetToArray(tmp, align.getSequences()));
176 * Sets the Alignment object with the given sequences
181 * sequences as an array
183 public static void setOrder(AlignmentI align, SequenceI[] seqs)
185 // NOTE: DO NOT USE align.setSequenceAt() here - it will NOT work
186 List<SequenceI> algn;
187 synchronized (algn = align.getSequences())
189 List<SequenceI> tmp = new ArrayList<SequenceI>();
191 for (int i = 0; i < seqs.length; i++)
193 if (algn.contains(seqs[i]))
200 // User may have hidden seqs, then clicked undo or redo
201 for (int i = 0; i < tmp.size(); i++)
203 algn.add(tmp.get(i));
209 * Sorts by ID. Numbers are sorted before letters.
212 * The alignment object to sort
214 public static void sortByID(AlignmentI align)
216 int nSeq = align.getHeight();
218 String[] ids = new String[nSeq];
219 SequenceI[] seqs = new SequenceI[nSeq];
221 for (int i = 0; i < nSeq; i++)
223 ids[i] = align.getSequenceAt(i).getName();
224 seqs[i] = align.getSequenceAt(i);
227 QuickSort.sort(ids, seqs);
231 setReverseOrder(align, seqs);
235 setOrder(align, seqs);
238 sortIdAscending = !sortIdAscending;
242 * Sorts by sequence length
245 * The alignment object to sort
247 public static void sortByLength(AlignmentI align)
249 int nSeq = align.getHeight();
251 float[] length = new float[nSeq];
252 SequenceI[] seqs = new SequenceI[nSeq];
254 for (int i = 0; i < nSeq; i++)
256 seqs[i] = align.getSequenceAt(i);
257 length[i] = (seqs[i].getEnd() - seqs[i].getStart());
260 QuickSort.sort(length, seqs);
262 if (sortLengthAscending)
264 setReverseOrder(align, seqs);
268 setOrder(align, seqs);
271 sortLengthAscending = !sortLengthAscending;
275 * Sorts the alignment by size of group. <br>
276 * Maintains the order of sequences in each group by order in given alignment
280 * sorts the given alignment object by group
282 public static void sortByGroup(AlignmentI align)
284 // MAINTAINS ORIGNAL SEQUENCE ORDER,
285 // ORDERS BY GROUP SIZE
286 Vector groups = new Vector();
288 if (groups.hashCode() != lastGroupHash)
290 sortGroupAscending = true;
291 lastGroupHash = groups.hashCode();
295 sortGroupAscending = !sortGroupAscending;
298 // SORTS GROUPS BY SIZE
299 // ////////////////////
300 for (SequenceGroup sg : align.getGroups())
302 for (int j = 0; j < groups.size(); j++)
304 SequenceGroup sg2 = (SequenceGroup) groups.elementAt(j);
306 if (sg.getSize() > sg2.getSize())
308 groups.insertElementAt(sg, j);
314 if (!groups.contains(sg))
316 groups.addElement(sg);
320 // NOW ADD SEQUENCES MAINTAINING ALIGNMENT ORDER
321 // /////////////////////////////////////////////
322 Vector seqs = new Vector();
324 for (int i = 0; i < groups.size(); i++)
326 SequenceGroup sg = (SequenceGroup) groups.elementAt(i);
327 SequenceI[] orderedseqs = sg.getSequencesInOrder(align);
329 for (int j = 0; j < orderedseqs.length; j++)
331 seqs.addElement(orderedseqs[j]);
335 if (sortGroupAscending)
337 setOrder(align, seqs);
341 setReverseOrder(align,
342 vectorSubsetToArray(seqs, align.getSequences()));
347 * Converts Vector to array. java 1.18 does not have Vector.toArray()
350 * Vector of SequenceI objects
352 * @return array of Sequence[]
354 private static SequenceI[] vectorToArray(Vector tmp)
356 SequenceI[] seqs = new SequenceI[tmp.size()];
358 for (int i = 0; i < tmp.size(); i++)
360 seqs[i] = (SequenceI) tmp.elementAt(i);
367 * Select sequences in order from tmp that is present in mask, and any
368 * remaining seqeunces in mask not in tmp
371 * thread safe collection of sequences
373 * thread safe collection of sequences
375 * @return intersect(tmp,mask)+intersect(complement(tmp),mask)
377 private static SequenceI[] vectorSubsetToArray(List<SequenceI> tmp,
378 List<SequenceI> mask)
380 ArrayList<SequenceI> seqs = new ArrayList<SequenceI>();
382 boolean[] tmask = new boolean[mask.size()];
384 for (i = 0; i < mask.size(); i++)
389 for (i = 0; i < tmp.size(); i++)
391 SequenceI sq = tmp.get(i);
392 idx = mask.indexOf(sq);
393 if (idx > -1 && tmask[idx])
400 for (i = 0; i < tmask.length; i++)
404 seqs.add(mask.get(i));
408 return seqs.toArray(new SequenceI[seqs.size()]);
412 * Sorts by a given AlignmentOrder object
417 * specified order for alignment
419 public static void sortBy(AlignmentI align, AlignmentOrder order)
421 // Get an ordered vector of sequences which may also be present in align
422 Vector tmp = order.getOrder();
424 if (lastOrder == order)
426 sortOrderAscending = !sortOrderAscending;
430 sortOrderAscending = true;
433 if (sortOrderAscending)
435 setOrder(align, tmp);
439 setReverseOrder(align, vectorSubsetToArray(tmp, align.getSequences()));
451 * @return DOCUMENT ME!
453 private static Vector getOrderByTree(AlignmentI align, NJTree tree)
455 int nSeq = align.getHeight();
457 Vector tmp = new Vector();
459 tmp = _sortByTree(tree.getTopNode(), tmp, align.getSequences());
461 if (tmp.size() != nSeq)
463 // TODO: JBPNote - decide if this is always an error
464 // (eg. not when a tree is associated to another alignment which has more
466 if (tmp.size() != nSeq)
468 addStrays(align, tmp);
471 if (tmp.size() != nSeq)
474 .println("WARNING: tmp.size()="
478 + " in getOrderByTree - tree contains sequences not in alignment");
486 * Sorts the alignment by a given tree
493 public static void sortByTree(AlignmentI align, NJTree tree)
495 Vector tmp = getOrderByTree(align, tree);
497 // tmp should properly permute align with tree.
498 if (lastTree != tree)
500 sortTreeAscending = true;
505 sortTreeAscending = !sortTreeAscending;
508 if (sortTreeAscending)
510 setOrder(align, tmp);
514 setReverseOrder(align, vectorSubsetToArray(tmp, align.getSequences()));
526 private static void addStrays(AlignmentI align, Vector seqs)
528 int nSeq = align.getHeight();
530 for (int i = 0; i < nSeq; i++)
532 if (!seqs.contains(align.getSequenceAt(i)))
534 seqs.addElement(align.getSequenceAt(i));
538 if (nSeq != seqs.size())
541 .println("ERROR: Size still not right even after addStrays");
555 * @return DOCUMENT ME!
557 private static Vector _sortByTree(SequenceNode node, Vector tmp,
558 List<SequenceI> seqset)
565 SequenceNode left = (SequenceNode) node.left();
566 SequenceNode right = (SequenceNode) node.right();
568 if ((left == null) && (right == null))
570 if (!node.isPlaceholder() && (node.element() != null))
572 if (node.element() instanceof SequenceI)
574 if (!tmp.contains(node.element())) // && (seqset==null ||
575 // seqset.size()==0 ||
576 // seqset.contains(tmp)))
578 tmp.addElement(node.element());
587 _sortByTree(left, tmp, seqset);
588 _sortByTree(right, tmp, seqset);
595 // Alignment.sortBy(OrderObj) - sequence of sequence pointer refs in
600 * recover the order of sequences given by the safe numbering scheme introducd
601 * SeqsetUtils.uniquify.
603 public static void recoverOrder(SequenceI[] alignment)
605 float[] ids = new float[alignment.length];
607 for (int i = 0; i < alignment.length; i++)
609 ids[i] = (new Float(alignment[i].getName().substring(8)))
613 jalview.util.QuickSort.sort(ids, alignment);
617 * Sort sequence in order of increasing score attribute for annotation with a
618 * particular scoreLabel. Or reverse if same label was used previously
621 * exact label for sequence associated AlignmentAnnotation scores to
624 * sequences to be sorted
626 public static void sortByAnnotationScore(String scoreLabel,
627 AlignmentI alignment)
629 SequenceI[] seqs = alignment.getSequencesArray();
630 boolean[] hasScore = new boolean[seqs.length]; // per sequence score
632 int hasScores = 0; // number of scores present on set
633 double[] scores = new double[seqs.length];
634 double min = 0, max = 0;
635 for (int i = 0; i < seqs.length; i++)
637 AlignmentAnnotation[] scoreAnn = seqs[i].getAnnotation(scoreLabel);
638 if (scoreAnn != null)
642 scores[i] = scoreAnn[0].getScore(); // take the first instance of this
646 max = min = scores[i];
667 return; // do nothing - no scores present to sort by.
669 if (hasScores < seqs.length)
671 for (int i = 0; i < seqs.length; i++)
675 scores[i] = (max + i + 1.0);
680 jalview.util.QuickSort.sort(scores, seqs);
681 if (lastSortByScore != scoreLabel)
683 lastSortByScore = scoreLabel;
684 setOrder(alignment, seqs);
688 setReverseOrder(alignment, seqs);
693 * types of feature ordering: Sort by score : average score - or total score -
694 * over all features in region Sort by feature label text: (or if null -
695 * feature type text) - numerical or alphabetical Sort by feature density:
696 * based on counts - ignoring individual text or scores for each feature
698 public static String FEATURE_SCORE = "average_score";
700 public static String FEATURE_LABEL = "text";
702 public static String FEATURE_DENSITY = "density";
705 * sort the alignment using the features on each sequence found between start
706 * and stop with the given featureLabel (and optional group qualifier)
708 * @param featureLabel
713 * (-1 to include non-positional features)
715 * (-1 to only sort on non-positional features)
717 * - aligned sequences containing features
719 * - one of the string constants FEATURE_SCORE, FEATURE_LABEL,
722 public static void sortByFeature(String featureLabel, String groupLabel,
723 int start, int stop, AlignmentI alignment, String method)
725 sortByFeature(featureLabel == null ? null : new String[]
726 { featureLabel }, groupLabel == null ? null : new String[]
727 { groupLabel }, start, stop, alignment, method);
730 private static boolean containsIgnoreCase(final String lab,
741 for (int q = 0; q < labs.length; q++)
743 if (labs[q] != null && lab.equalsIgnoreCase(labs[q]))
751 public static void sortByFeature(String[] featureLabels,
752 String[] groupLabels, int start, int stop, AlignmentI alignment,
755 if (method != FEATURE_SCORE && method != FEATURE_LABEL
756 && method != FEATURE_DENSITY)
759 "Implementation Error - sortByFeature method must be one of FEATURE_SCORE, FEATURE_LABEL or FEATURE_DENSITY.");
761 boolean ignoreScore = method != FEATURE_SCORE;
762 StringBuffer scoreLabel = new StringBuffer();
763 scoreLabel.append(start + stop + method);
764 // This doesn't quite work yet - we'd like to have a canonical ordering that
765 // can be preserved from call to call
766 for (int i = 0; featureLabels != null && i < featureLabels.length; i++)
768 scoreLabel.append(featureLabels[i] == null ? "null"
771 for (int i = 0; groupLabels != null && i < groupLabels.length; i++)
773 scoreLabel.append(groupLabels[i] == null ? "null" : groupLabels[i]);
775 SequenceI[] seqs = alignment.getSequencesArray();
777 boolean[] hasScore = new boolean[seqs.length]; // per sequence score
779 int hasScores = 0; // number of scores present on set
780 double[] scores = new double[seqs.length];
781 int[] seqScores = new int[seqs.length];
782 Object[] feats = new Object[seqs.length];
783 double min = 0, max = 0;
784 for (int i = 0; i < seqs.length; i++)
786 SequenceFeature[] sf = seqs[i].getSequenceFeatures();
787 if (sf == null && seqs[i].getDatasetSequence() != null)
789 sf = seqs[i].getDatasetSequence().getSequenceFeatures();
793 sf = new SequenceFeature[0];
797 SequenceFeature[] tmp = new SequenceFeature[sf.length];
798 for (int s = 0; s < tmp.length; s++)
804 int sstart = (start == -1) ? start : seqs[i].findPosition(start);
805 int sstop = (stop == -1) ? stop : seqs[i].findPosition(stop);
809 for (int f = 0; f < sf.length; f++)
811 // filter for selection criteria
813 // ignore features outwith alignment start-stop positions.
814 (sf[f].end < sstart || sf[f].begin > sstop) ||
815 // or ignore based on selection criteria
816 (featureLabels != null && !AlignmentSorter
817 .containsIgnoreCase(sf[f].type, featureLabels))
818 || (groupLabels != null
819 // problem here: we cannot eliminate null feature group features
820 && (sf[f].getFeatureGroup() != null && !AlignmentSorter
821 .containsIgnoreCase(sf[f].getFeatureGroup(),
824 // forget about this feature
830 // or, also take a look at the scores if necessary.
831 if (!ignoreScore && sf[f].getScore() != Float.NaN)
833 if (seqScores[i] == 0)
839 scores[i] += sf[f].getScore(); // take the first instance of this
844 SequenceFeature[] fs;
845 feats[i] = fs = new SequenceFeature[n];
849 for (int f = 0; f < sf.length; f++)
853 ((SequenceFeature[]) feats[i])[n++] = sf[f];
856 if (method == FEATURE_LABEL)
858 // order the labels by alphabet
859 String[] labs = new String[fs.length];
860 for (int l = 0; l < labs.length; l++)
862 labs[l] = (fs[l].getDescription() != null ? fs[l]
863 .getDescription() : fs[l].getType());
865 jalview.util.QuickSort.sort(labs, ((Object[]) feats[i]));
870 // compute average score
871 scores[i] /= seqScores[i];
872 // update the score bounds.
875 max = min = scores[i];
891 if (method == FEATURE_SCORE)
895 return; // do nothing - no scores present to sort by.
898 if (hasScores < seqs.length)
900 for (int i = 0; i < seqs.length; i++)
904 scores[i] = (max + 1 + i);
908 int nf = (feats[i] == null) ? 0
909 : ((SequenceFeature[]) feats[i]).length;
910 // System.err.println("Sorting on Score: seq "+seqs[i].getName()+
911 // " Feats: "+nf+" Score : "+scores[i]);
916 jalview.util.QuickSort.sort(scores, seqs);
918 else if (method == FEATURE_DENSITY)
921 // break ties between equivalent numbers for adjacent sequences by adding
922 // 1/Nseq*i on the original order
923 double fr = 0.9 / (1.0 * seqs.length);
924 for (int i = 0; i < seqs.length; i++)
927 scores[i] = (0.05 + fr * i)
928 + (nf = ((feats[i] == null) ? 0.0
929 : 1.0 * ((SequenceFeature[]) feats[i]).length));
930 // System.err.println("Sorting on Density: seq "+seqs[i].getName()+
931 // " Feats: "+nf+" Score : "+scores[i]);
933 jalview.util.QuickSort.sort(scores, seqs);
937 if (method == FEATURE_LABEL)
939 throw new Error("Not yet implemented.");
942 if (lastSortByFeatureScore == null
943 || !scoreLabel.toString().equals(lastSortByFeatureScore))
945 sortByFeatureScoreAscending = true;
949 sortByFeatureScoreAscending = !sortByFeatureScoreAscending;
951 if (sortByFeatureScoreAscending)
953 setOrder(alignment, seqs);
957 setReverseOrder(alignment, seqs);
959 lastSortByFeatureScore = scoreLabel.toString();