2 * Jalview - A Sequence Alignment Editor and Viewer (Version 2.7)
3 * Copyright (C) 2011 J Procter, AM Waterhouse, J Engelhardt, LM Lui, G Barton, M Clamp, S Searle
5 * This file is part of Jalview.
7 * Jalview is free software: you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version.
11 * Jalview is distributed in the hope that it will be useful, but
12 * WITHOUT ANY WARRANTY; without even the implied warranty
13 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
14 * PURPOSE. See the GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License along with Jalview. If not, see <http://www.gnu.org/licenses/>.
18 package jalview.analysis;
22 import jalview.datamodel.*;
23 import jalview.util.*;
26 * Routines for manipulating the order of a multiple sequence alignment TODO:
27 * this class retains some global states concerning sort-order which should be
28 * made attributes for the caller's alignment visualization. TODO: refactor to
29 * allow a subset of selected sequences to be sorted within the context of a
30 * whole alignment. Sort method template is: SequenceI[] tobesorted, [ input
31 * data mapping to each tobesorted element to use ], Alignment context of
32 * tobesorted that are to be re-ordered, boolean sortinplace, [special data - ie
33 * seuqence to be sorted w.r.t.]) sortinplace implies that the sorted vector
34 * resulting from applying the operation to tobesorted should be mapped back to
35 * the original positions in alignment. Otherwise, normal behaviour is to re
36 * order alignment so that tobesorted is sorted and grouped together starting
37 * from the first tobesorted position in the alignment. e.g. (a,tb2,b,tb1,c,tb3
38 * becomes a,tb1,tb2,tb3,b,c)
40 public class AlignmentSorter
43 * todo: refactor searches to follow a basic pattern: (search property, last
44 * search state, current sort direction)
46 static boolean sortIdAscending = true;
48 static int lastGroupHash = 0;
50 static boolean sortGroupAscending = true;
52 static AlignmentOrder lastOrder = null;
54 static boolean sortOrderAscending = true;
56 static NJTree lastTree = null;
58 static boolean sortTreeAscending = true;
61 * last Annotation Label used by sortByScore
63 private static String lastSortByScore;
65 private static boolean sortByScoreAscending = true;
68 * compact representation of last arguments to SortByFeatureScore
70 private static String lastSortByFeatureScore;
72 private static boolean sortByFeatureScoreAscending = true;
74 private static boolean sortLengthAscending;
77 * Sort by Percentage Identity w.r.t. s
84 * sequences from align that are to be sorted.
86 public static void sortByPID(AlignmentI align, SequenceI s,
89 sortByPID(align, s, tosort, 0, -1);
93 * Sort by Percentage Identity w.r.t. s
100 * sequences from align that are to be sorted.
102 * start column (0 for beginning
105 public static void sortByPID(AlignmentI align, SequenceI s,
106 SequenceI[] tosort, int start, int end)
108 int nSeq = align.getHeight();
110 float[] scores = new float[nSeq];
111 SequenceI[] seqs = new SequenceI[nSeq];
113 for (int i = 0; i < nSeq; i++)
115 scores[i] = Comparison.PID(align.getSequenceAt(i)
116 .getSequenceAsString(), s.getSequenceAsString());
117 seqs[i] = align.getSequenceAt(i);
120 QuickSort.sort(scores, 0, scores.length - 1, seqs);
122 setReverseOrder(align, seqs);
126 * Reverse the order of the sort
133 private static void setReverseOrder(AlignmentI align, SequenceI[] seqs)
135 int nSeq = seqs.length;
145 len = (nSeq + 1) / 2;
148 // NOTE: DO NOT USE align.setSequenceAt() here - it will NOT work
150 synchronized (asq = align.getSequences())
152 for (int i = 0; i < len; i++)
154 // SequenceI tmp = seqs[i];
155 asq.set(i, seqs[nSeq - i - 1]);
156 asq.set(nSeq - i - 1, seqs[i]);
162 * Sets the Alignment object with the given sequences
165 * Alignment object to be updated
167 * sequences as a vector
169 private static void setOrder(AlignmentI align, Vector tmp)
171 setOrder(align, vectorSubsetToArray(tmp, align.getSequences()));
175 * Sets the Alignment object with the given sequences
180 * sequences as an array
182 public static void setOrder(AlignmentI align, SequenceI[] seqs)
184 // NOTE: DO NOT USE align.setSequenceAt() here - it will NOT work
185 List<SequenceI> algn;
186 synchronized (algn = align.getSequences())
188 List<SequenceI> tmp = new ArrayList<SequenceI>();
190 for (int i = 0; i < seqs.length; i++)
192 if (algn.contains(seqs[i]))
199 // User may have hidden seqs, then clicked undo or redo
200 for (int i = 0; i < tmp.size(); i++)
202 algn.add(tmp.get(i));
208 * Sorts by ID. Numbers are sorted before letters.
211 * The alignment object to sort
213 public static void sortByID(AlignmentI align)
215 int nSeq = align.getHeight();
217 String[] ids = new String[nSeq];
218 SequenceI[] seqs = new SequenceI[nSeq];
220 for (int i = 0; i < nSeq; i++)
222 ids[i] = align.getSequenceAt(i).getName();
223 seqs[i] = align.getSequenceAt(i);
226 QuickSort.sort(ids, seqs);
230 setReverseOrder(align, seqs);
234 setOrder(align, seqs);
237 sortIdAscending = !sortIdAscending;
241 * Sorts by sequence length
244 * The alignment object to sort
246 public static void sortByLength(AlignmentI align)
248 int nSeq = align.getHeight();
250 float[] length = new float[nSeq];
251 SequenceI[] seqs = new SequenceI[nSeq];
253 for (int i = 0; i < nSeq; i++)
255 seqs[i] = align.getSequenceAt(i);
256 length[i] = (float) (seqs[i].getEnd() - seqs[i].getStart());
259 QuickSort.sort(length, seqs);
261 if (sortLengthAscending)
263 setReverseOrder(align, seqs);
267 setOrder(align, seqs);
270 sortLengthAscending = !sortLengthAscending;
274 * Sorts the alignment by size of group. <br>
275 * Maintains the order of sequences in each group by order in given alignment
279 * sorts the given alignment object by group
281 public static void sortByGroup(AlignmentI align)
283 // MAINTAINS ORIGNAL SEQUENCE ORDER,
284 // ORDERS BY GROUP SIZE
285 Vector groups = new Vector();
287 if (groups.hashCode() != lastGroupHash)
289 sortGroupAscending = true;
290 lastGroupHash = groups.hashCode();
294 sortGroupAscending = !sortGroupAscending;
297 // SORTS GROUPS BY SIZE
298 // ////////////////////
299 for (SequenceGroup sg:align.getGroups())
301 for (int j = 0; j < groups.size(); j++)
303 SequenceGroup sg2 = (SequenceGroup) groups.elementAt(j);
305 if (sg.getSize() > sg2.getSize())
307 groups.insertElementAt(sg, j);
313 if (!groups.contains(sg))
315 groups.addElement(sg);
319 // NOW ADD SEQUENCES MAINTAINING ALIGNMENT ORDER
320 // /////////////////////////////////////////////
321 Vector seqs = new Vector();
323 for (int i = 0; i < groups.size(); i++)
325 SequenceGroup sg = (SequenceGroup) groups.elementAt(i);
326 SequenceI[] orderedseqs = sg.getSequencesInOrder(align);
328 for (int j = 0; j < orderedseqs.length; j++)
330 seqs.addElement(orderedseqs[j]);
334 if (sortGroupAscending)
336 setOrder(align, seqs);
340 setReverseOrder(align,
341 vectorSubsetToArray(seqs, align.getSequences()));
346 * Converts Vector to array. java 1.18 does not have Vector.toArray()
349 * Vector of SequenceI objects
351 * @return array of Sequence[]
353 private static SequenceI[] vectorToArray(Vector tmp)
355 SequenceI[] seqs = new SequenceI[tmp.size()];
357 for (int i = 0; i < tmp.size(); i++)
359 seqs[i] = (SequenceI) tmp.elementAt(i);
366 * Select sequences in order from tmp that is present in mask, and any
367 * remaining seqeunces in mask not in tmp
370 * thread safe collection of sequences
372 * thread safe collection of sequences
374 * @return intersect(tmp,mask)+intersect(complement(tmp),mask)
376 private static SequenceI[] vectorSubsetToArray(List<SequenceI> tmp,
377 List<SequenceI> mask)
379 ArrayList<SequenceI> seqs = new ArrayList<SequenceI>();
381 boolean[] tmask = new boolean[mask.size()];
383 for (i = 0; i < mask.size(); i++)
388 for (i = 0; i < tmp.size(); i++)
390 SequenceI sq = tmp.get(i);
391 idx = mask.indexOf(sq);
392 if (idx > -1 && tmask[idx])
399 for (i = 0; i < tmask.length; i++)
403 seqs.add(mask.get(i));
407 return seqs.toArray(new SequenceI[seqs.size()]);
411 * Sorts by a given AlignmentOrder object
416 * specified order for alignment
418 public static void sortBy(AlignmentI align, AlignmentOrder order)
420 // Get an ordered vector of sequences which may also be present in align
421 Vector tmp = order.getOrder();
423 if (lastOrder == order)
425 sortOrderAscending = !sortOrderAscending;
429 sortOrderAscending = true;
432 if (sortOrderAscending)
434 setOrder(align, tmp);
438 setReverseOrder(align, vectorSubsetToArray(tmp, align.getSequences()));
450 * @return DOCUMENT ME!
452 private static Vector getOrderByTree(AlignmentI align, NJTree tree)
454 int nSeq = align.getHeight();
456 Vector tmp = new Vector();
458 tmp = _sortByTree(tree.getTopNode(), tmp, align.getSequences());
460 if (tmp.size() != nSeq)
462 // TODO: JBPNote - decide if this is always an error
463 // (eg. not when a tree is associated to another alignment which has more
465 if (tmp.size() != nSeq)
467 addStrays(align, tmp);
470 if (tmp.size() != nSeq)
472 System.err.println("WARNING: tmp.size()=" + tmp.size() + " != nseq="
473 + nSeq + " in getOrderByTree - tree contains sequences not in alignment");
481 * Sorts the alignment by a given tree
488 public static void sortByTree(AlignmentI align, NJTree tree)
490 Vector tmp = getOrderByTree(align, tree);
492 // tmp should properly permute align with tree.
493 if (lastTree != tree)
495 sortTreeAscending = true;
500 sortTreeAscending = !sortTreeAscending;
503 if (sortTreeAscending)
505 setOrder(align, tmp);
509 setReverseOrder(align, vectorSubsetToArray(tmp, align.getSequences()));
521 private static void addStrays(AlignmentI align, Vector seqs)
523 int nSeq = align.getHeight();
525 for (int i = 0; i < nSeq; i++)
527 if (!seqs.contains(align.getSequenceAt(i)))
529 seqs.addElement(align.getSequenceAt(i));
533 if (nSeq != seqs.size())
536 .println("ERROR: Size still not right even after addStrays");
550 * @return DOCUMENT ME!
552 private static Vector _sortByTree(SequenceNode node, Vector tmp,
553 List<SequenceI> seqset)
560 SequenceNode left = (SequenceNode) node.left();
561 SequenceNode right = (SequenceNode) node.right();
563 if ((left == null) && (right == null))
565 if (!node.isPlaceholder() && (node.element() != null))
567 if (node.element() instanceof SequenceI)
569 if (!tmp.contains(node.element())) // && (seqset==null || seqset.size()==0 || seqset.contains(tmp)))
571 tmp.addElement((SequenceI) node.element());
580 _sortByTree(left, tmp, seqset);
581 _sortByTree(right, tmp, seqset);
588 // Alignment.sortBy(OrderObj) - sequence of sequence pointer refs in
593 * recover the order of sequences given by the safe numbering scheme introducd
594 * SeqsetUtils.uniquify.
596 public static void recoverOrder(SequenceI[] alignment)
598 float[] ids = new float[alignment.length];
600 for (int i = 0; i < alignment.length; i++)
602 ids[i] = (new Float(alignment[i].getName().substring(8)))
606 jalview.util.QuickSort.sort(ids, alignment);
610 * Sort sequence in order of increasing score attribute for annotation with a
611 * particular scoreLabel. Or reverse if same label was used previously
614 * exact label for sequence associated AlignmentAnnotation scores to
617 * sequences to be sorted
619 public static void sortByAnnotationScore(String scoreLabel,
620 AlignmentI alignment)
622 SequenceI[] seqs = alignment.getSequencesArray();
623 boolean[] hasScore = new boolean[seqs.length]; // per sequence score
625 int hasScores = 0; // number of scores present on set
626 double[] scores = new double[seqs.length];
627 double min = 0, max = 0;
628 for (int i = 0; i < seqs.length; i++)
630 AlignmentAnnotation[] scoreAnn = seqs[i].getAnnotation(scoreLabel);
631 if (scoreAnn != null)
635 scores[i] = scoreAnn[0].getScore(); // take the first instance of this
639 max = min = scores[i];
660 return; // do nothing - no scores present to sort by.
662 if (hasScores < seqs.length)
664 for (int i = 0; i < seqs.length; i++)
668 scores[i] = (max + i + 1.0);
673 jalview.util.QuickSort.sort(scores, seqs);
674 if (lastSortByScore != scoreLabel)
676 lastSortByScore = scoreLabel;
677 setOrder(alignment, seqs);
681 setReverseOrder(alignment, seqs);
686 * types of feature ordering: Sort by score : average score - or total score -
687 * over all features in region Sort by feature label text: (or if null -
688 * feature type text) - numerical or alphabetical Sort by feature density:
689 * based on counts - ignoring individual text or scores for each feature
691 public static String FEATURE_SCORE = "average_score";
693 public static String FEATURE_LABEL = "text";
695 public static String FEATURE_DENSITY = "density";
698 * sort the alignment using the features on each sequence found between start
699 * and stop with the given featureLabel (and optional group qualifier)
701 * @param featureLabel
706 * (-1 to include non-positional features)
708 * (-1 to only sort on non-positional features)
710 * - aligned sequences containing features
712 * - one of the string constants FEATURE_SCORE, FEATURE_LABEL,
715 public static void sortByFeature(String featureLabel, String groupLabel,
716 int start, int stop, AlignmentI alignment, String method)
718 sortByFeature(featureLabel == null ? null : new String[]
719 { featureLabel }, groupLabel == null ? null : new String[]
720 { groupLabel }, start, stop, alignment, method);
723 private static boolean containsIgnoreCase(final String lab,
734 for (int q = 0; q < labs.length; q++)
736 if (labs[q] != null && lab.equalsIgnoreCase(labs[q]))
744 public static void sortByFeature(String[] featureLabels,
745 String[] groupLabels, int start, int stop, AlignmentI alignment,
748 if (method != FEATURE_SCORE && method != FEATURE_LABEL
749 && method != FEATURE_DENSITY)
752 "Implementation Error - sortByFeature method must be one of FEATURE_SCORE, FEATURE_LABEL or FEATURE_DENSITY.");
754 boolean ignoreScore = method != FEATURE_SCORE;
755 StringBuffer scoreLabel = new StringBuffer();
756 scoreLabel.append(start + stop + method);
757 // This doesn't quite work yet - we'd like to have a canonical ordering that
758 // can be preserved from call to call
759 for (int i = 0; featureLabels != null && i < featureLabels.length; i++)
761 scoreLabel.append(featureLabels[i] == null ? "null"
764 for (int i = 0; groupLabels != null && i < groupLabels.length; i++)
766 scoreLabel.append(groupLabels[i] == null ? "null" : groupLabels[i]);
768 SequenceI[] seqs = alignment.getSequencesArray();
770 boolean[] hasScore = new boolean[seqs.length]; // per sequence score
772 int hasScores = 0; // number of scores present on set
773 double[] scores = new double[seqs.length];
774 int[] seqScores = new int[seqs.length];
775 Object[] feats = new Object[seqs.length];
776 double min = 0, max = 0;
777 for (int i = 0; i < seqs.length; i++)
779 SequenceFeature[] sf = seqs[i].getSequenceFeatures();
780 if (sf == null && seqs[i].getDatasetSequence() != null)
782 sf = seqs[i].getDatasetSequence().getSequenceFeatures();
786 sf = new SequenceFeature[0];
790 SequenceFeature[] tmp = new SequenceFeature[sf.length];
791 for (int s = 0; s < tmp.length; s++)
797 int sstart = (start == -1) ? start : seqs[i].findPosition(start);
798 int sstop = (stop == -1) ? stop : seqs[i].findPosition(stop);
802 for (int f = 0; f < sf.length; f++)
804 // filter for selection criteria
806 // ignore features outwith alignment start-stop positions.
807 (sf[f].end < sstart || sf[f].begin > sstop) ||
808 // or ignore based on selection criteria
809 (featureLabels != null && !AlignmentSorter
810 .containsIgnoreCase(sf[f].type, featureLabels))
811 || (groupLabels != null
812 // problem here: we cannot eliminate null feature group features
813 && (sf[f].getFeatureGroup() != null && !AlignmentSorter
814 .containsIgnoreCase(sf[f].getFeatureGroup(),
817 // forget about this feature
823 // or, also take a look at the scores if necessary.
824 if (!ignoreScore && sf[f].getScore() != Float.NaN)
826 if (seqScores[i] == 0)
832 scores[i] += sf[f].getScore(); // take the first instance of this
837 SequenceFeature[] fs;
838 feats[i] = fs = new SequenceFeature[n];
842 for (int f = 0; f < sf.length; f++)
846 ((SequenceFeature[]) feats[i])[n++] = sf[f];
849 if (method == FEATURE_LABEL)
851 // order the labels by alphabet
852 String[] labs = new String[fs.length];
853 for (int l = 0; l < labs.length; l++)
855 labs[l] = (fs[l].getDescription() != null ? fs[l]
856 .getDescription() : fs[l].getType());
858 jalview.util.QuickSort.sort(labs, ((Object[]) feats[i]));
863 // compute average score
864 scores[i] /= seqScores[i];
865 // update the score bounds.
868 max = min = scores[i];
884 if (method == FEATURE_SCORE)
888 return; // do nothing - no scores present to sort by.
891 if (hasScores < seqs.length)
893 for (int i = 0; i < seqs.length; i++)
897 scores[i] = (max + 1 + i);
901 int nf = (feats[i] == null) ? 0
902 : ((SequenceFeature[]) feats[i]).length;
903 // System.err.println("Sorting on Score: seq "+seqs[i].getName()+
904 // " Feats: "+nf+" Score : "+scores[i]);
909 jalview.util.QuickSort.sort(scores, seqs);
911 else if (method == FEATURE_DENSITY)
914 // break ties between equivalent numbers for adjacent sequences by adding
915 // 1/Nseq*i on the original order
916 double fr = 0.9 / (1.0 * seqs.length);
917 for (int i = 0; i < seqs.length; i++)
920 scores[i] = (0.05 + fr * i)
921 + (nf = ((feats[i] == null) ? 0.0
922 : 1.0 * ((SequenceFeature[]) feats[i]).length));
923 // System.err.println("Sorting on Density: seq "+seqs[i].getName()+
924 // " Feats: "+nf+" Score : "+scores[i]);
926 jalview.util.QuickSort.sort(scores, seqs);
930 if (method == FEATURE_LABEL)
932 throw new Error("Not yet implemented.");
935 if (lastSortByFeatureScore == null
936 || !scoreLabel.toString().equals(lastSortByFeatureScore))
938 sortByFeatureScoreAscending = true;
942 sortByFeatureScoreAscending = !sortByFeatureScoreAscending;
944 if (sortByFeatureScoreAscending)
946 setOrder(alignment, seqs);
950 setReverseOrder(alignment, seqs);
952 lastSortByFeatureScore = scoreLabel.toString();