2 * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3 * Copyright (C) $$Year-Rel$$ The Jalview Authors
5 * This file is part of Jalview.
7 * Jalview is free software: you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation, either version 3
10 * of the License, or (at your option) any later version.
12 * Jalview is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty
14 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15 * PURPOSE. See the GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
19 * The Jalview Authors are detailed in the 'AUTHORS' file.
21 package jalview.analysis;
23 import jalview.datamodel.AlignmentAnnotation;
24 import jalview.datamodel.AlignmentI;
25 import jalview.datamodel.AlignmentOrder;
26 import jalview.datamodel.SequenceFeature;
27 import jalview.datamodel.SequenceGroup;
28 import jalview.datamodel.SequenceI;
29 import jalview.datamodel.SequenceNode;
30 import jalview.util.Comparison;
31 import jalview.util.MessageManager;
32 import jalview.util.QuickSort;
34 import java.util.ArrayList;
35 import java.util.List;
38 * Routines for manipulating the order of a multiple sequence alignment TODO:
39 * this class retains some global states concerning sort-order which should be
40 * made attributes for the caller's alignment visualization. TODO: refactor to
41 * allow a subset of selected sequences to be sorted within the context of a
42 * whole alignment. Sort method template is: SequenceI[] tobesorted, [ input
43 * data mapping to each tobesorted element to use ], Alignment context of
44 * tobesorted that are to be re-ordered, boolean sortinplace, [special data - ie
45 * seuqence to be sorted w.r.t.]) sortinplace implies that the sorted vector
46 * resulting from applying the operation to tobesorted should be mapped back to
47 * the original positions in alignment. Otherwise, normal behaviour is to re
48 * order alignment so that tobesorted is sorted and grouped together starting
49 * from the first tobesorted position in the alignment. e.g. (a,tb2,b,tb1,c,tb3
50 * becomes a,tb1,tb2,tb3,b,c)
52 public class AlignmentSorter
55 * todo: refactor searches to follow a basic pattern: (search property, last
56 * search state, current sort direction)
58 static boolean sortIdAscending = true;
60 static int lastGroupHash = 0;
62 static boolean sortGroupAscending = true;
64 static AlignmentOrder lastOrder = null;
66 static boolean sortOrderAscending = true;
68 static NJTree lastTree = null;
70 static boolean sortTreeAscending = true;
73 * last Annotation Label used by sortByScore
75 private static String lastSortByScore;
77 private static boolean sortByScoreAscending = true;
80 * compact representation of last arguments to SortByFeatureScore
82 private static String lastSortByFeatureScore;
84 private static boolean sortByFeatureScoreAscending = true;
86 private static boolean sortLengthAscending;
89 * Sort by Percentage Identity w.r.t. s
96 * sequences from align that are to be sorted.
98 public static void sortByPID(AlignmentI align, SequenceI s,
101 sortByPID(align, s, tosort, 0, -1);
105 * Sort by Percentage Identity w.r.t. s
112 * sequences from align that are to be sorted.
114 * start column (0 for beginning
117 public static void sortByPID(AlignmentI align, SequenceI s,
118 SequenceI[] tosort, int start, int end)
120 int nSeq = align.getHeight();
122 float[] scores = new float[nSeq];
123 SequenceI[] seqs = new SequenceI[nSeq];
125 for (int i = 0; i < nSeq; i++)
127 scores[i] = Comparison.PID(align.getSequenceAt(i)
128 .getSequenceAsString(), s.getSequenceAsString());
129 seqs[i] = align.getSequenceAt(i);
132 QuickSort.sort(scores, seqs);
134 setReverseOrder(align, seqs);
138 * Reverse the order of the sort
145 private static void setReverseOrder(AlignmentI align, SequenceI[] seqs)
147 int nSeq = seqs.length;
157 len = (nSeq + 1) / 2;
160 // NOTE: DO NOT USE align.setSequenceAt() here - it will NOT work
162 synchronized (asq = align.getSequences())
164 for (int i = 0; i < len; i++)
166 // SequenceI tmp = seqs[i];
167 asq.set(i, seqs[nSeq - i - 1]);
168 asq.set(nSeq - i - 1, seqs[i]);
174 * Sets the Alignment object with the given sequences
177 * Alignment object to be updated
179 * sequences as a vector
181 private static void setOrder(AlignmentI align, List<SequenceI> tmp)
183 setOrder(align, vectorSubsetToArray(tmp, align.getSequences()));
187 * Sets the Alignment object with the given sequences
192 * sequences as an array
194 public static void setOrder(AlignmentI align, SequenceI[] seqs)
196 // NOTE: DO NOT USE align.setSequenceAt() here - it will NOT work
197 List<SequenceI> algn;
198 synchronized (algn = align.getSequences())
200 List<SequenceI> tmp = new ArrayList<SequenceI>();
202 for (int i = 0; i < seqs.length; i++)
204 if (algn.contains(seqs[i]))
211 // User may have hidden seqs, then clicked undo or redo
212 for (int i = 0; i < tmp.size(); i++)
214 algn.add(tmp.get(i));
220 * Sorts by ID. Numbers are sorted before letters.
223 * The alignment object to sort
225 public static void sortByID(AlignmentI align)
227 int nSeq = align.getHeight();
229 String[] ids = new String[nSeq];
230 SequenceI[] seqs = new SequenceI[nSeq];
232 for (int i = 0; i < nSeq; i++)
234 ids[i] = align.getSequenceAt(i).getName();
235 seqs[i] = align.getSequenceAt(i);
238 QuickSort.sort(ids, seqs);
242 setReverseOrder(align, seqs);
246 setOrder(align, seqs);
249 sortIdAscending = !sortIdAscending;
253 * Sorts by sequence length
256 * The alignment object to sort
258 public static void sortByLength(AlignmentI align)
260 int nSeq = align.getHeight();
262 float[] length = new float[nSeq];
263 SequenceI[] seqs = new SequenceI[nSeq];
265 for (int i = 0; i < nSeq; i++)
267 seqs[i] = align.getSequenceAt(i);
268 length[i] = (seqs[i].getEnd() - seqs[i].getStart());
271 QuickSort.sort(length, seqs);
273 if (sortLengthAscending)
275 setReverseOrder(align, seqs);
279 setOrder(align, seqs);
282 sortLengthAscending = !sortLengthAscending;
286 * Sorts the alignment by size of group. <br>
287 * Maintains the order of sequences in each group by order in given alignment
291 * sorts the given alignment object by group
293 public static void sortByGroup(AlignmentI align)
295 // MAINTAINS ORIGNAL SEQUENCE ORDER,
296 // ORDERS BY GROUP SIZE
297 List<SequenceGroup> groups = new ArrayList<SequenceGroup>();
299 if (groups.hashCode() != lastGroupHash)
301 sortGroupAscending = true;
302 lastGroupHash = groups.hashCode();
306 sortGroupAscending = !sortGroupAscending;
309 // SORTS GROUPS BY SIZE
310 // ////////////////////
311 for (SequenceGroup sg : align.getGroups())
313 for (int j = 0; j < groups.size(); j++)
315 SequenceGroup sg2 = groups.get(j);
317 if (sg.getSize() > sg2.getSize())
325 if (!groups.contains(sg))
331 // NOW ADD SEQUENCES MAINTAINING ALIGNMENT ORDER
332 // /////////////////////////////////////////////
333 List<SequenceI> seqs = new ArrayList<SequenceI>();
335 for (int i = 0; i < groups.size(); i++)
337 SequenceGroup sg = groups.get(i);
338 SequenceI[] orderedseqs = sg.getSequencesInOrder(align);
340 for (int j = 0; j < orderedseqs.length; j++)
342 seqs.add(orderedseqs[j]);
346 if (sortGroupAscending)
348 setOrder(align, seqs);
352 setReverseOrder(align,
353 vectorSubsetToArray(seqs, align.getSequences()));
358 * Select sequences in order from tmp that is present in mask, and any
359 * remaining sequences in mask not in tmp
362 * thread safe collection of sequences
364 * thread safe collection of sequences
366 * @return intersect(tmp,mask)+intersect(complement(tmp),mask)
368 private static SequenceI[] vectorSubsetToArray(List<SequenceI> tmp,
369 List<SequenceI> mask)
372 // tmp2 = tmp.retainAll(mask);
373 // return tmp2.addAll(mask.removeAll(tmp2))
375 ArrayList<SequenceI> seqs = new ArrayList<SequenceI>();
377 boolean[] tmask = new boolean[mask.size()];
379 for (i = 0; i < mask.size(); i++)
384 for (i = 0; i < tmp.size(); i++)
386 SequenceI sq = tmp.get(i);
387 idx = mask.indexOf(sq);
388 if (idx > -1 && tmask[idx])
395 for (i = 0; i < tmask.length; i++)
399 seqs.add(mask.get(i));
403 return seqs.toArray(new SequenceI[seqs.size()]);
407 * Sorts by a given AlignmentOrder object
412 * specified order for alignment
414 public static void sortBy(AlignmentI align, AlignmentOrder order)
416 // Get an ordered vector of sequences which may also be present in align
417 List<SequenceI> tmp = order.getOrder();
419 if (lastOrder == order)
421 sortOrderAscending = !sortOrderAscending;
425 sortOrderAscending = true;
428 if (sortOrderAscending)
430 setOrder(align, tmp);
434 setReverseOrder(align, vectorSubsetToArray(tmp, align.getSequences()));
446 * @return DOCUMENT ME!
448 private static List<SequenceI> getOrderByTree(AlignmentI align,
451 int nSeq = align.getHeight();
453 List<SequenceI> tmp = new ArrayList<SequenceI>();
455 tmp = _sortByTree(tree.getTopNode(), tmp, align.getSequences());
457 if (tmp.size() != nSeq)
459 // TODO: JBPNote - decide if this is always an error
460 // (eg. not when a tree is associated to another alignment which has more
462 if (tmp.size() != nSeq)
464 addStrays(align, tmp);
467 if (tmp.size() != nSeq)
470 .println("WARNING: tmp.size()="
474 + " in getOrderByTree - tree contains sequences not in alignment");
482 * Sorts the alignment by a given tree
489 public static void sortByTree(AlignmentI align, NJTree tree)
491 List<SequenceI> tmp = getOrderByTree(align, tree);
493 // tmp should properly permute align with tree.
494 if (lastTree != tree)
496 sortTreeAscending = true;
501 sortTreeAscending = !sortTreeAscending;
504 if (sortTreeAscending)
506 setOrder(align, tmp);
510 setReverseOrder(align, vectorSubsetToArray(tmp, align.getSequences()));
522 private static void addStrays(AlignmentI align, List<SequenceI> tmp)
524 int nSeq = align.getHeight();
526 for (int i = 0; i < nSeq; i++)
528 if (!tmp.contains(align.getSequenceAt(i)))
530 tmp.add(align.getSequenceAt(i));
534 if (nSeq != tmp.size())
537 .println("ERROR: Size still not right even after addStrays");
551 * @return DOCUMENT ME!
553 private static List<SequenceI> _sortByTree(SequenceNode node,
555 List<SequenceI> seqset)
562 SequenceNode left = (SequenceNode) node.left();
563 SequenceNode right = (SequenceNode) node.right();
565 if ((left == null) && (right == null))
567 if (!node.isPlaceholder() && (node.element() != null))
569 if (node.element() instanceof SequenceI)
571 if (!tmp.contains(node.element())) // && (seqset==null ||
572 // seqset.size()==0 ||
573 // seqset.contains(tmp)))
575 tmp.add((SequenceI) node.element());
584 _sortByTree(left, tmp, seqset);
585 _sortByTree(right, tmp, seqset);
592 // Alignment.sortBy(OrderObj) - sequence of sequence pointer refs in
597 * recover the order of sequences given by the safe numbering scheme introducd
598 * SeqsetUtils.uniquify.
600 public static void recoverOrder(SequenceI[] alignment)
602 float[] ids = new float[alignment.length];
604 for (int i = 0; i < alignment.length; i++)
606 ids[i] = (new Float(alignment[i].getName().substring(8)))
610 jalview.util.QuickSort.sort(ids, alignment);
614 * Sort sequence in order of increasing score attribute for annotation with a
615 * particular scoreLabel. Or reverse if same label was used previously
618 * exact label for sequence associated AlignmentAnnotation scores to
621 * sequences to be sorted
623 public static void sortByAnnotationScore(String scoreLabel,
624 AlignmentI alignment)
626 SequenceI[] seqs = alignment.getSequencesArray();
627 boolean[] hasScore = new boolean[seqs.length]; // per sequence score
629 int hasScores = 0; // number of scores present on set
630 double[] scores = new double[seqs.length];
631 double min = 0, max = 0;
632 for (int i = 0; i < seqs.length; i++)
634 AlignmentAnnotation[] scoreAnn = seqs[i].getAnnotation(scoreLabel);
635 if (scoreAnn != null)
639 scores[i] = scoreAnn[0].getScore(); // take the first instance of this
643 max = min = scores[i];
664 return; // do nothing - no scores present to sort by.
666 if (hasScores < seqs.length)
668 for (int i = 0; i < seqs.length; i++)
672 scores[i] = (max + i + 1.0);
677 jalview.util.QuickSort.sort(scores, seqs);
678 if (lastSortByScore != scoreLabel)
680 lastSortByScore = scoreLabel;
681 setOrder(alignment, seqs);
685 setReverseOrder(alignment, seqs);
690 * types of feature ordering: Sort by score : average score - or total score -
691 * over all features in region Sort by feature label text: (or if null -
692 * feature type text) - numerical or alphabetical Sort by feature density:
693 * based on counts - ignoring individual text or scores for each feature
695 public static String FEATURE_SCORE = "average_score";
697 public static String FEATURE_LABEL = "text";
699 public static String FEATURE_DENSITY = "density";
702 * sort the alignment using the features on each sequence found between start
703 * and stop with the given featureLabel (and optional group qualifier)
705 * @param featureLabel
710 * (-1 to include non-positional features)
712 * (-1 to only sort on non-positional features)
714 * - aligned sequences containing features
716 * - one of the string constants FEATURE_SCORE, FEATURE_LABEL,
719 public static void sortByFeature(String featureLabel, String groupLabel,
720 int start, int stop, AlignmentI alignment, String method)
722 sortByFeature(featureLabel == null ? null : new String[]
723 { featureLabel }, groupLabel == null ? null : new String[]
724 { groupLabel }, start, stop, alignment, method);
727 private static boolean containsIgnoreCase(final String lab,
738 for (int q = 0; q < labs.length; q++)
740 if (labs[q] != null && lab.equalsIgnoreCase(labs[q]))
748 public static void sortByFeature(String[] featureLabels,
749 String[] groupLabels, int start, int stop, AlignmentI alignment,
752 if (method != FEATURE_SCORE && method != FEATURE_LABEL
753 && method != FEATURE_DENSITY)
755 throw new Error(MessageManager.getString("error.implementation_error_sortbyfeature"));
757 boolean ignoreScore = method != FEATURE_SCORE;
758 StringBuffer scoreLabel = new StringBuffer();
759 scoreLabel.append(start + stop + method);
760 // This doesn't quite work yet - we'd like to have a canonical ordering that
761 // can be preserved from call to call
762 for (int i = 0; featureLabels != null && i < featureLabels.length; i++)
764 scoreLabel.append(featureLabels[i] == null ? "null"
767 for (int i = 0; groupLabels != null && i < groupLabels.length; i++)
769 scoreLabel.append(groupLabels[i] == null ? "null" : groupLabels[i]);
771 SequenceI[] seqs = alignment.getSequencesArray();
773 boolean[] hasScore = new boolean[seqs.length]; // per sequence score
775 int hasScores = 0; // number of scores present on set
776 double[] scores = new double[seqs.length];
777 int[] seqScores = new int[seqs.length];
778 Object[] feats = new Object[seqs.length];
779 double min = 0, max = 0;
780 for (int i = 0; i < seqs.length; i++)
782 SequenceFeature[] sf = seqs[i].getSequenceFeatures();
785 sf = new SequenceFeature[0];
789 SequenceFeature[] tmp = new SequenceFeature[sf.length];
790 for (int s = 0; s < tmp.length; s++)
796 int sstart = (start == -1) ? start : seqs[i].findPosition(start);
797 int sstop = (stop == -1) ? stop : seqs[i].findPosition(stop);
801 for (int f = 0; f < sf.length; f++)
803 // filter for selection criteria
805 // ignore features outwith alignment start-stop positions.
806 (sf[f].end < sstart || sf[f].begin > sstop) ||
807 // or ignore based on selection criteria
808 (featureLabels != null && !AlignmentSorter
809 .containsIgnoreCase(sf[f].type, featureLabels))
810 || (groupLabels != null
811 // problem here: we cannot eliminate null feature group features
812 && (sf[f].getFeatureGroup() != null && !AlignmentSorter
813 .containsIgnoreCase(sf[f].getFeatureGroup(),
816 // forget about this feature
822 // or, also take a look at the scores if necessary.
823 if (!ignoreScore && sf[f].getScore() != Float.NaN)
825 if (seqScores[i] == 0)
831 scores[i] += sf[f].getScore(); // take the first instance of this
836 SequenceFeature[] fs;
837 feats[i] = fs = new SequenceFeature[n];
841 for (int f = 0; f < sf.length; f++)
845 ((SequenceFeature[]) feats[i])[n++] = sf[f];
848 if (method == FEATURE_LABEL)
850 // order the labels by alphabet
851 String[] labs = new String[fs.length];
852 for (int l = 0; l < labs.length; l++)
854 labs[l] = (fs[l].getDescription() != null ? fs[l]
855 .getDescription() : fs[l].getType());
857 jalview.util.QuickSort.sort(labs, ((Object[]) feats[i]));
862 // compute average score
863 scores[i] /= seqScores[i];
864 // update the score bounds.
867 max = min = scores[i];
883 if (method == FEATURE_SCORE)
887 return; // do nothing - no scores present to sort by.
890 if (hasScores < seqs.length)
892 for (int i = 0; i < seqs.length; i++)
896 scores[i] = (max + 1 + i);
900 int nf = (feats[i] == null) ? 0
901 : ((SequenceFeature[]) feats[i]).length;
902 // System.err.println("Sorting on Score: seq "+seqs[i].getName()+
903 // " Feats: "+nf+" Score : "+scores[i]);
908 jalview.util.QuickSort.sort(scores, seqs);
910 else if (method == FEATURE_DENSITY)
913 // break ties between equivalent numbers for adjacent sequences by adding
914 // 1/Nseq*i on the original order
915 double fr = 0.9 / (1.0 * seqs.length);
916 for (int i = 0; i < seqs.length; i++)
919 scores[i] = (0.05 + fr * i)
920 + (nf = ((feats[i] == null) ? 0.0
921 : 1.0 * ((SequenceFeature[]) feats[i]).length));
922 // System.err.println("Sorting on Density: seq "+seqs[i].getName()+
923 // " Feats: "+nf+" Score : "+scores[i]);
925 jalview.util.QuickSort.sort(scores, seqs);
929 if (method == FEATURE_LABEL)
931 throw new Error(MessageManager.getString("error.not_yet_implemented"));
934 if (lastSortByFeatureScore == null
935 || !scoreLabel.toString().equals(lastSortByFeatureScore))
937 sortByFeatureScoreAscending = true;
941 sortByFeatureScoreAscending = !sortByFeatureScoreAscending;
943 if (sortByFeatureScoreAscending)
945 setOrder(alignment, seqs);
949 setReverseOrder(alignment, seqs);
951 lastSortByFeatureScore = scoreLabel.toString();