2 * Jalview - A Sequence Alignment Editor and Viewer (Development Version 2.4.1)
3 * Copyright (C) 2009 AM Waterhouse, J Procter, G Barton, M Clamp, S Searle
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License
7 * as published by the Free Software Foundation; either version 2
8 * of the License, or (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
19 package jalview.analysis;
23 import jalview.datamodel.*;
24 import jalview.util.*;
27 * Routines for manipulating the order of a multiple sequence alignment TODO:
28 * this class retains some global states concerning sort-order which should be
29 * made attributes for the caller's alignment visualization. TODO: refactor to
30 * allow a subset of selected sequences to be sorted within the context of a
31 * whole alignment. Sort method template is: SequenceI[] tobesorted, [ input
32 * data mapping to each tobesorted element to use ], Alignment context of
33 * tobesorted that are to be re-ordered, boolean sortinplace, [special data - ie
34 * seuqence to be sorted w.r.t.]) sortinplace implies that the sorted vector
35 * resulting from applying the operation to tobesorted should be mapped back to
36 * the original positions in alignment. Otherwise, normal behaviour is to re
37 * order alignment so that tobesorted is sorted and grouped together starting
38 * from the first tobesorted position in the alignment. e.g. (a,tb2,b,tb1,c,tb3
39 * becomes a,tb1,tb2,tb3,b,c)
41 public class AlignmentSorter
44 * todo: refactor searches to follow a basic pattern:
45 * (search property, last search state, current sort direction)
47 static boolean sortIdAscending = true;
49 static int lastGroupHash = 0;
51 static boolean sortGroupAscending = true;
53 static AlignmentOrder lastOrder = null;
55 static boolean sortOrderAscending = true;
57 static NJTree lastTree = null;
59 static boolean sortTreeAscending = true;
62 * last Annotation Label used by sortByScore
64 private static String lastSortByScore;
65 private static boolean sortByScoreAscending = true;
67 * compact representation of last arguments to SortByFeatureScore
69 private static String lastSortByFeatureScore;
70 private static boolean sortByFeatureScoreAscending = true;
72 private static boolean sortLengthAscending;
75 * Sort by Percentage Identity w.r.t. s
82 * sequences from align that are to be sorted.
84 public static void sortByPID(AlignmentI align, SequenceI s,
87 sortByPID(align,s,tosort,0,-1);
90 * Sort by Percentage Identity w.r.t. s
97 * sequences from align that are to be sorted.
98 * @param start start column (0 for beginning
101 public static void sortByPID(AlignmentI align, SequenceI s,
102 SequenceI[] tosort,int start, int end)
104 int nSeq = align.getHeight();
106 float[] scores = new float[nSeq];
107 SequenceI[] seqs = new SequenceI[nSeq];
109 for (int i = 0; i < nSeq; i++)
111 scores[i] = Comparison.PID(align.getSequenceAt(i)
112 .getSequenceAsString(), s.getSequenceAsString());
113 seqs[i] = align.getSequenceAt(i);
116 QuickSort.sort(scores, 0, scores.length - 1, seqs);
118 setReverseOrder(align, seqs);
122 * Reverse the order of the sort
129 private static void setReverseOrder(AlignmentI align, SequenceI[] seqs)
131 int nSeq = seqs.length;
141 len = (nSeq + 1) / 2;
144 // NOTE: DO NOT USE align.setSequenceAt() here - it will NOT work
145 for (int i = 0; i < len; i++)
147 // SequenceI tmp = seqs[i];
148 align.getSequences().setElementAt(seqs[nSeq - i - 1], i);
149 align.getSequences().setElementAt(seqs[i], nSeq - i - 1);
154 * Sets the Alignment object with the given sequences
157 * Alignment object to be updated
159 * sequences as a vector
161 private static void setOrder(AlignmentI align, Vector tmp)
163 setOrder(align, vectorSubsetToArray(tmp, align.getSequences()));
167 * Sets the Alignment object with the given sequences
172 * sequences as an array
174 public static void setOrder(AlignmentI align, SequenceI[] seqs)
176 // NOTE: DO NOT USE align.setSequenceAt() here - it will NOT work
177 Vector algn = align.getSequences();
178 Vector tmp = new Vector();
180 for (int i = 0; i < seqs.length; i++)
182 if (algn.contains(seqs[i]))
184 tmp.addElement(seqs[i]);
188 algn.removeAllElements();
189 // User may have hidden seqs, then clicked undo or redo
190 for (int i = 0; i < tmp.size(); i++)
192 algn.addElement(tmp.elementAt(i));
198 * Sorts by ID. Numbers are sorted before letters.
201 * The alignment object to sort
203 public static void sortByID(AlignmentI align)
205 int nSeq = align.getHeight();
207 String[] ids = new String[nSeq];
208 SequenceI[] seqs = new SequenceI[nSeq];
210 for (int i = 0; i < nSeq; i++)
212 ids[i] = align.getSequenceAt(i).getName();
213 seqs[i] = align.getSequenceAt(i);
216 QuickSort.sort(ids, seqs);
220 setReverseOrder(align, seqs);
224 setOrder(align, seqs);
227 sortIdAscending = !sortIdAscending;
230 * Sorts by sequence length
233 * The alignment object to sort
235 public static void sortByLength(AlignmentI align)
237 int nSeq = align.getHeight();
239 float[] length = new float[nSeq];
240 SequenceI[] seqs = new SequenceI[nSeq];
242 for (int i = 0; i < nSeq; i++)
244 seqs[i] = align.getSequenceAt(i);
245 length[i] = (float) (seqs[i].getEnd()-seqs[i].getStart());
248 QuickSort.sort(length, seqs);
250 if (sortLengthAscending)
252 setReverseOrder(align, seqs);
256 setOrder(align, seqs);
259 sortLengthAscending = !sortLengthAscending;
263 * Sorts the alignment by size of group. <br>
264 * Maintains the order of sequences in each group by order in given alignment
268 * sorts the given alignment object by group
270 public static void sortByGroup(AlignmentI align)
272 // MAINTAINS ORIGNAL SEQUENCE ORDER,
273 // ORDERS BY GROUP SIZE
274 Vector groups = new Vector();
276 if (groups.hashCode() != lastGroupHash)
278 sortGroupAscending = true;
279 lastGroupHash = groups.hashCode();
283 sortGroupAscending = !sortGroupAscending;
286 // SORTS GROUPS BY SIZE
287 // ////////////////////
288 for (int i = 0; i < align.getGroups().size(); i++)
290 SequenceGroup sg = (SequenceGroup) align.getGroups().elementAt(i);
292 for (int j = 0; j < groups.size(); j++)
294 SequenceGroup sg2 = (SequenceGroup) groups.elementAt(j);
296 if (sg.getSize() > sg2.getSize())
298 groups.insertElementAt(sg, j);
304 if (!groups.contains(sg))
306 groups.addElement(sg);
310 // NOW ADD SEQUENCES MAINTAINING ALIGNMENT ORDER
311 // /////////////////////////////////////////////
312 Vector seqs = new Vector();
314 for (int i = 0; i < groups.size(); i++)
316 SequenceGroup sg = (SequenceGroup) groups.elementAt(i);
317 SequenceI[] orderedseqs = sg.getSequencesInOrder(align);
319 for (int j = 0; j < orderedseqs.length; j++)
321 seqs.addElement(orderedseqs[j]);
325 if (sortGroupAscending)
327 setOrder(align, seqs);
331 setReverseOrder(align,
332 vectorSubsetToArray(seqs, align.getSequences()));
337 * Converts Vector to array. java 1.18 does not have Vector.toArray()
340 * Vector of SequenceI objects
342 * @return array of Sequence[]
344 private static SequenceI[] vectorToArray(Vector tmp)
346 SequenceI[] seqs = new SequenceI[tmp.size()];
348 for (int i = 0; i < tmp.size(); i++)
350 seqs[i] = (SequenceI) tmp.elementAt(i);
364 * @return DOCUMENT ME!
366 private static SequenceI[] vectorSubsetToArray(Vector tmp, Vector mask)
368 Vector seqs = new Vector();
370 boolean[] tmask = new boolean[mask.size()];
372 for (i = 0; i < mask.size(); i++)
377 for (i = 0; i < tmp.size(); i++)
379 Object sq = tmp.elementAt(i);
380 idx = mask.indexOf(sq);
381 if (idx>-1 && tmask[idx])
388 for (i = 0; i < tmask.length; i++)
392 seqs.addElement(mask.elementAt(i));
396 return vectorToArray(seqs);
400 * Sorts by a given AlignmentOrder object
405 * specified order for alignment
407 public static void sortBy(AlignmentI align, AlignmentOrder order)
409 // Get an ordered vector of sequences which may also be present in align
410 Vector tmp = order.getOrder();
412 if (lastOrder == order)
414 sortOrderAscending = !sortOrderAscending;
418 sortOrderAscending = true;
421 if (sortOrderAscending)
423 setOrder(align, tmp);
427 setReverseOrder(align, vectorSubsetToArray(tmp, align.getSequences()));
439 * @return DOCUMENT ME!
441 private static Vector getOrderByTree(AlignmentI align, NJTree tree)
443 int nSeq = align.getHeight();
445 Vector tmp = new Vector();
447 tmp = _sortByTree(tree.getTopNode(), tmp, align.getSequences());
449 if (tmp.size() != nSeq)
451 // TODO: JBPNote - decide if this is always an error
452 // (eg. not when a tree is associated to another alignment which has more
454 if (tmp.size() < nSeq)
456 addStrays(align, tmp);
459 if (tmp.size() != nSeq)
461 System.err.println("ERROR: tmp.size()=" + tmp.size() + " != nseq="
462 + nSeq + " in getOrderByTree");
470 * Sorts the alignment by a given tree
477 public static void sortByTree(AlignmentI align, NJTree tree)
479 Vector tmp = getOrderByTree(align, tree);
481 // tmp should properly permute align with tree.
482 if (lastTree != tree)
484 sortTreeAscending = true;
489 sortTreeAscending = !sortTreeAscending;
492 if (sortTreeAscending)
494 setOrder(align, tmp);
498 setReverseOrder(align, vectorSubsetToArray(tmp, align.getSequences()));
510 private static void addStrays(AlignmentI align, Vector seqs)
512 int nSeq = align.getHeight();
514 for (int i = 0; i < nSeq; i++)
516 if (!seqs.contains(align.getSequenceAt(i)))
518 seqs.addElement(align.getSequenceAt(i));
522 if (nSeq != seqs.size())
525 .println("ERROR: Size still not right even after addStrays");
539 * @return DOCUMENT ME!
541 private static Vector _sortByTree(SequenceNode node, Vector tmp,
549 SequenceNode left = (SequenceNode) node.left();
550 SequenceNode right = (SequenceNode) node.right();
552 if ((left == null) && (right == null))
554 if (!node.isPlaceholder() && (node.element() != null))
556 if (node.element() instanceof SequenceI)
558 if (!tmp.contains(node.element()))
560 tmp.addElement((SequenceI) node.element());
569 _sortByTree(left, tmp, seqset);
570 _sortByTree(right, tmp, seqset);
577 // Alignment.sortBy(OrderObj) - sequence of sequence pointer refs in
582 * recover the order of sequences given by the safe numbering scheme introducd
583 * SeqsetUtils.uniquify.
585 public static void recoverOrder(SequenceI[] alignment)
587 float[] ids = new float[alignment.length];
589 for (int i = 0; i < alignment.length; i++)
591 ids[i] = (new Float(alignment[i].getName().substring(8)))
595 jalview.util.QuickSort.sort(ids, alignment);
599 * Sort sequence in order of increasing score attribute for annotation with a
600 * particular scoreLabel. Or reverse if same label was used previously
603 * exact label for sequence associated AlignmentAnnotation
604 * scores to use for sorting.
606 * sequences to be sorted
608 public static void sortByAnnotationScore(String scoreLabel,
609 AlignmentI alignment)
611 SequenceI[] seqs = alignment.getSequencesArray();
612 boolean[] hasScore = new boolean[seqs.length]; // per sequence score
614 int hasScores = 0; // number of scores present on set
615 double[] scores = new double[seqs.length];
616 double min = 0, max = 0;
617 for (int i = 0; i < seqs.length; i++)
619 AlignmentAnnotation[] scoreAnn = seqs[i].getAnnotation(scoreLabel);
620 if (scoreAnn != null)
624 scores[i] = scoreAnn[0].getScore(); // take the first instance of this
628 max = min = scores[i];
649 return; // do nothing - no scores present to sort by.
651 if (hasScores < seqs.length)
653 for (int i = 0; i < seqs.length; i++)
657 scores[i] = (max + i+1.0);
662 jalview.util.QuickSort.sort(scores, seqs);
663 if (lastSortByScore != scoreLabel)
665 lastSortByScore = scoreLabel;
666 setOrder(alignment, seqs);
670 setReverseOrder(alignment, seqs);
674 * types of feature ordering:
675 * Sort by score : average score - or total score - over all features in region
676 * Sort by feature label text: (or if null - feature type text) - numerical or alphabetical
677 * Sort by feature density: based on counts - ignoring individual text or scores for each feature
679 public static String FEATURE_SCORE="average_score";
680 public static String FEATURE_LABEL="text";
681 public static String FEATURE_DENSITY="density";
684 * sort the alignment using the features on each sequence found between start and stop with the given featureLabel (and optional group qualifier)
685 * @param featureLabel (may not be null)
686 * @param groupLabel (may be null)
687 * @param start (-1 to include non-positional features)
688 * @param stop (-1 to only sort on non-positional features)
689 * @param alignment - aligned sequences containing features
690 * @param method - one of the string constants FEATURE_SCORE, FEATURE_LABEL, FEATURE_DENSITY
692 public static void sortByFeature(String featureLabel, String groupLabel, int start, int stop,
693 AlignmentI alignment, String method)
695 sortByFeature(featureLabel==null ? null : new String[] {featureLabel},
696 groupLabel==null ? null : new String[] {groupLabel}, start, stop, alignment, method);
698 private static boolean containsIgnoreCase(final String lab, final String[] labs)
708 for (int q=0;q<labs.length;q++)
710 if (labs[q]!=null && lab.equalsIgnoreCase(labs[q]))
717 public static void sortByFeature(String[] featureLabels, String[] groupLabels, int start, int stop,
718 AlignmentI alignment, String method)
720 if (method!=FEATURE_SCORE && method!=FEATURE_LABEL && method!=FEATURE_DENSITY)
722 throw new Error("Implementation Error - sortByFeature method must be one of FEATURE_SCORE, FEATURE_LABEL or FEATURE_DENSITY.");
724 boolean ignoreScore=method!=FEATURE_SCORE;
725 StringBuffer scoreLabel = new StringBuffer();
726 scoreLabel.append(start+stop+method);
727 // This doesn't quite work yet - we'd like to have a canonical ordering that can be preserved from call to call
728 for (int i=0;featureLabels!=null && i<featureLabels.length; i++)
730 scoreLabel.append(featureLabels[i]==null ? "null" : featureLabels[i]);
732 for (int i=0;groupLabels!=null && i<groupLabels.length; i++)
734 scoreLabel.append(groupLabels[i]==null ? "null" : groupLabels[i]);
736 SequenceI[] seqs = alignment.getSequencesArray();
738 boolean[] hasScore = new boolean[seqs.length]; // per sequence score
740 int hasScores = 0; // number of scores present on set
741 double[] scores = new double[seqs.length];
742 int[] seqScores = new int[seqs.length];
743 Object[] feats = new Object[seqs.length];
744 double min = 0, max = 0;
745 for (int i = 0; i < seqs.length; i++)
747 SequenceFeature[] sf = seqs[i].getSequenceFeatures();
748 if (sf==null && seqs[i].getDatasetSequence()!=null)
750 sf = seqs[i].getDatasetSequence().getSequenceFeatures();
754 sf = new SequenceFeature[0];
756 SequenceFeature[] tmp = new SequenceFeature[sf.length];
757 for (int s=0; s<tmp.length;s++)
763 int sstart = (start==-1) ? start : seqs[i].findPosition(start);
764 int sstop = (stop==-1) ? stop : seqs[i].findPosition(stop);
768 for (int f=0;f<sf.length;f++)
770 // filter for selection criteria
772 // ignore features outwith alignment start-stop positions.
773 (sf[f].end < sstart || sf[f].begin > sstop)
775 // or ignore based on selection criteria
776 (featureLabels != null && !AlignmentSorter.containsIgnoreCase(sf[f].type, featureLabels))
777 || (groupLabels != null
778 // problem here: we cannot eliminate null feature group features
779 && (sf[f].getFeatureGroup() != null
780 && !AlignmentSorter.containsIgnoreCase(sf[f].getFeatureGroup(), groupLabels))))
782 // forget about this feature
786 // or, also take a look at the scores if necessary.
787 if (!ignoreScore && sf[f].getScore()!=Float.NaN)
795 scores[i] += sf[f].getScore(); // take the first instance of this
800 SequenceFeature[] fs;
801 feats[i] = fs = new SequenceFeature[n];
805 for (int f=0;f<sf.length;f++)
809 ((SequenceFeature[]) feats[i])[n++] = sf[f];
812 if (method==FEATURE_LABEL)
814 // order the labels by alphabet
815 String[] labs = new String[fs.length];
816 for (int l=0;l<labs.length; l++)
818 labs[l] = (fs[l].getDescription()!=null ? fs[l].getDescription() : fs[l].getType());
820 jalview.util.QuickSort.sort(labs, ((Object[]) feats[i]));
825 // compute average score
826 scores[i]/=seqScores[i];
827 // update the score bounds.
830 max = min = scores[i];
846 if (method==FEATURE_SCORE)
850 return; // do nothing - no scores present to sort by.
853 if (hasScores < seqs.length)
855 for (int i = 0; i < seqs.length; i++)
859 scores[i] = (max + 1+i);
861 int nf=(feats[i]==null) ? 0 :((SequenceFeature[]) feats[i]).length;
862 // System.err.println("Sorting on Score: seq "+seqs[i].getName()+ " Feats: "+nf+" Score : "+scores[i]);
867 jalview.util.QuickSort.sort(scores, seqs);
870 if (method==FEATURE_DENSITY)
873 // break ties between equivalent numbers for adjacent sequences by adding 1/Nseq*i on the original order
874 double fr = 0.9/(1.0*seqs.length);
875 for (int i=0;i<seqs.length; i++)
878 scores[i] = (0.05+fr*i)+(nf=((feats[i]==null) ? 0.0 :1.0*((SequenceFeature[]) feats[i]).length));
879 // System.err.println("Sorting on Density: seq "+seqs[i].getName()+ " Feats: "+nf+" Score : "+scores[i]);
881 jalview.util.QuickSort.sort(scores, seqs);
884 if (method==FEATURE_LABEL)
886 throw new Error("Not yet implemented.");
889 if (lastSortByFeatureScore ==null || !scoreLabel.toString().equals(lastSortByFeatureScore))
891 sortByFeatureScoreAscending = true;
893 sortByFeatureScoreAscending = !sortByFeatureScoreAscending;
895 if (sortByFeatureScoreAscending) {
896 setOrder(alignment, seqs);
900 setReverseOrder(alignment, seqs);
902 lastSortByFeatureScore = scoreLabel.toString();