2 * Jalview - A Sequence Alignment Editor and Viewer (Development Version 2.4.1)
3 * Copyright (C) 2009 AM Waterhouse, J Procter, G Barton, M Clamp, S Searle
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License
7 * as published by the Free Software Foundation; either version 2
8 * of the License, or (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
19 package jalview.analysis;
23 import jalview.datamodel.*;
24 import jalview.util.*;
27 * Routines for manipulating the order of a multiple sequence alignment TODO:
28 * this class retains some global states concerning sort-order which should be
29 * made attributes for the caller's alignment visualization. TODO: refactor to
30 * allow a subset of selected sequences to be sorted within the context of a
31 * whole alignment. Sort method template is: SequenceI[] tobesorted, [ input
32 * data mapping to each tobesorted element to use ], Alignment context of
33 * tobesorted that are to be re-ordered, boolean sortinplace, [special data - ie
34 * seuqence to be sorted w.r.t.]) sortinplace implies that the sorted vector
35 * resulting from applying the operation to tobesorted should be mapped back to
36 * the original positions in alignment. Otherwise, normal behaviour is to re
37 * order alignment so that tobesorted is sorted and grouped together starting
38 * from the first tobesorted position in the alignment. e.g. (a,tb2,b,tb1,c,tb3
39 * becomes a,tb1,tb2,tb3,b,c)
41 public class AlignmentSorter
43 static boolean sortIdAscending = true;
45 static int lastGroupHash = 0;
47 static boolean sortGroupAscending = true;
49 static AlignmentOrder lastOrder = null;
51 static boolean sortOrderAscending = true;
53 static NJTree lastTree = null;
55 static boolean sortTreeAscending = true;
58 * last Annotation Label used by sortByScore
60 private static String lastSortByScore;
63 * compact representation of last arguments to SortByFeatureScore
65 private static String lastSortByFeatureScore;
67 private static boolean sortLengthAscending;
70 * Sort by Percentage Identity w.r.t. s
77 * sequences from align that are to be sorted.
79 public static void sortByPID(AlignmentI align, SequenceI s,
82 sortByPID(align,s,tosort,0,-1);
85 * Sort by Percentage Identity w.r.t. s
92 * sequences from align that are to be sorted.
93 * @param start start column (0 for beginning
96 public static void sortByPID(AlignmentI align, SequenceI s,
97 SequenceI[] tosort,int start, int end)
99 int nSeq = align.getHeight();
101 float[] scores = new float[nSeq];
102 SequenceI[] seqs = new SequenceI[nSeq];
104 for (int i = 0; i < nSeq; i++)
106 scores[i] = Comparison.PID(align.getSequenceAt(i)
107 .getSequenceAsString(), s.getSequenceAsString());
108 seqs[i] = align.getSequenceAt(i);
111 QuickSort.sort(scores, 0, scores.length - 1, seqs);
113 setReverseOrder(align, seqs);
117 * Reverse the order of the sort
124 private static void setReverseOrder(AlignmentI align, SequenceI[] seqs)
126 int nSeq = seqs.length;
136 len = (nSeq + 1) / 2;
139 // NOTE: DO NOT USE align.setSequenceAt() here - it will NOT work
140 for (int i = 0; i < len; i++)
142 // SequenceI tmp = seqs[i];
143 align.getSequences().setElementAt(seqs[nSeq - i - 1], i);
144 align.getSequences().setElementAt(seqs[i], nSeq - i - 1);
149 * Sets the Alignment object with the given sequences
152 * Alignment object to be updated
154 * sequences as a vector
156 private static void setOrder(AlignmentI align, Vector tmp)
158 setOrder(align, vectorSubsetToArray(tmp, align.getSequences()));
162 * Sets the Alignment object with the given sequences
167 * sequences as an array
169 public static void setOrder(AlignmentI align, SequenceI[] seqs)
171 // NOTE: DO NOT USE align.setSequenceAt() here - it will NOT work
172 Vector algn = align.getSequences();
173 Vector tmp = new Vector();
175 for (int i = 0; i < seqs.length; i++)
177 if (algn.contains(seqs[i]))
179 tmp.addElement(seqs[i]);
183 algn.removeAllElements();
184 // User may have hidden seqs, then clicked undo or redo
185 for (int i = 0; i < tmp.size(); i++)
187 algn.addElement(tmp.elementAt(i));
193 * Sorts by ID. Numbers are sorted before letters.
196 * The alignment object to sort
198 public static void sortByID(AlignmentI align)
200 int nSeq = align.getHeight();
202 String[] ids = new String[nSeq];
203 SequenceI[] seqs = new SequenceI[nSeq];
205 for (int i = 0; i < nSeq; i++)
207 ids[i] = align.getSequenceAt(i).getName();
208 seqs[i] = align.getSequenceAt(i);
211 QuickSort.sort(ids, seqs);
215 setReverseOrder(align, seqs);
219 setOrder(align, seqs);
222 sortIdAscending = !sortIdAscending;
225 * Sorts by sequence length
228 * The alignment object to sort
230 public static void sortByLength(AlignmentI align)
232 int nSeq = align.getHeight();
234 float[] length = new float[nSeq];
235 SequenceI[] seqs = new SequenceI[nSeq];
237 for (int i = 0; i < nSeq; i++)
239 seqs[i] = align.getSequenceAt(i);
240 length[i] = (float) (seqs[i].getEnd()-seqs[i].getStart());
243 QuickSort.sort(length, seqs);
245 if (sortLengthAscending)
247 setReverseOrder(align, seqs);
251 setOrder(align, seqs);
254 sortLengthAscending = !sortLengthAscending;
258 * Sorts the alignment by size of group. <br>
259 * Maintains the order of sequences in each group by order in given alignment
263 * sorts the given alignment object by group
265 public static void sortByGroup(AlignmentI align)
267 // MAINTAINS ORIGNAL SEQUENCE ORDER,
268 // ORDERS BY GROUP SIZE
269 Vector groups = new Vector();
271 if (groups.hashCode() != lastGroupHash)
273 sortGroupAscending = true;
274 lastGroupHash = groups.hashCode();
278 sortGroupAscending = !sortGroupAscending;
281 // SORTS GROUPS BY SIZE
282 // ////////////////////
283 for (int i = 0; i < align.getGroups().size(); i++)
285 SequenceGroup sg = (SequenceGroup) align.getGroups().elementAt(i);
287 for (int j = 0; j < groups.size(); j++)
289 SequenceGroup sg2 = (SequenceGroup) groups.elementAt(j);
291 if (sg.getSize() > sg2.getSize())
293 groups.insertElementAt(sg, j);
299 if (!groups.contains(sg))
301 groups.addElement(sg);
305 // NOW ADD SEQUENCES MAINTAINING ALIGNMENT ORDER
306 // /////////////////////////////////////////////
307 Vector seqs = new Vector();
309 for (int i = 0; i < groups.size(); i++)
311 SequenceGroup sg = (SequenceGroup) groups.elementAt(i);
312 SequenceI[] orderedseqs = sg.getSequencesInOrder(align);
314 for (int j = 0; j < orderedseqs.length; j++)
316 seqs.addElement(orderedseqs[j]);
320 if (sortGroupAscending)
322 setOrder(align, seqs);
326 setReverseOrder(align,
327 vectorSubsetToArray(seqs, align.getSequences()));
332 * Converts Vector to array. java 1.18 does not have Vector.toArray()
335 * Vector of SequenceI objects
337 * @return array of Sequence[]
339 private static SequenceI[] vectorToArray(Vector tmp)
341 SequenceI[] seqs = new SequenceI[tmp.size()];
343 for (int i = 0; i < tmp.size(); i++)
345 seqs[i] = (SequenceI) tmp.elementAt(i);
359 * @return DOCUMENT ME!
361 private static SequenceI[] vectorSubsetToArray(Vector tmp, Vector mask)
363 Vector seqs = new Vector();
365 boolean[] tmask = new boolean[mask.size()];
367 for (i = 0; i < mask.size(); i++)
372 for (i = 0; i < tmp.size(); i++)
374 Object sq = tmp.elementAt(i);
376 if (mask.contains(sq) && tmask[mask.indexOf(sq)])
378 tmask[mask.indexOf(sq)] = false;
383 for (i = 0; i < tmask.length; i++)
387 seqs.addElement(mask.elementAt(i));
391 return vectorToArray(seqs);
395 * Sorts by a given AlignmentOrder object
400 * specified order for alignment
402 public static void sortBy(AlignmentI align, AlignmentOrder order)
404 // Get an ordered vector of sequences which may also be present in align
405 Vector tmp = order.getOrder();
407 if (lastOrder == order)
409 sortOrderAscending = !sortOrderAscending;
413 sortOrderAscending = true;
416 if (sortOrderAscending)
418 setOrder(align, tmp);
422 setReverseOrder(align, vectorSubsetToArray(tmp, align.getSequences()));
434 * @return DOCUMENT ME!
436 private static Vector getOrderByTree(AlignmentI align, NJTree tree)
438 int nSeq = align.getHeight();
440 Vector tmp = new Vector();
442 tmp = _sortByTree(tree.getTopNode(), tmp, align.getSequences());
444 if (tmp.size() != nSeq)
446 // TODO: JBPNote - decide if this is always an error
447 // (eg. not when a tree is associated to another alignment which has more
449 if (tmp.size() < nSeq)
451 addStrays(align, tmp);
454 if (tmp.size() != nSeq)
456 System.err.println("ERROR: tmp.size()=" + tmp.size() + " != nseq="
457 + nSeq + " in getOrderByTree");
465 * Sorts the alignment by a given tree
472 public static void sortByTree(AlignmentI align, NJTree tree)
474 Vector tmp = getOrderByTree(align, tree);
476 // tmp should properly permute align with tree.
477 if (lastTree != tree)
479 sortTreeAscending = true;
484 sortTreeAscending = !sortTreeAscending;
487 if (sortTreeAscending)
489 setOrder(align, tmp);
493 setReverseOrder(align, vectorSubsetToArray(tmp, align.getSequences()));
505 private static void addStrays(AlignmentI align, Vector seqs)
507 int nSeq = align.getHeight();
509 for (int i = 0; i < nSeq; i++)
511 if (!seqs.contains(align.getSequenceAt(i)))
513 seqs.addElement(align.getSequenceAt(i));
517 if (nSeq != seqs.size())
520 .println("ERROR: Size still not right even after addStrays");
534 * @return DOCUMENT ME!
536 private static Vector _sortByTree(SequenceNode node, Vector tmp,
544 SequenceNode left = (SequenceNode) node.left();
545 SequenceNode right = (SequenceNode) node.right();
547 if ((left == null) && (right == null))
549 if (!node.isPlaceholder() && (node.element() != null))
551 if (node.element() instanceof SequenceI)
553 if (!tmp.contains(node.element()))
555 tmp.addElement((SequenceI) node.element());
564 _sortByTree(left, tmp, seqset);
565 _sortByTree(right, tmp, seqset);
572 // Alignment.sortBy(OrderObj) - sequence of sequence pointer refs in
577 * recover the order of sequences given by the safe numbering scheme introducd
578 * SeqsetUtils.uniquify.
580 public static void recoverOrder(SequenceI[] alignment)
582 float[] ids = new float[alignment.length];
584 for (int i = 0; i < alignment.length; i++)
586 ids[i] = (new Float(alignment[i].getName().substring(8)))
590 jalview.util.QuickSort.sort(ids, alignment);
594 * Sort sequence in order of increasing score attribute for annotation with a
595 * particular scoreLabel. Or reverse if same label was used previously
598 * exact label for sequence associated AlignmentAnnotation
599 * scores to use for sorting.
601 * sequences to be sorted
603 public static void sortByAnnotationScore(String scoreLabel,
604 AlignmentI alignment)
606 SequenceI[] seqs = alignment.getSequencesArray();
607 boolean[] hasScore = new boolean[seqs.length]; // per sequence score
609 int hasScores = 0; // number of scores present on set
610 double[] scores = new double[seqs.length];
611 double min = 0, max = 0;
612 for (int i = 0; i < seqs.length; i++)
614 AlignmentAnnotation[] scoreAnn = seqs[i].getAnnotation(scoreLabel);
615 if (scoreAnn != null)
619 scores[i] = scoreAnn[0].getScore(); // take the first instance of this
623 max = min = scores[i];
644 return; // do nothing - no scores present to sort by.
646 if (hasScores < seqs.length)
648 for (int i = 0; i < seqs.length; i++)
652 scores[i] = (max + i+1.0);
657 jalview.util.QuickSort.sort(scores, seqs);
658 if (lastSortByScore != scoreLabel)
660 lastSortByScore = scoreLabel;
661 setOrder(alignment, seqs);
665 setReverseOrder(alignment, seqs);
669 * types of feature ordering:
670 * Sort by score : average score - or total score - over all features in region
671 * Sort by feature label text: (or if null - feature type text) - numerical or alphabetical
672 * Sort by feature density: based on counts - ignoring individual text or scores for each feature
674 public static String FEATURE_SCORE="average_score";
675 public static String FEATURE_LABEL="text";
676 public static String FEATURE_DENSITY="density";
679 * sort the alignment using the features on each sequence found between start and stop with the given featureLabel (and optional group qualifier)
680 * @param featureLabel (may not be null)
681 * @param groupLabel (may be null)
682 * @param start (-1 to include non-positional features)
683 * @param stop (-1 to only sort on non-positional features)
684 * @param alignment - aligned sequences containing features
685 * @param method - one of the string constants FEATURE_SCORE, FEATURE_LABEL, FEATURE_DENSITY
687 public static void sortByFeature(String featureLabel, String groupLabel, int start, int stop,
688 AlignmentI alignment, String method)
690 sortByFeature(featureLabel==null ? null : new String[] {featureLabel},
691 groupLabel==null ? null : new String[] {groupLabel}, start, stop, alignment, method);
693 private static boolean containsIgnoreCase(final String lab, final String[] labs)
703 for (int q=0;q<labs.length;q++)
705 if (labs[q]!=null && lab.equalsIgnoreCase(labs[q]))
712 public static void sortByFeature(String[] featureLabels, String[] groupLabels, int start, int stop,
713 AlignmentI alignment, String method)
715 if (method!=FEATURE_SCORE && method!=FEATURE_LABEL && method!=FEATURE_DENSITY)
717 throw new Error("Implementation Error - sortByFeature method must be one of FEATURE_SCORE, FEATURE_LABEL or FEATURE_DENSITY.");
719 boolean ignoreScore=method!=FEATURE_SCORE;
720 StringBuffer scoreLabel = new StringBuffer();
721 scoreLabel.append(start+stop+method);
722 // This doesn't work yet - we'd like to have a canonical ordering that can be preserved from call to call
723 for (int i=0;featureLabels!=null && i<featureLabels.length; i++)
725 scoreLabel.append(featureLabels[i]==null ? "null" : featureLabels[i]);
727 for (int i=0;groupLabels!=null && i<groupLabels.length; i++)
729 scoreLabel.append(groupLabels[i]==null ? "null" : groupLabels[i]);
731 SequenceI[] seqs = alignment.getSequencesArray();
733 boolean[] hasScore = new boolean[seqs.length]; // per sequence score
735 int hasScores = 0; // number of scores present on set
736 double[] scores = new double[seqs.length];
737 int[] seqScores = new int[seqs.length];
738 Object[] feats = new Object[seqs.length];
739 double min = 0, max = 0;
740 for (int i = 0; i < seqs.length; i++)
742 SequenceFeature[] sf = seqs[i].getSequenceFeatures();
743 if (sf==null && seqs[i].getDatasetSequence()!=null)
745 sf = seqs[i].getDatasetSequence().getSequenceFeatures();
749 sf = new SequenceFeature[0];
751 SequenceFeature[] tmp = new SequenceFeature[sf.length];
752 for (int s=0; s<tmp.length;s++)
758 int sstart = (start==-1) ? start : seqs[i].findPosition(start);
759 int sstop = (stop==-1) ? stop : seqs[i].findPosition(stop);
763 for (int f=0;f<sf.length;f++)
765 // filter for selection criteria
767 // ignore features outwith alignment start-stop positions.
768 (sf[f].end < sstart || sf[f].begin > sstop)
770 // or ignore based on selection criteria
771 (featureLabels != null && !AlignmentSorter.containsIgnoreCase(sf[f].type, featureLabels))
772 || (groupLabels != null
773 // problem here: we cannot eliminate null feature group features
774 && (sf[f].getFeatureGroup() != null
775 && !AlignmentSorter.containsIgnoreCase(sf[f].getFeatureGroup(), groupLabels))))
777 // forget about this feature
781 // or, also take a look at the scores if necessary.
782 if (!ignoreScore && sf[f].getScore()!=Float.NaN)
790 scores[i] += sf[f].getScore(); // take the first instance of this
795 SequenceFeature[] fs;
796 feats[i] = fs = new SequenceFeature[n];
800 for (int f=0;f<sf.length;f++)
804 ((SequenceFeature[]) feats[i])[n++] = sf[f];
807 if (method==FEATURE_LABEL)
809 // order the labels by alphabet
810 String[] labs = new String[fs.length];
811 for (int l=0;l<labs.length; l++)
813 labs[l] = (fs[l].getDescription()!=null ? fs[l].getDescription() : fs[l].getType());
815 jalview.util.QuickSort.sort(labs, ((Object[]) feats[i]));
820 // compute average score
821 scores[i]/=seqScores[i];
822 // update the score bounds.
825 max = min = scores[i];
841 if (method==FEATURE_SCORE)
845 return; // do nothing - no scores present to sort by.
848 if (hasScores < seqs.length)
850 for (int i = 0; i < seqs.length; i++)
854 scores[i] = (max + i);
856 int nf=(feats[i]==null) ? 0 :((SequenceFeature[]) feats[i]).length;
857 System.err.println("Sorting on Score: seq "+seqs[i].getName()+ " Feats: "+nf+" Score : "+scores[i]);
862 jalview.util.QuickSort.sort(scores, seqs);
865 if (method==FEATURE_DENSITY)
868 // break ties between equivalent numbers for adjacent sequences by adding 1/Nseq*i on the original order
869 double fr = 0.9/(1.0*seqs.length);
870 for (int i=0;i<seqs.length; i++)
873 scores[i] = (0.05+fr*i)+(nf=((feats[i]==null) ? 0.0 :1.0*((SequenceFeature[]) feats[i]).length));
874 System.err.println("Sorting on Density: seq "+seqs[i].getName()+ " Feats: "+nf+" Score : "+scores[i]);
876 jalview.util.QuickSort.sort(scores, seqs);
879 if (method==FEATURE_LABEL)
881 throw new Error("Not yet implemented.");
884 if (lastSortByFeatureScore ==null || scoreLabel.equals(lastSortByFeatureScore))
886 setOrder(alignment, seqs);
890 setReverseOrder(alignment, seqs);
892 lastSortByFeatureScore = scoreLabel.toString();