2 * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3 * Copyright (C) $$Year-Rel$$ The Jalview Authors
5 * This file is part of Jalview.
7 * Jalview is free software: you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation, either version 3
10 * of the License, or (at your option) any later version.
12 * Jalview is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty
14 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15 * PURPOSE. See the GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
19 * The Jalview Authors are detailed in the 'AUTHORS' file.
21 package jalview.analysis;
23 import jalview.analysis.scoremodels.PIDModel;
24 import jalview.analysis.scoremodels.SimilarityParams;
25 import jalview.datamodel.AlignmentAnnotation;
26 import jalview.datamodel.AlignmentI;
27 import jalview.datamodel.AlignmentOrder;
28 import jalview.datamodel.SequenceFeature;
29 import jalview.datamodel.SequenceGroup;
30 import jalview.datamodel.SequenceI;
31 import jalview.datamodel.SequenceNode;
32 import jalview.util.QuickSort;
34 import java.util.ArrayList;
35 import java.util.Collections;
36 import java.util.Iterator;
37 import java.util.List;
40 import org.forester.phylogeny.Phylogeny;
41 import org.forester.phylogeny.PhylogenyNode;
42 import org.forester.phylogeny.iterators.PhylogenyNodeIterator;
45 * Routines for manipulating the order of a multiple sequence alignment TODO:
46 * this class retains some global states concerning sort-order which should be
47 * made attributes for the caller's alignment visualization. TODO: refactor to
48 * allow a subset of selected sequences to be sorted within the context of a
49 * whole alignment. Sort method template is: SequenceI[] tobesorted, [ input
50 * data mapping to each tobesorted element to use ], Alignment context of
51 * tobesorted that are to be re-ordered, boolean sortinplace, [special data - ie
52 * seuqence to be sorted w.r.t.]) sortinplace implies that the sorted vector
53 * resulting from applying the operation to tobesorted should be mapped back to
54 * the original positions in alignment. Otherwise, normal behaviour is to re
55 * order alignment so that tobesorted is sorted and grouped together starting
56 * from the first tobesorted position in the alignment. e.g. (a,tb2,b,tb1,c,tb3
57 * becomes a,tb1,tb2,tb3,b,c)
59 public class AlignmentSorter
62 * todo: refactor searches to follow a basic pattern: (search property, last
63 * search state, current sort direction)
65 static boolean sortIdAscending = true;
67 static int lastGroupHash = 0;
69 static boolean sortGroupAscending = true;
71 static AlignmentOrder lastOrder = null;
73 static boolean sortOrderAscending = true;
75 static TreeModel lastTree = null;
77 static Phylogeny lastAptxTree = null;
79 static boolean sortTreeAscending = true;
82 * last Annotation Label used for sort by Annotation score
84 private static String lastSortByAnnotation;
87 * string hash of last arguments to sortByFeature
88 * (sort order toggles if this is unchanged between sorts)
90 private static String sortByFeatureCriteria;
92 private static boolean sortByFeatureAscending = true;
94 private static boolean sortLengthAscending;
97 * Sorts sequences in the alignment by Percentage Identity with the given
98 * reference sequence, sorting the highest identity to the top
106 public static void sortByPID(AlignmentI align, SequenceI s)
108 int nSeq = align.getHeight();
110 float[] scores = new float[nSeq];
111 SequenceI[] seqs = new SequenceI[nSeq];
112 String refSeq = s.getSequenceAsString();
114 SimilarityParams pidParams = new SimilarityParams(true, true, true,
116 for (int i = 0; i < nSeq; i++)
118 scores[i] = (float) PIDModel.computePID(
119 align.getSequenceAt(i).getSequenceAsString(), refSeq,
121 seqs[i] = align.getSequenceAt(i);
124 QuickSort.sort(scores, seqs);
126 setReverseOrder(align, seqs);
130 * Reverse the order of the sort
137 private static void setReverseOrder(AlignmentI align, SequenceI[] seqs)
139 int nSeq = seqs.length;
149 len = (nSeq + 1) / 2;
152 // NOTE: DO NOT USE align.setSequenceAt() here - it will NOT work
154 synchronized (asq = align.getSequences())
156 for (int i = 0; i < len; i++)
158 // SequenceI tmp = seqs[i];
159 asq.set(i, seqs[nSeq - i - 1]);
160 asq.set(nSeq - i - 1, seqs[i]);
166 * Sets the Alignment object with the given sequences
169 * Alignment object to be updated
171 * sequences as a vector
173 private static void setOrder(AlignmentI align, List<SequenceI> tmp)
175 setOrder(align, vectorSubsetToArray(tmp, align.getSequences()));
179 * Sets the Alignment object with the given sequences
184 * sequences as an array
186 public static void setOrder(AlignmentI align, SequenceI[] seqs)
188 // NOTE: DO NOT USE align.setSequenceAt() here - it will NOT work
189 List<SequenceI> algn;
190 synchronized (algn = align.getSequences())
192 List<SequenceI> tmp = new ArrayList<>();
194 for (int i = 0; i < seqs.length; i++)
196 if (algn.contains(seqs[i]))
203 // User may have hidden seqs, then clicked undo or redo
204 for (int i = 0; i < tmp.size(); i++)
206 algn.add(tmp.get(i));
212 * Sorts by ID. Numbers are sorted before letters.
215 * The alignment object to sort
217 public static void sortByID(AlignmentI align)
219 int nSeq = align.getHeight();
221 String[] ids = new String[nSeq];
222 SequenceI[] seqs = new SequenceI[nSeq];
224 for (int i = 0; i < nSeq; i++)
226 ids[i] = align.getSequenceAt(i).getName();
227 seqs[i] = align.getSequenceAt(i);
230 QuickSort.sort(ids, seqs);
234 setReverseOrder(align, seqs);
238 setOrder(align, seqs);
241 sortIdAscending = !sortIdAscending;
245 * Sorts by sequence length
248 * The alignment object to sort
250 public static void sortByLength(AlignmentI align)
252 int nSeq = align.getHeight();
254 float[] length = new float[nSeq];
255 SequenceI[] seqs = new SequenceI[nSeq];
257 for (int i = 0; i < nSeq; i++)
259 seqs[i] = align.getSequenceAt(i);
260 length[i] = (seqs[i].getEnd() - seqs[i].getStart());
263 QuickSort.sort(length, seqs);
265 if (sortLengthAscending)
267 setReverseOrder(align, seqs);
271 setOrder(align, seqs);
274 sortLengthAscending = !sortLengthAscending;
278 * Sorts the alignment by size of group. <br>
279 * Maintains the order of sequences in each group by order in given alignment
283 * sorts the given alignment object by group
285 public static void sortByGroup(AlignmentI align)
287 // MAINTAINS ORIGNAL SEQUENCE ORDER,
288 // ORDERS BY GROUP SIZE
289 List<SequenceGroup> groups = new ArrayList<>();
291 if (groups.hashCode() != lastGroupHash)
293 sortGroupAscending = true;
294 lastGroupHash = groups.hashCode();
298 sortGroupAscending = !sortGroupAscending;
301 // SORTS GROUPS BY SIZE
302 // ////////////////////
303 for (SequenceGroup sg : align.getGroups())
305 for (int j = 0; j < groups.size(); j++)
307 SequenceGroup sg2 = groups.get(j);
309 if (sg.getSize() > sg2.getSize())
317 if (!groups.contains(sg))
323 // NOW ADD SEQUENCES MAINTAINING ALIGNMENT ORDER
324 // /////////////////////////////////////////////
325 List<SequenceI> seqs = new ArrayList<>();
327 for (int i = 0; i < groups.size(); i++)
329 SequenceGroup sg = groups.get(i);
330 SequenceI[] orderedseqs = sg.getSequencesInOrder(align);
332 for (int j = 0; j < orderedseqs.length; j++)
334 seqs.add(orderedseqs[j]);
338 if (sortGroupAscending)
340 setOrder(align, seqs);
344 setReverseOrder(align,
345 vectorSubsetToArray(seqs, align.getSequences()));
350 * Select sequences in order from tmp that is present in mask, and any
351 * remaining sequences in mask not in tmp
354 * thread safe collection of sequences
356 * thread safe collection of sequences
358 * @return intersect(tmp,mask)+intersect(complement(tmp),mask)
360 private static SequenceI[] vectorSubsetToArray(List<SequenceI> tmp,
361 List<SequenceI> mask)
364 // tmp2 = tmp.retainAll(mask);
365 // return tmp2.addAll(mask.removeAll(tmp2))
367 ArrayList<SequenceI> seqs = new ArrayList<>();
369 boolean[] tmask = new boolean[mask.size()];
371 for (i = 0; i < mask.size(); i++)
376 for (i = 0; i < tmp.size(); i++)
378 SequenceI sq = tmp.get(i);
379 idx = mask.indexOf(sq);
380 if (idx > -1 && tmask[idx])
387 for (i = 0; i < tmask.length; i++)
391 seqs.add(mask.get(i));
395 return seqs.toArray(new SequenceI[seqs.size()]);
399 * Sorts by a given AlignmentOrder object
404 * specified order for alignment
406 public static void sortBy(AlignmentI align, AlignmentOrder order)
408 // Get an ordered vector of sequences which may also be present in align
409 List<SequenceI> tmp = order.getOrder();
411 if (lastOrder == order)
413 sortOrderAscending = !sortOrderAscending;
417 sortOrderAscending = true;
420 if (sortOrderAscending)
422 setOrder(align, tmp);
426 setReverseOrder(align,
427 vectorSubsetToArray(tmp, align.getSequences()));
439 * @return DOCUMENT ME!
441 private static List<SequenceI> getOrderByTree(AlignmentI align,
444 int nSeq = align.getHeight();
446 List<SequenceI> tmp = new ArrayList<>();
448 tmp = _sortByTree(tree.getTopNode(), tmp);
450 if (tmp.size() != nSeq)
452 // TODO: JBPNote - decide if this is always an error
453 // (eg. not when a tree is associated to another alignment which has more
455 if (tmp.size() != nSeq)
457 addStrays(align, tmp);
460 if (tmp.size() != nSeq)
462 System.err.println("WARNING: tmp.size()=" + tmp.size() + " != nseq="
464 + " in getOrderByTree - tree contains sequences not in alignment");
473 private static List<SequenceI> getOrderByTree(Phylogeny aptxTree,
474 Map<PhylogenyNode, SequenceI> nodesWithBoundSeqs)
476 List<SequenceI> seqsByTreeOrder = new ArrayList<>();
477 if (!aptxTree.isEmpty())
479 for (final PhylogenyNodeIterator iter = aptxTree
480 .iteratorPreorder(); iter.hasNext();)
482 PhylogenyNode treeNode = iter.next();
483 seqsByTreeOrder.add(nodesWithBoundSeqs.get(treeNode));
487 return seqsByTreeOrder;
493 * Sorts the alignment by a given tree
500 public static void sortByTree(AlignmentI align, TreeModel tree)
502 List<SequenceI> tmp = getOrderByTree(align, tree);
504 // tmp should properly permute align with tree.
505 if (lastTree != tree)
507 sortTreeAscending = true;
512 sortTreeAscending = !sortTreeAscending;
515 if (sortTreeAscending)
517 setOrder(align, tmp);
521 setReverseOrder(align,
522 vectorSubsetToArray(tmp, align.getSequences()));
527 * Sorts the alignment by a given tree from Archaeopteryx
534 public static void sortByTree(AlignmentI align,
535 Map<PhylogenyNode, SequenceI> aptxNodesWithSeqs,
536 Phylogeny aptxTree) throws IllegalArgumentException
538 List<SequenceI> tmp = getOrderByTree(aptxTree, aptxNodesWithSeqs);
542 if (lastAptxTree != aptxTree)
544 sortTreeAscending = true;
545 lastAptxTree = aptxTree;
549 sortTreeAscending = !sortTreeAscending;
552 if (sortTreeAscending)
554 setOrder(align, tmp);
558 setReverseOrder(align,
559 vectorSubsetToArray(tmp, align.getSequences()));
564 throw new IllegalArgumentException();
576 private static void addStrays(AlignmentI align, List<SequenceI> tmp)
578 int nSeq = align.getHeight();
580 for (int i = 0; i < nSeq; i++)
582 if (!tmp.contains(align.getSequenceAt(i)))
584 tmp.add(align.getSequenceAt(i));
588 if (nSeq != tmp.size())
591 .println("ERROR: Size still not right even after addStrays");
605 * @return DOCUMENT ME!
607 private static List<SequenceI> _sortByTree(SequenceNode node,
615 SequenceNode left = (SequenceNode) node.left();
616 SequenceNode right = (SequenceNode) node.right();
618 if ((left == null) && (right == null))
620 if (!node.isPlaceholder() && (node.element() != null))
622 if (node.element() instanceof SequenceI)
624 if (!tmp.contains(node.element())) // && (seqset==null ||
625 // seqset.size()==0 ||
626 // seqset.contains(tmp)))
628 tmp.add((SequenceI) node.element());
637 _sortByTree(left, tmp);
638 _sortByTree(right, tmp);
647 // Alignment.sortBy(OrderObj) - sequence of sequence pointer refs in
652 * recover the order of sequences given by the safe numbering scheme introducd
653 * SeqsetUtils.uniquify.
655 public static void recoverOrder(SequenceI[] alignment)
657 float[] ids = new float[alignment.length];
659 for (int i = 0; i < alignment.length; i++)
661 ids[i] = (new Float(alignment[i].getName().substring(8)))
665 jalview.util.QuickSort.sort(ids, alignment);
669 * Sort sequence in order of increasing score attribute for annotation with a
670 * particular scoreLabel. Or reverse if same label was used previously
673 * exact label for sequence associated AlignmentAnnotation scores to
676 * sequences to be sorted
678 public static void sortByAnnotationScore(String scoreLabel,
679 AlignmentI alignment)
681 SequenceI[] seqs = alignment.getSequencesArray();
682 boolean[] hasScore = new boolean[seqs.length]; // per sequence score
684 int hasScores = 0; // number of scores present on set
685 double[] scores = new double[seqs.length];
686 double min = 0, max = 0;
687 for (int i = 0; i < seqs.length; i++)
689 AlignmentAnnotation[] scoreAnn = seqs[i].getAnnotation(scoreLabel);
690 if (scoreAnn != null)
694 scores[i] = scoreAnn[0].getScore(); // take the first instance of this
698 max = min = scores[i];
719 return; // do nothing - no scores present to sort by.
721 if (hasScores < seqs.length)
723 for (int i = 0; i < seqs.length; i++)
727 scores[i] = (max + i + 1.0);
732 jalview.util.QuickSort.sort(scores, seqs);
733 if (lastSortByAnnotation != scoreLabel)
735 lastSortByAnnotation = scoreLabel;
736 setOrder(alignment, seqs);
740 setReverseOrder(alignment, seqs);
745 * types of feature ordering: Sort by score : average score - or total score -
746 * over all features in region Sort by feature label text: (or if null -
747 * feature type text) - numerical or alphabetical Sort by feature density:
748 * based on counts - ignoring individual text or scores for each feature
750 public static String FEATURE_SCORE = "average_score";
752 public static String FEATURE_LABEL = "text";
754 public static String FEATURE_DENSITY = "density";
757 * Sort sequences by feature score or density, optionally restricted by
758 * feature types, feature groups, or alignment start/end positions.
760 * If the sort is repeated for the same combination of types and groups, sort
763 * @param featureTypes
764 * a list of feature types to include (or null for all)
766 * a list of feature groups to include (or null for all)
768 * start column position to include (base zero)
770 * end column position to include (base zero)
772 * the alignment to be sorted
774 * either "average_score" or "density" ("text" not yet implemented)
776 public static void sortByFeature(List<String> featureTypes,
777 List<String> groups, final int startCol, final int endCol,
778 AlignmentI alignment, String method)
780 if (method != FEATURE_SCORE && method != FEATURE_LABEL
781 && method != FEATURE_DENSITY)
784 .format("Implementation Error - sortByFeature method must be either '%s' or '%s'",
785 FEATURE_SCORE, FEATURE_DENSITY);
786 System.err.println(msg);
790 flipFeatureSortIfUnchanged(method, featureTypes, groups, startCol, endCol);
792 SequenceI[] seqs = alignment.getSequencesArray();
794 boolean[] hasScore = new boolean[seqs.length]; // per sequence score
796 int hasScores = 0; // number of scores present on set
797 double[] scores = new double[seqs.length];
798 int[] seqScores = new int[seqs.length];
799 Object[][] feats = new Object[seqs.length][];
803 for (int i = 0; i < seqs.length; i++)
806 * get sequence residues overlapping column region
807 * and features for residue positions and specified types
809 String[] types = featureTypes == null ? null : featureTypes
810 .toArray(new String[featureTypes.size()]);
811 List<SequenceFeature> sfs = seqs[i].findFeatures(startCol + 1,
817 Iterator<SequenceFeature> it = sfs.listIterator();
820 SequenceFeature sf = it.next();
823 * accept all features with null or empty group, otherwise
824 * check group is one of the currently visible groups
826 String featureGroup = sf.getFeatureGroup();
827 if (groups != null && featureGroup != null
828 && !"".equals(featureGroup)
829 && !groups.contains(featureGroup))
835 float score = sf.getScore();
836 if (FEATURE_SCORE.equals(method) && !Float.isNaN(score))
838 if (seqScores[i] == 0)
845 // take the first instance of this score // ??
850 feats[i] = sfs.toArray(new SequenceFeature[sfs.size()]);
853 if (method == FEATURE_LABEL)
855 // order the labels by alphabet (not yet implemented)
856 String[] labs = new String[sfs.size()];
857 for (int l = 0; l < sfs.size(); l++)
859 SequenceFeature sf = sfs.get(l);
860 String description = sf.getDescription();
861 labs[l] = (description != null ? description : sf.getType());
863 QuickSort.sort(labs, feats[i]);
868 // compute average score
869 scores[i] /= seqScores[i];
870 // update the score bounds.
878 max = Math.max(max, scores[i]);
879 min = Math.min(min, scores[i]);
884 if (FEATURE_SCORE.equals(method))
888 return; // do nothing - no scores present to sort by.
891 if (hasScores < seqs.length)
893 for (int i = 0; i < seqs.length; i++)
897 scores[i] = (max + 1 + i);
901 // int nf = (feats[i] == null) ? 0
902 // : ((SequenceFeature[]) feats[i]).length;
903 // // System.err.println("Sorting on Score: seq " +
905 // + " Feats: " + nf + " Score : " + scores[i]);
909 QuickSort.sortByDouble(scores, seqs, sortByFeatureAscending);
911 else if (FEATURE_DENSITY.equals(method))
913 for (int i = 0; i < seqs.length; i++)
915 int featureCount = feats[i] == null ? 0
916 : ((SequenceFeature[]) feats[i]).length;
917 scores[i] = featureCount;
918 // System.err.println("Sorting on Density: seq "+seqs[i].getName()+
919 // " Feats: "+featureCount+" Score : "+scores[i]);
921 QuickSort.sortByDouble(scores, seqs, sortByFeatureAscending);
924 setOrder(alignment, seqs);
928 * Builds a string hash of criteria for sorting, and if unchanged from last
929 * time, reverse the sort order
932 * @param featureTypes
937 protected static void flipFeatureSortIfUnchanged(String method,
938 List<String> featureTypes, List<String> groups,
939 final int startCol, final int endCol)
941 StringBuilder sb = new StringBuilder(64);
942 sb.append(startCol).append(method).append(endCol);
943 if (featureTypes != null)
945 Collections.sort(featureTypes);
946 sb.append(featureTypes.toString());
950 Collections.sort(groups);
951 sb.append(groups.toString());
953 String scoreCriteria = sb.toString();
956 * if resorting on the same criteria, toggle sort order
958 if (sortByFeatureCriteria == null
959 || !scoreCriteria.equals(sortByFeatureCriteria))
961 sortByFeatureAscending = true;
965 sortByFeatureAscending = !sortByFeatureAscending;
967 sortByFeatureCriteria = scoreCriteria;