/* * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$) * Copyright (C) $$Year-Rel$$ The Jalview Authors * * This file is part of Jalview. * * Jalview is free software: you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation, either version 3 * of the License, or (at your option) any later version. * * Jalview is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty * of MERCHANTABILITY or FITNESS FOR A PARTICULAR * PURPOSE. See the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with Jalview. If not, see . * The Jalview Authors are detailed in the 'AUTHORS' file. */ package jalview.analysis; import jalview.analysis.scoremodels.PIDModel; import jalview.analysis.scoremodels.SimilarityParams; import jalview.bin.ApplicationSingletonProvider; import jalview.bin.ApplicationSingletonProvider.ApplicationSingletonI; import jalview.datamodel.AlignmentAnnotation; import jalview.datamodel.AlignmentI; import jalview.datamodel.AlignmentOrder; import jalview.datamodel.SequenceFeature; import jalview.datamodel.SequenceGroup; import jalview.datamodel.SequenceI; import jalview.datamodel.SequenceNode; import jalview.util.QuickSort; import java.util.ArrayList; import java.util.BitSet; import java.util.Collections; import java.util.Iterator; import java.util.List; /** * Routines for manipulating the order of a multiple sequence alignment TODO: * this class retains some global states concerning sort-order which should be * made attributes for the caller's alignment visualization. TODO: refactor to * allow a subset of selected sequences to be sorted within the context of a * whole alignment. Sort method template is: SequenceI[] tobesorted, [ input * data mapping to each tobesorted element to use ], Alignment context of * tobesorted that are to be re-ordered, boolean sortinplace, [special data - ie * seuqence to be sorted w.r.t.]) sortinplace implies that the sorted vector * resulting from applying the operation to tobesorted should be mapped back to * the original positions in alignment. Otherwise, normal behaviour is to re * order alignment so that tobesorted is sorted and grouped together starting * from the first tobesorted position in the alignment. e.g. (a,tb2,b,tb1,c,tb3 * becomes a,tb1,tb2,tb3,b,c) */ public class AlignmentSorter implements ApplicationSingletonI { private AlignmentSorter() { // private singleton } public static AlignmentSorter getInstance() { return (AlignmentSorter) ApplicationSingletonProvider .getInstance(AlignmentSorter.class); } /** * types of feature ordering: Sort by score : average score - or total score - * over all features in region Sort by feature label text: (or if null - * feature type text) - numerical or alphabetical Sort by feature density: * based on counts - ignoring individual text or scores for each feature */ public static final String FEATURE_SCORE = "average_score"; public static final String FEATURE_LABEL = "text"; public static final String FEATURE_DENSITY = "density"; /* * todo: refactor searches to follow a basic pattern: (search property, last * search state, current sort direction) */ boolean sortIdAscending = true; int lastGroupHash = 0; boolean sortGroupAscending = true; AlignmentOrder lastOrder = null; boolean sortOrderAscending = true; TreeModel lastTree = null; boolean sortTreeAscending = true; /** * last Annotation Label used for sort by Annotation score */ private String lastSortByAnnotation; /** * string hash of last arguments to sortByFeature (sort order toggles if this * is unchanged between sorts) */ private String sortByFeatureCriteria; private boolean sortByFeatureAscending = true; private boolean sortLengthAscending; private static boolean sortEValueAscending; private static boolean sortBitScoreAscending; /** * Sorts sequences in the alignment by Percentage Identity with the given * reference sequence, sorting the highest identity to the top * * @param align * AlignmentI * @param s * SequenceI * @param end */ public static void sortByPID(AlignmentI align, SequenceI s) { int nSeq = align.getHeight(); float[] scores = new float[nSeq]; SequenceI[] seqs = new SequenceI[nSeq]; String refSeq = s.getSequenceAsString(); SimilarityParams pidParams = new SimilarityParams(true, true, true, true); for (int i = 0; i < nSeq; i++) { scores[i] = (float) PIDModel.computePID( align.getSequenceAt(i).getSequenceAsString(), refSeq, pidParams); seqs[i] = align.getSequenceAt(i); } QuickSort.sort(scores, seqs); setReverseOrder(align, seqs); } /** * Sorts by ID. Numbers are sorted before letters. * * @param align * The alignment object to sort */ public static void sortByID(AlignmentI align) { int nSeq = align.getHeight(); String[] ids = new String[nSeq]; SequenceI[] seqs = new SequenceI[nSeq]; for (int i = 0; i < nSeq; i++) { ids[i] = align.getSequenceAt(i).getName(); seqs[i] = align.getSequenceAt(i); } QuickSort.sort(ids, seqs); AlignmentSorter as = getInstance(); as.sortIdAscending = !as.sortIdAscending; set(align, seqs, as.sortIdAscending); } /** * Sorts by sequence length * * @param align * The alignment object to sort */ public static void sortByLength(AlignmentI align) { int nSeq = align.getHeight(); float[] length = new float[nSeq]; SequenceI[] seqs = new SequenceI[nSeq]; for (int i = 0; i < nSeq; i++) { seqs[i] = align.getSequenceAt(i); length[i] = (seqs[i].getEnd() - seqs[i].getStart()); } QuickSort.sort(length, seqs); AlignmentSorter as = getInstance(); as.sortLengthAscending = !as.sortLengthAscending; set(align, seqs, as.sortLengthAscending); } /** * Sorts by sequence evalue. Currently moves all sequences without an evalue to * the top of the alignment. * * @param align * The alignment object to sort */ public static void sortByEValue(AlignmentI align) { int nSeq = align.getHeight(); double[] evalue = new double[nSeq]; SequenceI[] seqs = new SequenceI[nSeq]; for (int i = 0; i < nSeq; i++) { seqs[i] = align.getSequenceAt(i); AlignmentAnnotation[] ann = seqs[i].getAnnotation("Search Scores"); if (ann != null) { evalue[i] = ann[0].getEValue(); } else { evalue[i] = -1; } } QuickSort.sort(evalue, seqs); if (sortEValueAscending) { setReverseOrder(align, seqs); } else { setOrder(align, seqs); } sortEValueAscending = !sortEValueAscending; } /** * Sorts by sequence bit score. Currently moves all sequences without a bit * score to the top of the alignment * * @param align * The alignment object to sort */ public static void sortByBitScore(AlignmentI align) { int nSeq = align.getHeight(); double[] score = new double[nSeq]; SequenceI[] seqs = new SequenceI[nSeq]; for (int i = 0; i < nSeq; i++) { seqs[i] = align.getSequenceAt(i); AlignmentAnnotation[] ann = seqs[i].getAnnotation("Search Scores"); if (ann != null) { score[i] = ann[0].getEValue(); } else { score[i] = -1; } } QuickSort.sort(score, seqs); if (sortBitScoreAscending) { setReverseOrder(align, seqs); } else { setOrder(align, seqs); } sortBitScoreAscending = !sortBitScoreAscending; } /** * Sorts the alignment by size of group.
* Maintains the order of sequences in each group by order in given alignment * object. * * @param align * sorts the given alignment object by group */ public static void sortByGroup(AlignmentI align) { // MAINTAINS ORIGNAL SEQUENCE ORDER, // ORDERS BY GROUP SIZE List groups = new ArrayList<>(); AlignmentSorter as = getInstance(); if (groups.hashCode() != as.lastGroupHash) { as.sortGroupAscending = true; as.lastGroupHash = groups.hashCode(); } else { as.sortGroupAscending = !as.sortGroupAscending; } // SORTS GROUPS BY SIZE // //////////////////// for (SequenceGroup sg : align.getGroups()) { for (int j = 0; j < groups.size(); j++) { SequenceGroup sg2 = groups.get(j); if (sg.getSize() > sg2.getSize()) { groups.add(j, sg); break; } } if (!groups.contains(sg)) { groups.add(sg); } } // NOW ADD SEQUENCES MAINTAINING ALIGNMENT ORDER // ///////////////////////////////////////////// List tmp = new ArrayList<>(); for (int i = 0; i < groups.size(); i++) { SequenceGroup sg = groups.get(i); SequenceI[] orderedseqs = sg.getSequencesInOrder(align); for (int j = 0; j < orderedseqs.length; j++) { tmp.add(orderedseqs[j]); } } set(align, tmp, as.sortGroupAscending); } /** * Sorts by a given AlignmentOrder object * * @param align * Alignment to order * @param order * specified order for alignment */ public static void sortBy(AlignmentI align, AlignmentOrder order) { // Get an ordered vector of sequences which may also be present in align List tmp = order.getOrder(); AlignmentSorter as = getInstance(); if (as.lastOrder == order) { as.sortOrderAscending = !as.sortOrderAscending; } else { as.sortOrderAscending = true; } set(align, tmp, as.sortOrderAscending); } /** * DOCUMENT ME! * * @param align * alignment to order * @param tree * tree which has * * @return DOCUMENT ME! */ private static List getOrderByTree(AlignmentI align, TreeModel tree) { int nSeq = align.getHeight(); List tmp = new ArrayList<>(); tmp = _sortByTree(tree.getTopNode(), tmp, align.getSequences()); if (tmp.size() != nSeq) { // TODO: JBPNote - decide if this is always an error // (eg. not when a tree is associated to another alignment which has more // sequences) if (tmp.size() != nSeq) { addStrays(align, tmp); } if (tmp.size() != nSeq) { System.err.println("WARNING: tmp.size()=" + tmp.size() + " != nseq=" + nSeq + " in getOrderByTree - tree contains sequences not in alignment"); } } return tmp; } /** * Sorts the alignment by a given tree * * @param align * alignment to order * @param tree * tree which has */ public static void sortByTree(AlignmentI align, TreeModel tree) { List tmp = getOrderByTree(align, tree); AlignmentSorter as = getInstance(); // tmp should properly permute align with tree. if (as.lastTree != tree) { as.sortTreeAscending = true; as.lastTree = tree; } else { as.sortTreeAscending = !as.sortTreeAscending; } set(align, tmp, as.sortTreeAscending); } /** * DOCUMENT ME! * * @param align * DOCUMENT ME! * @param tmp * DOCUMENT ME! */ private static void addStrays(AlignmentI align, List tmp) { int nSeq = align.getHeight(); for (int i = 0; i < nSeq; i++) { if (!tmp.contains(align.getSequenceAt(i))) { tmp.add(align.getSequenceAt(i)); } } if (nSeq != tmp.size()) { System.err .println("ERROR: Size still not right even after addStrays"); } } /** * DOCUMENT ME! * * @param node * DOCUMENT ME! * @param tmp * DOCUMENT ME! * @param seqset * DOCUMENT ME! * * @return DOCUMENT ME! */ private static List _sortByTree(SequenceNode node, List tmp, List seqset) { if (node == null) { return tmp; } SequenceNode left = (SequenceNode) node.left(); SequenceNode right = (SequenceNode) node.right(); if ((left == null) && (right == null)) { if (!node.isPlaceholder() && (node.element() != null)) { if (node.element() instanceof SequenceI) { if (!tmp.contains(node.element())) // && (seqset==null || // seqset.size()==0 || // seqset.contains(tmp))) { tmp.add((SequenceI) node.element()); } } } return tmp; } else { _sortByTree(left, tmp, seqset); _sortByTree(right, tmp, seqset); } return tmp; } // Ordering Objects // Alignment.sortBy(OrderObj) - sequence of sequence pointer refs in // appropriate order // /** * recover the order of sequences given by the safe numbering scheme introducd * SeqsetUtils.uniquify. */ public static void recoverOrder(SequenceI[] alignment) { float[] ids = new float[alignment.length]; for (int i = 0; i < alignment.length; i++) { ids[i] = (Float.valueOf(alignment[i].getName().substring(8))) .floatValue(); } jalview.util.QuickSort.sort(ids, alignment); } /** * Sort sequence in order of increasing score attribute for annotation with a * particular scoreLabel. Or reverse if same label was used previously * * @param scoreLabel * exact label for sequence associated AlignmentAnnotation scores to * use for sorting. * @param alignment * sequences to be sorted */ public static void sortByAnnotationScore(String scoreLabel, AlignmentI alignment) { SequenceI[] seqs = alignment.getSequencesArray(); boolean[] hasScore = new boolean[seqs.length]; // per sequence score // presence int hasScores = 0; // number of scores present on set double[] scores = new double[seqs.length]; double min = 0, max = 0; for (int i = 0; i < seqs.length; i++) { AlignmentAnnotation[] scoreAnn = seqs[i].getAnnotation(scoreLabel); if (scoreAnn != null) { hasScores++; hasScore[i] = true; scores[i] = scoreAnn[0].getScore(); // take the first instance of this // score. if (hasScores == 1) { max = min = scores[i]; } else { if (max < scores[i]) { max = scores[i]; } if (min > scores[i]) { min = scores[i]; } } } else { hasScore[i] = false; } } if (hasScores == 0) { return; // do nothing - no scores present to sort by. } if (hasScores < seqs.length) { for (int i = 0; i < seqs.length; i++) { if (!hasScore[i]) { scores[i] = (max + i + 1.0); } } } jalview.util.QuickSort.sort(scores, seqs); AlignmentSorter as = getInstance(); if (as.lastSortByAnnotation != scoreLabel) { as.lastSortByAnnotation = scoreLabel; setOrder(alignment, seqs); } else { setReverseOrder(alignment, seqs); } } /** * Sort sequences by feature score or density, optionally restricted by * feature types, feature groups, or alignment start/end positions. *

* If the sort is repeated for the same combination of types and groups, sort * order is reversed. * * @param featureTypes * a list of feature types to include (or null for all) * @param groups * a list of feature groups to include (or null for all) * @param startCol * start column position to include (base zero) * @param endCol * end column position to include (base zero) * @param alignment * the alignment to be sorted * @param method * either "average_score" or "density" ("text" not yet implemented) */ public static void sortByFeature(List featureTypes, List groups, final int startCol, final int endCol, AlignmentI alignment, String method) { if (method != FEATURE_SCORE && method != FEATURE_LABEL && method != FEATURE_DENSITY) { String msg = String.format( "Implementation Error - sortByFeature method must be either '%s' or '%s'", FEATURE_SCORE, FEATURE_DENSITY); System.err.println(msg); return; } flipFeatureSortIfUnchanged(method, featureTypes, groups, startCol, endCol); SequenceI[] seqs = alignment.getSequencesArray(); boolean[] hasScore = new boolean[seqs.length]; // per sequence score // presence int hasScores = 0; // number of scores present on set double[] scores = new double[seqs.length]; int[] seqScores = new int[seqs.length]; Object[][] feats = new Object[seqs.length][]; double min = 0d; double max = 0d; for (int i = 0; i < seqs.length; i++) { /* * get sequence residues overlapping column region * and features for residue positions and specified types */ String[] types = featureTypes == null ? null : featureTypes.toArray(new String[featureTypes.size()]); List sfs = seqs[i].findFeatures(startCol + 1, endCol + 1, types); seqScores[i] = 0; scores[i] = 0.0; Iterator it = sfs.listIterator(); while (it.hasNext()) { SequenceFeature sf = it.next(); /* * accept all features with null or empty group, otherwise * check group is one of the currently visible groups */ String featureGroup = sf.getFeatureGroup(); if (groups != null && featureGroup != null && !"".equals(featureGroup) && !groups.contains(featureGroup)) { it.remove(); } else { float score = sf.getScore(); if (FEATURE_SCORE.equals(method) && !Float.isNaN(score)) { if (seqScores[i] == 0) { hasScores++; } seqScores[i]++; hasScore[i] = true; scores[i] += score; // take the first instance of this score // ?? } } } feats[i] = sfs.toArray(new SequenceFeature[sfs.size()]); if (!sfs.isEmpty()) { if (method == FEATURE_LABEL) { // order the labels by alphabet (not yet implemented) String[] labs = new String[sfs.size()]; for (int l = 0; l < sfs.size(); l++) { SequenceFeature sf = sfs.get(l); String description = sf.getDescription(); labs[l] = (description != null ? description : sf.getType()); } QuickSort.sort(labs, feats[i]); } } if (hasScore[i]) { // compute average score scores[i] /= seqScores[i]; // update the score bounds. if (hasScores == 1) { min = scores[i]; max = min; } else { max = Math.max(max, scores[i]); min = Math.min(min, scores[i]); } } } boolean doSort = false; if (FEATURE_SCORE.equals(method)) { if (hasScores == 0) { return; // do nothing - no scores present to sort by. } // pad score matrix if (hasScores < seqs.length) { for (int i = 0; i < seqs.length; i++) { if (!hasScore[i]) { scores[i] = (max + 1 + i); } else { // int nf = (feats[i] == null) ? 0 // : ((SequenceFeature[]) feats[i]).length; // // System.err.println("Sorting on Score: seq " + // seqs[i].getName() // + " Feats: " + nf + " Score : " + scores[i]); } } } doSort = true; } else if (FEATURE_DENSITY.equals(method)) { for (int i = 0; i < seqs.length; i++) { int featureCount = feats[i] == null ? 0 : ((SequenceFeature[]) feats[i]).length; scores[i] = featureCount; // System.err.println("Sorting on Density: seq "+seqs[i].getName()+ // " Feats: "+featureCount+" Score : "+scores[i]); } doSort = true; } if (doSort) { QuickSort.sortByDouble(scores, seqs, getInstance().sortByFeatureAscending); } setOrder(alignment, seqs); } /** * Builds a string hash of criteria for sorting, and if unchanged from last * time, reverse the sort order * * @param method * @param featureTypes * @param groups * @param startCol * @param endCol */ protected static void flipFeatureSortIfUnchanged(String method, List featureTypes, List groups, final int startCol, final int endCol) { StringBuilder sb = new StringBuilder(64); sb.append(startCol).append(method).append(endCol); if (featureTypes != null) { Collections.sort(featureTypes); sb.append(featureTypes.toString()); } if (groups != null) { Collections.sort(groups); sb.append(groups.toString()); } String scoreCriteria = sb.toString(); /* * if resorting on the same criteria, toggle sort order */ AlignmentSorter as = getInstance(); if (as.sortByFeatureCriteria == null || !scoreCriteria.equals(as.sortByFeatureCriteria)) { as.sortByFeatureAscending = true; } else { as.sortByFeatureAscending = !as.sortByFeatureAscending; } as.sortByFeatureCriteria = scoreCriteria; } /** * Set the alignment's sequences list to contain the sequences from a * temporary list, first adding all the elements from the tmp list, then adding all sequences in the alignment that * are not in the list. Option to do the final sort either in order or in reverse order. * * @param align The alignment being sorted * @param tmp * the temporary sequence list * @param ascending * false for reversed order; only sequences already in * the alignment will be used (which is actually already guaranteed * by vectorSubsetToArray) */ private static void set(AlignmentI align, List tmp, boolean ascending) { set(align, vectorSubsetToArray(align.getSequences(), tmp), ascending); } /** * Set the alignment's sequences list to contain these sequences, either in * this order or its reverse. * * @param align * @param seqs * the new sequence array * @param ascending * false for reversed order; if ascending, only sequences already in * the alignment will be used; if descending, then a direct 1:1 * replacement is made */ private static void set(AlignmentI align, SequenceI[] seqs, boolean ascending) { if (ascending) { setOrder(align, seqs); } else { setReverseOrder(align, seqs); } } /** * Replace the alignment's sequences with values in an array, clearing the * alignment's sequence list and filtering for sequences that are actually in * the alignment already. * * @param align * the Alignment * @param seqs * the array of replacement values, of any length */ public static void setOrder(AlignmentI align, SequenceI[] seqs) { // NOTE: DO NOT USE align.setSequenceAt() here - it will NOT work List seqList = align.getSequences(); synchronized (seqList) { List tmp = new ArrayList<>(); for (int i = 0; i < seqs.length; i++) { if (seqList.contains(seqs[i])) { tmp.add(seqs[i]); } } seqList.clear(); // User may have hidden seqs, then clicked undo or redo for (int i = 0; i < tmp.size(); i++) { seqList.add(tmp.get(i)); } } } /** * Replace the alignment's sequences or a subset of those sequences with * values in an array in reverse order. All sequences are replaced; no check * is made that these sequences are in the alignment already. * * @param align * the Alignment * @param seqs * the array of replacement values, length must be less than or equal * to Alignment.sequences.size() */ private static void setReverseOrder(AlignmentI align, SequenceI[] seqs) { int nSeq = seqs.length; int len = (nSeq + (nSeq % 2)) / 2; // int len = 0; // // if ((nSeq % 2) == 0) // { // len = nSeq / 2; // } // else // { // len = (nSeq + 1) / 2; // } // NOTE: DO NOT USE align.setSequenceAt() here - it will NOT work List seqList = align.getSequences(); synchronized (seqList) { for (int i = 0; i < len; i++) { // SequenceI tmp = seqs[i]; seqList.set(i, seqs[nSeq - i - 1]); seqList.set(nSeq - i - 1, seqs[i]); } } } /** * Create and array of reordered sequences in order first from tmp that are * present in seqList already, then, after that, any remaining sequences in * seqList not in tmp. Any sequences in tmp that are not in seqList already * are discarded. * * @param seqList * thread safe collection of sequences originally in the alignment * @param tmp * thread safe collection of sequences or subsequences possibly in * seqList * * @return intersect(tmp,seqList)+intersect(complement(tmp),seqList) */ private static SequenceI[] vectorSubsetToArray(List seqList, List tmp) { ArrayList seqs = new ArrayList<>(); int n = seqList.size(); BitSet bs = new BitSet(n); bs.set(0, n); for (int i = 0, nt = tmp.size(); i < nt; i++) { SequenceI sq = tmp.get(i); int idx = seqList.indexOf(sq); if (idx >= 0 && bs.get(idx)) { seqs.add(sq); bs.clear(idx); } } for (int i = bs.nextSetBit(0); i >= 0; i = bs.nextSetBit(i + 1)) { seqs.add(seqList.get(i)); } return seqs.toArray(new SequenceI[seqs.size()]); } }