/* * Jalview - A Sequence Alignment Editor and Viewer (Version 2.7) * Copyright (C) 2011 J Procter, AM Waterhouse, G Barton, M Clamp, S Searle * * This file is part of Jalview. * * Jalview is free software: you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. * * Jalview is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty * of MERCHANTABILITY or FITNESS FOR A PARTICULAR * PURPOSE. See the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License along with Jalview. If not, see . */ package jalview.analysis; import java.util.*; import jalview.datamodel.*; import jalview.util.*; /** * Routines for manipulating the order of a multiple sequence alignment TODO: * this class retains some global states concerning sort-order which should be * made attributes for the caller's alignment visualization. TODO: refactor to * allow a subset of selected sequences to be sorted within the context of a * whole alignment. Sort method template is: SequenceI[] tobesorted, [ input * data mapping to each tobesorted element to use ], Alignment context of * tobesorted that are to be re-ordered, boolean sortinplace, [special data - ie * seuqence to be sorted w.r.t.]) sortinplace implies that the sorted vector * resulting from applying the operation to tobesorted should be mapped back to * the original positions in alignment. Otherwise, normal behaviour is to re * order alignment so that tobesorted is sorted and grouped together starting * from the first tobesorted position in the alignment. e.g. (a,tb2,b,tb1,c,tb3 * becomes a,tb1,tb2,tb3,b,c) */ public class AlignmentSorter { /** * todo: refactor searches to follow a basic pattern: (search property, last * search state, current sort direction) */ static boolean sortIdAscending = true; static int lastGroupHash = 0; static boolean sortGroupAscending = true; static AlignmentOrder lastOrder = null; static boolean sortOrderAscending = true; static NJTree lastTree = null; static boolean sortTreeAscending = true; /** * last Annotation Label used by sortByScore */ private static String lastSortByScore; private static boolean sortByScoreAscending = true; /** * compact representation of last arguments to SortByFeatureScore */ private static String lastSortByFeatureScore; private static boolean sortByFeatureScoreAscending = true; private static boolean sortLengthAscending; /** * Sort by Percentage Identity w.r.t. s * * @param align * AlignmentI * @param s * SequenceI * @param tosort * sequences from align that are to be sorted. */ public static void sortByPID(AlignmentI align, SequenceI s, SequenceI[] tosort) { sortByPID(align, s, tosort, 0, -1); } /** * Sort by Percentage Identity w.r.t. s * * @param align * AlignmentI * @param s * SequenceI * @param tosort * sequences from align that are to be sorted. * @param start * start column (0 for beginning * @param end */ public static void sortByPID(AlignmentI align, SequenceI s, SequenceI[] tosort, int start, int end) { int nSeq = align.getHeight(); float[] scores = new float[nSeq]; SequenceI[] seqs = new SequenceI[nSeq]; for (int i = 0; i < nSeq; i++) { scores[i] = Comparison.PID(align.getSequenceAt(i) .getSequenceAsString(), s.getSequenceAsString()); seqs[i] = align.getSequenceAt(i); } QuickSort.sort(scores, 0, scores.length - 1, seqs); setReverseOrder(align, seqs); } /** * Reverse the order of the sort * * @param align * DOCUMENT ME! * @param seqs * DOCUMENT ME! */ private static void setReverseOrder(AlignmentI align, SequenceI[] seqs) { int nSeq = seqs.length; int len = 0; if ((nSeq % 2) == 0) { len = nSeq / 2; } else { len = (nSeq + 1) / 2; } // NOTE: DO NOT USE align.setSequenceAt() here - it will NOT work for (int i = 0; i < len; i++) { // SequenceI tmp = seqs[i]; align.getSequences().setElementAt(seqs[nSeq - i - 1], i); align.getSequences().setElementAt(seqs[i], nSeq - i - 1); } } /** * Sets the Alignment object with the given sequences * * @param align * Alignment object to be updated * @param tmp * sequences as a vector */ private static void setOrder(AlignmentI align, Vector tmp) { setOrder(align, vectorSubsetToArray(tmp, align.getSequences())); } /** * Sets the Alignment object with the given sequences * * @param align * DOCUMENT ME! * @param seqs * sequences as an array */ public static void setOrder(AlignmentI align, SequenceI[] seqs) { // NOTE: DO NOT USE align.setSequenceAt() here - it will NOT work Vector algn = align.getSequences(); Vector tmp = new Vector(); for (int i = 0; i < seqs.length; i++) { if (algn.contains(seqs[i])) { tmp.addElement(seqs[i]); } } algn.removeAllElements(); // User may have hidden seqs, then clicked undo or redo for (int i = 0; i < tmp.size(); i++) { algn.addElement(tmp.elementAt(i)); } } /** * Sorts by ID. Numbers are sorted before letters. * * @param align * The alignment object to sort */ public static void sortByID(AlignmentI align) { int nSeq = align.getHeight(); String[] ids = new String[nSeq]; SequenceI[] seqs = new SequenceI[nSeq]; for (int i = 0; i < nSeq; i++) { ids[i] = align.getSequenceAt(i).getName(); seqs[i] = align.getSequenceAt(i); } QuickSort.sort(ids, seqs); if (sortIdAscending) { setReverseOrder(align, seqs); } else { setOrder(align, seqs); } sortIdAscending = !sortIdAscending; } /** * Sorts by sequence length * * @param align * The alignment object to sort */ public static void sortByLength(AlignmentI align) { int nSeq = align.getHeight(); float[] length = new float[nSeq]; SequenceI[] seqs = new SequenceI[nSeq]; for (int i = 0; i < nSeq; i++) { seqs[i] = align.getSequenceAt(i); length[i] = (float) (seqs[i].getEnd() - seqs[i].getStart()); } QuickSort.sort(length, seqs); if (sortLengthAscending) { setReverseOrder(align, seqs); } else { setOrder(align, seqs); } sortLengthAscending = !sortLengthAscending; } /** * Sorts the alignment by size of group.
* Maintains the order of sequences in each group by order in given alignment * object. * * @param align * sorts the given alignment object by group */ public static void sortByGroup(AlignmentI align) { // MAINTAINS ORIGNAL SEQUENCE ORDER, // ORDERS BY GROUP SIZE Vector groups = new Vector(); if (groups.hashCode() != lastGroupHash) { sortGroupAscending = true; lastGroupHash = groups.hashCode(); } else { sortGroupAscending = !sortGroupAscending; } // SORTS GROUPS BY SIZE // //////////////////// for (int i = 0; i < align.getGroups().size(); i++) { SequenceGroup sg = (SequenceGroup) align.getGroups().elementAt(i); for (int j = 0; j < groups.size(); j++) { SequenceGroup sg2 = (SequenceGroup) groups.elementAt(j); if (sg.getSize() > sg2.getSize()) { groups.insertElementAt(sg, j); break; } } if (!groups.contains(sg)) { groups.addElement(sg); } } // NOW ADD SEQUENCES MAINTAINING ALIGNMENT ORDER // ///////////////////////////////////////////// Vector seqs = new Vector(); for (int i = 0; i < groups.size(); i++) { SequenceGroup sg = (SequenceGroup) groups.elementAt(i); SequenceI[] orderedseqs = sg.getSequencesInOrder(align); for (int j = 0; j < orderedseqs.length; j++) { seqs.addElement(orderedseqs[j]); } } if (sortGroupAscending) { setOrder(align, seqs); } else { setReverseOrder(align, vectorSubsetToArray(seqs, align.getSequences())); } } /** * Converts Vector to array. java 1.18 does not have Vector.toArray() * * @param tmp * Vector of SequenceI objects * * @return array of Sequence[] */ private static SequenceI[] vectorToArray(Vector tmp) { SequenceI[] seqs = new SequenceI[tmp.size()]; for (int i = 0; i < tmp.size(); i++) { seqs[i] = (SequenceI) tmp.elementAt(i); } return seqs; } /** * DOCUMENT ME! * * @param tmp * DOCUMENT ME! * @param mask * DOCUMENT ME! * * @return DOCUMENT ME! */ private static SequenceI[] vectorSubsetToArray(Vector tmp, Vector mask) { Vector seqs = new Vector(); int i, idx; boolean[] tmask = new boolean[mask.size()]; for (i = 0; i < mask.size(); i++) { tmask[i] = true; } for (i = 0; i < tmp.size(); i++) { Object sq = tmp.elementAt(i); idx = mask.indexOf(sq); if (idx > -1 && tmask[idx]) { tmask[idx] = false; seqs.addElement(sq); } } for (i = 0; i < tmask.length; i++) { if (tmask[i]) { seqs.addElement(mask.elementAt(i)); } } return vectorToArray(seqs); } /** * Sorts by a given AlignmentOrder object * * @param align * Alignment to order * @param order * specified order for alignment */ public static void sortBy(AlignmentI align, AlignmentOrder order) { // Get an ordered vector of sequences which may also be present in align Vector tmp = order.getOrder(); if (lastOrder == order) { sortOrderAscending = !sortOrderAscending; } else { sortOrderAscending = true; } if (sortOrderAscending) { setOrder(align, tmp); } else { setReverseOrder(align, vectorSubsetToArray(tmp, align.getSequences())); } } /** * DOCUMENT ME! * * @param align * alignment to order * @param tree * tree which has * * @return DOCUMENT ME! */ private static Vector getOrderByTree(AlignmentI align, NJTree tree) { int nSeq = align.getHeight(); Vector tmp = new Vector(); tmp = _sortByTree(tree.getTopNode(), tmp, align.getSequences()); if (tmp.size() != nSeq) { // TODO: JBPNote - decide if this is always an error // (eg. not when a tree is associated to another alignment which has more // sequences) if (tmp.size() != nSeq) { addStrays(align, tmp); } if (tmp.size() != nSeq) { System.err.println("WARNING: tmp.size()=" + tmp.size() + " != nseq=" + nSeq + " in getOrderByTree - tree contains sequences not in alignment"); } } return tmp; } /** * Sorts the alignment by a given tree * * @param align * alignment to order * @param tree * tree which has */ public static void sortByTree(AlignmentI align, NJTree tree) { Vector tmp = getOrderByTree(align, tree); // tmp should properly permute align with tree. if (lastTree != tree) { sortTreeAscending = true; lastTree = tree; } else { sortTreeAscending = !sortTreeAscending; } if (sortTreeAscending) { setOrder(align, tmp); } else { setReverseOrder(align, vectorSubsetToArray(tmp, align.getSequences())); } } /** * DOCUMENT ME! * * @param align * DOCUMENT ME! * @param seqs * DOCUMENT ME! */ private static void addStrays(AlignmentI align, Vector seqs) { int nSeq = align.getHeight(); for (int i = 0; i < nSeq; i++) { if (!seqs.contains(align.getSequenceAt(i))) { seqs.addElement(align.getSequenceAt(i)); } } if (nSeq != seqs.size()) { System.err .println("ERROR: Size still not right even after addStrays"); } } /** * DOCUMENT ME! * * @param node * DOCUMENT ME! * @param tmp * DOCUMENT ME! * @param seqset * DOCUMENT ME! * * @return DOCUMENT ME! */ private static Vector _sortByTree(SequenceNode node, Vector tmp, Vector seqset) { if (node == null) { return tmp; } SequenceNode left = (SequenceNode) node.left(); SequenceNode right = (SequenceNode) node.right(); if ((left == null) && (right == null)) { if (!node.isPlaceholder() && (node.element() != null)) { if (node.element() instanceof SequenceI) { if (!tmp.contains(node.element())) // && (seqset==null || seqset.size()==0 || seqset.contains(tmp))) { tmp.addElement((SequenceI) node.element()); } } } return tmp; } else { _sortByTree(left, tmp, seqset); _sortByTree(right, tmp, seqset); } return tmp; } // Ordering Objects // Alignment.sortBy(OrderObj) - sequence of sequence pointer refs in // appropriate order // /** * recover the order of sequences given by the safe numbering scheme introducd * SeqsetUtils.uniquify. */ public static void recoverOrder(SequenceI[] alignment) { float[] ids = new float[alignment.length]; for (int i = 0; i < alignment.length; i++) { ids[i] = (new Float(alignment[i].getName().substring(8))) .floatValue(); } jalview.util.QuickSort.sort(ids, alignment); } /** * Sort sequence in order of increasing score attribute for annotation with a * particular scoreLabel. Or reverse if same label was used previously * * @param scoreLabel * exact label for sequence associated AlignmentAnnotation scores to * use for sorting. * @param alignment * sequences to be sorted */ public static void sortByAnnotationScore(String scoreLabel, AlignmentI alignment) { SequenceI[] seqs = alignment.getSequencesArray(); boolean[] hasScore = new boolean[seqs.length]; // per sequence score // presence int hasScores = 0; // number of scores present on set double[] scores = new double[seqs.length]; double min = 0, max = 0; for (int i = 0; i < seqs.length; i++) { AlignmentAnnotation[] scoreAnn = seqs[i].getAnnotation(scoreLabel); if (scoreAnn != null) { hasScores++; hasScore[i] = true; scores[i] = scoreAnn[0].getScore(); // take the first instance of this // score. if (hasScores == 1) { max = min = scores[i]; } else { if (max < scores[i]) { max = scores[i]; } if (min > scores[i]) { min = scores[i]; } } } else { hasScore[i] = false; } } if (hasScores == 0) { return; // do nothing - no scores present to sort by. } if (hasScores < seqs.length) { for (int i = 0; i < seqs.length; i++) { if (!hasScore[i]) { scores[i] = (max + i + 1.0); } } } jalview.util.QuickSort.sort(scores, seqs); if (lastSortByScore != scoreLabel) { lastSortByScore = scoreLabel; setOrder(alignment, seqs); } else { setReverseOrder(alignment, seqs); } } /** * types of feature ordering: Sort by score : average score - or total score - * over all features in region Sort by feature label text: (or if null - * feature type text) - numerical or alphabetical Sort by feature density: * based on counts - ignoring individual text or scores for each feature */ public static String FEATURE_SCORE = "average_score"; public static String FEATURE_LABEL = "text"; public static String FEATURE_DENSITY = "density"; /** * sort the alignment using the features on each sequence found between start * and stop with the given featureLabel (and optional group qualifier) * * @param featureLabel * (may not be null) * @param groupLabel * (may be null) * @param start * (-1 to include non-positional features) * @param stop * (-1 to only sort on non-positional features) * @param alignment * - aligned sequences containing features * @param method * - one of the string constants FEATURE_SCORE, FEATURE_LABEL, * FEATURE_DENSITY */ public static void sortByFeature(String featureLabel, String groupLabel, int start, int stop, AlignmentI alignment, String method) { sortByFeature(featureLabel == null ? null : new String[] { featureLabel }, groupLabel == null ? null : new String[] { groupLabel }, start, stop, alignment, method); } private static boolean containsIgnoreCase(final String lab, final String[] labs) { if (labs == null) { return true; } if (lab == null) { return false; } for (int q = 0; q < labs.length; q++) { if (labs[q] != null && lab.equalsIgnoreCase(labs[q])) { return true; } } return false; } public static void sortByFeature(String[] featureLabels, String[] groupLabels, int start, int stop, AlignmentI alignment, String method) { if (method != FEATURE_SCORE && method != FEATURE_LABEL && method != FEATURE_DENSITY) { throw new Error( "Implementation Error - sortByFeature method must be one of FEATURE_SCORE, FEATURE_LABEL or FEATURE_DENSITY."); } boolean ignoreScore = method != FEATURE_SCORE; StringBuffer scoreLabel = new StringBuffer(); scoreLabel.append(start + stop + method); // This doesn't quite work yet - we'd like to have a canonical ordering that // can be preserved from call to call for (int i = 0; featureLabels != null && i < featureLabels.length; i++) { scoreLabel.append(featureLabels[i] == null ? "null" : featureLabels[i]); } for (int i = 0; groupLabels != null && i < groupLabels.length; i++) { scoreLabel.append(groupLabels[i] == null ? "null" : groupLabels[i]); } SequenceI[] seqs = alignment.getSequencesArray(); boolean[] hasScore = new boolean[seqs.length]; // per sequence score // presence int hasScores = 0; // number of scores present on set double[] scores = new double[seqs.length]; int[] seqScores = new int[seqs.length]; Object[] feats = new Object[seqs.length]; double min = 0, max = 0; for (int i = 0; i < seqs.length; i++) { SequenceFeature[] sf = seqs[i].getSequenceFeatures(); if (sf == null && seqs[i].getDatasetSequence() != null) { sf = seqs[i].getDatasetSequence().getSequenceFeatures(); } if (sf == null) { sf = new SequenceFeature[0]; } else { SequenceFeature[] tmp = new SequenceFeature[sf.length]; for (int s = 0; s < tmp.length; s++) { tmp[s] = sf[s]; } sf = tmp; } int sstart = (start == -1) ? start : seqs[i].findPosition(start); int sstop = (stop == -1) ? stop : seqs[i].findPosition(stop); seqScores[i] = 0; scores[i] = 0.0; int n = sf.length; for (int f = 0; f < sf.length; f++) { // filter for selection criteria if ( // ignore features outwith alignment start-stop positions. (sf[f].end < sstart || sf[f].begin > sstop) || // or ignore based on selection criteria (featureLabels != null && !AlignmentSorter .containsIgnoreCase(sf[f].type, featureLabels)) || (groupLabels != null // problem here: we cannot eliminate null feature group features && (sf[f].getFeatureGroup() != null && !AlignmentSorter .containsIgnoreCase(sf[f].getFeatureGroup(), groupLabels)))) { // forget about this feature sf[f] = null; n--; } else { // or, also take a look at the scores if necessary. if (!ignoreScore && sf[f].getScore() != Float.NaN) { if (seqScores[i] == 0) { hasScores++; } seqScores[i]++; hasScore[i] = true; scores[i] += sf[f].getScore(); // take the first instance of this // score. } } } SequenceFeature[] fs; feats[i] = fs = new SequenceFeature[n]; if (n > 0) { n = 0; for (int f = 0; f < sf.length; f++) { if (sf[f] != null) { ((SequenceFeature[]) feats[i])[n++] = sf[f]; } } if (method == FEATURE_LABEL) { // order the labels by alphabet String[] labs = new String[fs.length]; for (int l = 0; l < labs.length; l++) { labs[l] = (fs[l].getDescription() != null ? fs[l] .getDescription() : fs[l].getType()); } jalview.util.QuickSort.sort(labs, ((Object[]) feats[i])); } } if (hasScore[i]) { // compute average score scores[i] /= seqScores[i]; // update the score bounds. if (hasScores == 1) { max = min = scores[i]; } else { if (max < scores[i]) { max = scores[i]; } if (min > scores[i]) { min = scores[i]; } } } } if (method == FEATURE_SCORE) { if (hasScores == 0) { return; // do nothing - no scores present to sort by. } // pad score matrix if (hasScores < seqs.length) { for (int i = 0; i < seqs.length; i++) { if (!hasScore[i]) { scores[i] = (max + 1 + i); } else { int nf = (feats[i] == null) ? 0 : ((SequenceFeature[]) feats[i]).length; // System.err.println("Sorting on Score: seq "+seqs[i].getName()+ // " Feats: "+nf+" Score : "+scores[i]); } } } jalview.util.QuickSort.sort(scores, seqs); } else if (method == FEATURE_DENSITY) { // break ties between equivalent numbers for adjacent sequences by adding // 1/Nseq*i on the original order double fr = 0.9 / (1.0 * seqs.length); for (int i = 0; i < seqs.length; i++) { double nf; scores[i] = (0.05 + fr * i) + (nf = ((feats[i] == null) ? 0.0 : 1.0 * ((SequenceFeature[]) feats[i]).length)); // System.err.println("Sorting on Density: seq "+seqs[i].getName()+ // " Feats: "+nf+" Score : "+scores[i]); } jalview.util.QuickSort.sort(scores, seqs); } else { if (method == FEATURE_LABEL) { throw new Error("Not yet implemented."); } } if (lastSortByFeatureScore == null || !scoreLabel.toString().equals(lastSortByFeatureScore)) { sortByFeatureScoreAscending = true; } else { sortByFeatureScoreAscending = !sortByFeatureScoreAscending; } if (sortByFeatureScoreAscending) { setOrder(alignment, seqs); } else { setReverseOrder(alignment, seqs); } lastSortByFeatureScore = scoreLabel.toString(); } }