X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Fanalysis%2FAlignmentSorter.java;h=25717d03bcfc77f8842648e3cbe2f6395e56eb4f;hb=797df64fa2a0a30773d0f48f5494d4155e5a8be3;hp=a3493f7d69363c7b7a274b84f3a44aaeb419e584;hpb=198a4921d78d176afc0cb709bdbcb4627afd1e8b;p=jalview.git diff --git a/src/jalview/analysis/AlignmentSorter.java b/src/jalview/analysis/AlignmentSorter.java index a3493f7..25717d0 100755 --- a/src/jalview/analysis/AlignmentSorter.java +++ b/src/jalview/analysis/AlignmentSorter.java @@ -1,20 +1,19 @@ /* - * Jalview - A Sequence Alignment Editor and Viewer - * Copyright (C) 2007 AM Waterhouse, J Procter, G Barton, M Clamp, S Searle - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version 2 - * of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA + * Jalview - A Sequence Alignment Editor and Viewer (Version 2.7) + * Copyright (C) 2011 J Procter, AM Waterhouse, J Engelhardt, LM Lui, G Barton, M Clamp, S Searle + * + * This file is part of Jalview. + * + * Jalview is free software: you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. + * + * Jalview is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR + * PURPOSE. See the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along with Jalview. If not, see . */ package jalview.analysis; @@ -23,34 +22,88 @@ import java.util.*; import jalview.datamodel.*; import jalview.util.*; -/** - * Routines for manipulating the order of a multiple sequence alignment - * TODO: this class retains some global states concerning sort-order which should be made attributes for the caller's alignment visualization. - * TODO: refactor to allow a subset of selected sequences to be sorted within the context of a whole alignment. - * Sort method template is: SequenceI[] tobesorted, [ input data mapping to each tobesorted element to use ], Alignment context of tobesorted that are to be re-ordered, boolean sortinplace, [special data - ie seuqence to be sorted w.r.t.]) - * sortinplace implies that the sorted vector resulting from applying the operation to tobesorted should be mapped back to the original positions in alignment. - * Otherwise, normal behaviour is to re order alignment so that tobesorted is sorted and grouped together starting from the first tobesorted position in the alignment. - * e.g. (a,tb2,b,tb1,c,tb3 becomes a,tb1,tb2,tb3,b,c) +/** + * Routines for manipulating the order of a multiple sequence alignment TODO: + * this class retains some global states concerning sort-order which should be + * made attributes for the caller's alignment visualization. TODO: refactor to + * allow a subset of selected sequences to be sorted within the context of a + * whole alignment. Sort method template is: SequenceI[] tobesorted, [ input + * data mapping to each tobesorted element to use ], Alignment context of + * tobesorted that are to be re-ordered, boolean sortinplace, [special data - ie + * seuqence to be sorted w.r.t.]) sortinplace implies that the sorted vector + * resulting from applying the operation to tobesorted should be mapped back to + * the original positions in alignment. Otherwise, normal behaviour is to re + * order alignment so that tobesorted is sorted and grouped together starting + * from the first tobesorted position in the alignment. e.g. (a,tb2,b,tb1,c,tb3 + * becomes a,tb1,tb2,tb3,b,c) */ public class AlignmentSorter { + /** + * todo: refactor searches to follow a basic pattern: (search property, last + * search state, current sort direction) + */ static boolean sortIdAscending = true; + static int lastGroupHash = 0; + static boolean sortGroupAscending = true; + static AlignmentOrder lastOrder = null; + static boolean sortOrderAscending = true; + static NJTree lastTree = null; + static boolean sortTreeAscending = true; + + /** + * last Annotation Label used by sortByScore + */ private static String lastSortByScore; + private static boolean sortByScoreAscending = true; + + /** + * compact representation of last arguments to SortByFeatureScore + */ + private static String lastSortByFeatureScore; + + private static boolean sortByFeatureScoreAscending = true; + + private static boolean sortLengthAscending; + /** * Sort by Percentage Identity w.r.t. s - * - * @param align AlignmentI - * @param s SequenceI - * @param tosort sequences from align that are to be sorted. + * + * @param align + * AlignmentI + * @param s + * SequenceI + * @param tosort + * sequences from align that are to be sorted. */ - public static void sortByPID(AlignmentI align, SequenceI s, SequenceI[] tosort) + public static void sortByPID(AlignmentI align, SequenceI s, + SequenceI[] tosort) + { + sortByPID(align, s, tosort, 0, -1); + } + + /** + * Sort by Percentage Identity w.r.t. s + * + * @param align + * AlignmentI + * @param s + * SequenceI + * @param tosort + * sequences from align that are to be sorted. + * @param start + * start column (0 for beginning + * @param end + */ + public static void sortByPID(AlignmentI align, SequenceI s, + SequenceI[] tosort, int start, int end) { int nSeq = align.getHeight(); @@ -59,8 +112,8 @@ public class AlignmentSorter for (int i = 0; i < nSeq; i++) { - scores[i] = Comparison.PID(align.getSequenceAt(i).getSequenceAsString(), - s.getSequenceAsString()); + scores[i] = Comparison.PID(align.getSequenceAt(i) + .getSequenceAsString(), s.getSequenceAsString()); seqs[i] = align.getSequenceAt(i); } @@ -71,9 +124,11 @@ public class AlignmentSorter /** * Reverse the order of the sort - * - * @param align DOCUMENT ME! - * @param seqs DOCUMENT ME! + * + * @param align + * DOCUMENT ME! + * @param seqs + * DOCUMENT ME! */ private static void setReverseOrder(AlignmentI align, SequenceI[] seqs) { @@ -81,7 +136,7 @@ public class AlignmentSorter int len = 0; - if ( (nSeq % 2) == 0) + if ((nSeq % 2) == 0) { len = nSeq / 2; } @@ -93,7 +148,7 @@ public class AlignmentSorter // NOTE: DO NOT USE align.setSequenceAt() here - it will NOT work for (int i = 0; i < len; i++) { - //SequenceI tmp = seqs[i]; + // SequenceI tmp = seqs[i]; align.getSequences().setElementAt(seqs[nSeq - i - 1], i); align.getSequences().setElementAt(seqs[i], nSeq - i - 1); } @@ -101,9 +156,11 @@ public class AlignmentSorter /** * Sets the Alignment object with the given sequences - * - * @param align Alignment object to be updated - * @param tmp sequences as a vector + * + * @param align + * Alignment object to be updated + * @param tmp + * sequences as a vector */ private static void setOrder(AlignmentI align, Vector tmp) { @@ -112,9 +169,11 @@ public class AlignmentSorter /** * Sets the Alignment object with the given sequences - * - * @param align DOCUMENT ME! - * @param seqs sequences as an array + * + * @param align + * DOCUMENT ME! + * @param seqs + * sequences as an array */ public static void setOrder(AlignmentI align, SequenceI[] seqs) { @@ -131,7 +190,7 @@ public class AlignmentSorter } algn.removeAllElements(); - //User may have hidden seqs, then clicked undo or redo + // User may have hidden seqs, then clicked undo or redo for (int i = 0; i < tmp.size(); i++) { algn.addElement(tmp.elementAt(i)); @@ -141,8 +200,9 @@ public class AlignmentSorter /** * Sorts by ID. Numbers are sorted before letters. - * - * @param align The alignment object to sort + * + * @param align + * The alignment object to sort */ public static void sortByID(AlignmentI align) { @@ -172,16 +232,50 @@ public class AlignmentSorter } /** - * Sorts the alignment by size of group. - *
Maintains the order of sequences in each group - * by order in given alignment object. - * - * @param align sorts the given alignment object by group + * Sorts by sequence length + * + * @param align + * The alignment object to sort + */ + public static void sortByLength(AlignmentI align) + { + int nSeq = align.getHeight(); + + float[] length = new float[nSeq]; + SequenceI[] seqs = new SequenceI[nSeq]; + + for (int i = 0; i < nSeq; i++) + { + seqs[i] = align.getSequenceAt(i); + length[i] = (float) (seqs[i].getEnd() - seqs[i].getStart()); + } + + QuickSort.sort(length, seqs); + + if (sortLengthAscending) + { + setReverseOrder(align, seqs); + } + else + { + setOrder(align, seqs); + } + + sortLengthAscending = !sortLengthAscending; + } + + /** + * Sorts the alignment by size of group.
+ * Maintains the order of sequences in each group by order in given alignment + * object. + * + * @param align + * sorts the given alignment object by group */ public static void sortByGroup(AlignmentI align) { - //MAINTAINS ORIGNAL SEQUENCE ORDER, - //ORDERS BY GROUP SIZE + // MAINTAINS ORIGNAL SEQUENCE ORDER, + // ORDERS BY GROUP SIZE Vector groups = new Vector(); if (groups.hashCode() != lastGroupHash) @@ -194,8 +288,8 @@ public class AlignmentSorter sortGroupAscending = !sortGroupAscending; } - //SORTS GROUPS BY SIZE - ////////////////////// + // SORTS GROUPS BY SIZE + // //////////////////// for (int i = 0; i < align.getGroups().size(); i++) { SequenceGroup sg = (SequenceGroup) align.getGroups().elementAt(i); @@ -218,8 +312,8 @@ public class AlignmentSorter } } - //NOW ADD SEQUENCES MAINTAINING ALIGNMENT ORDER - /////////////////////////////////////////////// + // NOW ADD SEQUENCES MAINTAINING ALIGNMENT ORDER + // ///////////////////////////////////////////// Vector seqs = new Vector(); for (int i = 0; i < groups.size(); i++) @@ -240,16 +334,16 @@ public class AlignmentSorter else { setReverseOrder(align, - vectorSubsetToArray(seqs, align.getSequences())); + vectorSubsetToArray(seqs, align.getSequences())); } } /** - * Converts Vector to array. - * java 1.18 does not have Vector.toArray() - * - * @param tmp Vector of SequenceI objects - * + * Converts Vector to array. java 1.18 does not have Vector.toArray() + * + * @param tmp + * Vector of SequenceI objects + * * @return array of Sequence[] */ private static SequenceI[] vectorToArray(Vector tmp) @@ -266,16 +360,18 @@ public class AlignmentSorter /** * DOCUMENT ME! - * - * @param tmp DOCUMENT ME! - * @param mask DOCUMENT ME! - * + * + * @param tmp + * DOCUMENT ME! + * @param mask + * DOCUMENT ME! + * * @return DOCUMENT ME! */ private static SequenceI[] vectorSubsetToArray(Vector tmp, Vector mask) { Vector seqs = new Vector(); - int i; + int i, idx; boolean[] tmask = new boolean[mask.size()]; for (i = 0; i < mask.size(); i++) @@ -286,10 +382,10 @@ public class AlignmentSorter for (i = 0; i < tmp.size(); i++) { Object sq = tmp.elementAt(i); - - if (mask.contains(sq) && tmask[mask.indexOf(sq)]) + idx = mask.indexOf(sq); + if (idx > -1 && tmask[idx]) { - tmask[mask.indexOf(sq)] = false; + tmask[idx] = false; seqs.addElement(sq); } } @@ -307,9 +403,11 @@ public class AlignmentSorter /** * Sorts by a given AlignmentOrder object - * - * @param align Alignment to order - * @param order specified order for alignment + * + * @param align + * Alignment to order + * @param order + * specified order for alignment */ public static void sortBy(AlignmentI align, AlignmentOrder order) { @@ -331,17 +429,18 @@ public class AlignmentSorter } else { - setReverseOrder(align, - vectorSubsetToArray(tmp, align.getSequences())); + setReverseOrder(align, vectorSubsetToArray(tmp, align.getSequences())); } } /** * DOCUMENT ME! - * - * @param align alignment to order - * @param tree tree which has - * + * + * @param align + * alignment to order + * @param tree + * tree which has + * * @return DOCUMENT ME! */ private static Vector getOrderByTree(AlignmentI align, NJTree tree) @@ -356,16 +455,16 @@ public class AlignmentSorter { // TODO: JBPNote - decide if this is always an error // (eg. not when a tree is associated to another alignment which has more - // sequences) - if (tmp.size() < nSeq) + // sequences) + if (tmp.size() != nSeq) { addStrays(align, tmp); } if (tmp.size() != nSeq) { - System.err.println("ERROR: tmp.size()=" + tmp.size() + - " != nseq=" + nSeq + " in getOrderByTree"); + System.err.println("WARNING: tmp.size()=" + tmp.size() + " != nseq=" + + nSeq + " in getOrderByTree - tree contains sequences not in alignment"); } } @@ -374,9 +473,11 @@ public class AlignmentSorter /** * Sorts the alignment by a given tree - * - * @param align alignment to order - * @param tree tree which has + * + * @param align + * alignment to order + * @param tree + * tree which has */ public static void sortByTree(AlignmentI align, NJTree tree) { @@ -399,16 +500,17 @@ public class AlignmentSorter } else { - setReverseOrder(align, - vectorSubsetToArray(tmp, align.getSequences())); + setReverseOrder(align, vectorSubsetToArray(tmp, align.getSequences())); } } /** * DOCUMENT ME! - * - * @param align DOCUMENT ME! - * @param seqs DOCUMENT ME! + * + * @param align + * DOCUMENT ME! + * @param seqs + * DOCUMENT ME! */ private static void addStrays(AlignmentI align, Vector seqs) { @@ -424,22 +526,25 @@ public class AlignmentSorter if (nSeq != seqs.size()) { - System.err.println( - "ERROR: Size still not right even after addStrays"); + System.err + .println("ERROR: Size still not right even after addStrays"); } } /** * DOCUMENT ME! - * - * @param node DOCUMENT ME! - * @param tmp DOCUMENT ME! - * @param seqset DOCUMENT ME! - * + * + * @param node + * DOCUMENT ME! + * @param tmp + * DOCUMENT ME! + * @param seqset + * DOCUMENT ME! + * * @return DOCUMENT ME! */ private static Vector _sortByTree(SequenceNode node, Vector tmp, - Vector seqset) + Vector seqset) { if (node == null) { @@ -449,15 +554,15 @@ public class AlignmentSorter SequenceNode left = (SequenceNode) node.left(); SequenceNode right = (SequenceNode) node.right(); - if ( (left == null) && (right == null)) + if ((left == null) && (right == null)) { if (!node.isPlaceholder() && (node.element() != null)) { if (node.element() instanceof SequenceI) { - if (!tmp.contains(node.element())) + if (!tmp.contains(node.element())) // && (seqset==null || seqset.size()==0 || seqset.contains(tmp))) { - tmp.addElement( (SequenceI) node.element()); + tmp.addElement((SequenceI) node.element()); } } } @@ -474,7 +579,8 @@ public class AlignmentSorter } // Ordering Objects - // Alignment.sortBy(OrderObj) - sequence of sequence pointer refs in appropriate order + // Alignment.sortBy(OrderObj) - sequence of sequence pointer refs in + // appropriate order // /** @@ -487,42 +593,52 @@ public class AlignmentSorter for (int i = 0; i < alignment.length; i++) { - ids[i] = (new Float(alignment[i].getName().substring(8))).floatValue(); + ids[i] = (new Float(alignment[i].getName().substring(8))) + .floatValue(); } jalview.util.QuickSort.sort(ids, alignment); } + /** - * Sort sequence in order of increasing score attribute for annotation with a particular - * scoreLabel. Or reverse if same label was used previously - * @param scoreLabel exact label for sequence associated AlignmentAnnotation scores to use for sorting. - * @param alignment sequences to be sorted + * Sort sequence in order of increasing score attribute for annotation with a + * particular scoreLabel. Or reverse if same label was used previously + * + * @param scoreLabel + * exact label for sequence associated AlignmentAnnotation scores to + * use for sorting. + * @param alignment + * sequences to be sorted */ - public static void sortByAnnotationScore(String scoreLabel, AlignmentI alignment) + public static void sortByAnnotationScore(String scoreLabel, + AlignmentI alignment) { SequenceI[] seqs = alignment.getSequencesArray(); - boolean[] hasScore = new boolean[seqs.length]; // per sequence score presence - int hasScores=0; // number of scores present on set + boolean[] hasScore = new boolean[seqs.length]; // per sequence score + // presence + int hasScores = 0; // number of scores present on set double[] scores = new double[seqs.length]; - double min=0,max=0; + double min = 0, max = 0; for (int i = 0; i < seqs.length; i++) { AlignmentAnnotation[] scoreAnn = seqs[i].getAnnotation(scoreLabel); - if (scoreAnn!=null) + if (scoreAnn != null) { hasScores++; hasScore[i] = true; - scores[i] = scoreAnn[0].getScore(); // take the first instance of this score. - if (hasScores==1) + scores[i] = scoreAnn[0].getScore(); // take the first instance of this + // score. + if (hasScores == 1) { max = min = scores[i]; - } else + } + else { - if (maxscores[i]) + if (min > scores[i]) { min = scores[i]; } @@ -533,28 +649,301 @@ public class AlignmentSorter hasScore[i] = false; } } - if (hasScores==0) + if (hasScores == 0) { return; // do nothing - no scores present to sort by. } - if (hasScores sstop) || + // or ignore based on selection criteria + (featureLabels != null && !AlignmentSorter + .containsIgnoreCase(sf[f].type, featureLabels)) + || (groupLabels != null + // problem here: we cannot eliminate null feature group features + && (sf[f].getFeatureGroup() != null && !AlignmentSorter + .containsIgnoreCase(sf[f].getFeatureGroup(), + groupLabels)))) + { + // forget about this feature + sf[f] = null; + n--; + } + else + { + // or, also take a look at the scores if necessary. + if (!ignoreScore && sf[f].getScore() != Float.NaN) + { + if (seqScores[i] == 0) + { + hasScores++; + } + seqScores[i]++; + hasScore[i] = true; + scores[i] += sf[f].getScore(); // take the first instance of this + // score. + } + } + } + SequenceFeature[] fs; + feats[i] = fs = new SequenceFeature[n]; + if (n > 0) + { + n = 0; + for (int f = 0; f < sf.length; f++) + { + if (sf[f] != null) + { + ((SequenceFeature[]) feats[i])[n++] = sf[f]; + } + } + if (method == FEATURE_LABEL) + { + // order the labels by alphabet + String[] labs = new String[fs.length]; + for (int l = 0; l < labs.length; l++) + { + labs[l] = (fs[l].getDescription() != null ? fs[l] + .getDescription() : fs[l].getType()); + } + jalview.util.QuickSort.sort(labs, ((Object[]) feats[i])); + } + } + if (hasScore[i]) + { + // compute average score + scores[i] /= seqScores[i]; + // update the score bounds. + if (hasScores == 1) + { + max = min = scores[i]; + } + else + { + if (max < scores[i]) + { + max = scores[i]; + } + if (min > scores[i]) + { + min = scores[i]; + } + } + } + } + + if (method == FEATURE_SCORE) + { + if (hasScores == 0) + { + return; // do nothing - no scores present to sort by. + } + // pad score matrix + if (hasScores < seqs.length) + { + for (int i = 0; i < seqs.length; i++) + { + if (!hasScore[i]) + { + scores[i] = (max + 1 + i); + } + else + { + int nf = (feats[i] == null) ? 0 + : ((SequenceFeature[]) feats[i]).length; + // System.err.println("Sorting on Score: seq "+seqs[i].getName()+ + // " Feats: "+nf+" Score : "+scores[i]); + } + } + } + + jalview.util.QuickSort.sort(scores, seqs); + } + else if (method == FEATURE_DENSITY) + { + + // break ties between equivalent numbers for adjacent sequences by adding + // 1/Nseq*i on the original order + double fr = 0.9 / (1.0 * seqs.length); + for (int i = 0; i < seqs.length; i++) + { + double nf; + scores[i] = (0.05 + fr * i) + + (nf = ((feats[i] == null) ? 0.0 + : 1.0 * ((SequenceFeature[]) feats[i]).length)); + // System.err.println("Sorting on Density: seq "+seqs[i].getName()+ + // " Feats: "+nf+" Score : "+scores[i]); + } + jalview.util.QuickSort.sort(scores, seqs); + } + else + { + if (method == FEATURE_LABEL) + { + throw new Error("Not yet implemented."); + } + } + if (lastSortByFeatureScore == null + || !scoreLabel.toString().equals(lastSortByFeatureScore)) + { + sortByFeatureScoreAscending = true; + } + else + { + sortByFeatureScoreAscending = !sortByFeatureScoreAscending; + } + if (sortByFeatureScoreAscending) + { + setOrder(alignment, seqs); + } + else + { + setReverseOrder(alignment, seqs); + } + lastSortByFeatureScore = scoreLabel.toString(); + } + }