src/jalview/analysis/Grouping.java

   1 /*
   2  * Jalview - A Sequence Alignment Editor and Viewer (Version 2.7)
   3  * Copyright (C) 2011 J Procter, AM Waterhouse, J Engelhardt, LM Lui, G Barton, M Clamp, S Searle
   4  *
   5  * This file is part of Jalview.
   6  *
   7  * Jalview is free software: you can redistribute it and/or
   8  * modify it under the terms of the GNU General Public License
   9  * as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version.
  10  *
  11  * Jalview is distributed in the hope that it will be useful, but
  12  * WITHOUT ANY WARRANTY; without even the implied warranty
  13  * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
  14  * PURPOSE.  See the GNU General Public License for more details.
  15  *
  16  * You should have received a copy of the GNU General Public License along with Jalview.  If not, see <http://www.gnu.org/licenses/>.
  17  */
  18 package jalview.analysis;
  19
  20 import jalview.datamodel.AlignmentI;
  21 import jalview.datamodel.SequenceFeature;
  22 import jalview.datamodel.SequenceGroup;
  23 import jalview.datamodel.SequenceI;
  24
  25 import java.util.Enumeration;
  26 import java.util.Hashtable;
  27 import java.util.Vector;
  28
  29 /**
  30  * various methods for defining groups on an alignment based on some other
  31  * properties
  32  *
  33  * @author JimP
  34  *
  35  */
  36 public class Grouping
  37 {
  38   /**
  39    * Divide the given sequences based on the equivalence of their corresponding
  40    * selectedChars string. If exgroups is provided, existing groups will be
  41    * subdivided.
  42    *
  43    * @param sequences
  44    * @param selectedChars
  45    * @param exgroups
  46    * @return
  47    */
  48   public static SequenceGroup[] makeGroupsFrom(SequenceI[] sequences,
  49           String[] selectedChars, Vector exgroups)
  50   {
  51     // TODO: determine how to get/recover input data for group generation
  52     Hashtable gps = new Hashtable();
  53     int width = 0, i;
  54     Hashtable pgroup = new Hashtable();
  55     if (exgroups != null)
  56     {
  57       SequenceGroup sg;
  58       for (Enumeration g = exgroups.elements(); g.hasMoreElements();)
  59       {
  60         sg = (SequenceGroup) g.nextElement();
  61         for (Enumeration sq = sg.getSequences(null).elements(); sq
  62                 .hasMoreElements();)
  63           pgroup.put(sq.nextElement().toString(), sg);
  64       }
  65     }
  66     for (i = 0; i < sequences.length; i++)
  67     {
  68       String schar = selectedChars[i];
  69       SequenceGroup pgp = (SequenceGroup) pgroup
  70               .get(((Object) sequences[i]).toString());
  71       if (pgp != null)
  72       {
  73         schar = pgp.getName() + ":" + schar;
  74       }
  75       Vector svec = (Vector) gps.get(schar);
  76       if (svec == null)
  77       {
  78         svec = new Vector();
  79         gps.put(schar, svec);
  80       }
  81       if (width < sequences[i].getLength())
  82       {
  83         width = sequences[i].getLength();
  84       }
  85       svec.addElement(sequences[i]);
  86     }
  87     // make some groups
  88     java.util.Enumeration sge = gps.keys();
  89     SequenceGroup[] groups = new SequenceGroup[gps.size()];
  90     i = 0;
  91     while (sge.hasMoreElements())
  92     {
  93       String key = (String) sge.nextElement();
  94       SequenceGroup group = new SequenceGroup((Vector) gps.get(key),
  95               "Subseq: " + key, null, true, true, false, 0, width - 1);
  96
  97       groups[i++] = group;
  98     }
  99     gps.clear();
 100     pgroup.clear();
 101     return groups;
 102   }
 103
 104   /**
 105    * subdivide the given sequences based on the distribution of features
 106    *
 107    * @param featureLabels
 108    *          - null or one or more feature types to filter on.
 109    * @param groupLabels
 110    *          - null or set of groups to filter features on
 111    * @param start
 112    *          - range for feature filter
 113    * @param stop
 114    *          - range for feature filter
 115    * @param sequences
 116    *          - sequences to be divided
 117    * @param exgroups
 118    *          - existing groups to be subdivided
 119    * @param method
 120    *          - density, description, score
 121    */
 122   public static void divideByFeature(String[] featureLabels,
 123           String[] groupLabels, int start, int stop, SequenceI[] sequences,
 124           Vector exgroups, String method)
 125   {
 126     // TODO implement divideByFeature
 127     /*
 128      * if (method!=AlignmentSorter.FEATURE_SCORE &&
 129      * method!=AlignmentSorter.FEATURE_LABEL &&
 130      * method!=AlignmentSorter.FEATURE_DENSITY) { throw newError(
 131      * "Implementation Error - sortByFeature method must be one of FEATURE_SCORE, FEATURE_LABEL or FEATURE_DENSITY."
 132      * ); } boolean ignoreScore=method!=AlignmentSorter.FEATURE_SCORE;
 133      * StringBuffer scoreLabel = new StringBuffer();
 134      * scoreLabel.append(start+stop+method); // This doesn't work yet - we'd
 135      * like to have a canonical ordering that can be preserved from call to call
 136      * for (int i=0;featureLabels!=null && i<featureLabels.length; i++) {
 137      * scoreLabel.append(featureLabels[i]==null ? "null" : featureLabels[i]); }
 138      * for (int i=0;groupLabels!=null && i<groupLabels.length; i++) {
 139      * scoreLabel.append(groupLabels[i]==null ? "null" : groupLabels[i]); }
 140      * SequenceI[] seqs = alignment.getSequencesArray();
 141      *
 142      * boolean[] hasScore = new boolean[seqs.length]; // per sequence score //
 143      * presence int hasScores = 0; // number of scores present on set double[]
 144      * scores = new double[seqs.length]; int[] seqScores = new int[seqs.length];
 145      * Object[] feats = new Object[seqs.length]; double min = 0, max = 0; for
 146      * (int i = 0; i < seqs.length; i++) { SequenceFeature[] sf =
 147      * seqs[i].getSequenceFeatures(); if (sf==null &&
 148      * seqs[i].getDatasetSequence()!=null) { sf =
 149      * seqs[i].getDatasetSequence().getSequenceFeatures(); } if (sf==null) { sf
 150      * = new SequenceFeature[0]; } else { SequenceFeature[] tmp = new
 151      * SequenceFeature[sf.length]; for (int s=0; s<tmp.length;s++) { tmp[s] =
 152      * sf[s]; } sf = tmp; } int sstart = (start==-1) ? start :
 153      * seqs[i].findPosition(start); int sstop = (stop==-1) ? stop :
 154      * seqs[i].findPosition(stop); seqScores[i]=0; scores[i]=0.0; int
 155      * n=sf.length; for (int f=0;f<sf.length;f++) { // filter for selection
 156      * criteria if ( // ignore features outwith alignment start-stop positions.
 157      * (sf[f].end < sstart || sf[f].begin > sstop) || // or ignore based on
 158      * selection criteria (featureLabels != null &&
 159      * !AlignmentSorter.containsIgnoreCase(sf[f].type, featureLabels)) ||
 160      * (groupLabels != null // problem here: we cannot eliminate null feature
 161      * group features && (sf[f].getFeatureGroup() != null &&
 162      * !AlignmentSorter.containsIgnoreCase(sf[f].getFeatureGroup(),
 163      * groupLabels)))) { // forget about this feature sf[f] = null; n--; } else
 164      * { // or, also take a look at the scores if necessary. if (!ignoreScore &&
 165      * sf[f].getScore()!=Float.NaN) { if (seqScores[i]==0) { hasScores++; }
 166      * seqScores[i]++; hasScore[i] = true; scores[i] += sf[f].getScore(); //
 167      * take the first instance of this // score. } } } SequenceFeature[] fs;
 168      * feats[i] = fs = new SequenceFeature[n]; if (n>0) { n=0; for (int
 169      * f=0;f<sf.length;f++) { if (sf[f]!=null) { ((SequenceFeature[])
 170      * feats[i])[n++] = sf[f]; } } if (method==FEATURE_LABEL) { // order the
 171      * labels by alphabet String[] labs = new String[fs.length]; for (int
 172      * l=0;l<labs.length; l++) { labs[l] = (fs[l].getDescription()!=null ?
 173      * fs[l].getDescription() : fs[l].getType()); }
 174      * jalview.util.QuickSort.sort(labs, ((Object[]) feats[i])); } } if
 175      * (hasScore[i]) { // compute average score scores[i]/=seqScores[i]; //
 176      * update the score bounds. if (hasScores == 1) { max = min = scores[i]; }
 177      * else { if (max < scores[i]) { max = scores[i]; } if (min > scores[i]) {
 178      * min = scores[i]; } } } }
 179      *
 180      * if (method==FEATURE_SCORE) { if (hasScores == 0) { return; // do nothing
 181      * - no scores present to sort by. } // pad score matrix if (hasScores <
 182      * seqs.length) { for (int i = 0; i < seqs.length; i++) { if (!hasScore[i])
 183      * { scores[i] = (max + i); } else { int nf=(feats[i]==null) ? 0
 184      * :((SequenceFeature[]) feats[i]).length;
 185      * System.err.println("Sorting on Score: seq "+seqs[i].getName()+
 186      * " Feats: "+nf+" Score : "+scores[i]); } } }
 187      *
 188      * jalview.util.QuickSort.sort(scores, seqs); } else if
 189      * (method==FEATURE_DENSITY) {
 190      *
 191      * // break ties between equivalent numbers for adjacent sequences by adding
 192      * 1/Nseq*i on the original order double fr = 0.9/(1.0*seqs.length); for
 193      * (int i=0;i<seqs.length; i++) { double nf; scores[i] =
 194      * (0.05+fr*i)+(nf=((feats[i]==null) ? 0.0 :1.0*((SequenceFeature[])
 195      * feats[i]).length));
 196      * System.err.println("Sorting on Density: seq "+seqs[i].getName()+
 197      * " Feats: "+nf+" Score : "+scores[i]); }
 198      * jalview.util.QuickSort.sort(scores, seqs); } else { if
 199      * (method==FEATURE_LABEL) { throw new Error("Not yet implemented."); } } if
 200      * (lastSortByFeatureScore ==null ||
 201      * scoreLabel.equals(lastSortByFeatureScore)) { setOrder(alignment, seqs); }
 202      * else { setReverseOrder(alignment, seqs); } lastSortByFeatureScore =
 203      * scoreLabel.toString();
 204      */
 205   }
 206
 207 }