src/jalview/analysis/AAFrequency.java

   1 /*
   2  * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
   3  * Copyright (C) $$Year-Rel$$ The Jalview Authors
   4  *
   5  * This file is part of Jalview.
   6  *
   7  * Jalview is free software: you can redistribute it and/or
   8  * modify it under the terms of the GNU General Public License
   9  * as published by the Free Software Foundation, either version 3
  10  * of the License, or (at your option) any later version.
  11  *
  12  * Jalview is distributed in the hope that it will be useful, but
  13  * WITHOUT ANY WARRANTY; without even the implied warranty
  14  * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
  15  * PURPOSE.  See the GNU General Public License for more details.
  16  *
  17  * You should have received a copy of the GNU General Public License
  18  * along with Jalview.  If not, see <http://www.gnu.org/licenses/>.
  19  * The Jalview Authors are detailed in the 'AUTHORS' file.
  20  */
  21 package jalview.analysis;
  22
  23 import jalview.datamodel.AlignedCodonFrame;
  24 import jalview.datamodel.AlignmentAnnotation;
  25 import jalview.datamodel.AlignmentI;
  26 import jalview.datamodel.Annotation;
  27 import jalview.datamodel.SequenceI;
  28 import jalview.ext.android.SparseIntArray;
  29 import jalview.util.Comparison;
  30 import jalview.util.Format;
  31 import jalview.util.MappingUtils;
  32 import jalview.util.QuickSort;
  33
  34 import java.util.Arrays;
  35 import java.util.Hashtable;
  36 import java.util.List;
  37
  38 /**
  39  * Takes in a vector or array of sequences and column start and column end and
  40  * returns a new Hashtable[] of size maxSeqLength, if Hashtable not supplied.
  41  * This class is used extensively in calculating alignment colourschemes that
  42  * depend on the amount of conservation in each alignment column.
  43  *
  44  * @author $author$
  45  * @version $Revision$
  46  */
  47 public class AAFrequency
  48 {
  49   private static final int TO_UPPER_CASE = 'A' - 'a'; // -32
  50
  51   public static final String MAXCOUNT = "C";
  52
  53   public static final String MAXRESIDUE = "R";
  54
  55   public static final String PID_GAPS = "G";
  56
  57   public static final String PID_NOGAPS = "N";
  58
  59   public static final String PROFILE = "P";
  60
  61   public static final String ENCODED_CHARS = "E";
  62
  63   /*
  64    * Quick look-up of String value of char 'A' to 'Z'
  65    */
  66   private static final String[] CHARS = new String['Z' - 'A' + 1];
  67
  68   static
  69   {
  70     for (char c = 'A'; c <= 'Z'; c++)
  71     {
  72       CHARS[c - 'A'] = String.valueOf(c);
  73     }
  74   }
  75
  76   public static final Hashtable[] calculate(List<SequenceI> list,
  77           int start, int end)
  78   {
  79     return calculate(list, start, end, false);
  80   }
  81
  82   public static final Hashtable[] calculate(List<SequenceI> sequences,
  83           int start, int end, boolean profile)
  84   {
  85     SequenceI[] seqs = new SequenceI[sequences.size()];
  86     int width = 0;
  87     synchronized (sequences)
  88     {
  89       for (int i = 0; i < sequences.size(); i++)
  90       {
  91         seqs[i] = sequences.get(i);
  92         if (seqs[i].getLength() > width)
  93         {
  94           width = seqs[i].getLength();
  95         }
  96       }
  97
  98       Hashtable[] reply = new Hashtable[width];
  99
 100       if (end >= width)
 101       {
 102         end = width;
 103       }
 104
 105       calculate(seqs, start, end, reply, profile);
 106       return reply;
 107     }
 108   }
 109
 110   public static final void calculate(SequenceI[] sequences, int start,
 111           int end, Hashtable[] result, boolean profile)
 112   {
 113     long now = System.currentTimeMillis();
 114     Hashtable residueHash;
 115     int seqCount = sequences.length;
 116     char c = '-';
 117     SparseIntArray profileSizes = new SparseIntArray();
 118
 119     for (int column = start; column < end; column++)
 120     {
 121       residueHash = new Hashtable();
 122       int maxCount = 0;
 123       String maxResidue = "";
 124       int nongap = 0;
 125       // int [] values = new int[255];
 126       int guessProfileSize = estimateProfileSize(profileSizes);
 127       SparseIntArray values = new SparseIntArray(guessProfileSize);
 128
 129       for (int row = 0; row < seqCount; row++)
 130       {
 131         if (sequences[row] == null)
 132         {
 133           System.err
 134                   .println("WARNING: Consensus skipping null sequence - possible race condition.");
 135           continue;
 136         }
 137         char[] seq = sequences[row].getSequence();
 138         if (seq.length > column)
 139         {
 140           c = seq[column];
 141
 142           if (Comparison.isGap(c))
 143           {
 144             // values['-']++;
 145             // values.put('-', values.get('-') + 1);
 146             values.add('-', 1);
 147             continue;
 148           }
 149           else if ('a' <= c && c <= 'z')
 150           {
 151             c += TO_UPPER_CASE;
 152           }
 153
 154           nongap++;
 155           // values[c]++;
 156           // values.put(c, values.get(c) + 1);
 157           values.add(c, 1);
 158         }
 159         else
 160         {
 161           /*
 162            * here we count as a gap if the sequence doesn't
 163            * reach this column (is that correct?)
 164            */
 165           // values['-']++;
 166           // values.put('-', values.get('-') + 1);
 167           values.add('-', 1);
 168         }
 169       }
 170       if (seqCount == 1)
 171       {
 172         maxResidue = String.valueOf(c);
 173         maxCount = 1;
 174       }
 175       else
 176       {
 177         // iterate over values keys not alphabet
 178         // for (int v = 'A'; v <= 'Z'; v++)
 179         for (int k = 0; k < values.size(); k++)
 180         {
 181           int v = values.keyAt(k);
 182           int count = values.valueAt(k); // values[v];
 183           if (v == '-' || count < 1 || count < maxCount)
 184           {
 185             continue;
 186           }
 187
 188           if (count > maxCount)
 189           {
 190             maxResidue = String.valueOf((char) v);// CHARS[v - 'A'];
 191           }
 192           else if (count == maxCount)
 193           {
 194             maxResidue += String.valueOf((char) v); // CHARS[v - 'A'];
 195           }
 196           maxCount = count;
 197         }
 198       }
 199       if (maxResidue.length() == 0)
 200       {
 201         maxResidue = "-";
 202       }
 203       if (profile)
 204       {
 205         // residueHash.put(PROFILE, new int[][] { values,
 206         // new int[] { jSize, nongap } });
 207         residueHash.put(PROFILE, new Profile(values, seqCount, nongap));
 208       }
 209       residueHash.put(MAXCOUNT, new Integer(maxCount));
 210       residueHash.put(MAXRESIDUE, maxResidue);
 211
 212       float percentage = ((float) maxCount * 100) / seqCount;
 213       residueHash.put(PID_GAPS, new Float(percentage));
 214
 215       if (nongap > 0)
 216       {
 217         // calculate for non-gapped too
 218         percentage = ((float) maxCount * 100) / nongap;
 219       }
 220       residueHash.put(PID_NOGAPS, new Float(percentage));
 221
 222       result[column] = residueHash;
 223
 224       profileSizes.add(values.size(), 1);
 225     }
 226     long elapsed = System.currentTimeMillis() - now;
 227     System.out.println(elapsed);
 228   }
 229
 230   /**
 231    * Make an estimate of the profile size we are going to compute i.e. how many
 232    * different characters may be present in it. Overestimating has a cost of
 233    * using more memory than necessary. Underestimating has a cost of needing to
 234    * extend the SparseIntArray holding the profile counts.
 235    *
 236    * @param profileSizes
 237    *          counts of sizes of profiles so far encountered
 238    * @return
 239    */
 240   static int estimateProfileSize(SparseIntArray profileSizes)
 241   {
 242     if (profileSizes.size() == 0)
 243     {
 244       return 4;
 245     }
 246
 247     /*
 248      * could do a statistical heuristic here e.g. 75%ile
 249      * for now just return the largest value
 250      */
 251     return profileSizes.keyAt(profileSizes.size() - 1);
 252   }
 253
 254   /**
 255    * Compute all or part of the annotation row from the given consensus
 256    * hashtable
 257    *
 258    * @param consensus
 259    *          - pre-allocated annotation row
 260    * @param hconsensus
 261    * @param iStart
 262    * @param width
 263    * @param ignoreGapsInConsensusCalculation
 264    * @param includeAllConsSymbols
 265    * @param nseq
 266    */
 267   public static void completeConsensus(AlignmentAnnotation consensus,
 268           Hashtable[] hconsensus, int iStart, int width,
 269           boolean ignoreGapsInConsensusCalculation,
 270           boolean includeAllConsSymbols, long nseq)
 271   {
 272     completeConsensus(consensus, hconsensus, iStart, width,
 273             ignoreGapsInConsensusCalculation, includeAllConsSymbols, null,
 274             nseq);
 275   }
 276
 277   /**
 278    * Derive the consensus annotations to be added to the alignment for display.
 279    * This does not recompute the raw data, but may be called on a change in
 280    * display options, such as 'show logo', which may in turn result in a change
 281    * in the derived values.
 282    *
 283    * @param consensus
 284    *          the annotation row to add annotations to
 285    * @param hconsensus
 286    *          the source consensus data
 287    * @param iStart
 288    *          start column
 289    * @param width
 290    *          end column
 291    * @param ignoreGapsInConsensusCalculation
 292    *          if true, use the consensus calculated ignoring gaps
 293    * @param includeAllConsSymbols
 294    *          if true include all consensus symbols, else just show modal
 295    *          residue
 296    * @param alphabet
 297    * @param nseq
 298    *          number of sequences
 299    */
 300   public static void completeConsensus(AlignmentAnnotation consensus,
 301           Hashtable[] hconsensus, int iStart, int width,
 302           boolean ignoreGapsInConsensusCalculation,
 303           boolean includeAllConsSymbols, char[] alphabet, long nseq)
 304   {
 305      long now = System.currentTimeMillis();
 306     if (consensus == null || consensus.annotations == null
 307             || consensus.annotations.length < width)
 308     {
 309       // called with a bad alignment annotation row - wait for it to be
 310       // initialised properly
 311       return;
 312     }
 313
 314     final Format fmt = getPercentageFormat(nseq);
 315
 316     for (int i = iStart; i < width; i++)
 317     {
 318       Hashtable hci;
 319       if (i >= hconsensus.length || ((hci = hconsensus[i]) == null))
 320       {
 321         // happens if sequences calculated over were shorter than alignment
 322         // width
 323         consensus.annotations[i] = null;
 324         continue;
 325       }
 326       Float fv = (Float) hci
 327               .get(ignoreGapsInConsensusCalculation ? PID_NOGAPS : PID_GAPS);
 328       if (fv == null)
 329       {
 330         consensus.annotations[i] = null;
 331         // data has changed below us .. give up and
 332         continue;
 333       }
 334       float value = fv.floatValue();
 335       String maxRes = hci.get(AAFrequency.MAXRESIDUE).toString();
 336       StringBuilder mouseOver = new StringBuilder(64);
 337       if (maxRes.length() > 1)
 338       {
 339         mouseOver.append("[").append(maxRes).append("] ");
 340         maxRes = "+";
 341       }
 342       else
 343       {
 344         mouseOver.append(hci.get(AAFrequency.MAXRESIDUE) + " ");
 345       }
 346       // int[][] profile = (int[][]) hci.get(AAFrequency.PROFILE);
 347       Profile profile = (Profile) hci.get(AAFrequency.PROFILE);
 348       if (profile != null && includeAllConsSymbols)
 349       {
 350         int sequenceCount = profile.height;// profile[1][0];
 351         int nonGappedCount = profile.nonGapped;// [1][1];
 352         int normalisedBy = ignoreGapsInConsensusCalculation ? nonGappedCount
 353                 : sequenceCount;
 354         mouseOver.setLength(0);
 355         // TODO do this sort once only in calculate()?
 356         // char[][] ca = new char[profile[0].length][];
 357         // /int length = profile[0].length;
 358         int length = profile.profile.size();
 359         char[] ca = new char[length];
 360         // float[] vl = new float[length];
 361         int[] vl = new int[length];
 362         for (int c = 0; c < ca.length; c++)
 363         {
 364           int theChar = profile.profile.keyAt(c);
 365           ca[c] = (char) theChar;// c;
 366           // ca[c] = new char[]
 367           // { (char) c };
 368           vl[c] = profile.profile.valueAt(c);// profile[0][c];
 369         }
 370
 371         /*
 372          * sort characters into ascending order of their counts
 373          */
 374         QuickSort.sort(vl, ca);
 375
 376         /*
 377          * traverse in reverse order (highest count first) to build tooltip
 378          */
 379         // for (int p = 0, c = ca.length - 1; profile[0][ca[c]] > 0; c--)
 380         for (int p = 0, c = ca.length - 1; c >= 0; c--)
 381         {
 382           final char residue = ca[c];
 383           if (residue != '-')
 384           {
 385             // float tval = profile[0][residue] * 100f / normalisedBy;
 386             // float tval = profile[0][residue] * 100f / normalisedBy;
 387             float tval = (vl[c] * 100f) / normalisedBy;
 388             mouseOver
 389                     .append((((p == 0) ? "" : "; ")))
 390                     .append(residue)
 391                     .append(" ")
 392                     .append(((fmt != null) ? fmt.form(tval) : ((int) tval)))
 393                     .append("%");
 394             p++;
 395           }
 396         }
 397       }
 398       else
 399       {
 400         mouseOver.append(
 401                 (((fmt != null) ? fmt.form(value) : ((int) value))))
 402                 .append("%");
 403       }
 404       consensus.annotations[i] = new Annotation(maxRes,
 405               mouseOver.toString(), ' ', value);
 406     }
 407      long elapsed = System.currentTimeMillis() - now;
 408      System.out.println(-elapsed);
 409   }
 410
 411   /**
 412    * Returns a Format designed to show all significant figures for profile
 413    * percentages. For less than 100 sequences, returns null (the integer
 414    * percentage value will be displayed). For 100-999 sequences, returns "%3.1f"
 415    *
 416    * @param nseq
 417    * @return
 418    */
 419   protected static Format getPercentageFormat(long nseq)
 420   {
 421     int scale = 0;
 422     while (nseq >= 10)
 423     {
 424       scale++;
 425       nseq /= 10;
 426     }
 427     return scale <= 1 ? null : new Format("%3." + (scale - 1) + "f");
 428   }
 429
 430   /**
 431    * Returns the sorted profile for the given consensus data. The returned array
 432    * contains
 433    *
 434    * <pre>
 435    *    [profileType, numberOfValues, nonGapCount, charValue1, percentage1, charValue2, percentage2, ...]
 436    * in descending order of percentage value
 437    * </pre>
 438    *
 439    * @param hconsensus
 440    *          the data table from which to extract and sort values
 441    * @param ignoreGaps
 442    *          if true, only non-gapped values are included in percentage
 443    *          calculations
 444    * @return
 445    */
 446   public static int[] extractProfile(Hashtable hconsensus,
 447           boolean ignoreGaps)
 448   {
 449     int[] rtnval = new int[64];
 450     // int[][] profile = (int[][]) hconsensus.get(AAFrequency.PROFILE);
 451     Profile profile = (Profile) hconsensus.get(AAFrequency.PROFILE);
 452     if (profile == null)
 453     {
 454       return null;
 455     }
 456     // int profileLength = profile[0].length;
 457     int profileLength = profile.profile.size();
 458     char[] ca = new char[profileLength];
 459     float[] vl = new float[profileLength];
 460     // for (int c = 0; c < ca.length; c++)
 461     // {
 462     // ca[c] = (char) c;
 463     // vl[c] = profile[0][c];
 464     // }
 465     for (int i = 0; i < profileLength; i++)
 466     {
 467       int c = profile.profile.keyAt(i);
 468       ca[i] = (char) c;
 469       vl[i] = profile.profile.valueAt(i);
 470     }
 471     QuickSort.sort(vl, ca);
 472     int nextArrayPos = 2;
 473     int totalPercentage = 0;
 474     int distinctValuesCount = 0;
 475     final int divisor = ignoreGaps ? profile.nonGapped : profile.height;
 476     // final int divisor = profile[1][ignoreGaps ? 1 : 0];
 477     int j = profile.profile.size();
 478     for (int i = 0; i < j; i++)
 479 //    for (int c = ca.length - 1; profile[0][ca[c]] > 0; c--)
 480     {
 481       int theChar = profile.profile.keyAt(i);
 482       int charCount = profile.profile.valueAt(i);
 483
 484 //      if (ca[c] != '-')
 485         if (theChar != '-')
 486       {
 487 //        rtnval[nextArrayPos++] = ca[c];
 488         rtnval[nextArrayPos++] = theChar;
 489 //        final int percentage = (int) (profile[0][ca[c]] * 100f / divisor);
 490         final int percentage = (charCount * 100) / divisor;
 491         rtnval[nextArrayPos++] = percentage;
 492         totalPercentage += percentage;
 493         distinctValuesCount++;
 494       }
 495     }
 496     rtnval[0] = distinctValuesCount;
 497     rtnval[1] = totalPercentage;
 498     int[] result = new int[rtnval.length + 1];
 499     result[0] = AlignmentAnnotation.SEQUENCE_PROFILE;
 500     System.arraycopy(rtnval, 0, result, 1, rtnval.length);
 501
 502     return result;
 503   }
 504
 505   /**
 506    * Extract a sorted extract of cDNA codon profile data. The returned array
 507    * contains
 508    *
 509    * <pre>
 510    *    [profileType, numberOfValues, totalCount, charValue1, percentage1, charValue2, percentage2, ...]
 511    * in descending order of percentage value, where the character values encode codon triplets
 512    * </pre>
 513    *
 514    * @param hashtable
 515    * @return
 516    */
 517   public static int[] extractCdnaProfile(Hashtable hashtable,
 518           boolean ignoreGaps)
 519   {
 520     // this holds #seqs, #ungapped, and then codon count, indexed by encoded
 521     // codon triplet
 522     int[] codonCounts = (int[]) hashtable.get(PROFILE);
 523     int[] sortedCounts = new int[codonCounts.length - 2];
 524     System.arraycopy(codonCounts, 2, sortedCounts, 0,
 525             codonCounts.length - 2);
 526
 527     int[] result = new int[3 + 2 * sortedCounts.length];
 528     // first value is just the type of profile data
 529     result[0] = AlignmentAnnotation.CDNA_PROFILE;
 530
 531     char[] codons = new char[sortedCounts.length];
 532     for (int i = 0; i < codons.length; i++)
 533     {
 534       codons[i] = (char) i;
 535     }
 536     QuickSort.sort(sortedCounts, codons);
 537     int totalPercentage = 0;
 538     int distinctValuesCount = 0;
 539     int j = 3;
 540     int divisor = ignoreGaps ? codonCounts[1] : codonCounts[0];
 541     for (int i = codons.length - 1; i >= 0; i--)
 542     {
 543       final int codonCount = sortedCounts[i];
 544       if (codonCount == 0)
 545       {
 546         break; // nothing else of interest here
 547       }
 548       distinctValuesCount++;
 549       result[j++] = codons[i];
 550       final int percentage = codonCount * 100 / divisor;
 551       result[j++] = percentage;
 552       totalPercentage += percentage;
 553     }
 554     result[2] = totalPercentage;
 555
 556     /*
 557      * Just return the non-zero values
 558      */
 559     // todo next value is redundant if we limit the array to non-zero counts
 560     result[1] = distinctValuesCount;
 561     return Arrays.copyOfRange(result, 0, j);
 562   }
 563
 564   /**
 565    * Compute a consensus for the cDNA coding for a protein alignment.
 566    *
 567    * @param alignment
 568    *          the protein alignment (which should hold mappings to cDNA
 569    *          sequences)
 570    * @param hconsensus
 571    *          the consensus data stores to be populated (one per column)
 572    */
 573   public static void calculateCdna(AlignmentI alignment,
 574           Hashtable[] hconsensus)
 575   {
 576     final char gapCharacter = alignment.getGapCharacter();
 577     List<AlignedCodonFrame> mappings = alignment.getCodonFrames();
 578     if (mappings == null || mappings.isEmpty())
 579     {
 580       return;
 581     }
 582
 583     int cols = alignment.getWidth();
 584     for (int col = 0; col < cols; col++)
 585     {
 586       // todo would prefer a Java bean for consensus data
 587       Hashtable<String, int[]> columnHash = new Hashtable<String, int[]>();
 588       // #seqs, #ungapped seqs, counts indexed by (codon encoded + 1)
 589       int[] codonCounts = new int[66];
 590       codonCounts[0] = alignment.getSequences().size();
 591       int ungappedCount = 0;
 592       for (SequenceI seq : alignment.getSequences())
 593       {
 594         if (seq.getCharAt(col) == gapCharacter)
 595         {
 596           continue;
 597         }
 598         List<char[]> codons = MappingUtils
 599                 .findCodonsFor(seq, col, mappings);
 600         for (char[] codon : codons)
 601         {
 602           int codonEncoded = CodingUtils.encodeCodon(codon);
 603           if (codonEncoded >= 0)
 604           {
 605             codonCounts[codonEncoded + 2]++;
 606             ungappedCount++;
 607           }
 608         }
 609       }
 610       codonCounts[1] = ungappedCount;
 611       // todo: sort values here, save counts and codons?
 612       columnHash.put(PROFILE, codonCounts);
 613       hconsensus[col] = columnHash;
 614     }
 615   }
 616
 617   /**
 618    * Derive displayable cDNA consensus annotation from computed consensus data.
 619    *
 620    * @param consensusAnnotation
 621    *          the annotation row to be populated for display
 622    * @param consensusData
 623    *          the computed consensus data
 624    * @param showProfileLogo
 625    *          if true show all symbols present at each position, else only the
 626    *          modal value
 627    * @param nseqs
 628    *          the number of sequences in the alignment
 629    */
 630   public static void completeCdnaConsensus(
 631           AlignmentAnnotation consensusAnnotation,
 632           Hashtable[] consensusData, boolean showProfileLogo, int nseqs)
 633   {
 634     if (consensusAnnotation == null
 635             || consensusAnnotation.annotations == null
 636             || consensusAnnotation.annotations.length < consensusData.length)
 637     {
 638       // called with a bad alignment annotation row - wait for it to be
 639       // initialised properly
 640       return;
 641     }
 642
 643     // ensure codon triplet scales with font size
 644     consensusAnnotation.scaleColLabel = true;
 645     for (int col = 0; col < consensusData.length; col++)
 646     {
 647       Hashtable hci = consensusData[col];
 648       if (hci == null)
 649       {
 650         // gapped protein column?
 651         continue;
 652       }
 653       // array holds #seqs, #ungapped, then codon counts indexed by codon
 654       final int[] codonCounts = (int[]) hci.get(PROFILE);
 655       int totalCount = 0;
 656
 657       /*
 658        * First pass - get total count and find the highest
 659        */
 660       final char[] codons = new char[codonCounts.length - 2];
 661       for (int j = 2; j < codonCounts.length; j++)
 662       {
 663         final int codonCount = codonCounts[j];
 664         codons[j - 2] = (char) (j - 2);
 665         totalCount += codonCount;
 666       }
 667
 668       /*
 669        * Sort array of encoded codons by count ascending - so the modal value
 670        * goes to the end; start by copying the count (dropping the first value)
 671        */
 672       int[] sortedCodonCounts = new int[codonCounts.length - 2];
 673       System.arraycopy(codonCounts, 2, sortedCodonCounts, 0,
 674               codonCounts.length - 2);
 675       QuickSort.sort(sortedCodonCounts, codons);
 676
 677       int modalCodonEncoded = codons[codons.length - 1];
 678       int modalCodonCount = sortedCodonCounts[codons.length - 1];
 679       String modalCodon = String.valueOf(CodingUtils
 680               .decodeCodon(modalCodonEncoded));
 681       if (sortedCodonCounts.length > 1
 682               && sortedCodonCounts[codons.length - 2] == sortedCodonCounts[codons.length - 1])
 683       {
 684         /*
 685          * two or more codons share the modal count
 686          */
 687         modalCodon = "+";
 688       }
 689       float pid = sortedCodonCounts[sortedCodonCounts.length - 1] * 100
 690               / (float) totalCount;
 691
 692       /*
 693        * todo ? Replace consensus hashtable with sorted arrays of codons and
 694        * counts (non-zero only). Include total count in count array [0].
 695        */
 696
 697       /*
 698        * Scan sorted array backwards for most frequent values first. Show
 699        * repeated values compactly.
 700        */
 701       StringBuilder mouseOver = new StringBuilder(32);
 702       StringBuilder samePercent = new StringBuilder();
 703       String percent = null;
 704       String lastPercent = null;
 705       Format fmt = getPercentageFormat(nseqs);
 706
 707       for (int j = codons.length - 1; j >= 0; j--)
 708       {
 709         int codonCount = sortedCodonCounts[j];
 710         if (codonCount == 0)
 711         {
 712           /*
 713            * remaining codons are 0% - ignore, but finish off the last one if
 714            * necessary
 715            */
 716           if (samePercent.length() > 0)
 717           {
 718             mouseOver.append(samePercent).append(": ").append(percent)
 719                     .append("% ");
 720           }
 721           break;
 722         }
 723         int codonEncoded = codons[j];
 724         final int pct = codonCount * 100 / totalCount;
 725         String codon = String
 726                 .valueOf(CodingUtils.decodeCodon(codonEncoded));
 727         percent = fmt == null ? Integer.toString(pct) : fmt.form(pct);
 728         if (showProfileLogo || codonCount == modalCodonCount)
 729         {
 730           if (percent.equals(lastPercent) && j > 0)
 731           {
 732             samePercent.append(samePercent.length() == 0 ? "" : ", ");
 733             samePercent.append(codon);
 734           }
 735           else
 736           {
 737             if (samePercent.length() > 0)
 738             {
 739               mouseOver.append(samePercent).append(": ")
 740                       .append(lastPercent).append("% ");
 741             }
 742             samePercent.setLength(0);
 743             samePercent.append(codon);
 744           }
 745           lastPercent = percent;
 746         }
 747       }
 748
 749       consensusAnnotation.annotations[col] = new Annotation(modalCodon,
 750               mouseOver.toString(), ' ', pid);
 751     }
 752   }
 753 }