X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Fanalysis%2FStructureFrequency.java;h=3c8274e3ba190dfcf28a687e9c9b4b8f7eaca7a7;hb=d07fe6a0891b57a9d707e356f7769395ce94b0da;hp=5095ce1e5ffe3f8fe5fe3a9fb738842c31a5bac4;hpb=d7c00f48b00e3ede57c46ae4daf6a9203b9b6059;p=jalview.git diff --git a/src/jalview/analysis/StructureFrequency.java b/src/jalview/analysis/StructureFrequency.java index 5095ce1..3c8274e 100644 --- a/src/jalview/analysis/StructureFrequency.java +++ b/src/jalview/analysis/StructureFrequency.java @@ -1,38 +1,47 @@ /* - * Jalview - A Sequence Alignment Editor and Viewer (Version 2.7) - * Copyright (C) 2011 J Procter, AM Waterhouse, J Engelhardt, LM Lui, G Barton, M Clamp, S Searle - * + * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$) + * Copyright (C) $$Year-Rel$$ The Jalview Authors + * * This file is part of Jalview. - * + * * Jalview is free software: you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. - * - * Jalview is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty - * of MERCHANTABILITY or FITNESS FOR A PARTICULAR + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, either version 3 + * of the License, or (at your option) any later version. + * + * Jalview is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR * PURPOSE. See the GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License along with Jalview. If not, see . + * + * You should have received a copy of the GNU General Public License + * along with Jalview. If not, see . + * The Jalview Authors are detailed in the 'AUTHORS' file. */ - package jalview.analysis; -import java.util.*; +import jalview.datamodel.AlignmentAnnotation; +import jalview.datamodel.Annotation; +import jalview.datamodel.SequenceFeature; +import jalview.datamodel.SequenceI; +import jalview.util.Format; -import jalview.datamodel.*; +import java.util.ArrayList; +import java.util.Hashtable; /** * Takes in a vector or array of sequences and column start and column end and * returns a new Hashtable[] of size maxSeqLength, if Hashtable not supplied. * This class is used extensively in calculating alignment colourschemes that * depend on the amount of conservation in each alignment column. - * + * * @author $author$ * @version $Revision$ */ public class StructureFrequency { + public static final int STRUCTURE_PROFILE_LENGTH = 74; + // No need to store 1000s of strings which are not // visible to the user. public static final String MAXCOUNT = "C"; @@ -49,7 +58,7 @@ public class StructureFrequency /** * Returns the 3' position of a base pair - * + * * @param pairs * Secondary structure annotation * @param indice @@ -58,15 +67,15 @@ public class StructureFrequency */ public static int findPair(SequenceFeature[] pairs, int indice) { - System.out.print("indice"+indice+" "); + for (int i = 0; i < pairs.length; i++) { if (pairs[i].getBegin() == indice) - + { - System.out.println(pairs[i].getEnd()); + return pairs[i].getEnd(); - + } } return -1; @@ -75,7 +84,7 @@ public class StructureFrequency /** * Method to calculate a 'base pair consensus row', very similar to nucleotide * consensus but takes into account a given structure - * + * * @param sequences * @param start * @param end @@ -87,19 +96,11 @@ public class StructureFrequency int end, Hashtable[] result, boolean profile, AlignmentAnnotation rnaStruc) { -// System.out.println("longueur="+sequences.length); -// for(int l=0;l<=(sequences.length-1);l++){ -// System.out.println("sequences "+l+":"+sequences[l].getSequenceAsString()); -// } -// System.out.println("start="+start); - System.out.println("end="+end); -// System.out.println("result="+result.length); -// -// System.out.println("profile="+profile); -// System.out.println("rnaStruc="+rnaStruc); + Hashtable residueHash; String maxResidue; char[] struc = rnaStruc.getRNAStruc().toCharArray(); + SequenceFeature[] rna = rnaStruc._rnasecstr; char c, s, cEnd; int count = 0, nonGap = 0, i, bpEnd = -1, j, jSize = sequences.length; @@ -107,7 +108,6 @@ public class StructureFrequency int[][] pairs; float percentage; boolean wooble = true; - for (i = start; i < end; i++) // foreach column { residueHash = new Hashtable(); @@ -115,11 +115,11 @@ public class StructureFrequency values = new int[255]; pairs = new int[255][255]; bpEnd = -1; - //System.out.println("s="+struc[i]); + // System.out.println("s="+struc[i]); if (i < struc.length) { s = struc[i]; - + } else { @@ -139,23 +139,21 @@ public class StructureFrequency } else { - - + bpEnd = findPair(rna, i); - - if (bpEnd>-1) - { - for (j = 0; j < jSize; j++) // foreach row + + if (bpEnd > -1) { - if (sequences[j] == null) + for (j = 0; j < jSize; j++) // foreach row { - System.err - .println("WARNING: Consensus skipping null sequence - possible race condition."); - continue; - } - c = sequences[j].getCharAt(i); - //System.out.println("c="+c); - + if (sequences[j] == null) + { + System.err + .println("WARNING: Consensus skipping null sequence - possible race condition."); + continue; + } + c = sequences[j].getCharAt(i); + // System.out.println("c="+c); // standard representation for gaps in sequence and structure if (c == '.' || c == ' ') @@ -169,49 +167,46 @@ public class StructureFrequency continue; } cEnd = sequences[j].getCharAt(bpEnd); - - - System.out.println("pairs ="+c+","+cEnd); - if (checkBpType(c, cEnd)==true) + + // System.out.println("pairs ="+c+","+cEnd); + if (checkBpType(c, cEnd) == true) { values['(']++; // H means it's a helix (structured) maxResidue = "("; - wooble=true; - System.out.println("It's a pair wc"); - + wooble = true; + // System.out.println("It's a pair wc"); + } - if (checkBpType(c, cEnd)==false) + if (checkBpType(c, cEnd) == false) { - wooble =false; + wooble = false; values['[']++; // H means it's a helix (structured) maxResidue = "["; - System.out.println("It's an pair non canonic"); - System.out.println(sequences[j].getRNA()); - System.out.println(rnaStruc.getRNAStruc().charAt(i)); + } - pairs[c][cEnd]++; - - - } + pairs[c][cEnd]++; + + } } // nonGap++; } // UPDATE this for new values if (profile) { + // TODO 1-dim array with jsize in [0], nongapped in [1]; or Pojo residueHash.put(PROFILE, new int[][] { values, new int[] { jSize, (jSize - values['-']) } }); residueHash.put(PAIRPROFILE, pairs); } - if (wooble==true) + if (wooble == true) { - count = values['(']; + count = values['(']; } - if (wooble==false) + if (wooble == false) { - count = values['[']; + count = values['[']; } residueHash.put(MAXCOUNT, new Integer(count)); residueHash.put(MAXRESIDUE, maxResidue); @@ -227,18 +222,20 @@ public class StructureFrequency } if (bpEnd > 0) { - values[')'] = values['(']; + values[')'] = values['(']; values[']'] = values['[']; values['('] = 0; values['['] = 0; residueHash = new Hashtable(); - if (wooble==true){ - System.out.println(maxResidue+","+wooble); - maxResidue = ")"; + if (wooble == true) + { + // System.out.println(maxResidue+","+wooble); + maxResidue = ")"; } - if(wooble==false){ - System.out.println(maxResidue+","+wooble); - maxResidue = "]"; + if (wooble == false) + { + // System.out.println(maxResidue+","+wooble); + maxResidue = "]"; } if (profile) { @@ -256,14 +253,14 @@ public class StructureFrequency residueHash.put(PID_GAPS, new Float(percentage)); result[bpEnd] = residueHash; - + } } } /** * Method to check if a base-pair is a canonical or a wobble bp - * + * * @param up * 5' base * @param down @@ -335,7 +332,7 @@ public class StructureFrequency /** * Compute all or part of the annotation row from the given consensus * hashtable - * + * * @param consensus * - pre-allocated annotation row * @param hconsensus @@ -347,7 +344,7 @@ public class StructureFrequency public static void completeConsensus(AlignmentAnnotation consensus, Hashtable[] hconsensus, int iStart, int width, boolean ignoreGapsInConsensusCalculation, - boolean includeAllConsSymbols) + boolean includeAllConsSymbols, long nseq) { float tval, value; if (consensus == null || consensus.annotations == null @@ -357,10 +354,23 @@ public class StructureFrequency // initialised properly return; } + String fmtstr = "%3.1f"; + int precision = 2; + while (nseq > 100) + { + precision++; + nseq /= 10; + } + if (precision > 2) + { + fmtstr = "%" + (2 + precision) + "." + precision + "f"; + } + Format fmt = new Format(fmtstr); + for (int i = iStart; i < width; i++) { Hashtable hci; - if (i >= hconsensus.length || ((hci=hconsensus[i])==null)) + if (i >= hconsensus.length || ((hci = hconsensus[i]) == null)) { // happens if sequences calculated over were shorter than alignment // width @@ -371,32 +381,28 @@ public class StructureFrequency Float fv; if (ignoreGapsInConsensusCalculation) { - fv =(Float) hci.get(StructureFrequency.PID_NOGAPS); + fv = (Float) hci.get(StructureFrequency.PID_NOGAPS); } else { fv = (Float) hci.get(StructureFrequency.PID_GAPS); } - if (fv==null) + if (fv == null) { consensus.annotations[i] = null; // data has changed below us .. give up and continue; } value = fv.floatValue(); - String maxRes = hci.get(StructureFrequency.MAXRESIDUE) - .toString(); - String mouseOver = hci.get(StructureFrequency.MAXRESIDUE) - + " "; + String maxRes = hci.get(StructureFrequency.MAXRESIDUE).toString(); + String mouseOver = hci.get(StructureFrequency.MAXRESIDUE) + " "; if (maxRes.length() > 1) { mouseOver = "[" + maxRes + "] "; maxRes = "+"; } - int[][] profile = (int[][]) hci - .get(StructureFrequency.PROFILE); - int[][] pairs = (int[][]) hci - .get(StructureFrequency.PAIRPROFILE); + int[][] profile = (int[][]) hci.get(StructureFrequency.PROFILE); + int[][] pairs = (int[][]) hci.get(StructureFrequency.PAIRPROFILE); if (pairs != null && includeAllConsSymbols) // Just responsible for the // tooltip @@ -404,15 +410,17 @@ public class StructureFrequency { mouseOver = ""; - /* TODO It's not sure what is the purpose of the alphabet and wheter it is useful for structure? - * + /* + * TODO It's not sure what is the purpose of the alphabet and wheter it + * is useful for structure? + * * if (alphabet != null) { for (int c = 0; c < alphabet.length; c++) { * tval = ((float) profile[0][alphabet[c]]) 100f / (float) * profile[1][ignoreGapsInConsensusCalculation ? 1 : 0]; mouseOver += * ((c == 0) ? "" : "; ") + alphabet[c] + " " + ((int) tval) + "%"; } } * else { */ - Object[] ca = new Object[625]; + int[][] ca = new int[625][]; float[] vl = new float[625]; int x = 0; for (int c = 65; c < 90; c++) @@ -428,14 +436,19 @@ public class StructureFrequency jalview.util.QuickSort.sort(vl, ca); int p = 0; + /* + * profile[1] is {total, ungappedTotal} + */ + final int divisor = profile[1][ignoreGapsInConsensusCalculation ? 1 + : 0]; for (int c = 624; c > 0; c--) { if (vl[c] > 0) { - tval = (vl[c] * 100f / profile[1][ignoreGapsInConsensusCalculation ? 1 - : 0]); - mouseOver += ((p == 0) ? "" : "; ") + (char) ((int[]) ca[c])[0] - + (char) ((int[]) ca[c])[1] + " " + ((int) tval) + "%"; + tval = (vl[c] * 100f / divisor); + mouseOver += ((p == 0) ? "" : "; ") + (char) ca[c][0] + + (char) ca[c][1] + " " + fmt.form(tval) + + "%"; p++; } @@ -445,7 +458,7 @@ public class StructureFrequency } else { - mouseOver += ((int) value + "%"); + mouseOver += (fmt.form(value) + "%"); } consensus.annotations[i] = new Annotation(maxRes, mouseOver, ' ', value); @@ -454,23 +467,26 @@ public class StructureFrequency /** * get the sorted base-pair profile for the given position of the consensus - * + * * @param hconsensus * @return profile of the given column */ public static int[] extractProfile(Hashtable hconsensus, boolean ignoreGapsInConsensusCalculation) { - int[] rtnval = new int[74]; // 2*(5*5)+2 + int[] rtnval = new int[STRUCTURE_PROFILE_LENGTH]; // 2*(5*5)+2 int[][] profile = (int[][]) hconsensus.get(StructureFrequency.PROFILE); int[][] pairs = (int[][]) hconsensus .get(StructureFrequency.PAIRPROFILE); if (profile == null) + { return null; + } // TODO fix the object length, also do it in completeConsensus - Object[] ca = new Object[625]; + // Object[] ca = new Object[625]; + int[][] ca = new int[625][]; float[] vl = new float[625]; int x = 0; for (int c = 65; c < 90; c++) @@ -485,21 +501,28 @@ public class StructureFrequency } jalview.util.QuickSort.sort(vl, ca); - rtnval[0] = 2; + int valuesCount = 0; rtnval[1] = 0; + int offset = 2; + final int divisor = profile[1][ignoreGapsInConsensusCalculation ? 1 : 0]; for (int c = 624; c > 0; c--) { if (vl[c] > 0) { - rtnval[rtnval[0]++] = ((int[]) ca[c])[0]; - rtnval[rtnval[0]++] = ((int[]) ca[c])[1]; - rtnval[rtnval[0]] = (int) (vl[c] * 100f / profile[1][ignoreGapsInConsensusCalculation ? 1 - : 0]); - rtnval[1]+=rtnval[rtnval[0]++]; + rtnval[offset++] = ca[c][0]; + rtnval[offset++] = ca[c][1]; + rtnval[offset] = (int) (vl[c] * 100f / divisor); + rtnval[1] += rtnval[offset++]; + valuesCount++; } } + rtnval[0] = valuesCount; - return rtnval; + // insert profile type code in position 0 + int[] result = new int[rtnval.length + 1]; + result[0] = AlignmentAnnotation.STRUCTURE_PROFILE; + System.arraycopy(rtnval, 0, result, 1, rtnval.length); + return result; } public static void main(String args[])