X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Fanalysis%2FStructureFrequency.java;h=fe794be565e0cc27e5b0ada15844cb18bd7f9959;hb=865a855a4ca87eadb3e5ff284ed32ed307d9c34b;hp=a5dd030d48c78f6003be5ddc6a676a8ce51c588f;hpb=0f3f69c44e4a266029ebc16171bebfe34bd5ac96;p=jalview.git diff --git a/src/jalview/analysis/StructureFrequency.java b/src/jalview/analysis/StructureFrequency.java index a5dd030..fe794be 100644 --- a/src/jalview/analysis/StructureFrequency.java +++ b/src/jalview/analysis/StructureFrequency.java @@ -1,19 +1,20 @@ /* - * Jalview - A Sequence Alignment Editor and Viewer (Version 2.6) - * Copyright (C) 2010 J Procter, AM Waterhouse, G Barton, M Clamp, S Searle + * Jalview - A Sequence Alignment Editor and Viewer (Version 2.8.0b1) + * Copyright (C) 2014 The Jalview Authors * * This file is part of Jalview. * * Jalview is free software: you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. - * + * * Jalview is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty * of MERCHANTABILITY or FITNESS FOR A PARTICULAR * PURPOSE. See the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License along with Jalview. If not, see . + * The Jalview Authors are detailed in the 'AUTHORS' file. */ package jalview.analysis; @@ -44,309 +45,244 @@ public class StructureFrequency public static final String PROFILE = "P"; - public static final Hashtable[] calculate(Vector sequences, int start, - int end) - { - return calculate(sequences, start, end, false); - } + public static final String PAIRPROFILE = "B"; - public static final Hashtable[] calculate(Vector sequences, int start, - int end, boolean profile) + /** + * Returns the 3' position of a base pair + * + * @param pairs + * Secondary structure annotation + * @param indice + * 5' position of a base pair + * @return 3' position of a base pair + */ + public static int findPair(SequenceFeature[] pairs, int indice) { - SequenceI[] seqs = new SequenceI[sequences.size()]; - int width = 0; - for (int i = 0; i < sequences.size(); i++) + for (int i = 0; i < pairs.length; i++) { - seqs[i] = (SequenceI) sequences.elementAt(i); - if (seqs[i].getLength() > width) + if (pairs[i].getBegin() == indice) { - width = seqs[i].getLength(); + return pairs[i].getEnd(); } } - - Hashtable[] reply = new Hashtable[width]; - - if (end >= width) - { - end = width; - } - - calculate(seqs, start, end, reply, profile); - - return reply; - } - - public static final void calculate(SequenceI[] sequences, int start, - int end, Hashtable[] result) - { - calculate(sequences, start, end, result, false); + return -1; } + /** + * Method to calculate a 'base pair consensus row', very similar to nucleotide + * consensus but takes into account a given structure + * + * @param sequences + * @param start + * @param end + * @param result + * @param profile + * @param rnaStruc + */ public static final void calculate(SequenceI[] sequences, int start, - int end, Hashtable[] result, boolean profile) + int end, Hashtable[] result, boolean profile, + AlignmentAnnotation rnaStruc) { Hashtable residueHash; - int maxCount, nongap, i, j, v, jSize = sequences.length; String maxResidue; - char c; + char[] seq, struc = rnaStruc.getRNAStruc().toCharArray(); + SequenceFeature[] rna = rnaStruc._rnasecstr; + char c, s, cEnd; + int count, nonGap = 0, i, bpEnd = -1, j, jSize = sequences.length; + int[] values; + int[][] pairs; float percentage; - int[] values = new int[255]; - - char[] seq; - - for (i = start; i < end; i++) + for (i = start; i < end; i++) // foreach column { residueHash = new Hashtable(); - maxCount = 0; - maxResidue = ""; - nongap = 0; + maxResidue = "-"; values = new int[255]; - - - for (j = 0; j < jSize; j++) + pairs = new int[255][255]; + bpEnd = -1; + if (i < struc.length) { - if (sequences[j]==null) - { - System.err.println("WARNING: Consensus skipping null sequence - possible race condition."); - continue; - } - seq = sequences[j].getSequence(); - if (seq.length > i) - { - c = seq[i]; - - if (c == '.' || c == ' ') - { - c = '-'; - } - - if (c == '-') - { - values['-']++; - continue; - } - else if ('a' <= c && c <= 'z') - { - c -= 32; // ('a' - 'A'); - } - - nongap++; - values[c]++; + s = struc[i]; + } + else + { + s = '-'; + } + if (s == '.' || s == ' ') + { + s = '-'; + } - } - else + if (s != '(') + { + if (s == '-') { values['-']++; } } - - for (v = 'A'; v < 'Z'; v++) + else { - if (values[v] < 2 || values[v] < maxCount) + bpEnd = findPair(rna, i); + if (bpEnd > -1) { - continue; - } + for (j = 0; j < jSize; j++) // foreach row + { + if (sequences[j] == null) + { + System.err + .println("WARNING: Consensus skipping null sequence - possible race condition."); + continue; + } + c = sequences[j].getCharAt(i); + { - if (values[v] > maxCount) - { - maxResidue = String.valueOf((char) v); - } - else if (values[v] == maxCount) - { - maxResidue += String.valueOf((char) v); + // standard representation for gaps in sequence and structure + if (c == '.' || c == ' ') + { + c = '-'; + } + + if (c == '-') + { + values['-']++; + continue; + } + cEnd = sequences[j].getCharAt(bpEnd); + if (checkBpType(c, cEnd)) + { + values['(']++; // H means it's a helix (structured) + } + pairs[c][cEnd]++; + + maxResidue = "("; + } + } } - maxCount = values[v]; - } - - if (maxResidue.length() == 0) - { - maxResidue = "-"; + // nonGap++; } + // UPDATE this for new values if (profile) { residueHash.put(PROFILE, new int[][] { values, new int[] - { jSize, nongap } }); + { jSize, (jSize - values['-']) } }); + + residueHash.put(PAIRPROFILE, pairs); } - residueHash.put(MAXCOUNT, new Integer(maxCount)); + + count = values['(']; + + residueHash.put(MAXCOUNT, new Integer(count)); residueHash.put(MAXRESIDUE, maxResidue); - percentage = ((float) maxCount * 100) / (float) jSize; + percentage = ((float) count * 100) / jSize; residueHash.put(PID_GAPS, new Float(percentage)); - percentage = ((float) maxCount * 100) / (float) nongap; - residueHash.put(PID_NOGAPS, new Float(percentage)); - result[i] = residueHash; + // percentage = ((float) count * 100) / (float) nongap; + // residueHash.put(PID_NOGAPS, new Float(percentage)); + if (result[i] == null) + { + result[i] = residueHash; + } + if (bpEnd > 0) + { + values[')'] = values['(']; + values['('] = 0; + + residueHash = new Hashtable(); + maxResidue = ")"; + + if (profile) + { + residueHash.put(PROFILE, new int[][] + { values, new int[] + { jSize, (jSize - values['-']) } }); + + residueHash.put(PAIRPROFILE, pairs); + } + + residueHash.put(MAXCOUNT, new Integer(count)); + residueHash.put(MAXRESIDUE, maxResidue); + + percentage = ((float) count * 100) / jSize; + residueHash.put(PID_GAPS, new Float(percentage)); + + result[bpEnd] = residueHash; + } } } - - public static int findPair(SequenceFeature[] pairs,int indice){ - for(int i=0; i=0; x--){ - System.out.println("BP-"+((rna.length-1)-x)+" "+rna[x].getBegin()+","+rna[x].getEnd()+" "+rna[x].getFeatureGroup()); - }*/ - - - for (i = start; i < end; i++) //foreach column - { - residueHash = new Hashtable(); - maxResidue="-"; - values = new int[255]; - bpEnd=-1; - - s = struc[i]; - if (s == '.' || s == ' ') - { - s = '-'; - } - - if(s != '('){ - values['-']++; - } - else - { - for (j = 0; j < jSize; j++) //foreach row - { - if (sequences[j]==null) - { - System.err.println("WARNING: Consensus skipping null sequence - possible race condition."); - continue; - } - seq = sequences[j].getSequence(); - - if (seq.length > i) - { - c = seq[i]; - - //standard representation for gaps in sequence and structure - if (c == '.' || c == ' ') - { - c = '-'; - } - - if (c == '-') - { - values['-']++; - continue; - } - //if(s == '('){ - //bpEnd=rna[(rna.length-1-nonGap)].getEnd(); - bpEnd=findPair(rna,i); - cEnd=seq[bpEnd]; - //System.out.println(i+","+bpEnd); - if(checkBpType(c,cEnd)){ - values['H']++; //H means it's a helix (structured) - } - maxResidue="H"; - //} - } - } - nonGap++; - } - //UPDATE this for new values - //if (profile) - //{ - // residueHash.put(PROFILE, new int[][] - // { values, new int[] - // { jSize, nongap } }); - // } - - - count=values['H']; - - residueHash.put(MAXCOUNT, new Integer(count)); - residueHash.put(MAXRESIDUE, maxResidue); - - percentage = ((float) count * 100) / (float) jSize; - residueHash.put(PID_GAPS, new Float(percentage)); - - //percentage = ((float) count * 100) / (float) nongap; - //residueHash.put(PID_NOGAPS, new Float(percentage)); - if(result[i]==null){ - result[i] = residueHash; - } - if(bpEnd>0){ - result[bpEnd]=residueHash; - } - } - } - /** - * Method to check if a base-pair is a canonical or a wobble bp - * @param up 5' base - * @param down 3' base + * Method to check if a base-pair is a canonical or a wobble bp + * + * @param up + * 5' base + * @param down + * 3' base * @return True if it is a canonical/wobble bp */ - public static boolean checkBpType(char up, char down){ - if(up>'Z'){up-=32;} - if(down>'Z'){down-=32;} - - switch (up){ - case 'A': - switch (down){ - case 'T': return true; - case 'U': return true; - } - break; - case 'C': - switch (down){ - case 'G': return true; - } - break; - case 'T': - switch (down){ - case 'A': return true; - case 'G': return true; - } - break; - case 'G': - switch (down){ - case 'C': return true; - case 'T': return true; - case 'U': return true; - } - break; - case 'U': - switch (down){ - case 'A': return true; - case 'G': return true; - } - break; - } - return false; + public static boolean checkBpType(char up, char down) + { + if (up > 'Z') + { + up -= 32; + } + if (down > 'Z') + { + down -= 32; + } + + switch (up) + { + case 'A': + switch (down) + { + case 'T': + return true; + case 'U': + return true; + } + break; + case 'C': + switch (down) + { + case 'G': + return true; + } + break; + case 'T': + switch (down) + { + case 'A': + return true; + case 'G': + return true; + } + break; + case 'G': + switch (down) + { + case 'C': + return true; + case 'T': + return true; + case 'U': + return true; + } + break; + case 'U': + switch (down) + { + case 'A': + return true; + case 'G': + return true; + } + break; + } + return false; } - + /** * Compute all or part of the annotation row from the given consensus * hashtable @@ -364,17 +300,6 @@ public class StructureFrequency boolean ignoreGapsInConsensusCalculation, boolean includeAllConsSymbols) { - completeConsensus(consensus, hconsensus, iStart, width, - ignoreGapsInConsensusCalculation, includeAllConsSymbols, null); // new - // char[] - // { 'A', 'C', 'G', 'T', 'U' }); - } - - public static void completeConsensus(AlignmentAnnotation consensus, - Hashtable[] hconsensus, int iStart, int width, - boolean ignoreGapsInConsensusCalculation, - boolean includeAllConsSymbols, char[] alphabet) - { float tval, value; if (consensus == null || consensus.annotations == null || consensus.annotations.length < width) @@ -385,7 +310,8 @@ public class StructureFrequency } for (int i = iStart; i < width; i++) { - if (i >= hconsensus.length) + Hashtable hci; + if (i >= hconsensus.length || ((hci = hconsensus[i]) == null)) { // happens if sequences calculated over were shorter than alignment // width @@ -393,68 +319,78 @@ public class StructureFrequency continue; } value = 0; + Float fv; if (ignoreGapsInConsensusCalculation) { - value = ((Float) hconsensus[i].get(StructureFrequency.PID_NOGAPS)) - .floatValue(); + fv = (Float) hci.get(StructureFrequency.PID_NOGAPS); } else { - value = ((Float) hconsensus[i].get(StructureFrequency.PID_GAPS)) - .floatValue(); + fv = (Float) hci.get(StructureFrequency.PID_GAPS); + } + if (fv == null) + { + consensus.annotations[i] = null; + // data has changed below us .. give up and + continue; } - - String maxRes = hconsensus[i].get(StructureFrequency.MAXRESIDUE).toString(); - String mouseOver = hconsensus[i].get(StructureFrequency.MAXRESIDUE) + " "; + value = fv.floatValue(); + String maxRes = hci.get(StructureFrequency.MAXRESIDUE).toString(); + String mouseOver = hci.get(StructureFrequency.MAXRESIDUE) + " "; if (maxRes.length() > 1) { mouseOver = "[" + maxRes + "] "; maxRes = "+"; } - int[][] profile = (int[][]) hconsensus[i].get(StructureFrequency.PROFILE); - if (profile != null && includeAllConsSymbols) + int[][] profile = (int[][]) hci.get(StructureFrequency.PROFILE); + int[][] pairs = (int[][]) hci.get(StructureFrequency.PAIRPROFILE); + + if (pairs != null && includeAllConsSymbols) // Just responsible for the + // tooltip + // TODO Update tooltips for Structure row { mouseOver = ""; - if (alphabet != null) + + /* + * TODO It's not sure what is the purpose of the alphabet and wheter it + * is useful for structure? + * + * if (alphabet != null) { for (int c = 0; c < alphabet.length; c++) { + * tval = ((float) profile[0][alphabet[c]]) 100f / (float) + * profile[1][ignoreGapsInConsensusCalculation ? 1 : 0]; mouseOver += + * ((c == 0) ? "" : "; ") + alphabet[c] + " " + ((int) tval) + "%"; } } + * else { + */ + Object[] ca = new Object[625]; + float[] vl = new float[625]; + int x = 0; + for (int c = 65; c < 90; c++) { - for (int c = 0; c < alphabet.length; c++) + for (int d = 65; d < 90; d++) { - tval = ((float) profile[0][alphabet[c]]) - * 100f - / (float) profile[1][ignoreGapsInConsensusCalculation ? 1 - : 0]; - mouseOver += ((c == 0) ? "" : "; ") + alphabet[c] + " " - + ((int) tval) + "%"; + ca[x] = new int[] + { c, d }; + vl[x] = pairs[c][d]; + x++; } } - else + jalview.util.QuickSort.sort(vl, ca); + int p = 0; + + for (int c = 624; c > 0; c--) { - Object[] ca = new Object[profile[0].length]; - float[] vl = new float[profile[0].length]; - for (int c = 0; c < ca.length; c++) + if (vl[c] > 0) { - ca[c] = new char[] - { (char) c }; - vl[c] = (float) profile[0][c]; - } - ; - jalview.util.QuickSort.sort(vl, ca); - for (int p = 0, c = ca.length - 1; profile[0][((char[]) ca[c])[0]] > 0; c--) - { - if (((char[]) ca[c])[0] != '-') - { - tval = ((float) profile[0][((char[]) ca[c])[0]]) - * 100f - / (float) profile[1][ignoreGapsInConsensusCalculation ? 1 - : 0]; - mouseOver += ((p == 0) ? "" : "; ") + ((char[]) ca[c])[0] - + " " + ((int) tval) + "%"; - p++; + tval = (vl[c] * 100f / profile[1][ignoreGapsInConsensusCalculation ? 1 + : 0]); + mouseOver += ((p == 0) ? "" : "; ") + (char) ((int[]) ca[c])[0] + + (char) ((int[]) ca[c])[1] + " " + ((int) tval) + "%"; + p++; - } } - } + + // } } else { @@ -466,56 +402,71 @@ public class StructureFrequency } /** - * get the sorted profile for the given position of the consensus + * get the sorted base-pair profile for the given position of the consensus * * @param hconsensus - * @return + * @return profile of the given column */ public static int[] extractProfile(Hashtable hconsensus, boolean ignoreGapsInConsensusCalculation) { - int[] rtnval = new int[64]; + int[] rtnval = new int[74]; // 2*(5*5)+2 int[][] profile = (int[][]) hconsensus.get(StructureFrequency.PROFILE); + int[][] pairs = (int[][]) hconsensus + .get(StructureFrequency.PAIRPROFILE); + if (profile == null) return null; - Object[] ca = new Object[profile[0].length]; - float[] vl = new float[profile[0].length]; - for (int c = 0; c < ca.length; c++) + + // TODO fix the object length, also do it in completeConsensus + Object[] ca = new Object[625]; + float[] vl = new float[625]; + int x = 0; + for (int c = 65; c < 90; c++) { - ca[c] = new char[] - { (char) c }; - vl[c] = (float) profile[0][c]; + for (int d = 65; d < 90; d++) + { + ca[x] = new int[] + { c, d }; + vl[x] = pairs[c][d]; + x++; + } } - ; jalview.util.QuickSort.sort(vl, ca); - rtnval[0] = 1; - for (int c = ca.length - 1; profile[0][((char[]) ca[c])[0]] > 0; c--) + + rtnval[0] = 2; + rtnval[1] = 0; + for (int c = 624; c > 0; c--) { - if (((char[]) ca[c])[0] != '-') + if (vl[c] > 0) { - rtnval[rtnval[0]++] = ((char[]) ca[c])[0]; - rtnval[rtnval[0]++] = (int) (((float) profile[0][((char[]) ca[c])[0]]) * 100f / (float) profile[1][ignoreGapsInConsensusCalculation ? 1 + rtnval[rtnval[0]++] = ((int[]) ca[c])[0]; + rtnval[rtnval[0]++] = ((int[]) ca[c])[1]; + rtnval[rtnval[0]] = (int) (vl[c] * 100f / profile[1][ignoreGapsInConsensusCalculation ? 1 : 0]); + rtnval[1] += rtnval[rtnval[0]++]; } } + return rtnval; } - enum base {A,T,g,C}; - - - public static void main(String args[]){ - //Short test to see if checkBpType works - ArrayList test = new ArrayList(); - test.add("A"); - test.add("c"); - test.add("g"); - test.add("T"); - test.add("U"); - for (String i : test) { - for (String j : test) { - System.out.println(i+"-"+j+": "+StructureFrequency.checkBpType(i.charAt(0),j.charAt(0))); - } - } + public static void main(String args[]) + { + // Short test to see if checkBpType works + ArrayList test = new ArrayList(); + test.add("A"); + test.add("c"); + test.add("g"); + test.add("T"); + test.add("U"); + for (String i : test) + { + for (String j : test) + { + System.out.println(i + "-" + j + ": " + + StructureFrequency.checkBpType(i.charAt(0), j.charAt(0))); + } + } } }