X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Fanalysis%2FStructureFrequency.java;h=1342bb273760f0efc2524e1d3e2840ddf30b2f9c;hb=bde972cad13060894eadcb189afc9fa3d8ec265a;hp=cae5da29d277c6be93ea1c3cc806ed3baaed32a8;hpb=60f7107d9fc77f34e96ae4b9780e2d11195aed3b;p=jalview.git diff --git a/src/jalview/analysis/StructureFrequency.java b/src/jalview/analysis/StructureFrequency.java index cae5da2..1342bb2 100644 --- a/src/jalview/analysis/StructureFrequency.java +++ b/src/jalview/analysis/StructureFrequency.java @@ -1,28 +1,34 @@ /* - * Jalview - A Sequence Alignment Editor and Viewer (Version 2.8) - * Copyright (C) 2012 J Procter, AM Waterhouse, LM Lui, J Engelhardt, G Barton, M Clamp, S Searle + * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$) + * Copyright (C) $$Year-Rel$$ The Jalview Authors * * This file is part of Jalview. * * Jalview is free software: you can redistribute it and/or * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. + * as published by the Free Software Foundation, either version 3 + * of the License, or (at your option) any later version. * * Jalview is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty * of MERCHANTABILITY or FITNESS FOR A PARTICULAR * PURPOSE. See the GNU General Public License for more details. * - * You should have received a copy of the GNU General Public License along with Jalview. If not, see . + * You should have received a copy of the GNU General Public License + * along with Jalview. If not, see . + * The Jalview Authors are detailed in the 'AUTHORS' file. */ - - package jalview.analysis; -import java.util.*; - +import jalview.datamodel.AlignmentAnnotation; +import jalview.datamodel.Annotation; +import jalview.datamodel.SequenceFeature; +import jalview.datamodel.SequenceI; +import jalview.util.Comparison; import jalview.util.Format; -import jalview.datamodel.*; + +import java.util.ArrayList; +import java.util.Hashtable; /** * Takes in a vector or array of sequences and column start and column end and @@ -35,6 +41,8 @@ import jalview.datamodel.*; */ public class StructureFrequency { + public static final int STRUCTURE_PROFILE_LENGTH = 74; + // No need to store 1000s of strings which are not // visible to the user. public static final String MAXCOUNT = "C"; @@ -60,15 +68,15 @@ public class StructureFrequency */ public static int findPair(SequenceFeature[] pairs, int indice) { - System.out.print("indice"+indice+" "); + for (int i = 0; i < pairs.length; i++) { if (pairs[i].getBegin() == indice) - + { - System.out.println(pairs[i].getEnd()); + return pairs[i].getEnd(); - + } } return -1; @@ -89,39 +97,31 @@ public class StructureFrequency int end, Hashtable[] result, boolean profile, AlignmentAnnotation rnaStruc) { -// System.out.println("longueur="+sequences.length); -// for(int l=0;l<=(sequences.length-1);l++){ -// System.out.println("sequences "+l+":"+sequences[l].getSequenceAsString()); -// } -// System.out.println("start="+start); - System.out.println("end="+end); -// System.out.println("result="+result.length); -// -// System.out.println("profile="+profile); -// System.out.println("rnaStruc="+rnaStruc); + Hashtable residueHash; String maxResidue; char[] struc = rnaStruc.getRNAStruc().toCharArray(); + SequenceFeature[] rna = rnaStruc._rnasecstr; char c, s, cEnd; - int count = 0, nonGap = 0, i, bpEnd = -1, j, jSize = sequences.length; + int bpEnd = -1; + int jSize = sequences.length; int[] values; int[][] pairs; float percentage; - boolean wooble = true; - for (i = start; i < end; i++) // foreach column + for (int i = start; i < end; i++) // foreach column { - residueHash = new Hashtable(); + int canonicalOrWobblePairCount = 0, canonical = 0; + int otherPairCount = 0; + int nongap = 0; maxResidue = "-"; values = new int[255]; pairs = new int[255][255]; bpEnd = -1; - //System.out.println("s="+struc[i]); if (i < struc.length) { s = struc[i]; - } else { @@ -132,7 +132,7 @@ public class StructureFrequency s = '-'; } - if (s != '(' && s != '[') + if (!Rna.isOpeningParenthesis(s)) { if (s == '-') { @@ -141,193 +141,138 @@ public class StructureFrequency } else { - - bpEnd = findPair(rna, i); - - if (bpEnd>-1) - { - for (j = 0; j < jSize; j++) // foreach row + + if (bpEnd > -1) { - if (sequences[j] == null) + for (int j = 0; j < jSize; j++) // foreach row { - System.err - .println("WARNING: Consensus skipping null sequence - possible race condition."); - continue; - } - c = sequences[j].getCharAt(i); - //System.out.println("c="+c); - - - // standard representation for gaps in sequence and structure - if (c == '.' || c == ' ') + if (sequences[j] == null) { - System.err - .println("WARNING: Consensus skipping null sequence - possible race condition."); + System.err.println( + "WARNING: Consensus skipping null sequence - possible race condition."); continue; } + + c = sequences[j].getCharAt(i); cEnd = sequences[j].getCharAt(bpEnd); - - - System.out.println("pairs ="+c+","+cEnd); - if (checkBpType(c, cEnd)==true) + + if (Comparison.isGap(c) || Comparison.isGap(cEnd)) { - values['(']++; // H means it's a helix (structured) - maxResidue = "("; - wooble=true; - System.out.println("It's a pair wc"); - + values['-']++; + continue; } - if (checkBpType(c, cEnd)==false) + nongap++; + /* + * ensure upper-case for counting purposes + */ + if ('a' <= c && 'z' >= c) { - wooble =false; - values['[']++; // H means it's a helix (structured) - maxResidue = "["; - System.out.println("It's an pair non canonic"); - System.out.println(sequences[j].getRNA()); - System.out.println(rnaStruc.getRNAStruc().charAt(i)); + c += 'A' - 'a'; } - pairs[c][cEnd]++; - - - } - + if ('a' <= cEnd && 'z' >= cEnd) + { + cEnd += 'A' - 'a'; + } + if (Rna.isCanonicalOrWobblePair(c, cEnd)) + { + canonicalOrWobblePairCount++; + if (Rna.isCanonicalPair(c, cEnd)) + { + canonical++; + } + } + else + { + otherPairCount++; + } + pairs[c][cEnd]++; + } } - // nonGap++; } - // UPDATE this for new values + + residueHash = new Hashtable(); if (profile) { - residueHash.put(PROFILE, new int[][] - { values, new int[] - { jSize, (jSize - values['-']) } }); + // TODO 1-dim array with jsize in [0], nongapped in [1]; or Pojo + residueHash.put(PROFILE, + new int[][] + { values, new int[] { jSize, (jSize - values['-']) } }); residueHash.put(PAIRPROFILE, pairs); } - if (wooble==true) - { - count = values['(']; - } - if (wooble==false) + values['('] = canonicalOrWobblePairCount; + values['['] = canonical; + values['{'] = otherPairCount; + /* + * the count is the number of valid pairs (as a percentage, determines + * the relative size of the profile logo) + */ + int count = canonicalOrWobblePairCount; + + /* + * display '(' if most pairs are canonical, or as + * '[' if there are more wobble pairs. + */ + if (canonicalOrWobblePairCount > 0 || otherPairCount > 0) { - count = values['[']; + if (canonicalOrWobblePairCount >= otherPairCount) + { + maxResidue = (canonicalOrWobblePairCount - canonical) < canonical + ? "(" + : "["; + } + else + { + maxResidue = "{"; + } } - residueHash.put(MAXCOUNT, new Integer(count)); + residueHash.put(MAXCOUNT, Integer.valueOf(count)); residueHash.put(MAXRESIDUE, maxResidue); percentage = ((float) count * 100) / jSize; - residueHash.put(PID_GAPS, new Float(percentage)); + residueHash.put(PID_GAPS, Float.valueOf(percentage)); + + percentage = ((float) count * 100) / nongap; + residueHash.put(PID_NOGAPS, Float.valueOf(percentage)); - // percentage = ((float) count * 100) / (float) nongap; - // residueHash.put(PID_NOGAPS, new Float(percentage)); if (result[i] == null) { result[i] = residueHash; } if (bpEnd > 0) { - values[')'] = values['(']; + values[')'] = values['(']; values[']'] = values['[']; + values['}'] = values['{']; values['('] = 0; values['['] = 0; + values['{'] = 0; + maxResidue = maxResidue.equals("(") ? ")" + : maxResidue.equals("[") ? "]" : "}"; + residueHash = new Hashtable(); - if (wooble==true){ - System.out.println(maxResidue+","+wooble); - maxResidue = ")"; - } - if(wooble==false){ - System.out.println(maxResidue+","+wooble); - maxResidue = "]"; - } if (profile) { - residueHash.put(PROFILE, new int[][] - { values, new int[] - { jSize, (jSize - values['-']) } }); + residueHash.put(PROFILE, + new int[][] + { values, new int[] { jSize, (jSize - values['-']) } }); residueHash.put(PAIRPROFILE, pairs); } - residueHash.put(MAXCOUNT, new Integer(count)); + residueHash.put(MAXCOUNT, Integer.valueOf(count)); residueHash.put(MAXRESIDUE, maxResidue); percentage = ((float) count * 100) / jSize; - residueHash.put(PID_GAPS, new Float(percentage)); - - result[bpEnd] = residueHash; - } - } - } + residueHash.put(PID_GAPS, Float.valueOf(percentage)); - /** - * Method to check if a base-pair is a canonical or a wobble bp - * - * @param up - * 5' base - * @param down - * 3' base - * @return True if it is a canonical/wobble bp - */ - public static boolean checkBpType(char up, char down) - { - if (up > 'Z') - { - up -= 32; - } - if (down > 'Z') - { - down -= 32; - } + percentage = ((float) count * 100) / nongap; + residueHash.put(PID_NOGAPS, Float.valueOf(percentage)); - switch (up) - { - case 'A': - switch (down) - { - case 'T': - return true; - case 'U': - return true; - } - break; - case 'C': - switch (down) - { - case 'G': - return true; - } - break; - case 'T': - switch (down) - { - case 'A': - return true; - case 'G': - return true; - } - break; - case 'G': - switch (down) - { - case 'C': - return true; - case 'T': - return true; - case 'U': - return true; - } - break; - case 'U': - switch (down) - { - case 'A': - return true; - case 'G': - return true; + result[bpEnd] = residueHash; } - break; } - return false; } /** @@ -355,18 +300,19 @@ public class StructureFrequency // initialised properly return; } - String fmtstr="%3.1f"; - int precision=2; - while (nseq>100) { + String fmtstr = "%3.1f"; + int precision = 2; + while (nseq > 100) + { precision++; - nseq/=10; + nseq /= 10; } - if (precision>2) + if (precision > 2) { - fmtstr = "%"+(2+precision)+"."+precision+"f"; + fmtstr = "%" + (2 + precision) + "." + precision + "f"; } Format fmt = new Format(fmtstr); - + for (int i = iStart; i < width; i++) { Hashtable hci; @@ -420,15 +366,14 @@ public class StructureFrequency * ((c == 0) ? "" : "; ") + alphabet[c] + " " + ((int) tval) + "%"; } } * else { */ - Object[] ca = new Object[625]; + int[][] ca = new int[625][]; float[] vl = new float[625]; int x = 0; for (int c = 65; c < 90; c++) { for (int d = 65; d < 90; d++) { - ca[x] = new int[] - { c, d }; + ca[x] = new int[] { c, d }; vl[x] = pairs[c][d]; x++; } @@ -436,14 +381,18 @@ public class StructureFrequency jalview.util.QuickSort.sort(vl, ca); int p = 0; + /* + * profile[1] is {total, ungappedTotal} + */ + final int divisor = profile[1][ignoreGapsInConsensusCalculation ? 1 + : 0]; for (int c = 624; c > 0; c--) { if (vl[c] > 0) { - tval = (vl[c] * 100f / profile[1][ignoreGapsInConsensusCalculation ? 1 - : 0]); - mouseOver += ((p == 0) ? "" : "; ") + (char) ((int[]) ca[c])[0] - + (char) ((int[]) ca[c])[1] + " " + fmt.form(tval) + "%"; + tval = (vl[c] * 100f / divisor); + mouseOver += ((p == 0) ? "" : "; ") + (char) ca[c][0] + + (char) ca[c][1] + " " + fmt.form(tval) + "%"; p++; } @@ -469,45 +418,55 @@ public class StructureFrequency public static int[] extractProfile(Hashtable hconsensus, boolean ignoreGapsInConsensusCalculation) { - int[] rtnval = new int[74]; // 2*(5*5)+2 + int[] rtnval = new int[STRUCTURE_PROFILE_LENGTH]; // 2*(5*5)+2 int[][] profile = (int[][]) hconsensus.get(StructureFrequency.PROFILE); int[][] pairs = (int[][]) hconsensus .get(StructureFrequency.PAIRPROFILE); if (profile == null) + { return null; + } // TODO fix the object length, also do it in completeConsensus - Object[] ca = new Object[625]; + // Object[] ca = new Object[625]; + int[][] ca = new int[625][]; float[] vl = new float[625]; int x = 0; for (int c = 65; c < 90; c++) { for (int d = 65; d < 90; d++) { - ca[x] = new int[] - { c, d }; + ca[x] = new int[] { c, d }; vl[x] = pairs[c][d]; x++; } } jalview.util.QuickSort.sort(vl, ca); - rtnval[0] = 2; + int valuesCount = 0; rtnval[1] = 0; + int offset = 2; + final int divisor = profile[1][ignoreGapsInConsensusCalculation ? 1 + : 0]; for (int c = 624; c > 0; c--) { if (vl[c] > 0) { - rtnval[rtnval[0]++] = ((int[]) ca[c])[0]; - rtnval[rtnval[0]++] = ((int[]) ca[c])[1]; - rtnval[rtnval[0]] = (int) (vl[c] * 100f / profile[1][ignoreGapsInConsensusCalculation ? 1 - : 0]); - rtnval[1] += rtnval[rtnval[0]++]; + rtnval[offset++] = ca[c][0]; + rtnval[offset++] = ca[c][1]; + rtnval[offset] = (int) (vl[c] * 100f / divisor); + rtnval[1] += rtnval[offset++]; + valuesCount++; } } + rtnval[0] = valuesCount; - return rtnval; + // insert profile type code in position 0 + int[] result = new int[rtnval.length + 1]; + result[0] = AlignmentAnnotation.STRUCTURE_PROFILE; + System.arraycopy(rtnval, 0, result, 1, rtnval.length); + return result; } public static void main(String args[]) @@ -524,7 +483,7 @@ public class StructureFrequency for (String j : test) { System.out.println(i + "-" + j + ": " - + StructureFrequency.checkBpType(i.charAt(0), j.charAt(0))); + + Rna.isCanonicalOrWobblePair(i.charAt(0), j.charAt(0))); } } }