X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Fanalysis%2FRna.java;h=e5cda93b985f33520b6b5450ddeb624bb900bb24;hb=47fdef81050c615ce519a0deaa8a5f4c67b83f0b;hp=f497f0e4e909be7d49f6930d4d4c8dfe3bf9a532;hpb=267f5674a53a735d8e13a2a4c12a4c8d8b0c1f00;p=jalview.git diff --git a/src/jalview/analysis/Rna.java b/src/jalview/analysis/Rna.java index f497f0e..e5cda93 100644 --- a/src/jalview/analysis/Rna.java +++ b/src/jalview/analysis/Rna.java @@ -31,10 +31,11 @@ import jalview.datamodel.SequenceFeature; import jalview.util.MessageManager; import java.util.ArrayList; +import java.util.HashMap; import java.util.Hashtable; import java.util.List; +import java.util.Map; import java.util.Stack; -import java.util.Vector; public class Rna { @@ -48,7 +49,21 @@ public class Rna */ public static boolean isOpeningParenthesis(char c) { - return ('A' <= c && c <= 'Z' || c == '(' || c == '[' || c == '{' || c == '<'); + return ('A' <= c && c <= 'Z' || c == '(' || c == '[' || c == '{' + || c == '<'); + } + + /** + * Answers true if the string is a valid open pair rna secondary structure + * symbol. Currently accepts A-Z, ([{< + * + * @param s + * @return + */ + public static boolean isOpeningParenthesis(String s) + { + return s != null && s.length() == 1 + && isOpeningParenthesis(s.charAt(0)); } /** @@ -60,7 +75,21 @@ public class Rna */ public static boolean isClosingParenthesis(char c) { - return ('a' <= c && c <= 'z' || c == ')' || c == ']' || c == '}' || c == '>'); + return ('a' <= c && c <= 'z' || c == ')' || c == ']' || c == '}' + || c == '>'); + } + + /** + * Answers true if the string is a valid close pair rna secondary structure + * symbol. Currently accepts a-z, )]}> + * + * @param s + * @return + */ + public static boolean isClosingParenthesis(String s) + { + return s != null && s.length() == 1 + && isClosingParenthesis(s.charAt(0)); } /** @@ -106,11 +135,11 @@ public class Rna * @return * @throw {@link WUSSParseException} */ - public static Vector getSimpleBPs(CharSequence line) + protected static List getSimpleBPs(CharSequence line) throws WUSSParseException { Hashtable> stacks = new Hashtable>(); - Vector pairs = new Vector(); + List pairs = new ArrayList(); int i = 0; while (i < line.length()) { @@ -133,8 +162,8 @@ public class Rna if (!stacks.containsKey(opening)) { throw new WUSSParseException(MessageManager.formatMessage( - "exception.mismatched_unseen_closing_char", - new String[] { String.valueOf(base) }), i); + "exception.mismatched_unseen_closing_char", new String[] + { String.valueOf(base) }), i); } Stack stack = stacks.get(opening); @@ -142,8 +171,8 @@ public class Rna { // error whilst parsing i'th position. pass back throw new WUSSParseException(MessageManager.formatMessage( - "exception.mismatched_closing_char", - new String[] { String.valueOf(base) }), i); + "exception.mismatched_closing_char", new String[] + { String.valueOf(base) }), i); } int temp = stack.pop(); @@ -161,33 +190,17 @@ public class Rna * i (length of input string) */ throw new WUSSParseException(MessageManager.formatMessage( - "exception.mismatched_opening_char", - new String[] { String.valueOf(opening), - String.valueOf(stack.pop()) }), i); + "exception.mismatched_opening_char", new String[] + { String.valueOf(opening), String.valueOf(stack.pop()) }), + i); } } return pairs; } - public static SequenceFeature[] getBasePairs(List bps) - throws WUSSParseException - { - SequenceFeature[] outPairs = new SequenceFeature[bps.size()]; - for (int p = 0; p < bps.size(); p++) - { - SimpleBP bp = bps.get(p); - outPairs[p] = new SequenceFeature("RNA helix", "", "", bp.getBP5(), - bp.getBP3(), ""); - } - return outPairs; - } + - public static List getModeleBP(CharSequence line) - throws WUSSParseException - { - Vector bps = getSimpleBPs(line); - return new ArrayList(bps); - } + /** * Function to get the end position corresponding to a given start position @@ -204,97 +217,27 @@ public class Rna */ /** - * Figures out which helix each position belongs to and stores the helix - * number in the 'featureGroup' member of a SequenceFeature Based off of RALEE - * code ralee-helix-map. + * Answers true if the character is a recognised symbol for RNA secondary + * structure. Currently accepts a-z, A-Z, ()[]{}<>. * - * @param pairs - * Array of SequenceFeature (output from Rna.GetBasePairs) + * @param c + * @return */ - public static void HelixMap(SequenceFeature[] pairs) + public static boolean isRnaSecondaryStructureSymbol(char c) { - - int helix = 0; // Number of helices/current helix - int lastopen = 0; // Position of last open bracket reviewed - int lastclose = 9999999; // Position of last close bracket reviewed - int i = pairs.length; // Number of pairs - - int open; // Position of an open bracket under review - int close; // Position of a close bracket under review - int j; // Counter - - Hashtable helices = new Hashtable(); - // Keep track of helix number for each position - - // Go through each base pair and assign positions a helix - for (i = 0; i < pairs.length; i++) - { - - open = pairs[i].getBegin(); - close = pairs[i].getEnd(); - - // System.out.println("open " + open + " close " + close); - // System.out.println("lastclose " + lastclose + " lastopen " + lastopen); - - // we're moving from right to left based on closing pair - /* - * catch things like <<..>>..<<..>> | - */ - if (open > lastclose) - { - helix++; - } - - /* - * catch things like <<..<<..>>..<<..>>>> | - */ - j = pairs.length - 1; - while (j >= 0) - { - int popen = pairs[j].getBegin(); - - // System.out.println("j " + j + " popen " + popen + " lastopen " - // +lastopen + " open " + open); - if ((popen < lastopen) && (popen > open)) - { - if (helices.containsValue(popen) - && ((helices.get(popen)) == helix)) - { - continue; - } - else - { - helix++; - break; - } - } - - j -= 1; - } - - // Put positions and helix information into the hashtable - helices.put(open, helix); - helices.put(close, helix); - - // Record helix as featuregroup - pairs[i].setFeatureGroup(Integer.toString(helix)); - - lastopen = open; - lastclose = close; - - } + return isOpeningParenthesis(c) || isClosingParenthesis(c); } /** - * Answers true if the character is a recognised symbol for RNA secondary + * Answers true if the string is a recognised symbol for RNA secondary * structure. Currently accepts a-z, A-Z, ()[]{}<>. * - * @param c + * @param s * @return */ - public static boolean isRnaSecondaryStructureSymbol(char c) + public static boolean isRnaSecondaryStructureSymbol(String s) { - return isOpeningParenthesis(c) || isClosingParenthesis(c); + return isOpeningParenthesis(s) || isClosingParenthesis(s); } /** @@ -321,8 +264,8 @@ public class Rna } /** - * Answers true if the base-pair is either a canonical (A-T/U, C-G) or a - * wobble (G-T/U) pair (either way round), else false + * Answers true if the base-pair is either a Watson-Crick (A:T/U, C:G) or a + * wobble (G:T/U) pair (either way round), else false * * @param first * @param second @@ -338,7 +281,7 @@ public class Rna { second -= 32; } - + switch (first) { case 'A': @@ -379,6 +322,62 @@ public class Rna } /** + * Answers true if the base-pair is Watson-Crick - (A:T/U or C:G, either way + * round), else false + * + * @param first + * @param second + * @return + */ + public static boolean isCanonicalPair(char first, char second) + { + + if (first > 'Z') + { + first -= 32; + } + if (second > 'Z') + { + second -= 32; + } + + switch (first) + { + case 'A': + switch (second) + { + case 'T': + case 'U': + return true; + } + break; + case 'G': + switch (second) + { + case 'C': + return true; + } + break; + case 'C': + switch (second) + { + case 'G': + return true; + } + break; + case 'T': + case 'U': + switch (second) + { + case 'A': + return true; + } + break; + } + return false; + } + + /** * Returns the matching close pair symbol for the given opening symbol. * Currently returns a-z for A-Z, or )]}> for ([{<, or the input symbol if it * is not a valid opening symbol. @@ -406,4 +405,75 @@ public class Rna return c; } } + + public static SequenceFeature[] getHelixMap(CharSequence rnaAnnotation) + throws WUSSParseException + { + List result = new ArrayList(); + + int helix = 0; // Number of helices/current helix + int lastopen = 0; // Position of last open bracket reviewed + int lastclose = 9999999; // Position of last close bracket reviewed + + Map helices = new HashMap(); + // Keep track of helix number for each position + + // Go through each base pair and assign positions a helix + List bps = getSimpleBPs(rnaAnnotation); + for (SimpleBP basePair : bps) + { + final int open = basePair.getBP5(); + final int close = basePair.getBP3(); + + // System.out.println("open " + open + " close " + close); + // System.out.println("lastclose " + lastclose + " lastopen " + lastopen); + + // we're moving from right to left based on closing pair + /* + * catch things like <<..>>..<<..>> | + */ + if (open > lastclose) + { + helix++; + } + + /* + * catch things like <<..<<..>>..<<..>>>> | + */ + int j = bps.size(); + while (--j >= 0) + { + int popen = bps.get(j).getBP5(); + + // System.out.println("j " + j + " popen " + popen + " lastopen " + // +lastopen + " open " + open); + if ((popen < lastopen) && (popen > open)) + { + if (helices.containsValue(popen) + && ((helices.get(popen)) == helix)) + { + continue; + } + else + { + helix++; + break; + } + } + } + + // Put positions and helix information into the hashtable + helices.put(open, helix); + helices.put(close, helix); + + // Record helix as featuregroup + result.add(new SequenceFeature("RNA helix", "", open, close, + String.valueOf(helix))); + + lastopen = open; + lastclose = close; + } + + return result.toArray(new SequenceFeature[result.size()]); + } }