X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Fanalysis%2FRna.java;h=e5cda93b985f33520b6b5450ddeb624bb900bb24;hb=47fdef81050c615ce519a0deaa8a5f4c67b83f0b;hp=ca8578286e594a717c73c30c619930399173ed6c;hpb=838e4f91d4a53dd315640dbc9ff6ef7a815ee576;p=jalview.git diff --git a/src/jalview/analysis/Rna.java b/src/jalview/analysis/Rna.java index ca85782..e5cda93 100644 --- a/src/jalview/analysis/Rna.java +++ b/src/jalview/analysis/Rna.java @@ -1,6 +1,6 @@ /* - * Jalview - A Sequence Alignment Editor and Viewer (Version 2.9.0b1) - * Copyright (C) 2015 The Jalview Authors + * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$) + * Copyright (C) $$Year-Rel$$ The Jalview Authors * * This file is part of Jalview. * @@ -31,86 +31,115 @@ import jalview.datamodel.SequenceFeature; import jalview.util.MessageManager; import java.util.ArrayList; -import java.util.Arrays; -import java.util.HashSet; +import java.util.HashMap; import java.util.Hashtable; +import java.util.List; +import java.util.Map; import java.util.Stack; -import java.util.Vector; public class Rna { - static Hashtable pairHash = new Hashtable(); - - private static final Character[] openingPars = { '(', '[', '{', '<', 'A', - 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', - 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z' }; - - private static final Character[] closingPars = { ')', ']', '}', '>', 'a', - 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', - 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z' }; - - private static HashSet openingParsSet = new HashSet( - Arrays.asList(openingPars)); - - private static HashSet closingParsSet = new HashSet( - Arrays.asList(closingPars)); + /** + * Answers true if the character is a valid open pair rna secondary structure + * symbol. Currently accepts A-Z, ([{< + * + * @param c + * @return + */ + public static boolean isOpeningParenthesis(char c) + { + return ('A' <= c && c <= 'Z' || c == '(' || c == '[' || c == '{' + || c == '<'); + } - private static Hashtable closingToOpening = new Hashtable() - // Initializing final data structure + /** + * Answers true if the string is a valid open pair rna secondary structure + * symbol. Currently accepts A-Z, ([{< + * + * @param s + * @return + */ + public static boolean isOpeningParenthesis(String s) { - private static final long serialVersionUID = 1L; - { - for (int i = 0; i < openingPars.length; i++) - { - // System.out.println(closingPars[i] + "->" + openingPars[i]); - put(closingPars[i], openingPars[i]); - } - } - }; + return s != null && s.length() == 1 + && isOpeningParenthesis(s.charAt(0)); + } - private static boolean isOpeningParenthesis(char c) + /** + * Answers true if the character is a valid close pair rna secondary structure + * symbol. Currently accepts a-z, )]}> + * + * @param c + * @return + */ + public static boolean isClosingParenthesis(char c) { - return openingParsSet.contains(c); + return ('a' <= c && c <= 'z' || c == ')' || c == ']' || c == '}' + || c == '>'); } - private static boolean isClosingParenthesis(char c) + /** + * Answers true if the string is a valid close pair rna secondary structure + * symbol. Currently accepts a-z, )]}> + * + * @param s + * @return + */ + public static boolean isClosingParenthesis(String s) { - return closingParsSet.contains(c); + return s != null && s.length() == 1 + && isClosingParenthesis(s.charAt(0)); } - private static char matchingOpeningParenthesis(char closingParenthesis) - throws WUSSParseException + /** + * Returns the matching open pair symbol for the given closing symbol. + * Currently returns A-Z for a-z, or ([{< for )]}>, or the input symbol if it + * is not a valid closing symbol. + * + * @param c + * @return + */ + public static char getMatchingOpeningParenthesis(char c) { - if (!isClosingParenthesis(closingParenthesis)) + if ('a' <= c && c <= 'z') { - throw new WUSSParseException( - MessageManager.formatMessage( - "exception.querying_matching_opening_parenthesis_for_non_closing_parenthesis", - new String[] { new StringBuffer(closingParenthesis) - .toString() }), -1); + return (char) (c + 'A' - 'a'); + } + switch (c) + { + case ')': + return '('; + case ']': + return '['; + case '}': + return '{'; + case '>': + return '<'; + default: + return c; } - - return closingToOpening.get(closingParenthesis); } /** * Based off of RALEE code ralee-get-base-pairs. Keeps track of open bracket * positions in "stack" vector. When a close bracket is reached, pair this - * with the last element in the "stack" vector and store in "pairs" vector. - * Remove last element in the "stack" vector. Continue in this manner until - * the whole string is processed. + * with the last matching element in the "stack" vector and store in "pairs" + * vector. Remove last element in the "stack" vector. Continue in this manner + * until the whole string is processed. Parse errors are thrown as exceptions + * wrapping the error location - position of the first unmatched closing + * bracket, or string length if there is an unmatched opening bracket. * * @param line * Secondary structure line of an RNA Stockholm file - * @return Array of SequenceFeature; type = RNA helix, begin is open base - * pair, end is close base pair + * @return + * @throw {@link WUSSParseException} */ - public static Vector GetSimpleBPs(CharSequence line) + protected static List getSimpleBPs(CharSequence line) throws WUSSParseException { Hashtable> stacks = new Hashtable>(); - Vector pairs = new Vector(); + List pairs = new ArrayList(); int i = 0; while (i < line.length()) { @@ -128,13 +157,13 @@ public class Rna else if (isClosingParenthesis(base)) { - char opening = matchingOpeningParenthesis(base); + char opening = getMatchingOpeningParenthesis(base); if (!stacks.containsKey(opening)) { throw new WUSSParseException(MessageManager.formatMessage( - "exception.mismatched_unseen_closing_char", - new String[] { new StringBuffer(base).toString() }), i); + "exception.mismatched_unseen_closing_char", new String[] + { String.valueOf(base) }), i); } Stack stack = stacks.get(opening); @@ -142,8 +171,8 @@ public class Rna { // error whilst parsing i'th position. pass back throw new WUSSParseException(MessageManager.formatMessage( - "exception.mismatched_closing_char", - new String[] { new StringBuffer(base).toString() }), i); + "exception.mismatched_closing_char", new String[] + { String.valueOf(base) }), i); } int temp = stack.pop(); @@ -156,35 +185,22 @@ public class Rna Stack stack = stacks.get(opening); if (!stack.empty()) { + /* + * we have an unmatched opening bracket; report error as at + * i (length of input string) + */ throw new WUSSParseException(MessageManager.formatMessage( - "exception.mismatched_opening_char", - new String[] { new StringBuffer(opening).toString(), - Integer.valueOf(stack.pop()).toString() }), i); + "exception.mismatched_opening_char", new String[] + { String.valueOf(opening), String.valueOf(stack.pop()) }), + i); } } return pairs; } - public static SequenceFeature[] GetBasePairs(CharSequence line) - throws WUSSParseException - { - Vector bps = GetSimpleBPs(line); - SequenceFeature[] outPairs = new SequenceFeature[bps.size()]; - for (int p = 0; p < bps.size(); p++) - { - SimpleBP bp = bps.elementAt(p); - outPairs[p] = new SequenceFeature("RNA helix", "", "", bp.getBP5(), - bp.getBP3(), ""); - } - return outPairs; - } + - public static ArrayList GetModeleBP(CharSequence line) - throws WUSSParseException - { - Vector bps = GetSimpleBPs(line); - return new ArrayList(bps); - } + /** * Function to get the end position corresponding to a given start position @@ -201,34 +217,213 @@ public class Rna */ /** - * Figures out which helix each position belongs to and stores the helix - * number in the 'featureGroup' member of a SequenceFeature Based off of RALEE - * code ralee-helix-map. + * Answers true if the character is a recognised symbol for RNA secondary + * structure. Currently accepts a-z, A-Z, ()[]{}<>. + * + * @param c + * @return + */ + public static boolean isRnaSecondaryStructureSymbol(char c) + { + return isOpeningParenthesis(c) || isClosingParenthesis(c); + } + + /** + * Answers true if the string is a recognised symbol for RNA secondary + * structure. Currently accepts a-z, A-Z, ()[]{}<>. + * + * @param s + * @return + */ + public static boolean isRnaSecondaryStructureSymbol(String s) + { + return isOpeningParenthesis(s) || isClosingParenthesis(s); + } + + /** + * Translates a string to RNA secondary structure representation. Returns the + * string with any non-SS characters changed to spaces. Accepted characters + * are a-z, A-Z, and (){}[]<> brackets. + * + * @param ssString + * @return + */ + public static String getRNASecStrucState(String ssString) + { + if (ssString == null) + { + return null; + } + StringBuilder result = new StringBuilder(ssString.length()); + for (int i = 0; i < ssString.length(); i++) + { + char c = ssString.charAt(i); + result.append(isRnaSecondaryStructureSymbol(c) ? c : " "); + } + return result.toString(); + } + + /** + * Answers true if the base-pair is either a Watson-Crick (A:T/U, C:G) or a + * wobble (G:T/U) pair (either way round), else false + * + * @param first + * @param second + * @return + */ + public static boolean isCanonicalOrWobblePair(char first, char second) + { + if (first > 'Z') + { + first -= 32; + } + if (second > 'Z') + { + second -= 32; + } + + switch (first) + { + case 'A': + switch (second) + { + case 'T': + case 'U': + return true; + } + break; + case 'C': + switch (second) + { + case 'G': + return true; + } + break; + case 'T': + case 'U': + switch (second) + { + case 'A': + case 'G': + return true; + } + break; + case 'G': + switch (second) + { + case 'C': + case 'T': + case 'U': + return true; + } + break; + } + return false; + } + + /** + * Answers true if the base-pair is Watson-Crick - (A:T/U or C:G, either way + * round), else false + * + * @param first + * @param second + * @return + */ + public static boolean isCanonicalPair(char first, char second) + { + + if (first > 'Z') + { + first -= 32; + } + if (second > 'Z') + { + second -= 32; + } + + switch (first) + { + case 'A': + switch (second) + { + case 'T': + case 'U': + return true; + } + break; + case 'G': + switch (second) + { + case 'C': + return true; + } + break; + case 'C': + switch (second) + { + case 'G': + return true; + } + break; + case 'T': + case 'U': + switch (second) + { + case 'A': + return true; + } + break; + } + return false; + } + + /** + * Returns the matching close pair symbol for the given opening symbol. + * Currently returns a-z for A-Z, or )]}> for ([{<, or the input symbol if it + * is not a valid opening symbol. * - * @param pairs - * Array of SequenceFeature (output from Rna.GetBasePairs) + * @param c + * @return */ - public static void HelixMap(SequenceFeature[] pairs) + public static char getMatchingClosingParenthesis(char c) + { + if ('A' <= c && c <= 'Z') + { + return (char) (c + 'a' - 'A'); + } + switch (c) + { + case '(': + return ')'; + case '[': + return ']'; + case '{': + return '}'; + case '<': + return '>'; + default: + return c; + } + } + + public static SequenceFeature[] getHelixMap(CharSequence rnaAnnotation) + throws WUSSParseException { + List result = new ArrayList(); int helix = 0; // Number of helices/current helix int lastopen = 0; // Position of last open bracket reviewed int lastclose = 9999999; // Position of last close bracket reviewed - int i = pairs.length; // Number of pairs - int open; // Position of an open bracket under review - int close; // Position of a close bracket under review - int j; // Counter - - Hashtable helices = new Hashtable(); // Keep track of helix number for each - // position + Map helices = new HashMap(); + // Keep track of helix number for each position // Go through each base pair and assign positions a helix - for (i = 0; i < pairs.length; i++) + List bps = getSimpleBPs(rnaAnnotation); + for (SimpleBP basePair : bps) { - - open = pairs[i].getBegin(); - close = pairs[i].getEnd(); + final int open = basePair.getBP5(); + final int close = basePair.getBP3(); // System.out.println("open " + open + " close " + close); // System.out.println("lastclose " + lastclose + " lastopen " + lastopen); @@ -245,17 +440,17 @@ public class Rna /* * catch things like <<..<<..>>..<<..>>>> | */ - j = pairs.length - 1; - while (j >= 0) + int j = bps.size(); + while (--j >= 0) { - int popen = pairs[j].getBegin(); + int popen = bps.get(j).getBP5(); // System.out.println("j " + j + " popen " + popen + " lastopen " // +lastopen + " open " + open); if ((popen < lastopen) && (popen > open)) { if (helices.containsValue(popen) - && (((Integer) helices.get(popen)) == helix)) + && ((helices.get(popen)) == helix)) { continue; } @@ -265,8 +460,6 @@ public class Rna break; } } - - j -= 1; } // Put positions and helix information into the hashtable @@ -274,11 +467,13 @@ public class Rna helices.put(close, helix); // Record helix as featuregroup - pairs[i].setFeatureGroup(Integer.toString(helix)); + result.add(new SequenceFeature("RNA helix", "", open, close, + String.valueOf(helix))); lastopen = open; lastclose = close; - } + + return result.toArray(new SequenceFeature[result.size()]); } }