X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Fanalysis%2FRna.java;h=7c077088af2c9ce76661c8e13dcf09c22069f9d9;hb=9c1dd670f293d8f33de55133397b4cd2d4baf38e;hp=54d26973f0852cd39a449a653e5868bd78b98478;hpb=8d2724b83aca38ef75d68787cc5939d950467e63;p=jalview.git diff --git a/src/jalview/analysis/Rna.java b/src/jalview/analysis/Rna.java index 54d2697..7c07708 100644 --- a/src/jalview/analysis/Rna.java +++ b/src/jalview/analysis/Rna.java @@ -1,19 +1,21 @@ /* - * Jalview - A Sequence Alignment Editor and Viewer (Version 2.8.0b1) - * Copyright (C) 2014 The Jalview Authors + * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$) + * Copyright (C) $$Year-Rel$$ The Jalview Authors * * This file is part of Jalview. * * Jalview is free software: you can redistribute it and/or * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. + * as published by the Free Software Foundation, either version 3 + * of the License, or (at your option) any later version. * * Jalview is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty * of MERCHANTABILITY or FITNESS FOR A PARTICULAR * PURPOSE. See the GNU General Public License for more details. * - * You should have received a copy of the GNU General Public License along with Jalview. If not, see . + * You should have received a copy of the GNU General Public License + * along with Jalview. If not, see . * The Jalview Authors are detailed in the 'AUTHORS' file. */ /* Author: Lauren Michelle Lui @@ -24,155 +26,208 @@ package jalview.analysis; +import jalview.analysis.SecStrConsensus.SimpleBP; +import jalview.datamodel.SequenceFeature; +import jalview.util.MessageManager; import java.util.ArrayList; -import java.util.Arrays; -import java.util.HashSet; import java.util.Hashtable; +import java.util.List; import java.util.Stack; import java.util.Vector; - -import jalview.analysis.SecStrConsensus.SimpleBP; -import jalview.datamodel.SequenceFeature; - public class Rna { - - static Hashtable pairHash = new Hashtable(); - - private static final Character[] openingPars = {'(','[','{','<','A','B','C','D','E','F','G','H','I','J','K','L','M','N','O','P','Q','R','S','T','U','V','W','X','Y','Z'}; - private static final Character[] closingPars = {')',']','}','>','a','b','c','d','e','f','g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v','w','x','y','z'}; - - private static HashSet openingParsSet = new HashSet(Arrays.asList(openingPars)); - private static HashSet closingParsSet = new HashSet(Arrays.asList(closingPars)); - private static Hashtable closingToOpening = new Hashtable() - // Initializing final data structure + + /** + * Answers true if the character is a valid open pair rna secondary structure + * symbol. Currently accepts A-Z, ([{< + * + * @param c + * @return + */ + public static boolean isOpeningParenthesis(char c) { - private static final long serialVersionUID = 1L; - { - for(int i=0;i"+openingPars[i]); - put(closingPars[i],openingPars[i]); - } - }}; - - private static boolean isOpeningParenthesis(char c) + return ('A' <= c && c <= 'Z' || c == '(' || c == '[' || c == '{' || c == '<'); + } + + /** + * Answers true if the string is a valid open pair rna secondary structure + * symbol. Currently accepts A-Z, ([{< + * + * @param s + * @return + */ + public static boolean isOpeningParenthesis(String s) { - return openingParsSet.contains(c); + return s != null && s.length() == 1 + && isOpeningParenthesis(s.charAt(0)); } - - private static boolean isClosingParenthesis(char c) + + /** + * Answers true if the character is a valid close pair rna secondary structure + * symbol. Currently accepts a-z, )]}> + * + * @param c + * @return + */ + public static boolean isClosingParenthesis(char c) { - return closingParsSet.contains(c); + return ('a' <= c && c <= 'z' || c == ')' || c == ']' || c == '}' || c == '>'); } - private static char matchingOpeningParenthesis(char closingParenthesis) throws WUSSParseException + /** + * Answers true if the string is a valid close pair rna secondary structure + * symbol. Currently accepts a-z, )]}> + * + * @param s + * @return + */ + public static boolean isClosingParenthesis(String s) { - if (!isClosingParenthesis(closingParenthesis)) - { - throw new WUSSParseException("Querying matching opening parenthesis for non-closing parenthesis character "+closingParenthesis, -1); - } - - return closingToOpening.get(closingParenthesis); + return s != null && s.length() == 1 + && isClosingParenthesis(s.charAt(0)); } - + + /** + * Returns the matching open pair symbol for the given closing symbol. + * Currently returns A-Z for a-z, or ([{< for )]}>, or the input symbol if it + * is not a valid closing symbol. + * + * @param c + * @return + */ + public static char getMatchingOpeningParenthesis(char c) + { + if ('a' <= c && c <= 'z') + { + return (char) (c + 'A' - 'a'); + } + switch (c) + { + case ')': + return '('; + case ']': + return '['; + case '}': + return '{'; + case '>': + return '<'; + default: + return c; + } + } + /** * Based off of RALEE code ralee-get-base-pairs. Keeps track of open bracket * positions in "stack" vector. When a close bracket is reached, pair this - * with the last element in the "stack" vector and store in "pairs" vector. - * Remove last element in the "stack" vector. Continue in this manner until - * the whole string is processed. + * with the last matching element in the "stack" vector and store in "pairs" + * vector. Remove last element in the "stack" vector. Continue in this manner + * until the whole string is processed. Parse errors are thrown as exceptions + * wrapping the error location - position of the first unmatched closing + * bracket, or string length if there is an unmatched opening bracket. * * @param line * Secondary structure line of an RNA Stockholm file - * @return Array of SequenceFeature; type = RNA helix, begin is open base - * pair, end is close base pair + * @return + * @throw {@link WUSSParseException} */ - public static Vector GetSimpleBPs(CharSequence line) throws WUSSParseException + public static Vector getSimpleBPs(CharSequence line) + throws WUSSParseException { - System.out.println(line); - Hashtable> stacks = new Hashtable>(); + Hashtable> stacks = new Hashtable>(); Vector pairs = new Vector(); int i = 0; while (i < line.length()) { char base = line.charAt(i); - + if (isOpeningParenthesis(base)) { - if (!stacks.containsKey(base)){ - stacks.put(base, new Stack()); - } + if (!stacks.containsKey(base)) + { + stacks.put(base, new Stack()); + } stacks.get(base).push(i); - + } else if (isClosingParenthesis(base)) { - - char opening = matchingOpeningParenthesis(base); - - if (!stacks.containsKey(opening)){ - throw new WUSSParseException("Mismatched (unseen) closing character "+base, i); - } - - Stack stack = stacks.get(opening); + + char opening = getMatchingOpeningParenthesis(base); + + if (!stacks.containsKey(opening)) + { + throw new WUSSParseException(MessageManager.formatMessage( + "exception.mismatched_unseen_closing_char", + new String[] { String.valueOf(base) }), i); + } + + Stack stack = stacks.get(opening); if (stack.isEmpty()) { // error whilst parsing i'th position. pass back - throw new WUSSParseException("Mismatched closing character "+base, i); + throw new WUSSParseException(MessageManager.formatMessage( + "exception.mismatched_closing_char", + new String[] { String.valueOf(base) }), i); } int temp = stack.pop(); - - pairs.add(new SimpleBP(temp,i)); + + pairs.add(new SimpleBP(temp, i)); } i++; } - for(char opening: stacks.keySet()) + for (char opening : stacks.keySet()) { - Stack stack = stacks.get(opening); - if (!stack.empty()) - { - throw new WUSSParseException("Mismatched opening character "+opening+" at "+stack.pop(), i); - } + Stack stack = stacks.get(opening); + if (!stack.empty()) + { + /* + * we have an unmatched opening bracket; report error as at + * i (length of input string) + */ + throw new WUSSParseException(MessageManager.formatMessage( + "exception.mismatched_opening_char", + new String[] { String.valueOf(opening), + String.valueOf(stack.pop()) }), i); + } } return pairs; } - - public static SequenceFeature[] GetBasePairs(CharSequence line) throws WUSSParseException + + public static SequenceFeature[] getBasePairs(List bps) + throws WUSSParseException { - Vector bps = GetSimpleBPs(line); - SequenceFeature[] outPairs = new SequenceFeature[bps.size()]; + SequenceFeature[] outPairs = new SequenceFeature[bps.size()]; for (int p = 0; p < bps.size(); p++) { - SimpleBP bp = bps.elementAt(p); - outPairs[p] = new SequenceFeature("RNA helix", "", "", bp.getBP5(),bp.getBP3(), ""); + SimpleBP bp = bps.get(p); + outPairs[p] = new SequenceFeature("RNA helix", "", "", bp.getBP5(), + bp.getBP3(), ""); } return outPairs; } - - - public static ArrayList GetModeleBP(CharSequence line) throws WUSSParseException + + public static List getModeleBP(CharSequence line) + throws WUSSParseException { - Vector bps = GetSimpleBPs(line); - return new ArrayList(bps); + Vector bps = getSimpleBPs(line); + return new ArrayList(bps); } - - + /** * Function to get the end position corresponding to a given start position - * @param indice - start position of a base pair + * + * @param indice + * - start position of a base pair * @return - end position of a base pair */ - /*makes no sense at the moment :( - public int findEnd(int indice){ - //TODO: Probably extend this to find the start to a given end? - //could be done by putting everything twice to the hash - ArrayList pair = new ArrayList(); - return pairHash.get(indice); - }*/ - + /* + * makes no sense at the moment :( public int findEnd(int indice){ //TODO: + * Probably extend this to find the start to a given end? //could be done by + * putting everything twice to the hash ArrayList pair = new + * ArrayList(); return pairHash.get(indice); } + */ /** * Figures out which helix each position belongs to and stores the helix @@ -194,8 +249,8 @@ public class Rna int close; // Position of a close bracket under review int j; // Counter - Hashtable helices = new Hashtable(); // Keep track of helix number for each - // position + Hashtable helices = new Hashtable(); + // Keep track of helix number for each position // Go through each base pair and assign positions a helix for (i = 0; i < pairs.length; i++) @@ -229,7 +284,7 @@ public class Rna if ((popen < lastopen) && (popen > open)) { if (helices.containsValue(popen) - && (((Integer) helices.get(popen)) == helix)) + && ((helices.get(popen)) == helix)) { continue; } @@ -255,5 +310,186 @@ public class Rna } } -} + /** + * Answers true if the character is a recognised symbol for RNA secondary + * structure. Currently accepts a-z, A-Z, ()[]{}<>. + * + * @param c + * @return + */ + public static boolean isRnaSecondaryStructureSymbol(char c) + { + return isOpeningParenthesis(c) || isClosingParenthesis(c); + } + + /** + * Answers true if the string is a recognised symbol for RNA secondary + * structure. Currently accepts a-z, A-Z, ()[]{}<>. + * + * @param s + * @return + */ + public static boolean isRnaSecondaryStructureSymbol(String s) + { + return isOpeningParenthesis(s) || isClosingParenthesis(s); + } + + /** + * Translates a string to RNA secondary structure representation. Returns the + * string with any non-SS characters changed to spaces. Accepted characters + * are a-z, A-Z, and (){}[]<> brackets. + * + * @param ssString + * @return + */ + public static String getRNASecStrucState(String ssString) + { + if (ssString == null) + { + return null; + } + StringBuilder result = new StringBuilder(ssString.length()); + for (int i = 0; i < ssString.length(); i++) + { + char c = ssString.charAt(i); + result.append(isRnaSecondaryStructureSymbol(c) ? c : " "); + } + return result.toString(); + } + + /** + * Answers true if the base-pair is either a canonical (A-T/U, C-G) or a + * wobble (G-T/U) pair (either way round), else false + * + * @param first + * @param second + * @return + */ + public static boolean isCanonicalOrWobblePair(char first, char second) + { + if (first > 'Z') + { + first -= 32; + } + if (second > 'Z') + { + second -= 32; + } + + switch (first) + { + case 'A': + switch (second) + { + case 'T': + case 'U': + return true; + } + break; + case 'C': + switch (second) + { + case 'G': + return true; + } + break; + case 'T': + case 'U': + switch (second) + { + case 'A': + case 'G': + return true; + } + break; + case 'G': + switch (second) + { + case 'C': + case 'T': + case 'U': + return true; + } + break; + } + return false; + } + + public static boolean isCanonicalPair(char first, char second) + { + + if (first > 'Z') + { + first -= 32; + } + if (second > 'Z') + { + second -= 32; + } + + switch (first) + { + case 'A': + switch (second) + { + case 'T': + case 'U': + return true; + } + break; + case 'G': + switch (second) + { + case 'C': + return true; + } + break; + case 'C': + switch (second) + { + case 'G': + return true; + } + break; + case 'T': + case 'U': + switch (second) + { + case 'A': + return true; + } + break; + } + return false; + } + + /** + * Returns the matching close pair symbol for the given opening symbol. + * Currently returns a-z for A-Z, or )]}> for ([{<, or the input symbol if it + * is not a valid opening symbol. + * + * @param c + * @return + */ + public static char getMatchingClosingParenthesis(char c) + { + if ('A' <= c && c <= 'Z') + { + return (char) (c + 'a' - 'A'); + } + switch (c) + { + case '(': + return ')'; + case '[': + return ']'; + case '{': + return '}'; + case '<': + return '>'; + default: + return c; + } + } +}