X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;ds=sidebyside;f=src%2Fjalview%2Fanalysis%2FRna.java;h=41dcd28745a4c254c8746dc2c7067895c3eb3111;hb=0715e493687eeb93d3b3f39cd1ff25e2671ce0e9;hp=4c05ece72126c1acfcc91b6ca477858d16a77e1e;hpb=7ab5d6b0ba5fec1ea4a4239e79c476d841622485;p=jalview.git diff --git a/src/jalview/analysis/Rna.java b/src/jalview/analysis/Rna.java index 4c05ece..41dcd28 100644 --- a/src/jalview/analysis/Rna.java +++ b/src/jalview/analysis/Rna.java @@ -1,6 +1,6 @@ /* - * Jalview - A Sequence Alignment Editor and Viewer (Version 2.8.2) - * Copyright (C) 2014 The Jalview Authors + * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$) + * Copyright (C) $$Year-Rel$$ The Jalview Authors * * This file is part of Jalview. * @@ -26,30 +26,30 @@ package jalview.analysis; +import jalview.analysis.SecStrConsensus.SimpleBP; +import jalview.datamodel.SequenceFeature; +import jalview.util.MessageManager; + import java.util.ArrayList; import java.util.Arrays; import java.util.HashSet; import java.util.Hashtable; +import java.util.List; import java.util.Stack; import java.util.Vector; -import jalview.analysis.SecStrConsensus.SimpleBP; -import jalview.datamodel.SequenceFeature; - public class Rna { - static Hashtable pairHash = new Hashtable(); + static Hashtable pairHash = new Hashtable(); - private static final Character[] openingPars = - { '(', '[', '{', '<', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', - 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', - 'Y', 'Z' }; + private static final Character[] openingPars = { '(', '[', '{', '<', 'A', + 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', + 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z' }; - private static final Character[] closingPars = - { ')', ']', '}', '>', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', - 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', - 'y', 'z' }; + private static final Character[] closingPars = { ')', ']', '}', '>', 'a', + 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', + 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z' }; private static HashSet openingParsSet = new HashSet( Arrays.asList(openingPars)); @@ -64,18 +64,18 @@ public class Rna { for (int i = 0; i < openingPars.length; i++) { - System.out.println(closingPars[i] + "->" + openingPars[i]); + // System.out.println(closingPars[i] + "->" + openingPars[i]); put(closingPars[i], openingPars[i]); } } }; - private static boolean isOpeningParenthesis(char c) + public static boolean isOpeningParenthesis(char c) { return openingParsSet.contains(c); } - private static boolean isClosingParenthesis(char c) + public static boolean isClosingParenthesis(char c) { return closingParsSet.contains(c); } @@ -86,8 +86,10 @@ public class Rna if (!isClosingParenthesis(closingParenthesis)) { throw new WUSSParseException( - "Querying matching opening parenthesis for non-closing parenthesis character " - + closingParenthesis, -1); + MessageManager.formatMessage( + "exception.querying_matching_opening_parenthesis_for_non_closing_parenthesis", + new String[] { String.valueOf(closingParenthesis) }), + -1); } return closingToOpening.get(closingParenthesis); @@ -96,19 +98,20 @@ public class Rna /** * Based off of RALEE code ralee-get-base-pairs. Keeps track of open bracket * positions in "stack" vector. When a close bracket is reached, pair this - * with the last element in the "stack" vector and store in "pairs" vector. - * Remove last element in the "stack" vector. Continue in this manner until - * the whole string is processed. + * with the last matching element in the "stack" vector and store in "pairs" + * vector. Remove last element in the "stack" vector. Continue in this manner + * until the whole string is processed. Parse errors are thrown as exceptions + * wrapping the error location - position of the first unmatched closing + * bracket, or string length if there is an unmatched opening bracket. * * @param line * Secondary structure line of an RNA Stockholm file - * @return Array of SequenceFeature; type = RNA helix, begin is open base - * pair, end is close base pair + * @return + * @throw {@link WUSSParseException} */ - public static Vector GetSimpleBPs(CharSequence line) + public static Vector getSimpleBPs(CharSequence line) throws WUSSParseException { - System.out.println(line); Hashtable> stacks = new Hashtable>(); Vector pairs = new Vector(); int i = 0; @@ -132,16 +135,18 @@ public class Rna if (!stacks.containsKey(opening)) { - throw new WUSSParseException( - "Mismatched (unseen) closing character " + base, i); + throw new WUSSParseException(MessageManager.formatMessage( + "exception.mismatched_unseen_closing_char", + new String[] { String.valueOf(base) }), i); } Stack stack = stacks.get(opening); if (stack.isEmpty()) { // error whilst parsing i'th position. pass back - throw new WUSSParseException("Mismatched closing character " - + base, i); + throw new WUSSParseException(MessageManager.formatMessage( + "exception.mismatched_closing_char", + new String[] { String.valueOf(base) }), i); } int temp = stack.pop(); @@ -154,31 +159,36 @@ public class Rna Stack stack = stacks.get(opening); if (!stack.empty()) { - throw new WUSSParseException("Mismatched opening character " - + opening + " at " + stack.pop(), i); + /* + * we have an unmatched opening bracket; report error as at + * i (length of input string) + */ + throw new WUSSParseException(MessageManager.formatMessage( + "exception.mismatched_opening_char", + new String[] { String.valueOf(opening), + String.valueOf(stack.pop()) }), i); } } return pairs; } - public static SequenceFeature[] GetBasePairs(CharSequence line) + public static SequenceFeature[] getBasePairs(List bps) throws WUSSParseException { - Vector bps = GetSimpleBPs(line); SequenceFeature[] outPairs = new SequenceFeature[bps.size()]; for (int p = 0; p < bps.size(); p++) { - SimpleBP bp = bps.elementAt(p); + SimpleBP bp = bps.get(p); outPairs[p] = new SequenceFeature("RNA helix", "", "", bp.getBP5(), bp.getBP3(), ""); } return outPairs; } - public static ArrayList GetModeleBP(CharSequence line) + public static List getModeleBP(CharSequence line) throws WUSSParseException { - Vector bps = GetSimpleBPs(line); + Vector bps = getSimpleBPs(line); return new ArrayList(bps); } @@ -216,8 +226,8 @@ public class Rna int close; // Position of a close bracket under review int j; // Counter - Hashtable helices = new Hashtable(); // Keep track of helix number for each - // position + Hashtable helices = new Hashtable(); + // Keep track of helix number for each position // Go through each base pair and assign positions a helix for (i = 0; i < pairs.length; i++) @@ -251,7 +261,7 @@ public class Rna if ((popen < lastopen) && (popen > open)) { if (helices.containsValue(popen) - && (((Integer) helices.get(popen)) == helix)) + && ((helices.get(popen)) == helix)) { continue; } @@ -277,4 +287,97 @@ public class Rna } } + + /** + * Answers true if the character is a recognised symbol for RNA secondary + * structure. Currently accepts a-z, A-Z, ()[]{}<>. + * + * @param c + * @return + */ + public static boolean isRnaSecondaryStructureSymbol(char c) + { + return isOpeningParenthesis(c) || isClosingParenthesis(c); + } + + /** + * Translates a string to RNA secondary structure representation. Returns the + * string with any non-SS characters changed to spaces. Accepted characters + * are a-z, A-Z, and (){}[]<> brackets. + * + * @param ssString + * @return + */ + public static String getRNASecStrucState(String ssString) + { + if (ssString == null) + { + return null; + } + StringBuilder result = new StringBuilder(ssString.length()); + for (int i = 0; i < ssString.length(); i++) + { + char c = ssString.charAt(i); + result.append(isRnaSecondaryStructureSymbol(c) ? c : " "); + } + return result.toString(); + } + + /** + * Answers true if the base-pair is either a canonical (A-T/U, C-G) or a + * wobble (G-T/U) pair (either way round), else false + * + * @param first + * @param second + * @return + */ + public static boolean isCanonicalOrWobblePair(char first, char second) + { + if (first > 'Z') + { + first -= 32; + } + if (second > 'Z') + { + second -= 32; + } + + switch (first) + { + case 'A': + switch (second) + { + case 'T': + case 'U': + return true; + } + break; + case 'C': + switch (second) + { + case 'G': + return true; + } + break; + case 'T': + case 'U': + switch (second) + { + case 'A': + case 'G': + return true; + } + break; + case 'G': + switch (second) + { + case 'C': + case 'T': + case 'U': + return true; + } + break; + } + return false; + } }