import java.util.Arrays;
import java.util.HashSet;
import java.util.Hashtable;
+import java.util.List;
import java.util.Stack;
import java.util.Vector;
public class Rna
{
- static Hashtable<Integer, Integer> pairHash = new Hashtable();
+ static Hashtable<Integer, Integer> pairHash = new Hashtable<Integer, Integer>();
private static final Character[] openingPars = { '(', '[', '{', '<', 'A',
'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O',
}
};
- private static boolean isOpeningParenthesis(char c)
+ public static boolean isOpeningParenthesis(char c)
{
return openingParsSet.contains(c);
}
- private static boolean isClosingParenthesis(char c)
+ public static boolean isClosingParenthesis(char c)
{
return closingParsSet.contains(c);
}
throw new WUSSParseException(
MessageManager.formatMessage(
"exception.querying_matching_opening_parenthesis_for_non_closing_parenthesis",
- new String[] { new StringBuffer(closingParenthesis)
- .toString() }), -1);
+ new String[] { String.valueOf(closingParenthesis) }),
+ -1);
}
return closingToOpening.get(closingParenthesis);
/**
* Based off of RALEE code ralee-get-base-pairs. Keeps track of open bracket
* positions in "stack" vector. When a close bracket is reached, pair this
- * with the last element in the "stack" vector and store in "pairs" vector.
- * Remove last element in the "stack" vector. Continue in this manner until
- * the whole string is processed.
+ * with the last matching element in the "stack" vector and store in "pairs"
+ * vector. Remove last element in the "stack" vector. Continue in this manner
+ * until the whole string is processed. Parse errors are thrown as exceptions
+ * wrapping the error location - position of the first unmatched closing
+ * bracket, or string length if there is an unmatched opening bracket.
*
* @param line
* Secondary structure line of an RNA Stockholm file
- * @return Array of SequenceFeature; type = RNA helix, begin is open base
- * pair, end is close base pair
+ * @return
+ * @throw {@link WUSSParseException}
*/
- public static Vector<SimpleBP> GetSimpleBPs(CharSequence line)
+ public static Vector<SimpleBP> getSimpleBPs(CharSequence line)
throws WUSSParseException
{
Hashtable<Character, Stack<Integer>> stacks = new Hashtable<Character, Stack<Integer>>();
{
throw new WUSSParseException(MessageManager.formatMessage(
"exception.mismatched_unseen_closing_char",
- new String[] { new StringBuffer(base).toString() }), i);
+ new String[] { String.valueOf(base) }), i);
}
Stack<Integer> stack = stacks.get(opening);
// error whilst parsing i'th position. pass back
throw new WUSSParseException(MessageManager.formatMessage(
"exception.mismatched_closing_char",
- new String[] { new StringBuffer(base).toString() }), i);
+ new String[] { String.valueOf(base) }), i);
}
int temp = stack.pop();
Stack<Integer> stack = stacks.get(opening);
if (!stack.empty())
{
+ /*
+ * we have an unmatched opening bracket; report error as at
+ * i (length of input string)
+ */
throw new WUSSParseException(MessageManager.formatMessage(
"exception.mismatched_opening_char",
- new String[] { new StringBuffer(opening).toString(),
- Integer.valueOf(stack.pop()).toString() }), i);
+ new String[] { String.valueOf(opening),
+ String.valueOf(stack.pop()) }), i);
}
}
return pairs;
}
- public static SequenceFeature[] GetBasePairs(CharSequence line)
+ public static SequenceFeature[] getBasePairs(List<SimpleBP> bps)
throws WUSSParseException
{
- Vector<SimpleBP> bps = GetSimpleBPs(line);
SequenceFeature[] outPairs = new SequenceFeature[bps.size()];
for (int p = 0; p < bps.size(); p++)
{
- SimpleBP bp = bps.elementAt(p);
+ SimpleBP bp = bps.get(p);
outPairs[p] = new SequenceFeature("RNA helix", "", "", bp.getBP5(),
bp.getBP3(), "");
}
return outPairs;
}
- public static ArrayList<SimpleBP> GetModeleBP(CharSequence line)
+ public static List<SimpleBP> getModeleBP(CharSequence line)
throws WUSSParseException
{
- Vector<SimpleBP> bps = GetSimpleBPs(line);
+ Vector<SimpleBP> bps = getSimpleBPs(line);
return new ArrayList<SimpleBP>(bps);
}
int close; // Position of a close bracket under review
int j; // Counter
- Hashtable helices = new Hashtable(); // Keep track of helix number for each
- // position
+ Hashtable<Integer, Integer> helices = new Hashtable<Integer, Integer>();
+ // Keep track of helix number for each position
// Go through each base pair and assign positions a helix
for (i = 0; i < pairs.length; i++)
if ((popen < lastopen) && (popen > open))
{
if (helices.containsValue(popen)
- && (((Integer) helices.get(popen)) == helix))
+ && ((helices.get(popen)) == helix))
{
continue;
}
}
}
+
+ /**
+ * Answers true if the character is a recognised symbol for RNA secondary
+ * structure. Currently accepts a-z, A-Z, ()[]{}<>.
+ *
+ * @param c
+ * @return
+ */
+ public static boolean isRnaSecondaryStructureSymbol(char c)
+ {
+ return isOpeningParenthesis(c) || isClosingParenthesis(c);
+ }
+
+ /**
+ * Translates a string to RNA secondary structure representation. Returns the
+ * string with any non-SS characters changed to spaces. Accepted characters
+ * are a-z, A-Z, and (){}[]<> brackets.
+ *
+ * @param ssString
+ * @return
+ */
+ public static String getRNASecStrucState(String ssString)
+ {
+ if (ssString == null)
+ {
+ return null;
+ }
+ StringBuilder result = new StringBuilder(ssString.length());
+ for (int i = 0; i < ssString.length(); i++)
+ {
+ char c = ssString.charAt(i);
+ result.append(isRnaSecondaryStructureSymbol(c) ? c : " ");
+ }
+ return result.toString();
+ }
+
+ /**
+ * Answers true if the base-pair is either a canonical (A-T/U, C-G) or a
+ * wobble (G-T/U) pair (either way round), else false
+ *
+ * @param first
+ * @param second
+ * @return
+ */
+ public static boolean isCanonicalOrWobblePair(char first, char second)
+ {
+ if (first > 'Z')
+ {
+ first -= 32;
+ }
+ if (second > 'Z')
+ {
+ second -= 32;
+ }
+
+ switch (first)
+ {
+ case 'A':
+ switch (second)
+ {
+ case 'T':
+ case 'U':
+ return true;
+ }
+ break;
+ case 'C':
+ switch (second)
+ {
+ case 'G':
+ return true;
+ }
+ break;
+ case 'T':
+ case 'U':
+ switch (second)
+ {
+ case 'A':
+ case 'G':
+ return true;
+ }
+ break;
+ case 'G':
+ switch (second)
+ {
+ case 'C':
+ case 'T':
+ case 'U':
+ return true;
+ }
+ break;
+ }
+ return false;
+ }
}
SequenceFeature[] rna = rnaStruc._rnasecstr;
char c, s, cEnd;
- int count = 0, nonGap = 0, i, bpEnd = -1, j, jSize = sequences.length;
+ int bpEnd = -1;
+ int jSize = sequences.length;
int[] values;
int[][] pairs;
float percentage;
- boolean wooble = true;
- for (i = start; i < end; i++) // foreach column
+
+ for (int i = start; i < end; i++) // foreach column
{
- residueHash = new Hashtable();
+ int canonicalOrWobblePairCount = 0;
+ int otherPairCount = 0;
maxResidue = "-";
values = new int[255];
pairs = new int[255][255];
bpEnd = -1;
- // System.out.println("s="+struc[i]);
if (i < struc.length)
{
s = struc[i];
-
}
else
{
s = '-';
}
- if (s != '(' && s != '[')
+ if (!Rna.isOpeningParenthesis(s))
{
if (s == '-')
{
}
else
{
-
bpEnd = findPair(rna, i);
if (bpEnd > -1)
{
- for (j = 0; j < jSize; j++) // foreach row
+ for (int j = 0; j < jSize; j++) // foreach row
{
if (sequences[j] == null)
{
continue;
}
c = sequences[j].getCharAt(i);
- // System.out.println("c="+c);
// standard representation for gaps in sequence and structure
if (c == '.' || c == ' ')
}
cEnd = sequences[j].getCharAt(bpEnd);
- // System.out.println("pairs ="+c+","+cEnd);
- if (checkBpType(c, cEnd) == true)
+ /*
+ * ensure upper-case for counting purposes
+ */
+ if ('a' <= c && 'z' >= c)
+ {
+ c += 'A' - 'a';
+ }
+ if ('a' <= cEnd && 'z' >= cEnd)
{
- values['(']++; // H means it's a helix (structured)
+ cEnd += 'A' - 'a';
+ }
+ if (Rna.isCanonicalOrWobblePair(c, cEnd))
+ {
+ values['(']++;
maxResidue = "(";
- wooble = true;
- // System.out.println("It's a pair wc");
-
+ canonicalOrWobblePairCount++;
}
- if (checkBpType(c, cEnd) == false)
+ else
{
- wooble = false;
- values['[']++; // H means it's a helix (structured)
+ values['[']++;
maxResidue = "[";
-
+ otherPairCount++;
}
pairs[c][cEnd]++;
-
}
}
// nonGap++;
}
- // UPDATE this for new values
+
+ residueHash = new Hashtable();
if (profile)
{
// TODO 1-dim array with jsize in [0], nongapped in [1]; or Pojo
residueHash.put(PAIRPROFILE, pairs);
}
- if (wooble == true)
- {
- count = values['('];
- }
- if (wooble == false)
+ int count = Math.max(canonicalOrWobblePairCount, otherPairCount);
+ if (!maxResidue.equals("-"))
{
- count = values['['];
+ maxResidue = canonicalOrWobblePairCount >= otherPairCount ? "("
+ : "[";
}
residueHash.put(MAXCOUNT, new Integer(count));
residueHash.put(MAXRESIDUE, maxResidue);
values[']'] = values['['];
values['('] = 0;
values['['] = 0;
+ maxResidue = maxResidue.equals("(") ? ")" : "]";
+
residueHash = new Hashtable();
- if (wooble == true)
- {
- // System.out.println(maxResidue+","+wooble);
- maxResidue = ")";
- }
- if (wooble == false)
- {
- // System.out.println(maxResidue+","+wooble);
- maxResidue = "]";
- }
if (profile)
{
residueHash.put(PROFILE, new int[][] { values,
residueHash.put(PID_GAPS, new Float(percentage));
result[bpEnd] = residueHash;
-
- }
- }
- }
-
- /**
- * Method to check if a base-pair is a canonical or a wobble bp
- *
- * @param up
- * 5' base
- * @param down
- * 3' base
- * @return True if it is a canonical/wobble bp
- */
- public static boolean checkBpType(char up, char down)
- {
- if (up > 'Z')
- {
- up -= 32;
- }
- if (down > 'Z')
- {
- down -= 32;
- }
-
- switch (up)
- {
- case 'A':
- switch (down)
- {
- case 'T':
- return true;
- case 'U':
- return true;
- }
- break;
- case 'C':
- switch (down)
- {
- case 'G':
- return true;
- }
- break;
- case 'T':
- switch (down)
- {
- case 'A':
- return true;
- case 'G':
- return true;
- }
- break;
- case 'G':
- switch (down)
- {
- case 'C':
- return true;
- case 'T':
- return true;
- case 'U':
- return true;
- }
- break;
- case 'U':
- switch (down)
- {
- case 'A':
- return true;
- case 'G':
- return true;
}
- break;
}
- return false;
}
/**
for (String j : test)
{
System.out.println(i + "-" + j + ": "
- + StructureFrequency.checkBpType(i.charAt(0), j.charAt(0)));
+ + Rna.isCanonicalOrWobblePair(i.charAt(0), j.charAt(0)));
}
}
}
package jalview.analysis;
import static org.testng.AssertJUnit.assertEquals;
+import static org.testng.AssertJUnit.assertFalse;
+import static org.testng.AssertJUnit.assertNull;
+import static org.testng.AssertJUnit.assertTrue;
import static org.testng.AssertJUnit.fail;
import jalview.analysis.SecStrConsensus.SimpleBP;
public void testGetSimpleBPs() throws WUSSParseException
{
String rna = "([{})]"; // JAL-1081 example
- Vector<SimpleBP> bps = Rna.GetSimpleBPs(rna);
+ Vector<SimpleBP> bps = Rna.getSimpleBPs(rna);
assertEquals(3, bps.size());
/*
* the base pairs are added in the order in which the matching base is found
+ * (popping the stack of unmatched opening brackets)
*/
assertEquals(2, bps.get(0).bp5); // {
assertEquals(3, bps.get(0).bp3); // }
String rna = "(([{})]";
try
{
- Rna.GetSimpleBPs(rna);
+ Rna.getSimpleBPs(rna);
fail("expected exception");
} catch (WUSSParseException e)
{
- // expected
+ // error reported as after end of input string
+ assertEquals(rna.length(), e.getProblemPos());
}
}
@Test(groups = { "Functional" })
public void testGetSimpleBPs_unmatchedCloser()
{
- String rna = "([{})]]";
+ String rna = "([{})]]]";
try
{
- Rna.GetSimpleBPs(rna);
+ Rna.getSimpleBPs(rna);
fail("expected exception");
} catch (WUSSParseException e)
{
- // expected
+ // error reported as at first unmatched close
+ assertEquals(6, e.getProblemPos());
+ }
+
+ /*
+ * a variant where we have no opening bracket of the same type
+ * as the unmatched closing bracket (no stack rather than empty stack)
+ */
+ rna = "((()])";
+ try
+ {
+ Rna.getSimpleBPs(rna);
+ fail("expected exception");
+ } catch (WUSSParseException e)
+ {
+ assertEquals(4, e.getProblemPos());
+ }
+ }
+
+ @Test(groups = { "Functional" })
+ public void testGetRNASecStrucState()
+ {
+ assertNull(Rna.getRNASecStrucState(null));
+ for (int i = 0; i <= 255; i++)
+ {
+ String s = String.valueOf((char) i);
+ String ss = Rna.getRNASecStrucState(s);
+
+ /*
+ * valid SS chars are a-z, A-Z, and various brackets;
+ * anything else is returned as a space
+ */
+ if ((i >= 'a' && i <= 'z') || (i >= 'A' && i <= 'Z')
+ || "()[]{}<>".indexOf(s) > -1)
+ {
+ assertEquals("" + i, s, ss);
+ }
+ else
+ {
+ assertEquals(" ", ss);
+ }
+ }
+
+ /*
+ * a string is processed character by character
+ */
+ assertEquals("a [K ]z} {Q b(w)p><i",
+ Rna.getRNASecStrucState("a.[K-]z}?{Q b(w)p><i"));
+ }
+
+ @Test(groups = { "Functional" })
+ public void testIsClosingParenthesis()
+ {
+ /*
+ * only a-z, )]}> are closing bracket symbols
+ */
+ for (int i = 0; i <= 255; i++)
+ {
+ boolean isClosing = Rna.isClosingParenthesis((char) i);
+ if ((i >= 'a' && i <= 'z') || i == ')' || i == '}' || i == ']'
+ || i == '>')
+ {
+ assertTrue(String.format("close base pair %c", i), isClosing);
+ }
+ else
+ {
+ assertFalse(String.format("close base pair %c", i), isClosing);
+ }
+ }
+ }
+
+ @Test(groups = { "Functional" })
+ public void testIsCanonicalOrWobblePair()
+ {
+ String bases = "acgtuACGTU";
+ for (int i = 0; i < bases.length(); i++)
+ {
+ for (int j = 0; j < bases.length(); j++)
+ {
+ char first = bases.charAt(i);
+ char second = bases.charAt(j);
+ boolean result = Rna.isCanonicalOrWobblePair(first, second);
+ String pair = new String(new char[] { first, second })
+ .toUpperCase();
+ if (pair.equals("AT") || pair.equals("TA") || pair.equals("AU")
+ || pair.equals("UA") || pair.equals("GC")
+ || pair.equals("CG") || pair.equals("GT")
+ || pair.equals("TG") || pair.equals("GU")
+ || pair.equals("UG"))
+ {
+ assertTrue(pair + " should be valid", result);
+ }
+ else
+ {
+ assertFalse(pair + " should be invalid", result);
+ }
+ }
+ }
+ }
+
+ @Test(groups = { "Functional" })
+ public void testIsOpeningParenthesis()
+ {
+ /*
+ * only A-Z, ([{< are opening bracket symbols
+ */
+ for (int i = 0; i <= 255; i++)
+ {
+ boolean isOpening = Rna.isOpeningParenthesis((char) i);
+ if ((i >= 'A' && i <= 'Z') || i == '(' || i == '{' || i == '['
+ || i == '<')
+ {
+ assertTrue(String.format("Open base pair %c", i), isOpening);
+ }
+ else
+ {
+ assertFalse(String.format("Open base pair %c", i), isOpening);
+ }
}
}
}