X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Fanalysis%2FRna.java;h=466e6c6d8e3e97c73ec96fb1986579dac772e684;hb=c19d2a91ca05e052e3408bf5852d88eb5d0608f1;hp=ca3c6d5159308b1ee589d4b7553ec1be6584f40d;hpb=857a40730ad5db87bfc7a381924717a2162c2b81;p=jalview.git diff --git a/src/jalview/analysis/Rna.java b/src/jalview/analysis/Rna.java index ca3c6d5..466e6c6 100644 --- a/src/jalview/analysis/Rna.java +++ b/src/jalview/analysis/Rna.java @@ -1,36 +1,99 @@ /* - * Jalview - A Sequence Alignment Editor and Viewer (Version 2.7) - * Copyright (C) 2011 J Procter, AM Waterhouse, J Engelhardt, LM Lui, G Barton, M Clamp, S Searle + * Jalview - A Sequence Alignment Editor and Viewer (Version 2.9.0b2) + * Copyright (C) 2015 The Jalview Authors * * This file is part of Jalview. * * Jalview is free software: you can redistribute it and/or * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. - * + * as published by the Free Software Foundation, either version 3 + * of the License, or (at your option) any later version. + * * Jalview is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty * of MERCHANTABILITY or FITNESS FOR A PARTICULAR * PURPOSE. See the GNU General Public License for more details. * - * You should have received a copy of the GNU General Public License along with Jalview. If not, see . + * You should have received a copy of the GNU General Public License + * along with Jalview. If not, see . + * The Jalview Authors are detailed in the 'AUTHORS' file. */ - /* Author: Lauren Michelle Lui * Methods are based on RALEE methods http://personalpages.manchester.ac.uk/staff/sam.griffiths-jones/software/ralee/ + * Additional Author: Jan Engelhart (2011) - Structure consensus and bug fixing + * Additional Author: Anne Menard (2012) - Pseudoknot support and secondary structure consensus * */ package jalview.analysis; +import jalview.analysis.SecStrConsensus.SimpleBP; +import jalview.datamodel.SequenceFeature; +import jalview.util.MessageManager; + import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashSet; import java.util.Hashtable; +import java.util.Stack; import java.util.Vector; -import jalview.datamodel.SequenceFeature; - public class Rna { - static Hashtable pairHash = new Hashtable(); + + static Hashtable pairHash = new Hashtable(); + + private static final Character[] openingPars = { '(', '[', '{', '<', 'A', + 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', + 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z' }; + + private static final Character[] closingPars = { ')', ']', '}', '>', 'a', + 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', + 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z' }; + + private static HashSet openingParsSet = new HashSet( + Arrays.asList(openingPars)); + + private static HashSet closingParsSet = new HashSet( + Arrays.asList(closingPars)); + + private static Hashtable closingToOpening = new Hashtable() + // Initializing final data structure + { + private static final long serialVersionUID = 1L; + { + for (int i = 0; i < openingPars.length; i++) + { + // System.out.println(closingPars[i] + "->" + openingPars[i]); + put(closingPars[i], openingPars[i]); + } + } + }; + + private static boolean isOpeningParenthesis(char c) + { + return openingParsSet.contains(c); + } + + private static boolean isClosingParenthesis(char c) + { + return closingParsSet.contains(c); + } + + private static char matchingOpeningParenthesis(char closingParenthesis) + throws WUSSParseException + { + if (!isClosingParenthesis(closingParenthesis)) + { + throw new WUSSParseException( + MessageManager.formatMessage( + "exception.querying_matching_opening_parenthesis_for_non_closing_parenthesis", + new String[] { new StringBuffer(closingParenthesis) + .toString() }), -1); + } + + return closingToOpening.get(closingParenthesis); + } + /** * Based off of RALEE code ralee-get-base-pairs. Keeps track of open bracket * positions in "stack" vector. When a close bracket is reached, pair this @@ -43,67 +106,99 @@ public class Rna * @return Array of SequenceFeature; type = RNA helix, begin is open base * pair, end is close base pair */ - public static SequenceFeature[] GetBasePairs(String line) - public static SequenceFeature[] GetBasePairs(CharSequence line) throws WUSSParseException + public static Vector GetSimpleBPs(CharSequence line) + throws WUSSParseException { - - Vector stack = new Vector(); - Vector pairs = new Vector(); - + Hashtable> stacks = new Hashtable>(); + Vector pairs = new Vector(); int i = 0; while (i < line.length()) { char base = line.charAt(i); - if ((base == '<') || (base == '(') || (base == '{') || (base == '[')) + if (isOpeningParenthesis(base)) { - stack.addElement(i); + if (!stacks.containsKey(base)) + { + stacks.put(base, new Stack()); + } + stacks.get(base).push(i); + } - else if ((base == '>') || (base == ')') || (base == '}') - || (base == ']')) + else if (isClosingParenthesis(base)) { - Object temp = stack.lastElement(); - stack.remove(stack.size() - 1); - pairs.addElement(temp); - pairs.addElement(i); - } + char opening = matchingOpeningParenthesis(base); - i++; - } + if (!stacks.containsKey(opening)) + { + throw new WUSSParseException(MessageManager.formatMessage( + "exception.mismatched_unseen_closing_char", + new String[] { new StringBuffer(base).toString() }), i); + } - int numpairs = pairs.size() / 2; - SequenceFeature[] outPairs = new SequenceFeature[numpairs]; + Stack stack = stacks.get(opening); + if (stack.isEmpty()) + { + // error whilst parsing i'th position. pass back + throw new WUSSParseException(MessageManager.formatMessage( + "exception.mismatched_closing_char", + new String[] { new StringBuffer(base).toString() }), i); + } + int temp = stack.pop(); - // Convert pairs to array - for (int p = 0; p < pairs.size(); p += 2) + pairs.add(new SimpleBP(temp, i)); + } + i++; + } + for (char opening : stacks.keySet()) { - int begin = Integer.parseInt(pairs.elementAt(p).toString()); - int end = Integer.parseInt(pairs.elementAt(p + 1).toString()); - - outPairs[p / 2] = new SequenceFeature("RNA helix", "", "", begin, - end, ""); - //pairHash.put(begin, end); - + Stack stack = stacks.get(opening); + if (!stack.empty()) + { + throw new WUSSParseException(MessageManager.formatMessage( + "exception.mismatched_opening_char", + new String[] { new StringBuffer(opening).toString(), + Integer.valueOf(stack.pop()).toString() }), i); + } } + return pairs; + } + public static SequenceFeature[] GetBasePairs(CharSequence line) + throws WUSSParseException + { + Vector bps = GetSimpleBPs(line); + SequenceFeature[] outPairs = new SequenceFeature[bps.size()]; + for (int p = 0; p < bps.size(); p++) + { + SimpleBP bp = bps.elementAt(p); + outPairs[p] = new SequenceFeature("RNA helix", "", "", bp.getBP5(), + bp.getBP3(), ""); + } return outPairs; } - - + + public static ArrayList GetModeleBP(CharSequence line) + throws WUSSParseException + { + Vector bps = GetSimpleBPs(line); + return new ArrayList(bps); + } + /** * Function to get the end position corresponding to a given start position - * @param indice - start position of a base pair + * + * @param indice + * - start position of a base pair * @return - end position of a base pair */ - /*makes no sense at the moment :( - public int findEnd(int indice){ - //TODO: Probably extend this to find the start to a given end? - //could be done by putting everything twice to the hash - ArrayList pair = new ArrayList(); - return pairHash.get(indice); - }*/ - + /* + * makes no sense at the moment :( public int findEnd(int indice){ //TODO: + * Probably extend this to find the start to a given end? //could be done by + * putting everything twice to the hash ArrayList pair = new + * ArrayList(); return pairHash.get(indice); } + */ /** * Figures out which helix each position belongs to and stores the helix