2 VARNA is a tool for the automated drawing, visualization and annotation of the secondary structure of RNA, designed as a companion software for web servers and databases.
3 Copyright (C) 2008 Kevin Darty, Alain Denise and Yann Ponty.
4 electronic mail : Yann.Ponty@lri.fr
5 paper mail : LRI, bat 490 Universit� Paris-Sud 91405 Orsay Cedex France
7 This file is part of VARNA version 3.1.
8 VARNA version 3.1 is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License
9 as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version.
11 VARNA version 3.1 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
12 without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
13 See the GNU General Public License for more details.
15 You should have received a copy of the GNU General Public License along with VARNA version 3.1.
16 If not, see http://www.gnu.org/licenses.
18 package fr.orsay.lri.varna.utils;
20 import java.awt.Point;
21 import java.io.IOException;
22 import java.io.InputStream;
23 import java.io.InputStreamReader;
24 import java.io.StringReader;
26 import java.net.URLConnection;
27 import java.util.ArrayList;
28 import java.util.Collections;
29 import java.util.Hashtable;
30 import java.util.List;
31 import java.util.Stack;
32 import java.util.Vector;
34 import org.xml.sax.Attributes;
35 import org.xml.sax.InputSource;
36 import org.xml.sax.SAXException;
37 import org.xml.sax.helpers.DefaultHandler;
39 import fr.orsay.lri.varna.models.rna.ModeleBP;
40 import fr.orsay.lri.varna.models.rna.ModeleBase;
41 import fr.orsay.lri.varna.models.rna.ModeleBP.Edge;
42 import fr.orsay.lri.varna.models.rna.ModeleBP.Stericity;
44 public class RNAMLParser extends DefaultHandler {
45 public class HelixTemp {
46 public int pos5, pos3, length;
49 public HelixTemp(int pos5, int pos3, int length, String name) {
56 public String toString() {
57 return ("[" + name + "," + pos5 + "," + pos3 + "," + length + "]");
63 public int pos5, pos3;
64 public String edge5, edge3, orientation;
66 public BPTemp(int pos5, int pos3, String edge5, String edge3,
74 if (orientation == null) {
81 this.orientation = orientation;
84 public ModeleBP createBPStyle(ModeleBase mb5, ModeleBase mb3) {
86 @SuppressWarnings("unused")
87 boolean isCanonical = false;
88 if (edge5.equals("W")) {
89 e5 = ModeleBP.Edge.WC;
90 } else if (edge5.equals("H")) {
91 e5 = ModeleBP.Edge.HOOGSTEEN;
92 } else if (edge5.equals("S")) {
93 e5 = ModeleBP.Edge.SUGAR;
95 e5 = ModeleBP.Edge.WC;
98 if (edge3.equals("W")) {
99 e3 = ModeleBP.Edge.WC;
100 } else if (edge3.equals("H")) {
101 e3 = ModeleBP.Edge.HOOGSTEEN;
102 } else if (edge3.equals("S")) {
103 e3 = ModeleBP.Edge.SUGAR;
105 e3 = ModeleBP.Edge.WC;
108 if ((edge5.equals("+") && edge3.equals("+"))
109 || (edge5.equals("-") && edge3.equals("-"))) {
110 e3 = ModeleBP.Edge.WC;
111 e5 = ModeleBP.Edge.WC;
114 ModeleBP.Stericity ster;
116 if (orientation.equals("c")) {
117 ster = ModeleBP.Stericity.CIS;
118 } else if (orientation.equals("t")) {
119 ster = ModeleBP.Stericity.TRANS;
121 ster = ModeleBP.Stericity.CIS;
124 return (new ModeleBP(mb5, mb3, e5, e3, ster));
127 public String toString() {
128 return ("[" + pos5 + "," + pos3 + "," + edge5 + "," + edge3 + ","
129 + orientation + "]");
133 public class RNATmp {
134 public ArrayList<String> _sequence = new ArrayList<String>();
135 public Vector<Integer> _sequenceIDs = new Vector<Integer>();
136 public Vector<BPTemp> _structure = new Vector<BPTemp>();
137 public Vector<HelixTemp> _helices = new Vector<HelixTemp>();
139 public ArrayList<String> getSequence() {
143 public Vector<BPTemp> getStructure() {
148 private Hashtable<String, RNATmp> _molecules = new Hashtable<String, RNATmp>();
150 private boolean _inSequenceIDs, _inLength, _inSequence, _inHelix,
151 _inStrAnnotation, _inBP, _inBP5, _inBP3, _inEdge5, _inEdge3,
152 _inPosition, _inBondOrientation, _inMolecule;
153 private StringBuffer _buffer;
154 private String _currentModel = "";
155 private int _id5, _id3, _length;
156 String _edge5, _edge3, _orientation, _helixID;
158 public RNAMLParser() {
160 _inSequenceIDs = false;
162 _inStrAnnotation = false;
169 _inBondOrientation = false;
174 public InputSource createSourceFromURL(String path) {
178 URLConnection connexion = url.openConnection();
179 connexion.setUseCaches(false);
180 InputStream r = connexion.getInputStream();
181 InputStreamReader inr = new InputStreamReader(r);
182 return new InputSource(inr);
183 } catch (Exception e) {
186 return new InputSource(new StringReader(""));
189 public InputSource resolveEntity(String publicId, String systemId) {
190 // System.out.println("[crade]");
191 if (systemId.endsWith("rnaml.dtd"))
193 String resourceName = "/rnaml.dtd";
194 URL url = ClassLoader.getSystemResource(resourceName);
198 InputStream stream = url.openStream();
201 return new InputSource(stream );
203 } catch (IOException e) {
208 return new InputSource(new StringReader(""));
211 public void startElement(String uri, String localName, String qName,
212 Attributes attributes) throws SAXException {
213 if (qName.equals("numbering-table")) {
214 _inSequenceIDs = true;
215 _buffer = new StringBuffer();
216 } else if (qName.equals("helix")) {
218 _buffer = new StringBuffer();
219 _helixID = attributes.getValue("id");
220 } else if (qName.equals("seq-data")) {
222 _buffer = new StringBuffer();
223 } else if (qName.equals("length")) {
225 _buffer = new StringBuffer();
226 } else if (qName.equals("str-annotation")) {
227 _inStrAnnotation = true;
228 } else if (qName.equals("base-pair")) {
230 } else if (qName.equals("base-id-5p")) {
231 if (_inBP || _inHelix) {
234 } else if (qName.equals("base-id-3p")) {
235 if (_inBP || _inHelix) {
238 } else if (qName.equals("edge-5p")) {
240 _buffer = new StringBuffer();
241 } else if (qName.equals("edge-3p")) {
243 _buffer = new StringBuffer();
244 } else if (qName.equals("position")) {
246 _buffer = new StringBuffer();
247 } else if (qName.equals("bond-orientation")) {
248 _inBondOrientation = true;
249 _buffer = new StringBuffer();
250 } else if (qName.equals("molecule")) {
252 String id = (attributes.getValue("id"));
253 // System.err.println("Molecule#"+id);
254 _molecules.put(id, new RNATmp());
257 // We don't care too much about the rest ...
261 public void endElement(String uri, String localName, String qName)
262 throws SAXException {
263 if (qName.equals("numbering-table")) {
264 _inSequenceIDs = false;
265 String content = _buffer.toString();
266 content = content.trim();
267 String[] tokens = content.split("\\s+");
268 Vector<Integer> results = new Vector<Integer>();
269 for (int i = 0; i < tokens.length; i++) {
271 results.add(new Integer(Integer.parseInt(tokens[i])));
272 } catch (NumberFormatException e) {
276 _molecules.get(_currentModel)._sequenceIDs = results;
278 } else if (qName.equals("seq-data")) {
280 String content = _buffer.toString();
281 content = content.trim();
282 String[] tokens = content.split("\\s+");
283 ArrayList<String> results = new ArrayList<String>();
284 for (int i = 0; i < tokens.length; i++) {
285 for (int j = 0; j < tokens[i].length(); j++)
286 results.add("" + tokens[i].charAt(j));
288 // System.err.println(" Seq: "+results);
289 _molecules.get(_currentModel)._sequence = results;
291 } else if (qName.equals("bond-orientation")) {
292 _inBondOrientation = false;
293 String content = _buffer.toString();
294 content = content.trim();
295 _orientation = content;
297 } else if (qName.equals("str-annotation")) {
298 _inStrAnnotation = false;
299 } else if (qName.equals("base-pair")) {
302 BPTemp bp = new BPTemp(_id5, _id3, _edge5, _edge3, _orientation);
303 _molecules.get(_currentModel)._structure.add(bp);
304 // System.err.println(" "+bp);
306 } else if (qName.equals("helix")) {
309 HelixTemp h = new HelixTemp(_id5, _id3, _length, _helixID);
310 _molecules.get(_currentModel)._helices.add(h);
312 } else if (qName.equals("base-id-5p")) {
314 } else if (qName.equals("base-id-3p")) {
316 } else if (qName.equals("length")) {
318 String content = _buffer.toString();
319 content = content.trim();
320 _length = Integer.parseInt(content);
322 } else if (qName.equals("position")) {
323 String content = _buffer.toString();
324 content = content.trim();
325 int pos = Integer.parseInt(content);
333 } else if (qName.equals("edge-5p")) {
335 String content = _buffer.toString();
336 content = content.trim();
339 } else if (qName.equals("edge-3p")) {
341 String content = _buffer.toString();
342 content = content.trim();
345 } else if (qName.equals("molecule")) {
348 // We don't care too much about the rest ...
352 public void characters(char[] ch, int start, int length)
353 throws SAXException {
354 String lecture = new String(ch, start, length);
356 _buffer.append(lecture);
359 public void startDocument() throws SAXException {
362 public void endDocument() throws SAXException {
366 // Discarding stacking interactions...
367 private void discardStacking() {
368 Vector<BPTemp> result = new Vector<BPTemp>();
369 for (int i = 0; i < _molecules.get(_currentModel)._structure.size(); i++) {
370 BPTemp bp = _molecules.get(_currentModel)._structure.get(i);
371 if (bp.orientation.equals("c") || bp.orientation.equals("t")) {
375 _molecules.get(_currentModel)._structure = result;
378 public static boolean isSelfCrossing(int[] str) {
379 Stack<Point> intervals = new Stack<Point>();
380 intervals.add(new Point(0, str.length - 1));
381 while (!intervals.empty()) {
382 Point p = intervals.pop();
384 if (str[p.x] == -1) {
385 intervals.push(new Point(p.x + 1, p.y));
390 if ((k <= i) || (k > j)) {
393 intervals.push(new Point(i + 1, k - 1));
394 intervals.push(new Point(k + 1, j));
402 @SuppressWarnings("unused")
403 private void debugPrintArray(Object[] str) {
404 StringBuffer s = new StringBuffer("[");
405 for (int i = 0; i < str.length; i++) {
413 System.out.println(s.toString());
417 * Computes and returns a maximal planar subset of the current structure.
420 * A sequence of base-pairing positions
421 * @return A sequence of non-crossing base-pairing positions
424 public static int[] planarize(int[] str) {
425 if (!isSelfCrossing(str)) {
429 int length = str.length;
431 int[] result = new int[length];
432 for (int i = 0; i < result.length; i++) {
436 short[][] tab = new short[length][length];
437 short[][] backtrack = new short[length][length];
440 for (int i = 0; i < result.length; i++) {
441 for (int j = i; j < Math.min(i + theta, result.length); j++) {
443 backtrack[i][j] = -1;
446 for (int n = theta; n < length; n++) {
447 for (int i = 0; i < length - n; i++) {
449 tab[i][j] = tab[i + 1][j];
450 backtrack[i][j] = -1;
452 if ((k != -1) && (k <= j) && (i < k)) {
454 if (i + 1 <= k - 1) {
455 tmp += tab[i + 1][k - 1];
458 tmp += tab[k + 1][j];
460 if (tmp > tab[i][j]) {
461 tab[i][j] = (short) tmp;
462 backtrack[i][j] = (short) k;
467 Stack<Point> intervals = new Stack<Point>();
468 intervals.add(new Point(0, length - 1));
469 while (!intervals.empty()) {
470 Point p = intervals.pop();
472 if (backtrack[p.x][p.y] == -1) {
474 intervals.push(new Point(p.x + 1, p.y));
478 int k = backtrack[p.x][p.y];
481 intervals.push(new Point(i + 1, k - 1));
482 intervals.push(new Point(k + 1, j));
489 public static void planarize(ArrayList<ModeleBP> input,
490 ArrayList<ModeleBP> planar, ArrayList<ModeleBP> others, int length) {
491 // System.err.println("Planarize: Length:"+length);
492 Hashtable<Integer, ArrayList<ModeleBP>> index2BPs = new Hashtable<Integer, ArrayList<ModeleBP>>();
493 for (ModeleBP msbp : input) {
494 int i = msbp.getPartner5().getIndex();
495 if (!index2BPs.containsKey(i)) {
496 index2BPs.put(i, new ArrayList<ModeleBP>());
498 index2BPs.get(i).add(msbp);
500 // System.err.println(index2BPs);
502 short[][] tab = new short[length][length];
503 short[][] backtrack = new short[length][length];
506 for (int i = 0; i < length; i++) {
507 for (int j = i; j < Math.min(i + theta, length); j++) {
509 backtrack[i][j] = -1;
512 for (int n = theta; n < length; n++) {
513 for (int i = 0; i < length - n; i++) {
515 tab[i][j] = tab[i + 1][j];
516 backtrack[i][j] = -1;
517 if (index2BPs.containsKey(i)) {
518 ArrayList<ModeleBP> vi = index2BPs.get(i);
519 // System.err.print(".");
520 for (int numBP = 0; numBP < vi.size(); numBP++) {
521 ModeleBP mb = vi.get(numBP);
522 int k = mb.getPartner3().getIndex();
523 if ((k != -1) && (k <= j) && (i < k)) {
525 if (i + 1 <= k - 1) {
526 tmp += tab[i + 1][k - 1];
529 tmp += tab[k + 1][j];
531 if (tmp > tab[i][j]) {
532 tab[i][j] = (short) tmp;
533 backtrack[i][j] = (short) numBP;
540 // System.err.println("DP table: "+tab[0][length-1]);
543 Stack<Point> intervals = new Stack<Point>();
544 intervals.add(new Point(0, length - 1));
545 while (!intervals.empty()) {
546 Point p = intervals.pop();
548 if (backtrack[p.x][p.y] == -1) {
549 intervals.push(new Point(p.x + 1, p.y));
553 int nb = backtrack[p.x][p.y];
554 ModeleBP mb = index2BPs.get(i).get(nb);
555 int k = mb.getPartner3().getIndex();
557 intervals.push(new Point(i + 1, k - 1));
558 intervals.push(new Point(k + 1, j));
563 // Remaining base pairs
564 for (int i : index2BPs.keySet()) {
565 ArrayList<ModeleBP> vi = index2BPs.get(i);
566 for (ModeleBP mb : vi) {
567 if (!planar.contains(mb)) {
574 private void postProcess() {
575 for (RNATmp r : _molecules.values()) {
576 // First, check if base numbers were specified
577 if (r._sequenceIDs.size() == 0) {
578 Vector<Integer> results = new Vector<Integer>();
579 for (int i = 0; i < r._sequence.size(); i++) {
580 results.add(new Integer(i + 1));
582 r._sequenceIDs = results;
584 // System.err.println("IDs: "+_sequenceIDs);
585 // System.err.println("Before remapping: "+_structure);
587 // Then, build inverse mapping ID => index
588 Hashtable<Integer, Integer> ID2Index = new Hashtable<Integer, Integer>();
589 for (int i = 0; i < r._sequenceIDs.size(); i++) {
590 ID2Index.put(r._sequenceIDs.get(i), i);
593 // Translate BP coordinates into indices
594 for (BPTemp bp : r._structure) {
595 bp.pos3 = bp.pos3 - 1;
596 bp.pos5 = bp.pos5 - 1;
598 // System.err.println("After remapping: "+_structure);
601 // System.err.println(" Discard stacking (length="+r._sequence.size()+") => "+r._structure);
603 // Eliminate redundancy
604 Hashtable<Integer, Hashtable<Integer,BPTemp>> index2BPs = new Hashtable<Integer, Hashtable<Integer,BPTemp>>();
605 for (BPTemp msbp : r._structure) {
607 if (!index2BPs.containsKey(i)) {
608 index2BPs.put(i, new Hashtable<Integer,BPTemp>());
610 if (!index2BPs.get(i).contains(msbp.pos3)) {
611 index2BPs.get(i).put(msbp.pos3,msbp);
616 for (int i = 0; i < r._helices.size(); i++) {
617 HelixTemp h = r._helices.get(i);
618 for (int j = 0; j < h.length; j++) {
619 // System.err.println("Looking for residues: "+(h.pos5+j-1)+" and "+(h.pos3-j-1));
620 int a = (h.pos5 + j - 1);
621 int b = (h.pos3 - j - 1);
622 BPTemp bp = new BPTemp(a, (b), "+", "+", "c");
623 if (!index2BPs.containsKey(a)) {
624 index2BPs.put(a, new Hashtable<Integer,BPTemp>());
626 if (!index2BPs.get(a).contains(b)) {
627 index2BPs.get(a).put(b,bp);
632 Vector<BPTemp> newStructure = new Vector<BPTemp>();
633 for (int i : index2BPs.keySet()) {
634 for (int j : index2BPs.get(i).keySet()) {
635 BPTemp bp = index2BPs.get(i).get(j);
636 newStructure.add(bp);
639 r._structure = newStructure;
641 // System.err.println("After Helices => "+_structure);
644 // System.err.println("After Postprocess => "+_structure);
648 public ArrayList<RNATmp> getMolecules() {
649 return new ArrayList<RNATmp>(_molecules.values());