X-Git-Url: http://source.jalview.org/gitweb/?p=jalview.git;a=blobdiff_plain;f=src2%2Ffr%2Forsay%2Flri%2Fvarna%2Futils%2FRNAMLParser.java;fp=src2%2Ffr%2Forsay%2Flri%2Fvarna%2Futils%2FRNAMLParser.java;h=0000000000000000000000000000000000000000;hp=d5f67466746ae31ae3e9d61033ed49ac91148351;hb=a1225b9392dc7657d5cef12907385b07527d6122;hpb=b513684c725997c77341f30ce4e584cf9f7cdfed diff --git a/src2/fr/orsay/lri/varna/utils/RNAMLParser.java b/src2/fr/orsay/lri/varna/utils/RNAMLParser.java deleted file mode 100644 index d5f6746..0000000 --- a/src2/fr/orsay/lri/varna/utils/RNAMLParser.java +++ /dev/null @@ -1,651 +0,0 @@ -/* - VARNA is a tool for the automated drawing, visualization and annotation of the secondary structure of RNA, designed as a companion software for web servers and databases. - Copyright (C) 2008 Kevin Darty, Alain Denise and Yann Ponty. - electronic mail : Yann.Ponty@lri.fr - paper mail : LRI, bat 490 Universit� Paris-Sud 91405 Orsay Cedex France - - This file is part of VARNA version 3.1. - VARNA version 3.1 is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License - as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. - - VARNA version 3.1 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; - without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. - See the GNU General Public License for more details. - - You should have received a copy of the GNU General Public License along with VARNA version 3.1. - If not, see http://www.gnu.org/licenses. - */ -package fr.orsay.lri.varna.utils; - -import java.awt.Point; -import java.io.IOException; -import java.io.InputStream; -import java.io.InputStreamReader; -import java.io.StringReader; -import java.net.URL; -import java.net.URLConnection; -import java.util.ArrayList; -import java.util.Collections; -import java.util.Hashtable; -import java.util.List; -import java.util.Stack; -import java.util.Vector; - -import org.xml.sax.Attributes; -import org.xml.sax.InputSource; -import org.xml.sax.SAXException; -import org.xml.sax.helpers.DefaultHandler; - -import fr.orsay.lri.varna.models.rna.ModeleBP; -import fr.orsay.lri.varna.models.rna.ModeleBase; -import fr.orsay.lri.varna.models.rna.ModeleBP.Edge; -import fr.orsay.lri.varna.models.rna.ModeleBP.Stericity; - -public class RNAMLParser extends DefaultHandler { - public class HelixTemp { - public int pos5, pos3, length; - public String name; - - public HelixTemp(int pos5, int pos3, int length, String name) { - this.pos3 = pos3; - this.pos5 = pos5; - this.length = length; - this.name = name; - } - - public String toString() { - return ("[" + name + "," + pos5 + "," + pos3 + "," + length + "]"); - } - - } - - public class BPTemp { - public int pos5, pos3; - public String edge5, edge3, orientation; - - public BPTemp(int pos5, int pos3, String edge5, String edge3, - String orientation) { - if (edge3 == null) { - edge3 = "+"; - } - if (edge5 == null) { - edge5 = "+"; - } - if (orientation == null) { - orientation = "c"; - } - this.pos5 = pos5; - this.pos3 = pos3; - this.edge5 = edge5; - this.edge3 = edge3; - this.orientation = orientation; - } - - public ModeleBP createBPStyle(ModeleBase mb5, ModeleBase mb3) { - ModeleBP.Edge e5, e3; - @SuppressWarnings("unused") - boolean isCanonical = false; - if (edge5.equals("W")) { - e5 = ModeleBP.Edge.WC; - } else if (edge5.equals("H")) { - e5 = ModeleBP.Edge.HOOGSTEEN; - } else if (edge5.equals("S")) { - e5 = ModeleBP.Edge.SUGAR; - } else { - e5 = ModeleBP.Edge.WC; - } - - if (edge3.equals("W")) { - e3 = ModeleBP.Edge.WC; - } else if (edge3.equals("H")) { - e3 = ModeleBP.Edge.HOOGSTEEN; - } else if (edge3.equals("S")) { - e3 = ModeleBP.Edge.SUGAR; - } else { - e3 = ModeleBP.Edge.WC; - } - - if ((edge5.equals("+") && edge3.equals("+")) - || (edge5.equals("-") && edge3.equals("-"))) { - e3 = ModeleBP.Edge.WC; - e5 = ModeleBP.Edge.WC; - } - - ModeleBP.Stericity ster; - - if (orientation.equals("c")) { - ster = ModeleBP.Stericity.CIS; - } else if (orientation.equals("t")) { - ster = ModeleBP.Stericity.TRANS; - } else { - ster = ModeleBP.Stericity.CIS; - } - - return (new ModeleBP(mb5, mb3, e5, e3, ster)); - } - - public String toString() { - return ("[" + pos5 + "," + pos3 + "," + edge5 + "," + edge3 + "," - + orientation + "]"); - } - } - - public class RNATmp { - public ArrayList _sequence = new ArrayList(); - public Vector _sequenceIDs = new Vector(); - public Vector _structure = new Vector(); - public Vector _helices = new Vector(); - - public ArrayList getSequence() { - return _sequence; - } - - public Vector getStructure() { - return _structure; - } - }; - - private Hashtable _molecules = new Hashtable(); - - private boolean _inSequenceIDs, _inLength, _inSequence, _inHelix, - _inStrAnnotation, _inBP, _inBP5, _inBP3, _inEdge5, _inEdge3, - _inPosition, _inBondOrientation, _inMolecule; - private StringBuffer _buffer; - private String _currentModel = ""; - private int _id5, _id3, _length; - String _edge5, _edge3, _orientation, _helixID; - - public RNAMLParser() { - super(); - _inSequenceIDs = false; - _inSequence = false; - _inStrAnnotation = false; - _inBP = false; - _inBP5 = false; - _inBP3 = false; - _inPosition = false; - _inEdge5 = false; - _inEdge3 = false; - _inBondOrientation = false; - _inHelix = false; - _inMolecule = false; - } - - public InputSource createSourceFromURL(String path) { - URL url = null; - try { - url = new URL(path); - URLConnection connexion = url.openConnection(); - connexion.setUseCaches(false); - InputStream r = connexion.getInputStream(); - InputStreamReader inr = new InputStreamReader(r); - return new InputSource(inr); - } catch (Exception e) { - e.printStackTrace(); - } - return new InputSource(new StringReader("")); - } - - public InputSource resolveEntity(String publicId, String systemId) { - // System.out.println("[crade]"); - if (systemId.endsWith("rnaml.dtd")) - { - String resourceName = "/rnaml.dtd"; - URL url = ClassLoader.getSystemResource(resourceName); - if (url!=null) - { - try { - InputStream stream = url.openStream(); - if (stream != null) - { - return new InputSource(stream ); - } - } catch (IOException e) { - e.printStackTrace(); - } - } - } - return new InputSource(new StringReader("")); - } - - public void startElement(String uri, String localName, String qName, - Attributes attributes) throws SAXException { - if (qName.equals("numbering-table")) { - _inSequenceIDs = true; - _buffer = new StringBuffer(); - } else if (qName.equals("helix")) { - _inHelix = true; - _buffer = new StringBuffer(); - _helixID = attributes.getValue("id"); - } else if (qName.equals("seq-data")) { - _inSequence = true; - _buffer = new StringBuffer(); - } else if (qName.equals("length")) { - _inLength = true; - _buffer = new StringBuffer(); - } else if (qName.equals("str-annotation")) { - _inStrAnnotation = true; - } else if (qName.equals("base-pair")) { - _inBP = true; - } else if (qName.equals("base-id-5p")) { - if (_inBP || _inHelix) { - _inBP5 = true; - } - } else if (qName.equals("base-id-3p")) { - if (_inBP || _inHelix) { - _inBP3 = true; - } - } else if (qName.equals("edge-5p")) { - _inEdge5 = true; - _buffer = new StringBuffer(); - } else if (qName.equals("edge-3p")) { - _inEdge3 = true; - _buffer = new StringBuffer(); - } else if (qName.equals("position")) { - _inPosition = true; - _buffer = new StringBuffer(); - } else if (qName.equals("bond-orientation")) { - _inBondOrientation = true; - _buffer = new StringBuffer(); - } else if (qName.equals("molecule")) { - _inMolecule = true; - String id = (attributes.getValue("id")); - // System.err.println("Molecule#"+id); - _molecules.put(id, new RNATmp()); - _currentModel = id; - } else { - // We don't care too much about the rest ... - } - } - - public void endElement(String uri, String localName, String qName) - throws SAXException { - if (qName.equals("numbering-table")) { - _inSequenceIDs = false; - String content = _buffer.toString(); - content = content.trim(); - String[] tokens = content.split("\\s+"); - Vector results = new Vector(); - for (int i = 0; i < tokens.length; i++) { - try { - results.add(new Integer(Integer.parseInt(tokens[i]))); - } catch (NumberFormatException e) { - e.printStackTrace(); - } - } - _molecules.get(_currentModel)._sequenceIDs = results; - _buffer = null; - } else if (qName.equals("seq-data")) { - _inSequence = false; - String content = _buffer.toString(); - content = content.trim(); - String[] tokens = content.split("\\s+"); - ArrayList results = new ArrayList(); - for (int i = 0; i < tokens.length; i++) { - for (int j = 0; j < tokens[i].length(); j++) - results.add("" + tokens[i].charAt(j)); - } - // System.err.println(" Seq: "+results); - _molecules.get(_currentModel)._sequence = results; - _buffer = null; - } else if (qName.equals("bond-orientation")) { - _inBondOrientation = false; - String content = _buffer.toString(); - content = content.trim(); - _orientation = content; - _buffer = null; - } else if (qName.equals("str-annotation")) { - _inStrAnnotation = false; - } else if (qName.equals("base-pair")) { - if (_inMolecule) { - _inBP = false; - BPTemp bp = new BPTemp(_id5, _id3, _edge5, _edge3, _orientation); - _molecules.get(_currentModel)._structure.add(bp); - // System.err.println(" "+bp); - } - } else if (qName.equals("helix")) { - _inHelix = false; - if (_inMolecule) { - HelixTemp h = new HelixTemp(_id5, _id3, _length, _helixID); - _molecules.get(_currentModel)._helices.add(h); - } - } else if (qName.equals("base-id-5p")) { - _inBP5 = false; - } else if (qName.equals("base-id-3p")) { - _inBP3 = false; - } else if (qName.equals("length")) { - _inLength = false; - String content = _buffer.toString(); - content = content.trim(); - _length = Integer.parseInt(content); - _buffer = null; - } else if (qName.equals("position")) { - String content = _buffer.toString(); - content = content.trim(); - int pos = Integer.parseInt(content); - if (_inBP5) { - _id5 = pos; - } - if (_inBP3) { - _id3 = pos; - } - _buffer = null; - } else if (qName.equals("edge-5p")) { - _inEdge5 = false; - String content = _buffer.toString(); - content = content.trim(); - _edge5 = content; - _buffer = null; - } else if (qName.equals("edge-3p")) { - _inEdge3 = false; - String content = _buffer.toString(); - content = content.trim(); - _edge3 = content; - _buffer = null; - } else if (qName.equals("molecule")) { - _inMolecule = false; - } else { - // We don't care too much about the rest ... - } - } - - public void characters(char[] ch, int start, int length) - throws SAXException { - String lecture = new String(ch, start, length); - if (_buffer != null) - _buffer.append(lecture); - } - - public void startDocument() throws SAXException { - } - - public void endDocument() throws SAXException { - postProcess(); - } - - // Discarding stacking interactions... - private void discardStacking() { - Vector result = new Vector(); - for (int i = 0; i < _molecules.get(_currentModel)._structure.size(); i++) { - BPTemp bp = _molecules.get(_currentModel)._structure.get(i); - if (bp.orientation.equals("c") || bp.orientation.equals("t")) { - result.add(bp); - } - } - _molecules.get(_currentModel)._structure = result; - } - - public static boolean isSelfCrossing(int[] str) { - Stack intervals = new Stack(); - intervals.add(new Point(0, str.length - 1)); - while (!intervals.empty()) { - Point p = intervals.pop(); - if (p.x <= p.y) { - if (str[p.x] == -1) { - intervals.push(new Point(p.x + 1, p.y)); - } else { - int i = p.x; - int j = p.y; - int k = str[i]; - if ((k <= i) || (k > j)) { - return true; - } else { - intervals.push(new Point(i + 1, k - 1)); - intervals.push(new Point(k + 1, j)); - } - } - } - } - return false; - } - - @SuppressWarnings("unused") - private void debugPrintArray(Object[] str) { - StringBuffer s = new StringBuffer("["); - for (int i = 0; i < str.length; i++) { - if (i != 0) { - s.append(","); - } - s.append(str[i]); - - } - s.append("]"); - System.out.println(s.toString()); - } - - /** - * Computes and returns a maximal planar subset of the current structure. - * - * @param str - * A sequence of base-pairing positions - * @return A sequence of non-crossing base-pairing positions - */ - - public static int[] planarize(int[] str) { - if (!isSelfCrossing(str)) { - return str; - } - - int length = str.length; - - int[] result = new int[length]; - for (int i = 0; i < result.length; i++) { - result[i] = -1; - } - - short[][] tab = new short[length][length]; - short[][] backtrack = new short[length][length]; - int theta = 3; - - for (int i = 0; i < result.length; i++) { - for (int j = i; j < Math.min(i + theta, result.length); j++) { - tab[i][j] = 0; - backtrack[i][j] = -1; - } - } - for (int n = theta; n < length; n++) { - for (int i = 0; i < length - n; i++) { - int j = i + n; - tab[i][j] = tab[i + 1][j]; - backtrack[i][j] = -1; - int k = str[i]; - if ((k != -1) && (k <= j) && (i < k)) { - int tmp = 1; - if (i + 1 <= k - 1) { - tmp += tab[i + 1][k - 1]; - } - if (k + 1 <= j) { - tmp += tab[k + 1][j]; - } - if (tmp > tab[i][j]) { - tab[i][j] = (short) tmp; - backtrack[i][j] = (short) k; - } - } - } - } - Stack intervals = new Stack(); - intervals.add(new Point(0, length - 1)); - while (!intervals.empty()) { - Point p = intervals.pop(); - if (p.x <= p.y) { - if (backtrack[p.x][p.y] == -1) { - result[p.x] = -1; - intervals.push(new Point(p.x + 1, p.y)); - } else { - int i = p.x; - int j = p.y; - int k = backtrack[p.x][p.y]; - result[i] = k; - result[k] = i; - intervals.push(new Point(i + 1, k - 1)); - intervals.push(new Point(k + 1, j)); - } - } - } - return result; - } - - public static void planarize(ArrayList input, - ArrayList planar, ArrayList others, int length) { - // System.err.println("Planarize: Length:"+length); - Hashtable> index2BPs = new Hashtable>(); - for (ModeleBP msbp : input) { - int i = msbp.getPartner5().getIndex(); - if (!index2BPs.containsKey(i)) { - index2BPs.put(i, new ArrayList()); - } - index2BPs.get(i).add(msbp); - } - // System.err.println(index2BPs); - - short[][] tab = new short[length][length]; - short[][] backtrack = new short[length][length]; - int theta = 3; - - for (int i = 0; i < length; i++) { - for (int j = i; j < Math.min(i + theta, length); j++) { - tab[i][j] = 0; - backtrack[i][j] = -1; - } - } - for (int n = theta; n < length; n++) { - for (int i = 0; i < length - n; i++) { - int j = i + n; - tab[i][j] = tab[i + 1][j]; - backtrack[i][j] = -1; - if (index2BPs.containsKey(i)) { - ArrayList vi = index2BPs.get(i); - // System.err.print("."); - for (int numBP = 0; numBP < vi.size(); numBP++) { - ModeleBP mb = vi.get(numBP); - int k = mb.getPartner3().getIndex(); - if ((k != -1) && (k <= j) && (i < k)) { - int tmp = 1; - if (i + 1 <= k - 1) { - tmp += tab[i + 1][k - 1]; - } - if (k + 1 <= j) { - tmp += tab[k + 1][j]; - } - if (tmp > tab[i][j]) { - tab[i][j] = (short) tmp; - backtrack[i][j] = (short) numBP; - } - } - } - } - } - } - // System.err.println("DP table: "+tab[0][length-1]); - - // Backtracking - Stack intervals = new Stack(); - intervals.add(new Point(0, length - 1)); - while (!intervals.empty()) { - Point p = intervals.pop(); - if (p.x <= p.y) { - if (backtrack[p.x][p.y] == -1) { - intervals.push(new Point(p.x + 1, p.y)); - } else { - int i = p.x; - int j = p.y; - int nb = backtrack[p.x][p.y]; - ModeleBP mb = index2BPs.get(i).get(nb); - int k = mb.getPartner3().getIndex(); - planar.add(mb); - intervals.push(new Point(i + 1, k - 1)); - intervals.push(new Point(k + 1, j)); - } - } - } - - // Remaining base pairs - for (int i : index2BPs.keySet()) { - ArrayList vi = index2BPs.get(i); - for (ModeleBP mb : vi) { - if (!planar.contains(mb)) { - others.add(mb); - } - } - } - } - - private void postProcess() { - for (RNATmp r : _molecules.values()) { - // First, check if base numbers were specified - if (r._sequenceIDs.size() == 0) { - Vector results = new Vector(); - for (int i = 0; i < r._sequence.size(); i++) { - results.add(new Integer(i + 1)); - } - r._sequenceIDs = results; - } - // System.err.println("IDs: "+_sequenceIDs); - // System.err.println("Before remapping: "+_structure); - - // Then, build inverse mapping ID => index - Hashtable ID2Index = new Hashtable(); - for (int i = 0; i < r._sequenceIDs.size(); i++) { - ID2Index.put(r._sequenceIDs.get(i), i); - } - - // Translate BP coordinates into indices - for (BPTemp bp : r._structure) { - bp.pos3 = bp.pos3 - 1; - bp.pos5 = bp.pos5 - 1; - } - // System.err.println("After remapping: "+_structure); - - discardStacking(); - // System.err.println(" Discard stacking (length="+r._sequence.size()+") => "+r._structure); - - // Eliminate redundancy - Hashtable> index2BPs = new Hashtable>(); - for (BPTemp msbp : r._structure) { - int i = msbp.pos5; - if (!index2BPs.containsKey(i)) { - index2BPs.put(i, new Hashtable()); - } - if (!index2BPs.get(i).contains(msbp.pos3)) { - index2BPs.get(i).put(msbp.pos3,msbp); - } - } - - // Adding helices... - for (int i = 0; i < r._helices.size(); i++) { - HelixTemp h = r._helices.get(i); - for (int j = 0; j < h.length; j++) { - // System.err.println("Looking for residues: "+(h.pos5+j-1)+" and "+(h.pos3-j-1)); - int a = (h.pos5 + j - 1); - int b = (h.pos3 - j - 1); - BPTemp bp = new BPTemp(a, (b), "+", "+", "c"); - if (!index2BPs.containsKey(a)) { - index2BPs.put(a, new Hashtable()); - } - if (!index2BPs.get(a).contains(b)) { - index2BPs.get(a).put(b,bp); - } - } - } - - Vector newStructure = new Vector(); - for (int i : index2BPs.keySet()) { - for (int j : index2BPs.get(i).keySet()) { - BPTemp bp = index2BPs.get(i).get(j); - newStructure.add(bp); - } - } - r._structure = newStructure; - - // System.err.println("After Helices => "+_structure); - - - // System.err.println("After Postprocess => "+_structure); - } - } - - public ArrayList getMolecules() { - return new ArrayList(_molecules.values()); - } -} \ No newline at end of file