2 * Jalview - A Sequence Alignment Editor and Viewer (Version 2.8)
\r
3 * Copyright (C) 2012 J Procter, AM Waterhouse, LM Lui, J Engelhardt, G Barton, M Clamp, S Searle
\r
5 * This file is part of Jalview.
\r
7 * Jalview is free software: you can redistribute it and/or
\r
8 * modify it under the terms of the GNU General Public License
\r
9 * as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version.
\r
11 * Jalview is distributed in the hope that it will be useful, but
\r
12 * WITHOUT ANY WARRANTY; without even the implied warranty
\r
13 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
\r
14 * PURPOSE. See the GNU General Public License for more details.
\r
16 * You should have received a copy of the GNU General Public License along with Jalview. If not, see <http://www.gnu.org/licenses/>.
\r
19 * This extension was written by Benjamin Schuster-Boeckler at sanger.ac.uk
\r
23 import jalview.datamodel.AlignmentAnnotation;
\r
24 import jalview.datamodel.Annotation;
\r
25 import jalview.datamodel.Sequence;
\r
26 import jalview.datamodel.SequenceI;
\r
28 import java.io.BufferedReader;
\r
29 import java.io.FileReader;
\r
30 import java.io.IOException;
\r
31 import java.util.ArrayList;
\r
32 import java.util.Enumeration;
\r
33 import java.util.Hashtable;
\r
34 import java.util.Vector;
\r
36 import com.stevesoft.pat.Regex;
\r
38 import fr.orsay.lri.varna.exceptions.ExceptionUnmatchedClosingParentheses;
\r
39 import fr.orsay.lri.varna.factories.RNAFactory;
\r
40 import fr.orsay.lri.varna.models.rna.RNA;
\r
42 // import org.apache.log4j.*;
\r
45 * This class is supposed to parse a Stockholm format file into Jalview There
\r
46 * are TODOs in this class: we do not know what the database source and version
\r
47 * is for the file when parsing the #GS= AC tag which associates accessions with
\r
48 * sequences. Database references are also not parsed correctly: a separate
\r
49 * reference string parser must be added to parse the database reference form
\r
50 * into Jalview's local representation.
\r
52 * @author bsb at sanger.ac.uk
\r
53 * @version 0.3 + jalview mods
\r
56 public class StockholmFile extends AlignFile
\r
58 // static Logger logger = Logger.getLogger("jalview.io.StockholmFile");
\r
59 protected ArrayList<RNA> result;
\r
63 public StockholmFile()
\r
67 public StockholmFile(String inFile, String type) throws IOException
\r
69 super(inFile, type);
\r
72 public StockholmFile(FileParse source) throws IOException
\r
77 public void initData()
\r
83 * Parse a file in Stockholm format into Jalview's data model. The file has to
\r
84 * be passed at construction time
\r
86 * @throws IOException
\r
87 * If there is an error with the input file
\r
89 public void parse() throws IOException
\r
91 FileReader fr = null;
\r
92 fr = new FileReader(inFile);
\r
94 BufferedReader r = new BufferedReader(fr);
\r
98 result = RNAFactory.loadSecStrStockholm(r);
\r
99 } catch (ExceptionUnmatchedClosingParentheses umcp)
\r
101 errormessage = "Unmatched parentheses in annotation. Aborting ("
\r
102 + umcp.getMessage() + ")";
\r
103 throw new IOException(umcp);
\r
105 // DEBUG System.out.println("this is the secondary scructure:"
\r
106 // +result.size());
\r
107 SequenceI[] seqs = new SequenceI[result.size()];
\r
108 for (int i = 0; i < result.size(); i++)
\r
110 // DEBUG System.err.println("Processing i'th sequence in Stockholm file")
\r
111 RNA current = result.get(i);
\r
113 String seq = current.getSeq();
\r
114 String rna = current.getStructDBN(true);
\r
115 // DEBUG System.out.println(seq);
\r
116 // DEBUG System.err.println(rna);
\r
118 int end = seq.length() - 1;
\r
119 id = safeName(getDataName());
\r
120 seqs[i] = new Sequence(id, seq, begin, end);
\r
121 String[] annot = new String[rna.length()];
\r
122 Annotation[] ann = new Annotation[rna.length()];
\r
123 for (int j = 0; j < rna.length(); j++)
\r
125 annot[j] = rna.substring(j, j + 1);
\r
129 for (int k = 0; k < rna.length(); k++)
\r
131 ann[k] = new Annotation(annot[k], "",
\r
132 jalview.schemes.ResidueProperties.getRNASecStrucState(
\r
133 annot[k]).charAt(0), 0f);
\r
136 AlignmentAnnotation align = new AlignmentAnnotation("Sec. str.",
\r
137 current.getID(), ann);
\r
139 seqs[i].addAlignmentAnnotation(align);
\r
140 seqs[i].setRNA(result.get(i));
\r
141 this.annotations.addElement(align);
\r
143 this.setSeqs(seqs);
\r
147 protected static AlignmentAnnotation parseAnnotationRow(
\r
148 Vector annotation, String label, String annots)
\r
150 String convert1, convert2 = null;
\r
152 // Convert all bracket types to parentheses
\r
153 Regex openparen = new Regex("(<|\\[)", "(");
\r
154 Regex closeparen = new Regex("(>|\\])", ")");
\r
156 // Detect if file is RNA by looking for bracket types
\r
157 Regex detectbrackets = new Regex("(<|>|\\[|\\]|\\(|\\))");
\r
159 convert1 = openparen.replaceAll(annots);
\r
160 convert2 = closeparen.replaceAll(convert1);
\r
163 String type = (label.indexOf("_cons") == label.length() - 5) ? label
\r
164 .substring(0, label.length() - 5) : label;
\r
165 boolean ss = false;
\r
166 type = id2type(type);
\r
167 if (type.equals("secondary structure"))
\r
171 // decide on secondary structure or not.
\r
172 Annotation[] els = new Annotation[annots.length()];
\r
173 for (int i = 0; i < annots.length(); i++)
\r
175 String pos = annots.substring(i, i + 1);
\r
177 ann = new Annotation(pos, "", ' ', 0f); // 0f is 'valid' null - will not
\r
181 if (detectbrackets.search(pos))
\r
183 ann.secondaryStructure = jalview.schemes.ResidueProperties
\r
184 .getRNASecStrucState(pos).charAt(0);
\r
188 ann.secondaryStructure = jalview.schemes.ResidueProperties
\r
189 .getDssp3state(pos).charAt(0);
\r
192 if (ann.secondaryStructure == pos.charAt(0) || pos.charAt(0) == 'C')
\r
194 ann.displayCharacter = ""; // null; // " ";
\r
198 ann.displayCharacter = " " + ann.displayCharacter;
\r
204 AlignmentAnnotation annot = null;
\r
205 Enumeration e = annotation.elements();
\r
206 while (e.hasMoreElements())
\r
208 annot = (AlignmentAnnotation) e.nextElement();
\r
209 if (annot.label.equals(type))
\r
215 annot = new AlignmentAnnotation(type, type, els);
\r
216 annotation.addElement(annot);
\r
220 Annotation[] anns = new Annotation[annot.annotations.length
\r
222 System.arraycopy(annot.annotations, 0, anns, 0,
\r
223 annot.annotations.length);
\r
224 System.arraycopy(els, 0, anns, annot.annotations.length, els.length);
\r
225 annot.annotations = anns;
\r
226 // System.out.println("else: ");
\r
231 public static String print(SequenceI[] s)
\r
233 return "not yet implemented";
\r
236 public String print()
\r
238 return print(getSeqsAsArray());
\r
241 private static Hashtable typeIds = null;
\r
244 if (typeIds == null)
\r
246 typeIds = new Hashtable();
\r
247 typeIds.put("SS", "secondary structure");
\r
248 typeIds.put("SA", "surface accessibility");
\r
249 typeIds.put("TM", "transmembrane");
\r
250 typeIds.put("PP", "posterior probability");
\r
251 typeIds.put("LI", "ligand binding");
\r
252 typeIds.put("AS", "active site");
\r
253 typeIds.put("IN", "intron");
\r
254 typeIds.put("IR", "interacting residue");
\r
255 typeIds.put("AC", "accession");
\r
256 typeIds.put("OS", "organism");
\r
257 typeIds.put("CL", "class");
\r
258 typeIds.put("DE", "description");
\r
259 typeIds.put("DR", "reference");
\r
260 typeIds.put("LO", "look");
\r
261 typeIds.put("RF", "reference positions");
\r
266 protected static String id2type(String id)
\r
268 if (typeIds.containsKey(id))
\r
270 return (String) typeIds.get(id);
\r
272 System.err.println("Warning : Unknown Stockholm annotation type code "
\r
277 * make a friendly ID string.
\r
280 * @return truncated dataName to after last '/'
\r
282 private String safeName(String dataName)
\r
285 while ((b = dataName.indexOf("/")) > -1 && b < dataName.length())
\r
287 dataName = dataName.substring(b + 1).trim();
\r
290 int e = (dataName.length() - dataName.indexOf(".")) + 1;
\r
291 dataName = dataName.substring(1, e).trim();
\r