1 package fr.orsay.lri.varna.factories;
3 import java.io.BufferedReader;
5 import java.io.FileNotFoundException;
6 import java.io.FileReader;
7 import java.io.IOException;
9 import java.util.ArrayList;
10 import java.util.Collection;
11 import java.util.EmptyStackException;
12 import java.util.Hashtable;
13 import java.util.List;
14 import java.util.Stack;
15 import java.util.Vector;
17 import javax.xml.parsers.SAXParser;
18 import javax.xml.parsers.SAXParserFactory;
20 import org.xml.sax.InputSource;
22 import fr.orsay.lri.varna.exceptions.ExceptionExportFailed;
23 import fr.orsay.lri.varna.exceptions.ExceptionFileFormatOrSyntax;
24 import fr.orsay.lri.varna.exceptions.ExceptionLoadingFailed;
25 import fr.orsay.lri.varna.exceptions.ExceptionPermissionDenied;
26 import fr.orsay.lri.varna.exceptions.ExceptionUnmatchedClosingParentheses;
27 import fr.orsay.lri.varna.models.rna.ModeleBP;
28 import fr.orsay.lri.varna.models.rna.ModeleBackboneElement;
29 import fr.orsay.lri.varna.models.rna.ModeleBackboneElement.BackboneType;
30 import fr.orsay.lri.varna.models.rna.ModeleBase;
31 import fr.orsay.lri.varna.models.rna.RNA;
32 import fr.orsay.lri.varna.utils.RNAMLParser;
35 * BH JAVA FIX: mostly here we are just removing a lot of unnecessary stack traces when doing a drag-drop of a file
36 * BH JAVA FIX: making sure the file reader is closed properly
39 public class RNAFactory
42 public enum RNAFileType
44 FILE_TYPE_STOCKHOLM, FILE_TYPE_TCOFFEE, FILE_TYPE_BPSEQ, FILE_TYPE_CT, FILE_TYPE_DBN, FILE_TYPE_RNAML, FILE_TYPE_UNKNOWN
47 private static boolean isQuiet;
49 public static ArrayList<RNA> loadSecStrRNAML(Reader r)
50 throws ExceptionPermissionDenied, ExceptionLoadingFailed,
51 ExceptionFileFormatOrSyntax
54 ArrayList<RNA> result = new ArrayList<RNA>();
57 // System.setProperty("javax.xml.parsers.SAXParserFactory",
58 // "com.sun.org.apache.xerces.internal.jaxp.SAXParserFactoryImpl");
59 SAXParserFactory saxFact = javax.xml.parsers.SAXParserFactory
61 saxFact.setValidating(false);
62 saxFact.setXIncludeAware(false);
63 saxFact.setNamespaceAware(false);
64 SAXParser sp = saxFact.newSAXParser();
65 RNAMLParser RNAMLData = new RNAMLParser();
66 sp.parse(new InputSource(r), RNAMLData);
69 * XMLReader xr = XMLReaderFactory.createXMLReader(); RNAMLParser
70 * RNAMLData = new RNAMLParser(); xr.setContentHandler(RNAMLData);
71 * xr.setErrorHandler(RNAMLData); xr.setEntityResolver(RNAMLData);
72 * xr.parse(new InputSource(r));
74 for (RNAMLParser.RNATmp rnaTmp : RNAMLData.getMolecules())
76 RNA current = new RNA();
77 // Retrieving parsed data
78 List<String> seq = rnaTmp.getSequence();
79 // Creating empty structure of suitable size
80 int[] str = new int[seq.size()];
81 for (int i = 0; i < str.length; i++)
85 current.setRNA(seq, str);
86 Vector<RNAMLParser.BPTemp> allbpsTmp = rnaTmp.getStructure();
87 ArrayList<ModeleBP> allbps = new ArrayList<ModeleBP>();
88 for (int i = 0; i < allbpsTmp.size(); i++)
90 RNAMLParser.BPTemp bp = allbpsTmp.get(i);
91 // System.err.println(bp);
94 ModeleBase mb = current.getBaseAt(bp5);
95 ModeleBase part = current.getBaseAt(bp3);
96 ModeleBP newStyle = bp.createBPStyle(mb, part);
100 current.applyBPs(allbps);
105 catch (IOException ioe)
107 throw new ExceptionLoadingFailed(
108 "Couldn't load file due to I/O or security policy issues.", "");
113 ge.printStackTrace();
118 public static int[] parseSecStr(String _secStr)
119 throws ExceptionUnmatchedClosingParentheses
121 Hashtable<Character, Stack<Integer>> stacks = new Hashtable<Character, Stack<Integer>>();
122 int[] result = new int[_secStr.length()];
126 for (i = 0; i < _secStr.length(); i++)
129 char c = _secStr.charAt(i);
130 char c2 = Character.toUpperCase(c);
131 if (!stacks.containsKey(c2))
133 stacks.put(c2, new Stack<Integer>());
141 stacks.get(c).push(i);
145 int j = stacks.get('<').pop();
152 int j = stacks.get('{').pop();
159 int j = stacks.get('(').pop();
166 int j = stacks.get('[').pop();
175 if (Character.isLetter(c) && Character.isUpperCase(c))
177 stacks.get(c).push(i);
179 else if (Character.isLetter(c) && Character.isLowerCase(c))
181 int j = stacks.get(Character.toUpperCase(c)).pop();
189 catch (EmptyStackException e)
191 throw new ExceptionUnmatchedClosingParentheses(i);
196 public static ArrayList<RNA> loadSecStrDBN(Reader r)
197 throws ExceptionLoadingFailed, ExceptionPermissionDenied,
198 ExceptionUnmatchedClosingParentheses, ExceptionFileFormatOrSyntax
200 boolean loadOk = false;
201 ArrayList<RNA> result = new ArrayList<RNA>();
202 RNA current = new RNA();
205 BufferedReader fr = new BufferedReader(r);
206 String line = fr.readLine();
210 while ((line != null) && (strTmp.equals("")))
213 if (!line.startsWith(">"))
215 if (seqTmp.equals(""))
226 title = line.substring(1).trim();
228 line = fr.readLine();
230 if (strTmp.length() != 0)
232 current.setRNA(seqTmp, strTmp);
233 current.setName(title);
237 catch (IOException e)
239 throw new ExceptionLoadingFailed(e.getMessage(), "");
248 public static ArrayList<RNA> loadSecStr(File f)
249 throws ExceptionFileFormatOrSyntax
252 return loadSecStr(new BufferedReader(new FileReader(f)), RNAFileType.FILE_TYPE_UNKNOWN);
253 } catch (FileNotFoundException e) {
254 throw new ExceptionFileFormatOrSyntax(f.toString());
258 public static ArrayList<RNA> loadSecStr(Reader r)
259 throws ExceptionFileFormatOrSyntax
261 return loadSecStr(new BufferedReader(r), RNAFileType.FILE_TYPE_UNKNOWN);
264 public static ArrayList<RNA> loadSecStr(BufferedReader r, RNAFileType fileType) throws ExceptionFileFormatOrSyntax {
267 case FILE_TYPE_DBN: {
269 ArrayList<RNA> result = loadSecStrDBN(r);
270 if (result.size() != 0)
272 } catch (Exception e) {
278 ArrayList<RNA> result = loadSecStrCT(r);
279 if (result.size() != 0)
281 } catch (Exception e) {
287 case FILE_TYPE_BPSEQ: {
289 ArrayList<RNA> result = loadSecStrBPSEQ(r);
290 if (result.size() != 0)
292 } catch (Exception e) {
298 case FILE_TYPE_TCOFFEE: {
300 ArrayList<RNA> result = loadSecStrTCoffee(r);
301 if (result.size() != 0)
303 } catch (Exception e) {
309 case FILE_TYPE_STOCKHOLM: {
311 ArrayList<RNA> result = loadSecStrStockholm(r);
312 if (result.size() != 0)
314 } catch (Exception e) {
320 case FILE_TYPE_RNAML: {
322 ArrayList<RNA> result = loadSecStrRNAML(r);
323 if (result.size() != 0)
325 } catch (Exception e) {
332 case FILE_TYPE_UNKNOWN: {
335 RNAFactory.RNAFileType[] types = RNAFactory.RNAFileType.values();
336 isQuiet = true; // BH to not report errors when
338 ArrayList<RNA> result = null;
339 RNAFactory.RNAFileType t = null;
340 for (int i = 0; i < types.length; i++) {
343 if (t != RNAFactory.RNAFileType.FILE_TYPE_UNKNOWN) {
345 result = loadSecStr(r, t);
346 if (result.size() != 0) {
349 } catch (Exception e) {
351 System.err.println(e.toString());
355 System.out.println(t); // BH
356 isQuiet = false; // BH
358 } catch (IOException e2) {
359 e2.printStackTrace();
363 throw new ExceptionFileFormatOrSyntax("Couldn't parse this file as " + fileType + ".");
368 } catch (IOException e) {
374 public static RNAFileType guessFileTypeFromExtension(String path)
376 if (path.toLowerCase().endsWith("ml"))
378 return RNAFileType.FILE_TYPE_RNAML;
380 else if (path.toLowerCase().endsWith("dbn")
381 || path.toLowerCase().endsWith("faa"))
383 return RNAFileType.FILE_TYPE_DBN;
385 else if (path.toLowerCase().endsWith("ct"))
387 return RNAFileType.FILE_TYPE_CT;
389 else if (path.toLowerCase().endsWith("bpseq"))
391 return RNAFileType.FILE_TYPE_BPSEQ;
393 else if (path.toLowerCase().endsWith("rfold"))
395 return RNAFileType.FILE_TYPE_TCOFFEE;
397 else if (path.toLowerCase().endsWith("stockholm")
398 || path.toLowerCase().endsWith("stk"))
400 return RNAFileType.FILE_TYPE_STOCKHOLM;
403 return RNAFileType.FILE_TYPE_UNKNOWN;
407 public static ArrayList<RNA> loadSecStr(String path)
408 throws ExceptionExportFailed, ExceptionPermissionDenied,
409 ExceptionLoadingFailed, ExceptionFileFormatOrSyntax,
410 ExceptionUnmatchedClosingParentheses, FileNotFoundException
412 FileReader fr = null;
415 fr = new FileReader(path);
416 RNAFileType type = guessFileTypeFromExtension(path);
417 return loadSecStr(new BufferedReader(fr), type);
419 catch (ExceptionFileFormatOrSyntax e)
426 catch (IOException e2)
434 public static ArrayList<RNA> loadSecStrStockholm(BufferedReader r)
435 throws IOException, ExceptionUnmatchedClosingParentheses
437 RNAAlignment a = StockholmIO.readAlignement(r);
441 public static ArrayList<RNA> loadSecStrBPSEQ(Reader r)
442 throws ExceptionPermissionDenied, ExceptionLoadingFailed,
443 ExceptionFileFormatOrSyntax
445 boolean loadOk = false;
446 ArrayList<RNA> result = new ArrayList<RNA>();
447 RNA current = new RNA();
450 BufferedReader fr = new BufferedReader(r);
451 String line = fr.readLine();
452 ArrayList<String> seqTmp = new ArrayList<String>();
453 Hashtable<Integer, Vector<Integer>> strTmp = new Hashtable<Integer, Vector<Integer>>();
459 boolean noWarningYet = true;
462 String filenameStr = "Filename:";
463 String organismStr = "Organism:";
464 String ANStr = "Accession Number:";
468 String[] tokens = line.split("\\s+");
469 ArrayList<Integer> numbers = new ArrayList<Integer>();
470 Hashtable<Integer,Integer> numberToIndex = new Hashtable<Integer,Integer>();
471 if ((tokens.length >= 3) && !tokens[0].contains("#")
472 && !line.startsWith("Organism:") && !line.startsWith("Citation")
473 && !line.startsWith("Filename:")
474 && !line.startsWith("Accession Number:"))
478 bpFrom = (Integer.parseInt(tokens[0]));
483 if (seqTmp.size() < (bpFrom - minIndex + 1))
487 noWarningYet = false;
489 * warningEmition( "Discontinuity detected between nucleotides " +
490 * (seqTmp.size()) + " and " + (bpFrom + 1) +
491 * "!\nFilling in missing portions with unpaired unknown 'X' nucleotides ..."
495 while (seqTmp.size() < (bpFrom - minIndex + 1))
497 // System.err.println(".");
501 for (int i = 2; i < tokens.length; i++)
503 bpTo = (Integer.parseInt(tokens[i]));
504 if ((bpTo != 0) || (i != tokens.length - 1))
506 if (!strTmp.containsKey(bpFrom))
507 strTmp.put(bpFrom, new Vector<Integer>());
508 strTmp.get(bpFrom).add(bpTo);
512 else if (tokens[0].startsWith("#"))
514 int occur = line.indexOf("#");
515 String tmp = line.substring(occur + 1);
516 title += tmp.trim() + " ";
518 else if (tokens[0].startsWith(filenameStr))
520 int occur = line.indexOf(filenameStr);
521 String tmp = line.substring(occur + filenameStr.length());
524 else if (tokens[0].startsWith(organismStr))
526 int occur = line.indexOf(organismStr);
527 String tmp = line.substring(occur + organismStr.length());
528 if (title.length() != 0)
532 title = tmp.trim() + title;
534 else if (line.contains(ANStr))
536 int occur = line.indexOf(ANStr);
537 String tmp = line.substring(occur + ANStr.length());
540 line = fr.readLine();
542 if (strTmp.size() != 0)
544 ArrayList<String> seq = seqTmp;
545 int[] str = new int[seq.size()];
546 for (int i = 0; i < seq.size(); i++)
550 current.setRNA(seq, str, minIndex);
551 ArrayList<ModeleBP> allbps = new ArrayList<ModeleBP>();
552 for (int i : strTmp.keySet())
554 for (int j : strTmp.get(i))
558 ModeleBase mb = current.getBaseAt(i - minIndex);
559 ModeleBase part = current.getBaseAt(j - minIndex);
560 ModeleBP newStyle = new ModeleBP(mb, part);
561 allbps.add(newStyle);
565 current.applyBPs(allbps);
566 current.setName(title);
571 catch (NumberFormatException e)
573 if (!isQuiet) // BH SwingJS
576 catch (IOException e)
578 // TODO Auto-generated catch block
583 throw new ExceptionLoadingFailed(e.getMessage(), "");
590 public static ArrayList<RNA> loadSecStrTCoffee(Reader r)
591 throws ExceptionPermissionDenied, ExceptionLoadingFailed,
592 ExceptionFileFormatOrSyntax
594 boolean loadOk = false;
595 ArrayList<RNA> result = new ArrayList<RNA>();
598 BufferedReader fr = new BufferedReader(r);
599 String line = fr.readLine();
600 ArrayList<String> seqs = new ArrayList<String>();
601 ArrayList<String> ids = new ArrayList<String>();
607 if (!line.startsWith("!"))
609 String[] tokens = line.split("\\s+");
610 // This may indicate new secondary structure
611 if (line.startsWith("#"))
613 currSeq = Integer.parseInt(tokens[0].substring(1));
614 int currSeq2 = Integer.parseInt(tokens[1]);
615 // For TCoffee, a sec str is a matching between a seq and itself
616 // => Disregard any alignment by filtering on the equality of sequence indices.
617 if (currSeq == currSeq2)
620 current.setName(ids.get(currSeq - 1));
621 current.setSequence(seqs.get(currSeq - 1));
629 // Beginning of the file...
630 else if (current == null)
632 //... either this is the number of sequences...
635 numSeqs = Integer.parseInt(tokens[0]);
637 //... or this is a sequence definition...
640 String id = tokens[0];
641 String seq = tokens[2];
646 //Otherwise, this is a base-pair definition, related to the currently selected sequence
647 else if (tokens.length == 3)
649 int from = Integer.parseInt(tokens[0]) - 1;
650 int to = Integer.parseInt(tokens[1]) - 1;
651 current.addBP(from, to);
654 line = fr.readLine();
658 catch (NumberFormatException e)
660 if (!isQuiet) // BH SwingJS
663 catch (IOException e)
665 if (!isQuiet) // BH SwingJS
670 throw new ExceptionLoadingFailed("Parse Error", "");
675 public static ArrayList<RNA> loadSecStrCT(Reader r)
676 throws ExceptionPermissionDenied, ExceptionLoadingFailed,
677 ExceptionFileFormatOrSyntax
679 boolean loadOk = false;
680 ArrayList<RNA> result = new ArrayList<RNA>();
681 RNA current = new RNA();
684 BufferedReader fr = new BufferedReader(r);
685 String line = fr.readLine();
686 ArrayList<String> seq = new ArrayList<String>();
687 ArrayList<String> lbls = new ArrayList<String>();
688 Vector<Integer> strTmp = new Vector<Integer>();
689 Vector<Integer> newStrands = new Vector<Integer>();
694 boolean noWarningYet = true;
700 String[] tokens = line.split("\\s+");
701 if (tokens.length >= 6)
705 bpFrom = (Integer.parseInt(tokens[0]));
706 bpTo = (Integer.parseInt(tokens[4]));
716 int before = Integer.parseInt(tokens[2]);
717 int after = Integer.parseInt(tokens[3]);
719 if (before==0 && !seq.isEmpty())
721 newStrands.add(strTmp.size()-1);
723 if (bpFrom != seq.size())
727 noWarningYet = false;
729 * warningEmition( "Discontinuity detected between nucleotides "
730 * + (seq.size()) + " and " + (bpFrom + 1) +
731 * "!\nFilling in missing portions with unpaired unknown 'X' nucleotides ..."
735 while (bpFrom > seq.size())
746 catch (NumberFormatException e)
748 if (strTmp.size()!=0)
752 if ((line.contains("ENERGY = ")) || line.contains("dG = "))
754 String[] ntokens = line.split("\\s+");
755 if (ntokens.length >= 4)
757 String energy = ntokens[3];
758 for (int i = 4; i < ntokens.length; i++)
760 title += ntokens[i] + " ";
762 title += "(E=" + energy + " kcal/mol)";
765 line = fr.readLine();
767 if (strTmp.size() != 0)
769 int[] str = new int[strTmp.size()];
770 for (int i = 0; i < strTmp.size(); i++)
772 str[i] = strTmp.elementAt(i).intValue();
774 current.setRNA(seq, str, minIndex);
775 current.setName(title);
776 for (int i = 0; i < current.getSize(); i++)
778 current.getBaseAt(i).setLabel(lbls.get(i));
780 for (int i : newStrands)
782 current.getBackbone().addElement(new ModeleBackboneElement(i,BackboneType.DISCONTINUOUS_TYPE));
789 catch (IOException e)
792 throw new ExceptionLoadingFailed(e.getMessage(), "");
794 catch (NumberFormatException e)
796 if (!isQuiet) // BH SwingJS
798 throw new ExceptionFileFormatOrSyntax(e.getMessage(), "");