package fr.orsay.lri.varna.factories; import java.io.BufferedReader; import java.io.File; import java.io.FileNotFoundException; import java.io.FileReader; import java.io.IOException; import java.io.Reader; import java.util.ArrayList; import java.util.Collection; import java.util.EmptyStackException; import java.util.Hashtable; import java.util.List; import java.util.Stack; import java.util.Vector; import javax.xml.parsers.SAXParser; import javax.xml.parsers.SAXParserFactory; import org.xml.sax.InputSource; import fr.orsay.lri.varna.exceptions.ExceptionExportFailed; import fr.orsay.lri.varna.exceptions.ExceptionFileFormatOrSyntax; import fr.orsay.lri.varna.exceptions.ExceptionLoadingFailed; import fr.orsay.lri.varna.exceptions.ExceptionPermissionDenied; import fr.orsay.lri.varna.exceptions.ExceptionUnmatchedClosingParentheses; import fr.orsay.lri.varna.models.rna.ModeleBP; import fr.orsay.lri.varna.models.rna.ModeleBackboneElement; import fr.orsay.lri.varna.models.rna.ModeleBackboneElement.BackboneType; import fr.orsay.lri.varna.models.rna.ModeleBase; import fr.orsay.lri.varna.models.rna.RNA; import fr.orsay.lri.varna.utils.RNAMLParser; /** * BH JAVA FIX: mostly here we are just removing a lot of unnecessary stack traces when doing a drag-drop of a file * BH JAVA FIX: making sure the file reader is closed properly * */ public class RNAFactory { public enum RNAFileType { FILE_TYPE_STOCKHOLM, FILE_TYPE_TCOFFEE, FILE_TYPE_BPSEQ, FILE_TYPE_CT, FILE_TYPE_DBN, FILE_TYPE_RNAML, FILE_TYPE_UNKNOWN } private static boolean isQuiet; public static ArrayList loadSecStrRNAML(Reader r) throws ExceptionPermissionDenied, ExceptionLoadingFailed, ExceptionFileFormatOrSyntax { ArrayList result = new ArrayList(); try { // System.setProperty("javax.xml.parsers.SAXParserFactory", // "com.sun.org.apache.xerces.internal.jaxp.SAXParserFactoryImpl"); SAXParserFactory saxFact = javax.xml.parsers.SAXParserFactory .newInstance(); saxFact.setValidating(false); saxFact.setXIncludeAware(false); saxFact.setNamespaceAware(false); SAXParser sp = saxFact.newSAXParser(); RNAMLParser RNAMLData = new RNAMLParser(); sp.parse(new InputSource(r), RNAMLData); /* * XMLReader xr = XMLReaderFactory.createXMLReader(); RNAMLParser * RNAMLData = new RNAMLParser(); xr.setContentHandler(RNAMLData); * xr.setErrorHandler(RNAMLData); xr.setEntityResolver(RNAMLData); * xr.parse(new InputSource(r)); */ for (RNAMLParser.RNATmp rnaTmp : RNAMLData.getMolecules()) { RNA current = new RNA(); // Retrieving parsed data List seq = rnaTmp.getSequence(); // Creating empty structure of suitable size int[] str = new int[seq.size()]; for (int i = 0; i < str.length; i++) { str[i] = -1; } current.setRNA(seq, str); Vector allbpsTmp = rnaTmp.getStructure(); ArrayList allbps = new ArrayList(); for (int i = 0; i < allbpsTmp.size(); i++) { RNAMLParser.BPTemp bp = allbpsTmp.get(i); // System.err.println(bp); int bp5 = bp.pos5; int bp3 = bp.pos3; ModeleBase mb = current.getBaseAt(bp5); ModeleBase part = current.getBaseAt(bp3); ModeleBP newStyle = bp.createBPStyle(mb, part); allbps.add(newStyle); } current.applyBPs(allbps); result.add(current); } } catch (IOException ioe) { throw new ExceptionLoadingFailed( "Couldn't load file due to I/O or security policy issues.", ""); } catch (Exception ge) { if (!isQuiet) // BH ge.printStackTrace(); } return result; } public static int[] parseSecStr(String _secStr) throws ExceptionUnmatchedClosingParentheses { Hashtable> stacks = new Hashtable>(); int[] result = new int[_secStr.length()]; int i = 0; try { for (i = 0; i < _secStr.length(); i++) { result[i] = -1; char c = _secStr.charAt(i); char c2 = Character.toUpperCase(c); if (!stacks.containsKey(c2)) { stacks.put(c2, new Stack()); } switch (c) { case '<': case '{': case '(': case '[': stacks.get(c).push(i); break; case '>': { int j = stacks.get('<').pop(); result[i] = j; result[j] = i; break; } case '}': { int j = stacks.get('{').pop(); result[i] = j; result[j] = i; break; } case ')': { int j = stacks.get('(').pop(); result[i] = j; result[j] = i; break; } case ']': { int j = stacks.get('[').pop(); result[i] = j; result[j] = i; break; } case '.': break; default: { if (Character.isLetter(c) && Character.isUpperCase(c)) { stacks.get(c).push(i); } else if (Character.isLetter(c) && Character.isLowerCase(c)) { int j = stacks.get(Character.toUpperCase(c)).pop(); result[i] = j; result[j] = i; } } } } } catch (EmptyStackException e) { throw new ExceptionUnmatchedClosingParentheses(i); } return result; } public static ArrayList loadSecStrDBN(Reader r) throws ExceptionLoadingFailed, ExceptionPermissionDenied, ExceptionUnmatchedClosingParentheses, ExceptionFileFormatOrSyntax { boolean loadOk = false; ArrayList result = new ArrayList(); RNA current = new RNA(); try { BufferedReader fr = new BufferedReader(r); String line = fr.readLine(); String title = ""; String seqTmp = ""; String strTmp = ""; while ((line != null) && (strTmp.equals(""))) { line = line.trim(); if (!line.startsWith(">")) { if (seqTmp.equals("")) { seqTmp = line; } else { strTmp = line; } } else { title = line.substring(1).trim(); } line = fr.readLine(); } if (strTmp.length() != 0) { current.setRNA(seqTmp, strTmp); current.setName(title); loadOk = true; } } catch (IOException e) { throw new ExceptionLoadingFailed(e.getMessage(), ""); } if (loadOk) { result.add(current); } return result; } public static ArrayList loadSecStr(File f) throws ExceptionFileFormatOrSyntax { try { return loadSecStr(new BufferedReader(new FileReader(f)), RNAFileType.FILE_TYPE_UNKNOWN); } catch (FileNotFoundException e) { throw new ExceptionFileFormatOrSyntax(f.toString()); } } public static ArrayList loadSecStr(Reader r) throws ExceptionFileFormatOrSyntax { return loadSecStr(new BufferedReader(r), RNAFileType.FILE_TYPE_UNKNOWN); } public static ArrayList loadSecStr(BufferedReader r, RNAFileType fileType) throws ExceptionFileFormatOrSyntax { try { switch (fileType) { case FILE_TYPE_DBN: { try { ArrayList result = loadSecStrDBN(r); if (result.size() != 0) return result; } catch (Exception e) { } } break; case FILE_TYPE_CT: { try { ArrayList result = loadSecStrCT(r); if (result.size() != 0) return result; } catch (Exception e) { if (!isQuiet) // BH e.printStackTrace(); } } break; case FILE_TYPE_BPSEQ: { try { ArrayList result = loadSecStrBPSEQ(r); if (result.size() != 0) return result; } catch (Exception e) { if (!isQuiet) // BH e.printStackTrace(); } } break; case FILE_TYPE_TCOFFEE: { try { ArrayList result = loadSecStrTCoffee(r); if (result.size() != 0) return result; } catch (Exception e) { if (!isQuiet) // BH e.printStackTrace(); } } break; case FILE_TYPE_STOCKHOLM: { try { ArrayList result = loadSecStrStockholm(r); if (result.size() != 0) return result; } catch (Exception e) { if (!isQuiet) // BH e.printStackTrace(); } } break; case FILE_TYPE_RNAML: { try { ArrayList result = loadSecStrRNAML(r); if (result.size() != 0) return result; } catch (Exception e) { if (!isQuiet) // BH e.printStackTrace(); } } break; case FILE_TYPE_UNKNOWN: { try { r.mark(1000000); RNAFactory.RNAFileType[] types = RNAFactory.RNAFileType.values(); isQuiet = true; // BH to not report errors when // drag-dropping ArrayList result = null; RNAFactory.RNAFileType t = null; for (int i = 0; i < types.length; i++) { r.reset(); t = types[i]; if (t != RNAFactory.RNAFileType.FILE_TYPE_UNKNOWN) { try { result = loadSecStr(r, t); if (result.size() != 0) { break; } } catch (Exception e) { if (!isQuiet) // BH System.err.println(e.toString()); } } } System.out.println(t); // BH isQuiet = false; // BH return result; } catch (IOException e2) { e2.printStackTrace(); } } } throw new ExceptionFileFormatOrSyntax("Couldn't parse this file as " + fileType + "."); } finally { // BH !! try { if (!isQuiet) r.close(); } catch (IOException e) { // ignore } } } public static RNAFileType guessFileTypeFromExtension(String path) { if (path.toLowerCase().endsWith("ml")) { return RNAFileType.FILE_TYPE_RNAML; } else if (path.toLowerCase().endsWith("dbn") || path.toLowerCase().endsWith("faa")) { return RNAFileType.FILE_TYPE_DBN; } else if (path.toLowerCase().endsWith("ct")) { return RNAFileType.FILE_TYPE_CT; } else if (path.toLowerCase().endsWith("bpseq")) { return RNAFileType.FILE_TYPE_BPSEQ; } else if (path.toLowerCase().endsWith("rfold")) { return RNAFileType.FILE_TYPE_TCOFFEE; } else if (path.toLowerCase().endsWith("stockholm") || path.toLowerCase().endsWith("stk")) { return RNAFileType.FILE_TYPE_STOCKHOLM; } return RNAFileType.FILE_TYPE_UNKNOWN; } public static ArrayList loadSecStr(String path) throws ExceptionExportFailed, ExceptionPermissionDenied, ExceptionLoadingFailed, ExceptionFileFormatOrSyntax, ExceptionUnmatchedClosingParentheses, FileNotFoundException { FileReader fr = null; try { fr = new FileReader(path); RNAFileType type = guessFileTypeFromExtension(path); return loadSecStr(new BufferedReader(fr), type); } catch (ExceptionFileFormatOrSyntax e) { if (fr != null) try { fr.close(); } catch (IOException e2) { } e.setPath(path); throw e; } } public static ArrayList loadSecStrStockholm(BufferedReader r) throws IOException, ExceptionUnmatchedClosingParentheses { RNAAlignment a = StockholmIO.readAlignement(r); return a.getRNAs(); } public static ArrayList loadSecStrBPSEQ(Reader r) throws ExceptionPermissionDenied, ExceptionLoadingFailed, ExceptionFileFormatOrSyntax { boolean loadOk = false; ArrayList result = new ArrayList(); RNA current = new RNA(); try { BufferedReader fr = new BufferedReader(r); String line = fr.readLine(); ArrayList seqTmp = new ArrayList(); Hashtable> strTmp = new Hashtable>(); int bpFrom; String base; int bpTo; int minIndex = -1; boolean noWarningYet = true; String title = ""; String id = ""; String filenameStr = "Filename:"; String organismStr = "Organism:"; String ANStr = "Accession Number:"; while (line != null) { line = line.trim(); String[] tokens = line.split("\\s+"); ArrayList numbers = new ArrayList(); Hashtable numberToIndex = new Hashtable(); if ((tokens.length >= 3) && !tokens[0].contains("#") && !line.startsWith("Organism:") && !line.startsWith("Citation") && !line.startsWith("Filename:") && !line.startsWith("Accession Number:")) { base = tokens[1]; seqTmp.add(base); bpFrom = (Integer.parseInt(tokens[0])); numbers.add(bpFrom); if (minIndex < 0) minIndex = bpFrom; if (seqTmp.size() < (bpFrom - minIndex + 1)) { if (noWarningYet) { noWarningYet = false; /* * warningEmition( "Discontinuity detected between nucleotides " + * (seqTmp.size()) + " and " + (bpFrom + 1) + * "!\nFilling in missing portions with unpaired unknown 'X' nucleotides ..." * ); */ } while (seqTmp.size() < (bpFrom - minIndex + 1)) { // System.err.println("."); seqTmp.add("X"); } } for (int i = 2; i < tokens.length; i++) { bpTo = (Integer.parseInt(tokens[i])); if ((bpTo != 0) || (i != tokens.length - 1)) { if (!strTmp.containsKey(bpFrom)) strTmp.put(bpFrom, new Vector()); strTmp.get(bpFrom).add(bpTo); } } } else if (tokens[0].startsWith("#")) { int occur = line.indexOf("#"); String tmp = line.substring(occur + 1); title += tmp.trim() + " "; } else if (tokens[0].startsWith(filenameStr)) { int occur = line.indexOf(filenameStr); String tmp = line.substring(occur + filenameStr.length()); title += tmp.trim(); } else if (tokens[0].startsWith(organismStr)) { int occur = line.indexOf(organismStr); String tmp = line.substring(occur + organismStr.length()); if (title.length() != 0) { title = "/" + title; } title = tmp.trim() + title; } else if (line.contains(ANStr)) { int occur = line.indexOf(ANStr); String tmp = line.substring(occur + ANStr.length()); id = tmp.trim(); } line = fr.readLine(); } if (strTmp.size() != 0) { ArrayList seq = seqTmp; int[] str = new int[seq.size()]; for (int i = 0; i < seq.size(); i++) { str[i] = -1; } current.setRNA(seq, str, minIndex); ArrayList allbps = new ArrayList(); for (int i : strTmp.keySet()) { for (int j : strTmp.get(i)) { if (i<=j) { ModeleBase mb = current.getBaseAt(i - minIndex); ModeleBase part = current.getBaseAt(j - minIndex); ModeleBP newStyle = new ModeleBP(mb, part); allbps.add(newStyle); } } } current.applyBPs(allbps); current.setName(title); current.setID(id); loadOk = true; } } catch (NumberFormatException e) { if (!isQuiet) // BH SwingJS e.printStackTrace(); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (Exception e) { throw new ExceptionLoadingFailed(e.getMessage(), ""); } if (loadOk) result.add(current); return result; } public static ArrayList loadSecStrTCoffee(Reader r) throws ExceptionPermissionDenied, ExceptionLoadingFailed, ExceptionFileFormatOrSyntax { boolean loadOk = false; ArrayList result = new ArrayList(); try { BufferedReader fr = new BufferedReader(r); String line = fr.readLine(); ArrayList seqs = new ArrayList(); ArrayList ids = new ArrayList(); int numSeqs = -1; int currSeq = -1; RNA current = null; while (line != null) { if (!line.startsWith("!")) { String[] tokens = line.split("\\s+"); // This may indicate new secondary structure if (line.startsWith("#")) { currSeq = Integer.parseInt(tokens[0].substring(1)); int currSeq2 = Integer.parseInt(tokens[1]); // For TCoffee, a sec str is a matching between a seq and itself // => Disregard any alignment by filtering on the equality of sequence indices. if (currSeq == currSeq2) { current = new RNA(); current.setName(ids.get(currSeq - 1)); current.setSequence(seqs.get(currSeq - 1)); result.add(current); } else { current = null; } } // Beginning of the file... else if (current == null) { //... either this is the number of sequences... if (numSeqs < 0) { numSeqs = Integer.parseInt(tokens[0]); } //... or this is a sequence definition... else { String id = tokens[0]; String seq = tokens[2]; seqs.add(seq); ids.add(id); } } //Otherwise, this is a base-pair definition, related to the currently selected sequence else if (tokens.length == 3) { int from = Integer.parseInt(tokens[0]) - 1; int to = Integer.parseInt(tokens[1]) - 1; current.addBP(from, to); } } line = fr.readLine(); } loadOk = true; } catch (NumberFormatException e) { if (!isQuiet) // BH SwingJS e.printStackTrace(); } catch (IOException e) { if (!isQuiet) // BH SwingJS e.printStackTrace(); } if (!loadOk) { throw new ExceptionLoadingFailed("Parse Error", ""); } return result; } public static ArrayList loadSecStrCT(Reader r) throws ExceptionPermissionDenied, ExceptionLoadingFailed, ExceptionFileFormatOrSyntax { boolean loadOk = false; ArrayList result = new ArrayList(); RNA current = new RNA(); try { BufferedReader fr = new BufferedReader(r); String line = fr.readLine(); ArrayList seq = new ArrayList(); ArrayList lbls = new ArrayList(); Vector strTmp = new Vector(); Vector newStrands = new Vector(); int bpFrom; String base; String lbl; int bpTo; boolean noWarningYet = true; int minIndex = -1; String title = ""; while (line != null) { line = line.trim(); String[] tokens = line.split("\\s+"); if (tokens.length >= 6) { try { bpFrom = (Integer.parseInt(tokens[0])); bpTo = (Integer.parseInt(tokens[4])); if (minIndex == -1) minIndex = bpFrom; bpFrom -= minIndex; if (bpTo != 0) bpTo -= minIndex; else bpTo = -1; base = tokens[1]; lbl = tokens[5]; int before = Integer.parseInt(tokens[2]); int after = Integer.parseInt(tokens[3]); if (before==0 && !seq.isEmpty()) { newStrands.add(strTmp.size()-1); } if (bpFrom != seq.size()) { if (noWarningYet) { noWarningYet = false; /* * warningEmition( "Discontinuity detected between nucleotides " * + (seq.size()) + " and " + (bpFrom + 1) + * "!\nFilling in missing portions with unpaired unknown 'X' nucleotides ..." * ); */ } while (bpFrom > seq.size()) { seq.add("X"); strTmp.add(-1); lbls.add(""); } } seq.add(base); strTmp.add(bpTo); lbls.add(lbl); } catch (NumberFormatException e) { if (strTmp.size()!=0) e.printStackTrace(); } } if ((line.contains("ENERGY = ")) || line.contains("dG = ")) { String[] ntokens = line.split("\\s+"); if (ntokens.length >= 4) { String energy = ntokens[3]; for (int i = 4; i < ntokens.length; i++) { title += ntokens[i] + " "; } title += "(E=" + energy + " kcal/mol)"; } } line = fr.readLine(); } if (strTmp.size() != 0) { int[] str = new int[strTmp.size()]; for (int i = 0; i < strTmp.size(); i++) { str[i] = strTmp.elementAt(i).intValue(); } current.setRNA(seq, str, minIndex); current.setName(title); for (int i = 0; i < current.getSize(); i++) { current.getBaseAt(i).setLabel(lbls.get(i)); } for (int i : newStrands) { current.getBackbone().addElement(new ModeleBackboneElement(i,BackboneType.DISCONTINUOUS_TYPE)); } loadOk = true; } } catch (IOException e) { e.printStackTrace(); throw new ExceptionLoadingFailed(e.getMessage(), ""); } catch (NumberFormatException e) { if (!isQuiet) // BH SwingJS e.printStackTrace(); throw new ExceptionFileFormatOrSyntax(e.getMessage(), ""); } if (loadOk) result.add(current); return result; } }