X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src2%2Ffr%2Forsay%2Flri%2Fvarna%2Ffactories%2FRNAFactory.java;fp=src2%2Ffr%2Forsay%2Flri%2Fvarna%2Ffactories%2FRNAFactory.java;h=c514f31fc3cc27577b461c077955bb4530149d11;hb=665d2c2f4c1310e6985b93b7c2c8a8eec2fa9086;hp=0000000000000000000000000000000000000000;hpb=0e684f72690bd6532272a39ab6c188a27559fd09;p=jalview.git diff --git a/src2/fr/orsay/lri/varna/factories/RNAFactory.java b/src2/fr/orsay/lri/varna/factories/RNAFactory.java new file mode 100644 index 0000000..c514f31 --- /dev/null +++ b/src2/fr/orsay/lri/varna/factories/RNAFactory.java @@ -0,0 +1,805 @@ +package fr.orsay.lri.varna.factories; + +import java.io.BufferedReader; +import java.io.File; +import java.io.FileNotFoundException; +import java.io.FileReader; +import java.io.IOException; +import java.io.Reader; +import java.util.ArrayList; +import java.util.Collection; +import java.util.EmptyStackException; +import java.util.Hashtable; +import java.util.List; +import java.util.Stack; +import java.util.Vector; + +import javax.xml.parsers.SAXParser; +import javax.xml.parsers.SAXParserFactory; + +import org.xml.sax.InputSource; + +import fr.orsay.lri.varna.exceptions.ExceptionExportFailed; +import fr.orsay.lri.varna.exceptions.ExceptionFileFormatOrSyntax; +import fr.orsay.lri.varna.exceptions.ExceptionLoadingFailed; +import fr.orsay.lri.varna.exceptions.ExceptionPermissionDenied; +import fr.orsay.lri.varna.exceptions.ExceptionUnmatchedClosingParentheses; +import fr.orsay.lri.varna.models.rna.ModeleBP; +import fr.orsay.lri.varna.models.rna.ModeleBackboneElement; +import fr.orsay.lri.varna.models.rna.ModeleBackboneElement.BackboneType; +import fr.orsay.lri.varna.models.rna.ModeleBase; +import fr.orsay.lri.varna.models.rna.RNA; +import fr.orsay.lri.varna.utils.RNAMLParser; + +/** + * BH JAVA FIX: mostly here we are just removing a lot of unnecessary stack traces when doing a drag-drop of a file + * BH JAVA FIX: making sure the file reader is closed properly + * + */ +public class RNAFactory +{ + + public enum RNAFileType + { + FILE_TYPE_STOCKHOLM, FILE_TYPE_TCOFFEE, FILE_TYPE_BPSEQ, FILE_TYPE_CT, FILE_TYPE_DBN, FILE_TYPE_RNAML, FILE_TYPE_UNKNOWN + } + +private static boolean isQuiet; + + public static ArrayList loadSecStrRNAML(Reader r) + throws ExceptionPermissionDenied, ExceptionLoadingFailed, + ExceptionFileFormatOrSyntax + { + + ArrayList result = new ArrayList(); + try + { + // System.setProperty("javax.xml.parsers.SAXParserFactory", + // "com.sun.org.apache.xerces.internal.jaxp.SAXParserFactoryImpl"); + SAXParserFactory saxFact = javax.xml.parsers.SAXParserFactory + .newInstance(); + saxFact.setValidating(false); + saxFact.setXIncludeAware(false); + saxFact.setNamespaceAware(false); + SAXParser sp = saxFact.newSAXParser(); + RNAMLParser RNAMLData = new RNAMLParser(); + sp.parse(new InputSource(r), RNAMLData); + + /* + * XMLReader xr = XMLReaderFactory.createXMLReader(); RNAMLParser + * RNAMLData = new RNAMLParser(); xr.setContentHandler(RNAMLData); + * xr.setErrorHandler(RNAMLData); xr.setEntityResolver(RNAMLData); + * xr.parse(new InputSource(r)); + */ + for (RNAMLParser.RNATmp rnaTmp : RNAMLData.getMolecules()) + { + RNA current = new RNA(); + // Retrieving parsed data + List seq = rnaTmp.getSequence(); + // Creating empty structure of suitable size + int[] str = new int[seq.size()]; + for (int i = 0; i < str.length; i++) + { + str[i] = -1; + } + current.setRNA(seq, str); + Vector allbpsTmp = rnaTmp.getStructure(); + ArrayList allbps = new ArrayList(); + for (int i = 0; i < allbpsTmp.size(); i++) + { + RNAMLParser.BPTemp bp = allbpsTmp.get(i); + // System.err.println(bp); + int bp5 = bp.pos5; + int bp3 = bp.pos3; + ModeleBase mb = current.getBaseAt(bp5); + ModeleBase part = current.getBaseAt(bp3); + ModeleBP newStyle = bp.createBPStyle(mb, part); + allbps.add(newStyle); + } + + current.applyBPs(allbps); + result.add(current); + } + + } + catch (IOException ioe) + { + throw new ExceptionLoadingFailed( + "Couldn't load file due to I/O or security policy issues.", ""); + } + catch (Exception ge) + { + if (!isQuiet) // BH + ge.printStackTrace(); + } + return result; + } + + public static int[] parseSecStr(String _secStr) + throws ExceptionUnmatchedClosingParentheses + { + Hashtable> stacks = new Hashtable>(); + int[] result = new int[_secStr.length()]; + int i = 0; + try + { + for (i = 0; i < _secStr.length(); i++) + { + result[i] = -1; + char c = _secStr.charAt(i); + char c2 = Character.toUpperCase(c); + if (!stacks.containsKey(c2)) + { + stacks.put(c2, new Stack()); + } + switch (c) + { + case '<': + case '{': + case '(': + case '[': + stacks.get(c).push(i); + break; + case '>': + { + int j = stacks.get('<').pop(); + result[i] = j; + result[j] = i; + break; + } + case '}': + { + int j = stacks.get('{').pop(); + result[i] = j; + result[j] = i; + break; + } + case ')': + { + int j = stacks.get('(').pop(); + result[i] = j; + result[j] = i; + break; + } + case ']': + { + int j = stacks.get('[').pop(); + result[i] = j; + result[j] = i; + break; + } + case '.': + break; + default: + { + if (Character.isLetter(c) && Character.isUpperCase(c)) + { + stacks.get(c).push(i); + } + else if (Character.isLetter(c) && Character.isLowerCase(c)) + { + int j = stacks.get(Character.toUpperCase(c)).pop(); + result[i] = j; + result[j] = i; + } + } + } + } + } + catch (EmptyStackException e) + { + throw new ExceptionUnmatchedClosingParentheses(i); + } + return result; + } + + public static ArrayList loadSecStrDBN(Reader r) + throws ExceptionLoadingFailed, ExceptionPermissionDenied, + ExceptionUnmatchedClosingParentheses, ExceptionFileFormatOrSyntax + { + boolean loadOk = false; + ArrayList result = new ArrayList(); + RNA current = new RNA(); + try + { + BufferedReader fr = new BufferedReader(r); + String line = fr.readLine(); + String title = ""; + String seqTmp = ""; + String strTmp = ""; + while ((line != null) && (strTmp.equals(""))) + { + line = line.trim(); + if (!line.startsWith(">")) + { + if (seqTmp.equals("")) + { + seqTmp = line; + } + else + { + strTmp = line; + } + } + else + { + title = line.substring(1).trim(); + } + line = fr.readLine(); + } + if (strTmp.length() != 0) + { + current.setRNA(seqTmp, strTmp); + current.setName(title); + loadOk = true; + } + } + catch (IOException e) + { + throw new ExceptionLoadingFailed(e.getMessage(), ""); + } + if (loadOk) + { + result.add(current); + } + return result; + } + + public static ArrayList loadSecStr(File f) + throws ExceptionFileFormatOrSyntax + { + try { + return loadSecStr(new BufferedReader(new FileReader(f)), RNAFileType.FILE_TYPE_UNKNOWN); + } catch (FileNotFoundException e) { + throw new ExceptionFileFormatOrSyntax(f.toString()); + } + } + + public static ArrayList loadSecStr(Reader r) + throws ExceptionFileFormatOrSyntax + { + return loadSecStr(new BufferedReader(r), RNAFileType.FILE_TYPE_UNKNOWN); + } + + public static ArrayList loadSecStr(BufferedReader r, RNAFileType fileType) throws ExceptionFileFormatOrSyntax { + try { + switch (fileType) { + case FILE_TYPE_DBN: { + try { + ArrayList result = loadSecStrDBN(r); + if (result.size() != 0) + return result; + } catch (Exception e) { + } + } + break; + case FILE_TYPE_CT: { + try { + ArrayList result = loadSecStrCT(r); + if (result.size() != 0) + return result; + } catch (Exception e) { + if (!isQuiet) // BH + e.printStackTrace(); + } + } + break; + case FILE_TYPE_BPSEQ: { + try { + ArrayList result = loadSecStrBPSEQ(r); + if (result.size() != 0) + return result; + } catch (Exception e) { + if (!isQuiet) // BH + e.printStackTrace(); + } + } + break; + case FILE_TYPE_TCOFFEE: { + try { + ArrayList result = loadSecStrTCoffee(r); + if (result.size() != 0) + return result; + } catch (Exception e) { + if (!isQuiet) // BH + e.printStackTrace(); + } + } + break; + case FILE_TYPE_STOCKHOLM: { + try { + ArrayList result = loadSecStrStockholm(r); + if (result.size() != 0) + return result; + } catch (Exception e) { + if (!isQuiet) // BH + e.printStackTrace(); + } + } + break; + case FILE_TYPE_RNAML: { + try { + ArrayList result = loadSecStrRNAML(r); + if (result.size() != 0) + return result; + } catch (Exception e) { + if (!isQuiet) // BH + e.printStackTrace(); + } + } + break; + + case FILE_TYPE_UNKNOWN: { + try { + r.mark(1000000); + RNAFactory.RNAFileType[] types = RNAFactory.RNAFileType.values(); + isQuiet = true; // BH to not report errors when + // drag-dropping + ArrayList result = null; + RNAFactory.RNAFileType t = null; + for (int i = 0; i < types.length; i++) { + r.reset(); + t = types[i]; + if (t != RNAFactory.RNAFileType.FILE_TYPE_UNKNOWN) { + try { + result = loadSecStr(r, t); + if (result.size() != 0) { + break; + } + } catch (Exception e) { + if (!isQuiet) // BH + System.err.println(e.toString()); + } + } + } + System.out.println(t); // BH + isQuiet = false; // BH + return result; + } catch (IOException e2) { + e2.printStackTrace(); + } + } + } + throw new ExceptionFileFormatOrSyntax("Couldn't parse this file as " + fileType + "."); + } finally { // BH !! + try { + if (!isQuiet) + r.close(); + } catch (IOException e) { + // ignore + } + } + } + + public static RNAFileType guessFileTypeFromExtension(String path) + { + if (path.toLowerCase().endsWith("ml")) + { + return RNAFileType.FILE_TYPE_RNAML; + } + else if (path.toLowerCase().endsWith("dbn") + || path.toLowerCase().endsWith("faa")) + { + return RNAFileType.FILE_TYPE_DBN; + } + else if (path.toLowerCase().endsWith("ct")) + { + return RNAFileType.FILE_TYPE_CT; + } + else if (path.toLowerCase().endsWith("bpseq")) + { + return RNAFileType.FILE_TYPE_BPSEQ; + } + else if (path.toLowerCase().endsWith("rfold")) + { + return RNAFileType.FILE_TYPE_TCOFFEE; + } + else if (path.toLowerCase().endsWith("stockholm") + || path.toLowerCase().endsWith("stk")) + { + return RNAFileType.FILE_TYPE_STOCKHOLM; + } + + return RNAFileType.FILE_TYPE_UNKNOWN; + + } + + public static ArrayList loadSecStr(String path) + throws ExceptionExportFailed, ExceptionPermissionDenied, + ExceptionLoadingFailed, ExceptionFileFormatOrSyntax, + ExceptionUnmatchedClosingParentheses, FileNotFoundException + { + FileReader fr = null; + try + { + fr = new FileReader(path); + RNAFileType type = guessFileTypeFromExtension(path); + return loadSecStr(new BufferedReader(fr), type); + } + catch (ExceptionFileFormatOrSyntax e) + { + if (fr != null) + try + { + fr.close(); + } + catch (IOException e2) + { + } + e.setPath(path); + throw e; + } + } + + public static ArrayList loadSecStrStockholm(BufferedReader r) + throws IOException, ExceptionUnmatchedClosingParentheses + { + RNAAlignment a = StockholmIO.readAlignement(r); + return a.getRNAs(); + } + + public static ArrayList loadSecStrBPSEQ(Reader r) + throws ExceptionPermissionDenied, ExceptionLoadingFailed, + ExceptionFileFormatOrSyntax + { + boolean loadOk = false; + ArrayList result = new ArrayList(); + RNA current = new RNA(); + try + { + BufferedReader fr = new BufferedReader(r); + String line = fr.readLine(); + ArrayList seqTmp = new ArrayList(); + Hashtable> strTmp = new Hashtable>(); + + int bpFrom; + String base; + int bpTo; + int minIndex = -1; + boolean noWarningYet = true; + String title = ""; + String id = ""; + String filenameStr = "Filename:"; + String organismStr = "Organism:"; + String ANStr = "Accession Number:"; + while (line != null) + { + line = line.trim(); + String[] tokens = line.split("\\s+"); + ArrayList numbers = new ArrayList(); + Hashtable numberToIndex = new Hashtable(); + if ((tokens.length >= 3) && !tokens[0].contains("#") + && !line.startsWith("Organism:") && !line.startsWith("Citation") + && !line.startsWith("Filename:") + && !line.startsWith("Accession Number:")) + { + base = tokens[1]; + seqTmp.add(base); + bpFrom = (Integer.parseInt(tokens[0])); + numbers.add(bpFrom); + if (minIndex < 0) + minIndex = bpFrom; + + if (seqTmp.size() < (bpFrom - minIndex + 1)) + { + if (noWarningYet) + { + noWarningYet = false; + /* + * warningEmition( "Discontinuity detected between nucleotides " + + * (seqTmp.size()) + " and " + (bpFrom + 1) + + * "!\nFilling in missing portions with unpaired unknown 'X' nucleotides ..." + * ); + */ + } + while (seqTmp.size() < (bpFrom - minIndex + 1)) + { + // System.err.println("."); + seqTmp.add("X"); + } + } + for (int i = 2; i < tokens.length; i++) + { + bpTo = (Integer.parseInt(tokens[i])); + if ((bpTo != 0) || (i != tokens.length - 1)) + { + if (!strTmp.containsKey(bpFrom)) + strTmp.put(bpFrom, new Vector()); + strTmp.get(bpFrom).add(bpTo); + } + } + } + else if (tokens[0].startsWith("#")) + { + int occur = line.indexOf("#"); + String tmp = line.substring(occur + 1); + title += tmp.trim() + " "; + } + else if (tokens[0].startsWith(filenameStr)) + { + int occur = line.indexOf(filenameStr); + String tmp = line.substring(occur + filenameStr.length()); + title += tmp.trim(); + } + else if (tokens[0].startsWith(organismStr)) + { + int occur = line.indexOf(organismStr); + String tmp = line.substring(occur + organismStr.length()); + if (title.length() != 0) + { + title = "/" + title; + } + title = tmp.trim() + title; + } + else if (line.contains(ANStr)) + { + int occur = line.indexOf(ANStr); + String tmp = line.substring(occur + ANStr.length()); + id = tmp.trim(); + } + line = fr.readLine(); + } + if (strTmp.size() != 0) + { + ArrayList seq = seqTmp; + int[] str = new int[seq.size()]; + for (int i = 0; i < seq.size(); i++) + { + str[i] = -1; + } + current.setRNA(seq, str, minIndex); + ArrayList allbps = new ArrayList(); + for (int i : strTmp.keySet()) + { + for (int j : strTmp.get(i)) + { + if (i<=j) + { + ModeleBase mb = current.getBaseAt(i - minIndex); + ModeleBase part = current.getBaseAt(j - minIndex); + ModeleBP newStyle = new ModeleBP(mb, part); + allbps.add(newStyle); + } + } + } + current.applyBPs(allbps); + current.setName(title); + current.setID(id); + loadOk = true; + } + } + catch (NumberFormatException e) + { + if (!isQuiet) // BH SwingJS + e.printStackTrace(); + } + catch (IOException e) + { + // TODO Auto-generated catch block + e.printStackTrace(); + } + catch (Exception e) + { + throw new ExceptionLoadingFailed(e.getMessage(), ""); + } + if (loadOk) + result.add(current); + return result; + } + + public static ArrayList loadSecStrTCoffee(Reader r) + throws ExceptionPermissionDenied, ExceptionLoadingFailed, + ExceptionFileFormatOrSyntax + { + boolean loadOk = false; + ArrayList result = new ArrayList(); + try + { + BufferedReader fr = new BufferedReader(r); + String line = fr.readLine(); + ArrayList seqs = new ArrayList(); + ArrayList ids = new ArrayList(); + int numSeqs = -1; + int currSeq = -1; + RNA current = null; + while (line != null) + { + if (!line.startsWith("!")) + { + String[] tokens = line.split("\\s+"); + // This may indicate new secondary structure + if (line.startsWith("#")) + { + currSeq = Integer.parseInt(tokens[0].substring(1)); + int currSeq2 = Integer.parseInt(tokens[1]); + // For TCoffee, a sec str is a matching between a seq and itself + // => Disregard any alignment by filtering on the equality of sequence indices. + if (currSeq == currSeq2) + { + current = new RNA(); + current.setName(ids.get(currSeq - 1)); + current.setSequence(seqs.get(currSeq - 1)); + result.add(current); + } + else + { + current = null; + } + } + // Beginning of the file... + else if (current == null) + { + //... either this is the number of sequences... + if (numSeqs < 0) + { + numSeqs = Integer.parseInt(tokens[0]); + } + //... or this is a sequence definition... + else + { + String id = tokens[0]; + String seq = tokens[2]; + seqs.add(seq); + ids.add(id); + } + } + //Otherwise, this is a base-pair definition, related to the currently selected sequence + else if (tokens.length == 3) + { + int from = Integer.parseInt(tokens[0]) - 1; + int to = Integer.parseInt(tokens[1]) - 1; + current.addBP(from, to); + } + } + line = fr.readLine(); + } + loadOk = true; + } + catch (NumberFormatException e) + { + if (!isQuiet) // BH SwingJS + e.printStackTrace(); + } + catch (IOException e) + { + if (!isQuiet) // BH SwingJS + e.printStackTrace(); + } + if (!loadOk) + { + throw new ExceptionLoadingFailed("Parse Error", ""); + } + return result; + } + + public static ArrayList loadSecStrCT(Reader r) + throws ExceptionPermissionDenied, ExceptionLoadingFailed, + ExceptionFileFormatOrSyntax + { + boolean loadOk = false; + ArrayList result = new ArrayList(); + RNA current = new RNA(); + try + { + BufferedReader fr = new BufferedReader(r); + String line = fr.readLine(); + ArrayList seq = new ArrayList(); + ArrayList lbls = new ArrayList(); + Vector strTmp = new Vector(); + Vector newStrands = new Vector(); + int bpFrom; + String base; + String lbl; + int bpTo; + boolean noWarningYet = true; + int minIndex = -1; + String title = ""; + while (line != null) + { + line = line.trim(); + String[] tokens = line.split("\\s+"); + if (tokens.length >= 6) + { + try + { + bpFrom = (Integer.parseInt(tokens[0])); + bpTo = (Integer.parseInt(tokens[4])); + if (minIndex == -1) + minIndex = bpFrom; + bpFrom -= minIndex; + if (bpTo != 0) + bpTo -= minIndex; + else + bpTo = -1; + base = tokens[1]; + lbl = tokens[5]; + int before = Integer.parseInt(tokens[2]); + int after = Integer.parseInt(tokens[3]); + + if (before==0 && !seq.isEmpty()) + { + newStrands.add(strTmp.size()-1); + } + if (bpFrom != seq.size()) + { + if (noWarningYet) + { + noWarningYet = false; + /* + * warningEmition( "Discontinuity detected between nucleotides " + * + (seq.size()) + " and " + (bpFrom + 1) + + * "!\nFilling in missing portions with unpaired unknown 'X' nucleotides ..." + * ); + */ + } + while (bpFrom > seq.size()) + { + seq.add("X"); + strTmp.add(-1); + lbls.add(""); + } + } + seq.add(base); + strTmp.add(bpTo); + lbls.add(lbl); + } + catch (NumberFormatException e) + { + if (strTmp.size()!=0) + e.printStackTrace(); + } + } + if ((line.contains("ENERGY = ")) || line.contains("dG = ")) + { + String[] ntokens = line.split("\\s+"); + if (ntokens.length >= 4) + { + String energy = ntokens[3]; + for (int i = 4; i < ntokens.length; i++) + { + title += ntokens[i] + " "; + } + title += "(E=" + energy + " kcal/mol)"; + } + } + line = fr.readLine(); + } + if (strTmp.size() != 0) + { + int[] str = new int[strTmp.size()]; + for (int i = 0; i < strTmp.size(); i++) + { + str[i] = strTmp.elementAt(i).intValue(); + } + current.setRNA(seq, str, minIndex); + current.setName(title); + for (int i = 0; i < current.getSize(); i++) + { + current.getBaseAt(i).setLabel(lbls.get(i)); + } + for (int i : newStrands) + { + current.getBackbone().addElement(new ModeleBackboneElement(i,BackboneType.DISCONTINUOUS_TYPE)); + } + + + loadOk = true; + } + } + catch (IOException e) + { + e.printStackTrace(); + throw new ExceptionLoadingFailed(e.getMessage(), ""); + } + catch (NumberFormatException e) + { + if (!isQuiet) // BH SwingJS + e.printStackTrace(); + throw new ExceptionFileFormatOrSyntax(e.getMessage(), ""); + } + if (loadOk) + result.add(current); + return result; + } + +}