2 * File written by Raphael Champeimont
3 * UMR 7238 Genomique des Microorganismes
5 package fr.orsay.lri.varna.models.templates;
7 import java.io.BufferedReader;
8 import java.io.BufferedWriter;
10 import java.io.FileNotFoundException;
11 import java.io.FileReader;
12 import java.io.FileWriter;
13 import java.io.IOException;
14 import java.util.ArrayList;
15 import java.util.Arrays;
16 import java.util.Collection;
17 import java.util.List;
19 import fr.orsay.lri.varna.exceptions.ExceptionExportFailed;
20 import fr.orsay.lri.varna.exceptions.ExceptionFileFormatOrSyntax;
21 import fr.orsay.lri.varna.exceptions.ExceptionLoadingFailed;
22 import fr.orsay.lri.varna.exceptions.ExceptionNAViewAlgorithm;
23 import fr.orsay.lri.varna.exceptions.ExceptionPermissionDenied;
24 import fr.orsay.lri.varna.exceptions.ExceptionUnmatchedClosingParentheses;
25 import fr.orsay.lri.varna.exceptions.ExceptionXmlLoading;
26 import fr.orsay.lri.varna.factories.RNAFactory;
27 import fr.orsay.lri.varna.models.VARNAConfig;
28 import fr.orsay.lri.varna.models.rna.RNA;
31 public class BatchBenchmark {
32 private VARNAConfig conf = new VARNAConfig();
34 final boolean DEFAULT_STRAIGHT_BULGES = false;
36 public static RNA loadRNA(File file) throws ExceptionFileFormatOrSyntax, ExceptionUnmatchedClosingParentheses, FileNotFoundException, ExceptionExportFailed, ExceptionPermissionDenied, ExceptionLoadingFailed {
37 Collection<RNA> rnas = RNAFactory.loadSecStr(file.getPath());
39 throw new ExceptionFileFormatOrSyntax(
40 "No RNA could be parsed from that source.");
42 return rnas.iterator().next();
45 public void benchmarkRNA(File templatePath, File rnaPath, BufferedWriter outbuf) throws ExceptionXmlLoading, RNATemplateDrawingAlgorithmException, ExceptionFileFormatOrSyntax, ExceptionUnmatchedClosingParentheses, ExceptionExportFailed, ExceptionPermissionDenied, ExceptionLoadingFailed, ExceptionNAViewAlgorithm, IOException {
47 RNATemplate template = RNATemplate.fromXMLFile(templatePath);
50 RNA rna = loadRNA(rnaPath);
52 for (int algo=0; algo<=100; algo++) {
58 // rna.drawRNALine(conf);
59 // algoname = "Linear";
62 // rna.drawRNACircle(conf);
63 // algoname = "Circular";
66 rna.drawRNARadiate(conf);
70 rna.drawRNANAView(conf);
74 algoname = "Template/noadj";
75 rna.drawRNATemplate(template, conf, DrawRNATemplateMethod.NOADJUST, DrawRNATemplateCurveMethod.EXACTLY_AS_IN_TEMPLATE, DEFAULT_STRAIGHT_BULGES);
78 algoname = "Template/noadj/ellipses";
79 rna.drawRNATemplate(template, conf, DrawRNATemplateMethod.NOADJUST, DrawRNATemplateCurveMethod.ALWAYS_REPLACE_BY_ELLIPSES, DEFAULT_STRAIGHT_BULGES);
82 algoname = "Template/noadj/smart";
83 rna.drawRNATemplate(template, conf, DrawRNATemplateMethod.NOADJUST, DrawRNATemplateCurveMethod.SMART, DEFAULT_STRAIGHT_BULGES);
87 algoname = "Template/maxfactor";
88 rna.drawRNATemplate(template, conf, DrawRNATemplateMethod.MAXSCALINGFACTOR, DrawRNATemplateCurveMethod.EXACTLY_AS_IN_TEMPLATE, DEFAULT_STRAIGHT_BULGES);
92 algoname = "Template/mininter";
93 rna.drawRNATemplate(template, conf, DrawRNATemplateMethod.NOINTERSECT, DrawRNATemplateCurveMethod.EXACTLY_AS_IN_TEMPLATE, DEFAULT_STRAIGHT_BULGES);
96 algoname = "Template/translate";
97 rna.drawRNATemplate(template, conf, DrawRNATemplateMethod.HELIXTRANSLATE, DrawRNATemplateCurveMethod.EXACTLY_AS_IN_TEMPLATE, DEFAULT_STRAIGHT_BULGES);
100 algoname = "Template/translate/ellipses";
101 rna.drawRNATemplate(template, conf, DrawRNATemplateMethod.HELIXTRANSLATE, DrawRNATemplateCurveMethod.ALWAYS_REPLACE_BY_ELLIPSES, DEFAULT_STRAIGHT_BULGES);
104 algoname = "Template/translate/smart";
105 rna.drawRNATemplate(template, conf, DrawRNATemplateMethod.HELIXTRANSLATE, DrawRNATemplateCurveMethod.SMART, DEFAULT_STRAIGHT_BULGES);
112 Benchmark benchmark = new Benchmark(rna);
116 removeExt(rnaPath.getName())
118 + "\t" + benchmark.backboneCrossings
119 // averageUnpairedDistance % -> best is 100
120 + "\t" + (benchmark.averageUnpairedDistance / benchmark.targetConsecutiveBaseDistance *100)
121 + "\t" + benchmark.tooNearConsecutiveBases
122 + "\t" + benchmark.tooFarConsecutiveBases
128 public void runBenchmark(List<File> templates, List<File> rnas, File outfile) throws Exception {
129 if (templates.size() != rnas.size()) {
130 throw new Error("templates and rnas list size differ");
133 BufferedWriter outbuf = new BufferedWriter(new FileWriter(outfile));
135 outbuf.write("RNA\tAlgorithm\tBackbone crossings\tAverage unpaired distance %\tToo near\tToo far\n");
137 for (int i=0; i<templates.size(); i++) {
138 System.out.println("Benchmarking for RNA " + removeExt(rnas.get(i).getName()));
139 benchmarkRNA(templates.get(i), rnas.get(i), outbuf);
144 System.out.println("******* Benchmark finished. *******");
147 public void runExamples() throws Exception {
148 File templatesDir = new File("templates");
149 File root = new File(templatesDir, "examples");
150 File outfile = new File(new File(templatesDir, "benchmark"), "benchmark.txt");
152 String seqlist[] = {"RNase P E Coli.ct", "RNase P Synechocystis-PCC6803.ct", "RNase P M Musculus.ct"};
154 List<File> templates = new ArrayList<File>();
155 List<File> rnas = new ArrayList<File>();
157 for (String seq: seqlist) {
158 templates.add(new File(root, "RNase P E Coli.xml"));
159 rnas.add(new File(root, seq));
162 runBenchmark(templates, rnas, outfile);
165 public static void readFASTA(File file, List<String> seqnames, List<String> sequences) throws IOException {
166 BufferedReader buf = new BufferedReader(new FileReader(file));
167 String line = buf.readLine();
168 while (line != null) {
169 if (line.length() != 0) {
170 if (line.charAt(0) == '>') {
171 String id = line.substring(1); // remove the >
175 sequences.set(sequences.size()-1, sequences.get(sequences.size()-1) + line);
178 line = buf.readLine();
185 * We assume given directory contains a alignemnt.fasta file,
186 * of which the first sequence is the consensus structure,
187 * and the other sequences are aligned nucleotides.
188 * The principle is to convert it to a set of secondary structure,
189 * using the following rule:
190 * - keep the same nucleotides as in original sequence
191 * - keep base pairs where both bases of the pair are non-gaps in our sequence
193 public void benchmarkAllDir(File rootdir) throws Exception {
194 File seqdir = new File(rootdir, "sequences");
195 File templateFile = new File(rootdir, "template.xml");
196 File sequenceFiles[] = seqdir.listFiles();
197 Arrays.sort(sequenceFiles);
199 List<File> templates = new ArrayList<File>();
200 List<File> rnas = new ArrayList<File>();
201 for (File seq: sequenceFiles) {
202 if (!seq.getPath().endsWith(".dbn")) continue;
204 templates.add(templateFile);
207 File outfile = new File(rootdir, "benchmark.txt");
208 runBenchmark(templates, rnas, outfile);
213 public static void main(String[] args) throws Exception {
214 File templatesDir = new File("templates");
215 if (args.length < 1) {
216 System.out.println("Command-line argument required: RNA");
217 System.out.println("Example: RNaseP_bact_a");
220 //new BatchBenchmark().runExamples();
221 for (String arg: args) {
222 new BatchBenchmark().benchmarkAllDir(new File(templatesDir, arg));
227 * Return the given file path without the (last) extension.
229 public static String removeExt(String path) {
230 return path.substring(0, path.lastIndexOf('.'));
233 public static File removeExt(File path) {
234 return new File(removeExt(path.getPath()));