Merge branch 'Jalview-JS/jim/JAL-3253-JAL-3418' into Jalview-JS/JAL-3253-applet
[jalview.git] / srcjar / fr / orsay / lri / varna / models / templates / BatchBenchmark.java
1 /**
2  * File written by Raphael Champeimont
3  * UMR 7238 Genomique des Microorganismes
4  */
5 package fr.orsay.lri.varna.models.templates;
6
7 import java.io.BufferedReader;
8 import java.io.BufferedWriter;
9 import java.io.File;
10 import java.io.FileNotFoundException;
11 import java.io.FileReader;
12 import java.io.FileWriter;
13 import java.io.IOException;
14 import java.util.ArrayList;
15 import java.util.Arrays;
16 import java.util.Collection;
17 import java.util.List;
18
19 import fr.orsay.lri.varna.exceptions.ExceptionExportFailed;
20 import fr.orsay.lri.varna.exceptions.ExceptionFileFormatOrSyntax;
21 import fr.orsay.lri.varna.exceptions.ExceptionLoadingFailed;
22 import fr.orsay.lri.varna.exceptions.ExceptionNAViewAlgorithm;
23 import fr.orsay.lri.varna.exceptions.ExceptionPermissionDenied;
24 import fr.orsay.lri.varna.exceptions.ExceptionUnmatchedClosingParentheses;
25 import fr.orsay.lri.varna.exceptions.ExceptionXmlLoading;
26 import fr.orsay.lri.varna.factories.RNAFactory;
27 import fr.orsay.lri.varna.models.VARNAConfig;
28 import fr.orsay.lri.varna.models.rna.RNA;
29
30
31 public class BatchBenchmark {
32         private VARNAConfig conf = new VARNAConfig();
33         
34         final boolean DEFAULT_STRAIGHT_BULGES = false;
35         
36         public static RNA loadRNA(File file) throws ExceptionFileFormatOrSyntax, ExceptionUnmatchedClosingParentheses, FileNotFoundException, ExceptionExportFailed, ExceptionPermissionDenied, ExceptionLoadingFailed {
37                 Collection<RNA> rnas = RNAFactory.loadSecStr(file.getPath());
38                 if (rnas.isEmpty()) {
39                         throw new ExceptionFileFormatOrSyntax(
40                                         "No RNA could be parsed from that source.");
41                 }
42                 return rnas.iterator().next();
43         }
44         
45         public void benchmarkRNA(File templatePath, File rnaPath, BufferedWriter outbuf) throws ExceptionXmlLoading, RNATemplateDrawingAlgorithmException, ExceptionFileFormatOrSyntax, ExceptionUnmatchedClosingParentheses, ExceptionExportFailed, ExceptionPermissionDenied, ExceptionLoadingFailed, ExceptionNAViewAlgorithm, IOException {
46                 // load template
47                 RNATemplate template = RNATemplate.fromXMLFile(templatePath);
48                 
49                 // load RNA
50                 RNA rna = loadRNA(rnaPath);
51                 
52                 for (int algo=0; algo<=100; algo++) {
53                         String algoname = "";
54                 
55                         // draw RNA
56                         switch (algo) {
57                         //case 0:
58                         //      rna.drawRNALine(conf);
59                         //      algoname = "Linear";
60                         //      break;
61                         //case 1:
62                         //      rna.drawRNACircle(conf);
63                         //      algoname = "Circular";
64                         //      break;
65                         case 2:
66                                 rna.drawRNARadiate(conf);
67                                 algoname = "Radiate";
68                                 break;
69                         case 3:
70                                 rna.drawRNANAView(conf);
71                                 algoname = "NAView";
72                                 break;
73                         case 10:
74                                 algoname = "Template/noadj";
75                                 rna.drawRNATemplate(template, conf, DrawRNATemplateMethod.NOADJUST, DrawRNATemplateCurveMethod.EXACTLY_AS_IN_TEMPLATE, DEFAULT_STRAIGHT_BULGES);
76                                 break;
77                         case 11:
78                                 algoname = "Template/noadj/ellipses";
79                                 rna.drawRNATemplate(template, conf, DrawRNATemplateMethod.NOADJUST, DrawRNATemplateCurveMethod.ALWAYS_REPLACE_BY_ELLIPSES, DEFAULT_STRAIGHT_BULGES);
80                                 break;
81                         case 12:
82                                 algoname = "Template/noadj/smart";
83                                 rna.drawRNATemplate(template, conf, DrawRNATemplateMethod.NOADJUST, DrawRNATemplateCurveMethod.SMART, DEFAULT_STRAIGHT_BULGES);
84                                 break;
85                                 /*
86                         case 5:
87                                 algoname = "Template/maxfactor";
88                                 rna.drawRNATemplate(template, conf, DrawRNATemplateMethod.MAXSCALINGFACTOR, DrawRNATemplateCurveMethod.EXACTLY_AS_IN_TEMPLATE, DEFAULT_STRAIGHT_BULGES);
89                                 break;
90                                 */
91                         case 6:
92                                 algoname = "Template/mininter";
93                                 rna.drawRNATemplate(template, conf, DrawRNATemplateMethod.NOINTERSECT, DrawRNATemplateCurveMethod.EXACTLY_AS_IN_TEMPLATE, DEFAULT_STRAIGHT_BULGES);
94                                 break;
95                         case 30:
96                                 algoname = "Template/translate";
97                                 rna.drawRNATemplate(template, conf, DrawRNATemplateMethod.HELIXTRANSLATE, DrawRNATemplateCurveMethod.EXACTLY_AS_IN_TEMPLATE, DEFAULT_STRAIGHT_BULGES);
98                                 break;
99                         case 31:
100                                 algoname = "Template/translate/ellipses";
101                                 rna.drawRNATemplate(template, conf, DrawRNATemplateMethod.HELIXTRANSLATE, DrawRNATemplateCurveMethod.ALWAYS_REPLACE_BY_ELLIPSES, DEFAULT_STRAIGHT_BULGES);
102                                 break;
103                         case 32:
104                                 algoname = "Template/translate/smart";
105                                 rna.drawRNATemplate(template, conf, DrawRNATemplateMethod.HELIXTRANSLATE, DrawRNATemplateCurveMethod.SMART, DEFAULT_STRAIGHT_BULGES);
106                                 break;
107                         default:
108                                 continue;
109                         }
110                 
111                         // benchmark
112                         Benchmark benchmark = new Benchmark(rna);
113                         
114                         // print results
115                         outbuf.write(
116                                         removeExt(rnaPath.getName())
117                                         + "\t" + algoname
118                                         + "\t" + benchmark.backboneCrossings
119                                         // averageUnpairedDistance % -> best is 100
120                                         + "\t" + (benchmark.averageUnpairedDistance / benchmark.targetConsecutiveBaseDistance *100)
121                                         + "\t" + benchmark.tooNearConsecutiveBases
122                                         + "\t" + benchmark.tooFarConsecutiveBases
123                                         + "\n");
124                 }
125                 
126         }
127         
128         public void runBenchmark(List<File> templates, List<File> rnas, File outfile) throws Exception {
129                 if (templates.size() != rnas.size()) {
130                         throw new Error("templates and rnas list size differ");
131                 }
132                 
133                 BufferedWriter outbuf = new BufferedWriter(new FileWriter(outfile));
134                 
135                 outbuf.write("RNA\tAlgorithm\tBackbone crossings\tAverage unpaired distance %\tToo near\tToo far\n");
136                 
137                 for (int i=0; i<templates.size(); i++) {
138                         System.out.println("Benchmarking for RNA " + removeExt(rnas.get(i).getName()));
139                         benchmarkRNA(templates.get(i), rnas.get(i), outbuf);
140                 }
141                 
142                 outbuf.close();
143                 
144                 System.out.println("******* Benchmark finished. *******");
145         }
146         
147         public void runExamples() throws Exception {
148                 File templatesDir = new File("templates");
149                 File root = new File(templatesDir, "examples");
150                 File outfile = new File(new File(templatesDir, "benchmark"), "benchmark.txt");
151                 
152                 String seqlist[] = {"RNase P E Coli.ct", "RNase P Synechocystis-PCC6803.ct", "RNase P M Musculus.ct"};
153                 
154                 List<File> templates = new ArrayList<File>();
155                 List<File> rnas = new ArrayList<File>();
156                 
157                 for (String seq: seqlist) {
158                         templates.add(new File(root, "RNase P E Coli.xml"));
159                         rnas.add(new File(root, seq));
160                 }
161                 
162                 runBenchmark(templates, rnas, outfile);
163         }
164         
165         public static void readFASTA(File file, List<String> seqnames, List<String> sequences) throws IOException {
166                 BufferedReader buf = new BufferedReader(new FileReader(file));
167                 String line = buf.readLine();
168                 while (line != null) {
169                         if (line.length() != 0) {
170                                 if (line.charAt(0) == '>') {
171                                         String id = line.substring(1); // remove the >
172                                         seqnames.add(id);
173                                         sequences.add("");
174                                 } else {
175                                         sequences.set(sequences.size()-1, sequences.get(sequences.size()-1) + line);
176                                 }
177                         }
178                         line = buf.readLine();
179                 }
180                 buf.close();
181         }
182         
183         
184         /**
185          * We assume given directory contains a alignemnt.fasta file,
186          * of which the first sequence is the consensus structure,
187          * and the other sequences are aligned nucleotides. 
188          * The principle is to convert it to a set of secondary structure,
189          * using the following rule:
190          * - keep the same nucleotides as in original sequence
191          * - keep base pairs where both bases of the pair are non-gaps in our sequence
192          */
193         public void benchmarkAllDir(File rootdir) throws Exception {
194                 File seqdir = new File(rootdir, "sequences");
195                 File templateFile = new File(rootdir, "template.xml");
196                 File sequenceFiles[] = seqdir.listFiles();
197                 Arrays.sort(sequenceFiles);
198                 
199                 List<File> templates = new ArrayList<File>();
200                 List<File> rnas = new ArrayList<File>();
201                 for (File seq: sequenceFiles) {
202                         if (!seq.getPath().endsWith(".dbn")) continue;
203                         rnas.add(seq);
204                         templates.add(templateFile);
205                 }
206                 
207                 File outfile = new File(rootdir, "benchmark.txt");
208                 runBenchmark(templates, rnas, outfile);
209                 
210         }
211         
212         
213         public static void main(String[] args) throws Exception {
214                 File templatesDir = new File("templates");
215                 if (args.length < 1) {
216                         System.out.println("Command-line argument required: RNA");
217                         System.out.println("Example: RNaseP_bact_a");
218                         System.exit(1);
219                 }
220                 //new BatchBenchmark().runExamples();
221                 for (String arg: args) {
222                         new BatchBenchmark().benchmarkAllDir(new File(templatesDir, arg));
223                 }
224         }
225         
226         /**
227          * Return the given file path without the (last) extension. 
228          */
229         public static String removeExt(String path) {
230                 return path.substring(0, path.lastIndexOf('.'));
231         }
232         
233         public static File removeExt(File path) {
234                 return new File(removeExt(path.getPath()));
235         }
236 }