JAL-3026 srcjar files for VARNA and log4j
[jalview.git] / srcjar / fr / orsay / lri / varna / models / templates / BatchBenchmarkPrepare.java
1 /**
2  * File written by Raphael Champeimont
3  * UMR 7238 Genomique des Microorganismes
4  */
5 package fr.orsay.lri.varna.models.templates;
6
7 import java.io.BufferedWriter;
8 import java.io.File;
9 import java.io.FileWriter;
10 import java.util.ArrayList;
11 import java.util.List;
12
13 import fr.orsay.lri.varna.factories.RNAFactory;
14
15 public class BatchBenchmarkPrepare {
16
17         /**
18          * We assume given directory contains a alignemnt.fasta file,
19          * of which the first sequence is the consensus structure,
20          * and the other sequences are aligned nucleotides. 
21          * The principle is to convert it to a set of secondary structure,
22          * using the following rule:
23          * - keep the same nucleotides as in original sequence
24          * - keep base pairs where both bases of the pair are non-gaps in our sequence
25          */
26         public void benchmarkAllDir(File rootdir) throws Exception {
27                 File seqdir = new File(rootdir, "sequences");
28                 if (!seqdir.exists()) {
29                         seqdir.mkdir();
30                 }
31                 
32                 File templateFile = new File(rootdir, "template.xml");
33                 
34                 ArrayList<String> seqnames = new ArrayList<String>();
35                 ArrayList<String> sequences = new ArrayList<String>();
36                 BatchBenchmark.readFASTA(new File(rootdir, "alignment.fasta"), seqnames, sequences);
37                 
38                 BufferedWriter outbufASS = new BufferedWriter(new FileWriter(new File(rootdir, "all_secondary_structures.fasta")));
39                 
40                 String consensusSecStr = sequences.get(0);
41                 int[] consensusSecStrInt = RNAFactory.parseSecStr(consensusSecStr);
42                 
43                 List<File> templates = new ArrayList<File>();
44                 for (int i=1; i<seqnames.size(); i++) {
45                         String seqname = seqnames.get(i);
46                         String sequence = sequences.get(i);
47                         String sequenceUngapped = sequence.replaceAll("[\\.-]", "");
48                         System.out.println(seqname);
49                         String ss = "";
50                         String nt = "";
51                         for (int j=0; j<sequence.length(); j++) {
52                                 if (sequence.charAt(j) != '.' && sequence.charAt(j) != '-') {
53                                         if (consensusSecStr.charAt(j) == '-' || consensusSecStr.charAt(j) == '.') {
54                                                 nt += sequence.charAt(j);
55                                                 ss += '.';
56                                         } else {
57                                                 int k = consensusSecStrInt[j];
58                                                 // k is the matching base, is it aligned to a base in our sequence?
59                                                 if (sequence.charAt(k) != '.' && sequence.charAt(k) != '-') {
60                                                         nt += sequence.charAt(j);
61                                                         ss += consensusSecStr.charAt(j);
62                                                 } else {
63                                                         nt += sequence.charAt(j);
64                                                         ss += '.';
65                                                 }
66                                         }
67                                 }
68                         }
69                         
70                         if (!sequenceUngapped.equals(nt)) {
71                                 System.out.println(sequenceUngapped);
72                                 System.out.println(nt);
73                                 throw new Error("bug");
74                         }
75                         
76                         // We now have the sequence with its secondary structure.
77                         File outfile = new File(seqdir, seqname + ".dbn");
78                         BufferedWriter outbuf = new BufferedWriter(new FileWriter(outfile));
79                         outbuf.write(">" + seqname + "\n");
80                         outbuf.write(nt + "\n");
81                         outbuf.write(ss + "\n");
82                         outbuf.close();
83                         
84                         outbufASS.write(">" + seqname + "\n");
85                         outbufASS.write(ss + "\n");
86                         
87                         templates.add(templateFile);
88                 }
89                 
90                 outbufASS.close();
91                 
92         }
93         
94         public static void main(String[] args) throws Exception {
95                 new BatchBenchmarkPrepare().benchmarkAllDir(new File(new File("templates"), "RNaseP_bact_a"));
96         }
97         
98 }