+++ /dev/null
-/**
- * File written by Raphael Champeimont
- * UMR 7238 Genomique des Microorganismes
- */
-package fr.orsay.lri.varna.models.templates;
-
-import java.io.BufferedWriter;
-import java.io.File;
-import java.io.FileWriter;
-import java.util.ArrayList;
-import java.util.List;
-
-import fr.orsay.lri.varna.factories.RNAFactory;
-
-public class BatchBenchmarkPrepare {
-
- /**
- * We assume given directory contains a alignemnt.fasta file,
- * of which the first sequence is the consensus structure,
- * and the other sequences are aligned nucleotides.
- * The principle is to convert it to a set of secondary structure,
- * using the following rule:
- * - keep the same nucleotides as in original sequence
- * - keep base pairs where both bases of the pair are non-gaps in our sequence
- */
- public void benchmarkAllDir(File rootdir) throws Exception {
- File seqdir = new File(rootdir, "sequences");
- if (!seqdir.exists()) {
- seqdir.mkdir();
- }
-
- File templateFile = new File(rootdir, "template.xml");
-
- ArrayList<String> seqnames = new ArrayList<String>();
- ArrayList<String> sequences = new ArrayList<String>();
- BatchBenchmark.readFASTA(new File(rootdir, "alignment.fasta"), seqnames, sequences);
-
- BufferedWriter outbufASS = new BufferedWriter(new FileWriter(new File(rootdir, "all_secondary_structures.fasta")));
-
- String consensusSecStr = sequences.get(0);
- int[] consensusSecStrInt = RNAFactory.parseSecStr(consensusSecStr);
-
- List<File> templates = new ArrayList<File>();
- for (int i=1; i<seqnames.size(); i++) {
- String seqname = seqnames.get(i);
- String sequence = sequences.get(i);
- String sequenceUngapped = sequence.replaceAll("[\\.-]", "");
- System.out.println(seqname);
- String ss = "";
- String nt = "";
- for (int j=0; j<sequence.length(); j++) {
- if (sequence.charAt(j) != '.' && sequence.charAt(j) != '-') {
- if (consensusSecStr.charAt(j) == '-' || consensusSecStr.charAt(j) == '.') {
- nt += sequence.charAt(j);
- ss += '.';
- } else {
- int k = consensusSecStrInt[j];
- // k is the matching base, is it aligned to a base in our sequence?
- if (sequence.charAt(k) != '.' && sequence.charAt(k) != '-') {
- nt += sequence.charAt(j);
- ss += consensusSecStr.charAt(j);
- } else {
- nt += sequence.charAt(j);
- ss += '.';
- }
- }
- }
- }
-
- if (!sequenceUngapped.equals(nt)) {
- System.out.println(sequenceUngapped);
- System.out.println(nt);
- throw new Error("bug");
- }
-
- // We now have the sequence with its secondary structure.
- File outfile = new File(seqdir, seqname + ".dbn");
- BufferedWriter outbuf = new BufferedWriter(new FileWriter(outfile));
- outbuf.write(">" + seqname + "\n");
- outbuf.write(nt + "\n");
- outbuf.write(ss + "\n");
- outbuf.close();
-
- outbufASS.write(">" + seqname + "\n");
- outbufASS.write(ss + "\n");
-
- templates.add(templateFile);
- }
-
- outbufASS.close();
-
- }
-
- public static void main(String[] args) throws Exception {
- new BatchBenchmarkPrepare().benchmarkAllDir(new File(new File("templates"), "RNaseP_bact_a"));
- }
-
-}