From 4dfd20147b1753e436e71ea673e2ac9a7fae420d Mon Sep 17 00:00:00 2001 From: jprocter Date: Fri, 7 Dec 2012 15:50:23 +0000 Subject: [PATCH] Anne's refined RNAML parser. Seems to have some bugs still (merge with jprocter's bugfixed parser) --- src/jalview/io/RnamlFile.java | 501 +++++++++++++++-------------------------- 1 file changed, 180 insertions(+), 321 deletions(-) diff --git a/src/jalview/io/RnamlFile.java b/src/jalview/io/RnamlFile.java index bc6b385..0029e60 100644 --- a/src/jalview/io/RnamlFile.java +++ b/src/jalview/io/RnamlFile.java @@ -17,34 +17,22 @@ */ package jalview.io; -import jalview.datamodel.AlignmentAnnotation; -import jalview.datamodel.Annotation; -import jalview.datamodel.SecondaryStructureAnnotation; -import jalview.datamodel.Sequence; -import jalview.datamodel.SequenceFeature; -import jalview.datamodel.SequenceI; -import jalview.ws.jws1.Annotate3D; - import java.io.BufferedReader; import java.io.FileNotFoundException; -import java.io.IOException; -import java.io.Reader; import java.io.FileReader; -import java.lang.reflect.Array; import java.util.ArrayList; -import java.util.Collections; -import java.util.Hashtable; -import java.util.List; import java.util.Vector; +import java.util.regex.Matcher; +import java.util.regex.Pattern; -import javax.xml.parsers.ParserConfigurationException; -import javax.xml.parsers.SAXParser; -import javax.xml.parsers.SAXParserFactory; - -import org.xml.sax.InputSource; -import org.xml.sax.SAXException; - -import com.stevesoft.pat.Regex; +import jalview.analysis.SecStrConsensus; +import jalview.analysis.SecStrConsensus.SimpleBP; +import jalview.datamodel.AlignmentAnnotation; +import jalview.datamodel.Annotation; +import jalview.datamodel.SecondaryStructureAnnotation; +import jalview.datamodel.Sequence; +import jalview.datamodel.SequenceFeature; +import jalview.datamodel.SequenceI; import fr.orsay.lri.varna.exceptions.ExceptionFileFormatOrSyntax; import fr.orsay.lri.varna.exceptions.ExceptionLoadingFailed; @@ -55,318 +43,189 @@ import fr.orsay.lri.varna.factories.RNAFactory.RNAFileType; import fr.orsay.lri.varna.models.rna.RNA; import fr.orsay.lri.varna.utils.RNAMLParser; +public class RnamlFile extends AlignFile { + public int id; + public String namefile; + public String name; + protected ArrayList result; + public RnamlFile() { + super(); + } + + public RnamlFile(String inFile, String type) throws Exception { + super(inFile, type); -public class RnamlFile extends AlignFile -{ - public String id; - protected ArrayList result; - - public RnamlFile() - { - super(); - - } - - public RnamlFile(String inFile, String type) throws IOException, ExceptionFileFormatOrSyntax, ParserConfigurationException, SAXException, ExceptionPermissionDenied, ExceptionLoadingFailed, InterruptedException, ExceptionUnmatchedClosingParentheses - { - super(inFile, type); - - } - - public RnamlFile(FileParse source) throws IOException, ExceptionFileFormatOrSyntax, ParserConfigurationException, SAXException, ExceptionPermissionDenied, ExceptionLoadingFailed, InterruptedException, ExceptionUnmatchedClosingParentheses - { - super(source); - - } - -// public RnamlFile(BufferedReader r) throws IOException, ExceptionFileFormatOrSyntax, ParserConfigurationException, SAXException, ExceptionPermissionDenied, ExceptionLoadingFailed -// { -// super(); -// parse(r); -// // sets the index of each sequence in the alignment -// for( int i=0,c=seqs.size(); i allarray = new ArrayList(); + ArrayList> BP = new ArrayList(); + ArrayList strucinarray = new ArrayList(); + SequenceI[] seqs = new SequenceI[result.size()]; + namefile = inFile.getName(); + + for (int i = 0; i < result.size(); i++) { + + RNA current = result.get(i); + String rna = current.getStructDBN(true); + String seq = current.getSeq(); + int begin = 1; + int end = 1000; + id = i; + System.out.println("id=" + i); + name = this.safeName(namefile, i); + seqs[i] = new Sequence(name, seq, begin, end); + + seqs[i].setEnd(seqs[i].findPosition(seqs[i].getLength())); + String[] annot = new String[rna.length()]; + Annotation[] ann = new Annotation[rna.length()]; + + for (int j = 0; j < rna.length(); j++) { + annot[j] = "" + rna.charAt(j); + } - - for(int k=0;k seq =new ArrayList(); - //System.out.println(r); - - result = RNAFactory.loadSecStr(r,RNAFileType.FILE_TYPE_RNAML); - - System.out.println("this is the secondary scructure:" +result.toArray()); - System.out.println("this is the secondary scructure:" +result.toString()); - System.out.println("this is the secondary scructure:" +result.size()); - for(int i=0;i allarray = new ArrayList(); + ArrayList> BP = new ArrayList(); + ArrayList strucinarray = new ArrayList(); + SequenceI[] seqs = new SequenceI[result.size()]; + namefile = inFile.getName(); + + for (int i = 0; i < result.size(); i++) { + + RNA current = result.get(i); + String rna = current.getStructDBN(true); + + String seq = current.getSeq(); + int begin = 1; + int end = 1000; + id = i; + name = this.safeName(namefile, i); + seqs[i] = new Sequence(name, seq, begin, end); + + seqs[i].setEnd(seqs[i].findPosition(seqs[i].getLength())); + String[] annot = new String[rna.length()]; + Annotation[] ann = new Annotation[rna.length()]; + + for (int j = 0; j < rna.length(); j++) { + annot[j] = "" + rna.charAt(j); + } - - for(int k=0;k allbpsTmp = rnaTmp.getStructure(); -// String[] annot = new String[10000]; -// -// //Annotation[] ann = new Annotation[allbpsTmp.size()]; -// for (int i = 0; i < allbpsTmp.size(); i++) { -// RNAMLParser.BPTemp bp = allbpsTmp.get(i); -// //System.err.println("toto ="+bp); -// int bp5 = bp.pos5; -// annot[bp5]= "("; -// //annot[bp5]= '('; -// System.out.println(annot[bp5]); -// //System.out.println("bp5 ="+bp5); -// int bp3 = bp.pos3; -// -// annot[bp3]= ")"; -// System.out.println(annot[bp3]); -// //System.out.println(annot.get(bp3)); -// -// //String pos1 = annot.substring(bp5, bp5+1); -// //String pos2 = annot.substring(bp3, bp3+1); -// //ann[i] = new Annotation(pos1, "", ' ', 0f); -// //ann[i+1] = new Annotation(pos2, "", ' ', 0f); -// //System.out.println(pos1); -// -// //System.out.println("bp3 ="+bp3); -// //ModeleBP newStyle = bp.createBPStyle(mb, part); -// } -// Annotation[] ann = new Annotation[annot.length+1]; -// for(int i=0;i allbpsTmp = rnaTmp.getStructure(); -// for (int i = 0; i < allbpsTmp.size(); i++) { -// RNAMLParser.BPTemp bp = allbpsTmp.get(i); -// //System.err.println("toto ="+bp); -// int bp5 = bp.pos5; -// //System.out.println("bp5 ="+bp5); -// int bp3 = bp.pos3; - //System.out.println("bp3 ="+bp3); - //ModeleBP newStyle = bp.createBPStyle(mb, part); -// } - //Annotation ann; - //ann = new Annotation(bp5, "", ' ', 0f); - - //} + } - + -// return result; - -} + public static String print(SequenceI[] s) { + return "not yet implemented"; + } + + public String print() { + System.out.print("print :"); + return print(getSeqsAsArray()); + } + + public ArrayList getRNA() { + return result; + } + + public Vector getAnnot() { + return annotations; + } + + // public static void main(String[] args) { + // Pattern p= Pattern.compile("(.+)[.][^.]+"); + // Matcher m = p.matcher("toto.xml.zip"); + // System.out.println(m.matches()); + // System.out.println(m.group(1)); + // } + public String safeName(String namefile, int id) { + + Pattern p = Pattern.compile("(.*)[.][^.]+"); + Matcher m = p.matcher(namefile); + name = m.group(1) + "_" + id; + + return name; + + } - - public static String print(SequenceI[] s) - { - return "not yet implemented"; - } - - public String print() - { - System.out.print("affiche :"); - return print(getSeqsAsArray()); - } - - /** - * make a friendly ID string. - * - * @param dataName - * @return truncated dataName to after last '/' - */ - private String safeName(String dataName) - { - int b = 0; - while ((b = dataName.indexOf("/")) > -1 && b < dataName.length() ) - { - dataName = dataName.substring(b + 1).trim(); - - } - int e = (dataName.length() - dataName.indexOf("."))+1; - dataName = dataName.substring(1,e).trim(); - return dataName; - } - - public ArrayList getRNA() - { - return result; - } - - public Vector getAnnot() - { - return annotations; - } } -- 1.7.10.2