Anne's refined RNAML parser. Seems to have some bugs still (merge with jprocter's...
authorjprocter <jprocter@compbio.dundee.ac.uk>
Fri, 7 Dec 2012 15:50:23 +0000 (15:50 +0000)
committerjprocter <jprocter@compbio.dundee.ac.uk>
Fri, 7 Dec 2012 15:50:23 +0000 (15:50 +0000)
src/jalview/io/RnamlFile.java

index bc6b385..0029e60 100644 (file)
  */
 package jalview.io;
 
-import jalview.datamodel.AlignmentAnnotation;
-import jalview.datamodel.Annotation;
-import jalview.datamodel.SecondaryStructureAnnotation;
-import jalview.datamodel.Sequence;
-import jalview.datamodel.SequenceFeature;
-import jalview.datamodel.SequenceI;
-import jalview.ws.jws1.Annotate3D;
-
 import java.io.BufferedReader;
 import java.io.FileNotFoundException;
-import java.io.IOException;
-import java.io.Reader;
 import java.io.FileReader;
-import java.lang.reflect.Array;
 import java.util.ArrayList;
-import java.util.Collections;
-import java.util.Hashtable;
-import java.util.List;
 import java.util.Vector;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
 
-import javax.xml.parsers.ParserConfigurationException;
-import javax.xml.parsers.SAXParser;
-import javax.xml.parsers.SAXParserFactory;
-
-import org.xml.sax.InputSource;
-import org.xml.sax.SAXException;
-
-import com.stevesoft.pat.Regex;
+import jalview.analysis.SecStrConsensus;
+import jalview.analysis.SecStrConsensus.SimpleBP;
+import jalview.datamodel.AlignmentAnnotation;
+import jalview.datamodel.Annotation;
+import jalview.datamodel.SecondaryStructureAnnotation;
+import jalview.datamodel.Sequence;
+import jalview.datamodel.SequenceFeature;
+import jalview.datamodel.SequenceI;
 
 import fr.orsay.lri.varna.exceptions.ExceptionFileFormatOrSyntax;
 import fr.orsay.lri.varna.exceptions.ExceptionLoadingFailed;
@@ -55,318 +43,189 @@ import fr.orsay.lri.varna.factories.RNAFactory.RNAFileType;
 import fr.orsay.lri.varna.models.rna.RNA;
 import fr.orsay.lri.varna.utils.RNAMLParser;
 
+public class RnamlFile extends AlignFile {
+       public int id;
+       public String namefile;
+       public String name;
+       protected ArrayList<RNA> result;
 
+       public RnamlFile() {
+               super();
 
+       }
+
+       public RnamlFile(String inFile, String type) throws Exception {
+               super(inFile, type);
 
-public class RnamlFile extends AlignFile
-{
-        public String id;
-        protected ArrayList<RNA> result;
-        
-        public RnamlFile()
-         {
-                super();
-               
-         }
-        
-        public RnamlFile(String inFile, String type) throws IOException, ExceptionFileFormatOrSyntax, ParserConfigurationException, SAXException, ExceptionPermissionDenied, ExceptionLoadingFailed, InterruptedException, ExceptionUnmatchedClosingParentheses
-         {
-           super(inFile, type);
-           
-         }
-
-         public RnamlFile(FileParse source) throws IOException, ExceptionFileFormatOrSyntax, ParserConfigurationException, SAXException, ExceptionPermissionDenied, ExceptionLoadingFailed, InterruptedException, ExceptionUnmatchedClosingParentheses
-         {
-           super(source);
-           
-         }
-         
-//       public RnamlFile(BufferedReader r) throws IOException, ExceptionFileFormatOrSyntax, ParserConfigurationException, SAXException, ExceptionPermissionDenied, ExceptionLoadingFailed
-//       {
-//                     super();            
-//                 parse(r);
-//                 // sets the index of each sequence in the alignment
-//                 for( int i=0,c=seqs.size(); i<c; i++ ) {  
-//                     seqs.get(i).setIndex(i);  
-//                 }
-//               
-//         
-//       }
-         
-       public BufferedReader CreateReader() throws FileNotFoundException
-       {
-                FileReader fr = null;
-                 fr = new FileReader(inFile); 
-                 
-                 BufferedReader r = new BufferedReader (fr);
-                 return r;
        }
-       
-       
+
+       public RnamlFile(FileParse source) throws Exception {
+               super(source);
+
+       }
+
+       public BufferedReader CreateReader() throws FileNotFoundException {
+               FileReader fr = null;
+               fr = new FileReader(inFile);
+
+               BufferedReader r = new BufferedReader(fr);
+               return r;
+       }
+
        @SuppressWarnings("unchecked")
-       public void parse() throws FileNotFoundException, ExceptionPermissionDenied, ExceptionLoadingFailed, ExceptionFileFormatOrSyntax
-       {
-                result = RNAFactory.loadSecStrRNAML(getReader());
-                 
-                System.out.println("this is the secondary scructure:" +result.size());
-                 //System.out.println("this is the secondary scructure:" +result.toString());
-                 //System.out.println("this is the secondary scructure:" +result.size());
-                SequenceI[] seqs = new SequenceI[result.size()];
-                 System.out.println(type); //the type is "File"
-                 System.out.println(inFile );//inFile is the path
-                 
-                for(int i=0;i<result.size();i++)
-                       {
-                        RNA current = result.get(i);
-                 System.out.println(current.getSeq());
-                 //System.out.println(result.get(i).getStructBPSEQ());
-                 System.out.println(result.get(i).getStructDBN(true));
-                 System.out.println(i);
-                 String rna =current.getStructDBN(true);
-                 //System.out.println(result.get(0).getAnnotations());
-                 String seq = current.getSeq();
-       
-                 int begin=1;
-                 int end = seq.length();
-                 
-                 id = current.getName();
-                 seqs[i] = new Sequence(id, seq, begin, end);
-                 //System.out.println(seq);
-                 System.out.println("Rna is still "+rna);
-                 //SequenceFeature feat = new SequenceFeature(type, "",5, 13, 0f, null);
-                 //seqs[i].addSequenceFeature(feat);
-                 
-                 String[] annot=new String[rna.length()] ;
-                 Annotation[] ann = new Annotation[rna.length()];
-               
-                 for(int j=0;j<rna.length();j++)
-                       {
-                         annot[j] =rna.substring(j,j+1);
-                 
+       public void parse() throws FileNotFoundException,
+                       ExceptionPermissionDenied, ExceptionLoadingFailed,
+                       ExceptionFileFormatOrSyntax {
+
+               result = RNAFactory.loadSecStrRNAML(getReader());
+
+               ArrayList<ArrayList> allarray = new ArrayList();
+               ArrayList<ArrayList<SimpleBP>> BP = new ArrayList();
+               ArrayList strucinarray = new ArrayList();
+               SequenceI[] seqs = new SequenceI[result.size()];
+               namefile = inFile.getName();
+
+               for (int i = 0; i < result.size(); i++) {
+
+                       RNA current = result.get(i);
+                       String rna = current.getStructDBN(true);
+                       String seq = current.getSeq();
+                       int begin = 1;
+                       int end = 1000;
+                       id = i;
+                       System.out.println("id=" + i);
+                       name = this.safeName(namefile, i);
+                       seqs[i] = new Sequence(name, seq, begin, end);
+
+                       seqs[i].setEnd(seqs[i].findPosition(seqs[i].getLength()));
+                       String[] annot = new String[rna.length()];
+                       Annotation[] ann = new Annotation[rna.length()];
+
+                       for (int j = 0; j < rna.length(); j++) {
+                               annot[j] = "" + rna.charAt(j);
+
                        }
-                 
-                 for(int k=0;k<rna.length();k++)
-               {
-                         ann[k] = new Annotation(annot[k], "", jalview.schemes.ResidueProperties.getRNASecStrucState(annot[k]).charAt(0), 0f);
-                                                 
-               }
-                 AlignmentAnnotation align = new AlignmentAnnotation("Sec. str.",current.getID(),ann);
-                 
-                 seqs[i].addAlignmentAnnotation(align);
-                 seqs[i].setRNA(result.get(i));
-                 this.annotations.addElement(align);
+                       for (int k = 0; k < rna.length(); k++) {
+                               ann[k] = new Annotation(annot[k], "",
+                                               jalview.schemes.ResidueProperties.getRNASecStrucState(
+                                                               annot[k]).charAt(0), 0f);
+                       }
+
+                       AlignmentAnnotation align = new AlignmentAnnotation("Sec. str.",
+                                       current.getID(), ann);
+
+                       seqs[i].addAlignmentAnnotation(align);
+                       seqs[i].setRNA(result.get(i));
+
+                       char[] struc = align.getRNAStruc().toCharArray();
+
+                       allarray.add(strucinarray);
+
+                       this.annotations.addElement(align);
+                       BP.add(align.bps);
+                       
                }
-         this.setSeqs(seqs);
-                
-                
-               
+               this.setSeqs(seqs);
+               int[] tab = SecStrConsensus.extractConsensus(BP);
+
 
        }
-         
-       
-         public void parse(BufferedReader r) throws ExceptionPermissionDenied, ExceptionLoadingFailed, ExceptionFileFormatOrSyntax 
-         {
-                  System.out.println("parse with Buffer (RnamlFile");
-                 Hashtable seqhash = new Hashtable();
-                 Vector headers = new Vector();
-                 //ArrayList<String> seq =new ArrayList();
-                 //System.out.println(r);
-                 
-                 result = RNAFactory.loadSecStr(r,RNAFileType.FILE_TYPE_RNAML);
-                 
-                 System.out.println("this is the secondary scructure:" +result.toArray());
-                 System.out.println("this is the secondary scructure:" +result.toString());
-                 System.out.println("this is the secondary scructure:" +result.size());
-                       for(int i=0;i<result.size();i++)
-                       {
-                 System.out.println(result.get(0).getSeq());
-                 System.out.println(result.get(0).getStructBPSEQ());
-                 System.out.println(result.get(0).getStructDBN(true));
-                 String rna1 =result.get(0).getStructDBN(true);
-                 System.out.println(result.get(0).getAnnotations());
-                 String seq = result.get(0).getSeq();
-                 System.out.println(type); //the type is "File"
-                 System.out.println(inFile );//inFile is the path
-                 int begin=0;
-                 int end = seq.length()-1;
-                 SequenceI[] seqs = new SequenceI[1];
-                 id = safeName(getDataName());
-                 seqs[0] = new Sequence(id, seq, begin, end);
-                 SequenceFeature feat = new SequenceFeature(type, "",5, 13, 0f, null);
-                 seqs[0].addSequenceFeature(feat);
-                 this.setSeqs(seqs);
-                 
-                 String[] annot=new String[10000] ;
-                 Annotation[] ann = new Annotation[1000];
-                 for(int j=0;j<rna1.length();j++)
-                       {
-                 annot[j] =rna1.substring(j,j+1);
-                 //System.out.println(annot[j]);
+
+       public void parse(BufferedReader r) throws ExceptionPermissionDenied,
+                       ExceptionLoadingFailed, ExceptionFileFormatOrSyntax {
+
+               result = RNAFactory.loadSecStrRNAML(r);
+
+               ArrayList<ArrayList> allarray = new ArrayList();
+               ArrayList<ArrayList<SimpleBP>> BP = new ArrayList();
+               ArrayList strucinarray = new ArrayList();
+               SequenceI[] seqs = new SequenceI[result.size()];
+               namefile = inFile.getName();
+
+               for (int i = 0; i < result.size(); i++) {
+
+                       RNA current = result.get(i);
+                       String rna = current.getStructDBN(true);
+
+                       String seq = current.getSeq();
+                       int begin = 1;
+                       int end = 1000;
+                       id = i;
+                       name = this.safeName(namefile, i);
+                       seqs[i] = new Sequence(name, seq, begin, end);
+
+                       seqs[i].setEnd(seqs[i].findPosition(seqs[i].getLength()));
+                       String[] annot = new String[rna.length()];
+                       Annotation[] ann = new Annotation[rna.length()];
+
+                       for (int j = 0; j < rna.length(); j++) {
+                               annot[j] = "" + rna.charAt(j);
+
                        }
-                 
-                 for(int k=0;k<rna1.length();k++)
-               {
-                         ann[k] = new Annotation(annot[k], "", ' ', 0f);
-                         
-                         ann[k].secondaryStructure = jalview.schemes.ResidueProperties.getRNASecStrucState(annot[k]).charAt(0);
-                         
-               }
-                 AlignmentAnnotation align = new AlignmentAnnotation("Secondary structure","",ann);
-                 seqs[0].addAlignmentAnnotation(align);
-                 this.annotations.addElement(align);
+                       for (int k = 0; k < rna.length(); k++) {
+                               ann[k] = new Annotation(annot[k], "",
+                                               jalview.schemes.ResidueProperties.getRNASecStrucState(
+                                                               annot[k]).charAt(0), 0f);
+                       }
+
+                       AlignmentAnnotation align = new AlignmentAnnotation("Sec. str.",
+                                       current.getID(), ann);
+
+                       seqs[i].addAlignmentAnnotation(align);
+                       seqs[i].setRNA(result.get(i));
+
+                       char[] struc = align.getRNAStruc().toCharArray();
+                       for (int y = 0; y < struc.length; y++) {
+                               strucinarray.add(struc[y]);
+                               // System.out.println("structy"+struc[y]);
+                       }
+                       // System.out.println("Sequence"+i+"\t : "+strucinarray.toString());
+                       allarray.add(strucinarray);
+
+                       this.annotations.addElement(align);
+                       System.out.println(rna);
+                       BP.add(align.bps);
+                       System.out.println(align.bps.size());
                }
-               //Annotate3D tintin = new Annotate3D();
-                 
-               //System.out.println(tintin);  
-                 
-                 
-                 
-                 
-                 
-                 
-                
+               this.setSeqs(seqs);
+               int[] tab = SecStrConsensus.extractConsensus(BP);
+
        
-//                                     for(RNAMLParser.RNATmp rnaTmp : RNAMLData.getMolecules())
-//                                     {
-//                                             // Retrieving parsed data
-//                                             seq = rnaTmp.getSequence(); //contient la séquence sous forme de liste
-//                                             System.out.println(seq);
-//                                             String seqAsStr = "";
-//                                             for(String s:seq)
-//                                             {
-//                                                     // Only consider last character, e.g. in case of modified nucleotide
-//                                                     seqAsStr += s.charAt(s.length()-1);
-//                                             }
-//                                             int begin=0;
-//                                             int end = seqAsStr.length()-1;
-//                                             SequenceI[] seqs = new SequenceI[1];
-//                                             id = safeName(getDataName());
-//                                             seqs[0] = new Sequence(id, seqAsStr, begin, end);
-//                                             SequenceFeature feat = new SequenceFeature(type, "",5, 13, 0f, null);
-//
-//                         seqs[0].addSequenceFeature(feat);
-//                                             this.setSeqs(seqs);
-                                               
-                                               //String pos = annot.substring(1, 2);
-                                               //System.out.println(annot);
-                                               //System.out.println(pos);
-                                               
-                                          // ann[1] = new Annotation(pos, "", ' ', 0f);
-                                          // ann[1].secondaryStructure = 'A';
-                                           //System.out.println(ann[1]);
-                                           
-                                          // toto.annotations=ann;
-                                           
-
-                                           
-////                                            Annotation[] els = new Annotation[seq.size()];
-//                                             
-//                                             //System.err.println(""+seq.size());
-//                                         Vector<RNAMLParser.BPTemp> allbpsTmp = rnaTmp.getStructure();
-//                                         String[] annot = new String[10000];
-//                                       
-//                                             //Annotation[] ann = new Annotation[allbpsTmp.size()];
-//                                             for (int i = 0; i < allbpsTmp.size(); i++) {
-//                                             RNAMLParser.BPTemp bp = allbpsTmp.get(i);
-//                                             //System.err.println("toto ="+bp);
-//                                             int bp5 = bp.pos5;
-//                                             annot[bp5]= "(";
-//                                             //annot[bp5]= '(';
-//                                             System.out.println(annot[bp5]);
-//                                             //System.out.println("bp5 ="+bp5);
-//                                             int bp3 = bp.pos3;
-//                                     
-//                                             annot[bp3]= ")";
-//                                             System.out.println(annot[bp3]);
-//                                             //System.out.println(annot.get(bp3));
-//                                             
-//                                             //String pos1 = annot.substring(bp5, bp5+1);
-//                                             //String pos2 = annot.substring(bp3, bp3+1);
-//                                             //ann[i] = new Annotation(pos1, "", ' ', 0f);
-//                                             //ann[i+1] = new Annotation(pos2, "", ' ', 0f);
-//                                             //System.out.println(pos1);
-//                                             
-//                                             //System.out.println("bp3 ="+bp3);
-//                                             //ModeleBP newStyle = bp.createBPStyle(mb, part);
-//                                             }       
-//                                             Annotation[] ann = new Annotation[annot.length+1];
-//                                             for(int i=0;i<annot.length;i++)
-//                                             {               
-//                                                     if(annot[i]==null)
-//                                                     {
-//                                                             annot[i]=".";
-//                                                     }
-//                                                     
-//                                                     ann[i] = new Annotation(annot[i], "", ' ', 0f);
-//                                             //System.out.print(annot[i]);
-//                                             }
-//                                             
-//                                             AlignmentAnnotation toto = new AlignmentAnnotation("Secondary structure","",ann);
-//                                             this.annotations.addElement(toto);
-                                       
-                                               // Creating empty structure of suitable size
-//                                             int[] str = new int[seq.size()];
-//                                             for (int i=0;i<str.length;i++)
-//                                             { str[i] = -1; }
-//                                             Vector<RNAMLParser.BPTemp> allbpsTmp = rnaTmp.getStructure();
-//                                             for (int i = 0; i < allbpsTmp.size(); i++) {
-//                                                     RNAMLParser.BPTemp bp = allbpsTmp.get(i);
-//                                                     //System.err.println("toto ="+bp);
-//                                                     int bp5 = bp.pos5;
-//                                                     //System.out.println("bp5 ="+bp5);
-//                                                     int bp3 = bp.pos3;
-                                                       //System.out.println("bp3 ="+bp3);
-                                                       //ModeleBP newStyle = bp.createBPStyle(mb, part);
-//                                             }
-                                               //Annotation ann;
-                                           //ann = new Annotation(bp5, "", ' ', 0f);
-                               
-                                       //}
+       }
 
-                       
+       
 
-//                             return result;
-         
-}
+       public static String print(SequenceI[] s) {
+               return "not yet implemented";
+       }
+
+       public String print() {
+               System.out.print("print :");
+               return print(getSeqsAsArray());
+       }
+
+       public ArrayList getRNA() {
+               return result;
+       }
+
+       public Vector getAnnot() {
+               return annotations;
+       }
+
+       // public static void main(String[] args) {
+       // Pattern p= Pattern.compile("(.+)[.][^.]+");
+       // Matcher m = p.matcher("toto.xml.zip");
+       // System.out.println(m.matches());
+       // System.out.println(m.group(1));
+       // }
+       public String safeName(String namefile, int id) {
+
+               Pattern p = Pattern.compile("(.*)[.][^.]+");
+               Matcher m = p.matcher(namefile);
+               name = m.group(1) + "_" + id;
+
+               return name;
+
+       }
 
-         
-         public static String print(SequenceI[] s)
-         {
-           return "not yet implemented";
-         }
-         
-         public String print()
-         {
-               System.out.print("affiche :");
-           return print(getSeqsAsArray());
-         }
-         
-         /**
-          * make a friendly ID string.
-          * 
-          * @param dataName
-          * @return truncated dataName to after last '/'
-          */
-         private String safeName(String dataName)
-         {
-           int b = 0;
-           while ((b = dataName.indexOf("/")) > -1 && b < dataName.length() )
-           {              
-               dataName = dataName.substring(b + 1).trim();
-               
-           }
-           int e = (dataName.length() - dataName.indexOf("."))+1;
-           dataName = dataName.substring(1,e).trim();
-           return dataName;
-         }
-         
-         public ArrayList getRNA()
-         {
-                 return result;
-         }
-         
-         public Vector getAnnot()
-         {
-                 return annotations;
-         }
 }