From: jprocter <jprocter@compbio.dundee.ac.uk>
Date: Fri, 7 Dec 2012 15:49:04 +0000 (+0000)
Subject: Anne reinstated general stockholm parser code but extended to support pseudoknot... 
X-Git-Tag: Jalview_2_9~221^2^2~8^2~27
X-Git-Url: http://source.jalview.org/gitweb/?a=commitdiff_plain;h=d380a9fd147e32f905a775237576507b6d11444d;p=jalview.git

Anne reinstated general stockholm parser code but extended to support pseudoknot annotation lines. This will need merging with Natasha's upgraded io.StockholmFile class
---

diff --git a/src/jalview/io/StockholmFile.java b/src/jalview/io/StockholmFile.java
index 86cb4ae..987cdcd 100644
--- a/src/jalview/io/StockholmFile.java
+++ b/src/jalview/io/StockholmFile.java
@@ -20,23 +20,29 @@
  */
 package jalview.io;
 
+import jalview.datamodel.AlignmentAnnotation;
+import jalview.datamodel.Annotation;
+import jalview.datamodel.Sequence;
+import jalview.datamodel.SequenceFeature;
+import jalview.datamodel.SequenceI;
+
 import java.io.*;
 import java.util.*;
 
+
 import javax.xml.parsers.ParserConfigurationException;
 
 import org.xml.sax.SAXException;
 
-import com.stevesoft.pat.*;
+import com.stevesoft.pat.Regex;
 
 import fr.orsay.lri.varna.exceptions.ExceptionFileFormatOrSyntax;
 import fr.orsay.lri.varna.exceptions.ExceptionLoadingFailed;
 import fr.orsay.lri.varna.exceptions.ExceptionPermissionDenied;
 import fr.orsay.lri.varna.exceptions.ExceptionUnmatchedClosingParentheses;
-import fr.orsay.lri.varna.factories.RNAFactory;
 import fr.orsay.lri.varna.models.rna.RNA;
-import jalview.datamodel.*;
-import jalview.analysis.Rna;
+
+
 
 // import org.apache.log4j.*;
 
@@ -85,485 +91,459 @@ public class StockholmFile extends AlignFile
    *           If there is an error with the input file
  * @throws ExceptionUnmatchedClosingParentheses 
    */
-  public void parse()
+  public void parse() throws Exception
   {
-	  FileReader fr = null;
-	  fr = new FileReader(inFile); 
-
-	BufferedReader r = new BufferedReader (fr);
-	result = RNAFactory.loadSecStrStockholm(r);
-	System.out.println("this is the secondary scructure:" +result.size());
-	SequenceI[] seqs = new SequenceI[result.size()];
-	  System.out.println(type); //the type is "File"
-	  System.out.println(inFile );//inFile is the path
-	for(int i=0;i<result.size();i++)
-	{
-		RNA current = result.get(i);
 	
-		  System.out.println(current.getSeq());
-		  //System.out.println(result.get(i).getStructBPSEQ());
-		  System.out.println(result.get(i).getStructDBN(true));
-		  System.out.println(i);
-		String rna =current.getStructDBN(true);
-		 String seq = current.getSeq();
-		 int begin=0;
-		  int end = seq.length()-1;
-		  id = safeName(getDataName());
-		  seqs[i] = new Sequence(id, seq, begin, end);
-		  String[] annot=new String[rna.length()] ;
-		  Annotation[] ann = new Annotation[rna.length()];
-		  for(int j=0;j<rna.length();j++)
+      StringBuffer treeString = new StringBuffer();
+      String treeName = null;
+      // --------------- Variable Definitions -------------------
+      String line;
+      String version;
+      String id;
+      Hashtable seqAnn = new Hashtable(); // Sequence related annotations
+      Hashtable seqs = new Hashtable();
+      Regex p, r, rend, s, x;
+      // Temporary line for processing RNA annotation
+      // String RNAannot = "";
+
+      // ------------------ Parsing File ----------------------
+      // First, we have to check that this file has STOCKHOLM format, i.e. the
+      // first line must match
+      
+  
+  		r = new Regex("# STOCKHOLM ([\\d\\.]+)");
+  		if (!r.search(nextLine()))
+  		{
+  			throw new IOException(
+  					"This file is not in valid STOCKHOLM format: First line does not contain '# STOCKHOLM'");
+  		}
+  		else
+  		{
+  			version = r.stringMatched(1);
+  		
+			// logger.debug("Stockholm version: " + version);
+  		}
+
+  		// We define some Regexes here that will be used regularily later
+  		rend = new Regex("^\\s*\\/\\/"); // Find the end of an alignment
+  		p = new Regex("(\\S+)\\/(\\d+)\\-(\\d+)"); // split sequence id in
+  		// id/from/to
+  		s = new Regex("(\\S+)\\s+(\\S*)\\s+(.*)"); // Parses annotation subtype
+  		r = new Regex("#=(G[FSRC]?)\\s+(.*)"); // Finds any annotation line
+  		x = new Regex("(\\S+)\\s+(\\S+)"); // split id from sequence
+
+  		// Convert all bracket types to parentheses (necessary for passing to VARNA)
+  		Regex openparen = new Regex("(<|\\[)", "(");
+  		Regex closeparen = new Regex("(>|\\])", ")");
+
+  		// Detect if file is RNA by looking for bracket types
+  		Regex detectbrackets = new Regex("(<|>|\\[|\\]|\\(|\\))");
+
+		rend.optimize();
+	    p.optimize();
+	    s.optimize();
+	    r.optimize();
+	    x.optimize();
+	    openparen.optimize();
+	    closeparen.optimize();
+	
+	    while ((line = nextLine()) != null)
+	    {
+	      if (line.length() == 0)
+	      {
+	        continue;
+	      }
+	      if (rend.search(line))
+	      {
+	        // End of the alignment, pass stuff back
+	
+	        this.noSeqs = seqs.size();
+	        // logger.debug("Number of sequences: " + this.noSeqs);
+	        Enumeration accs = seqs.keys();
+	        while (accs.hasMoreElements())
+	        {
+	          String acc = (String) accs.nextElement();
+	          // logger.debug("Processing sequence " + acc);
+	          String seq = (String) seqs.remove(acc);
+	          if (maxLength < seq.length())
+	          {
+	            maxLength = seq.length();
+	          }
+	          int start = 1;
+	          int end = -1;
+	          String sid = acc;
+	          /*
+	           * Retrieve hash of annotations for this accession
+	           * Associate Annotation with accession
+	           */
+	          Hashtable accAnnotations = null;
+	
+	          if (seqAnn != null && seqAnn.containsKey(acc))
+	          {
+	            accAnnotations = (Hashtable) seqAnn.remove(acc);
+	            //TODO: add structures to sequence
+	          }
+	
+	          // Split accession in id and from/to
+	          if (p.search(acc))
+	          {
+	            sid = p.stringMatched(1);
+	            start = Integer.parseInt(p.stringMatched(2));
+	            end = Integer.parseInt(p.stringMatched(3));
+	          }
+	          // logger.debug(sid + ", " + start + ", " + end);
+	
+	          Sequence seqO = new Sequence(sid, seq, start, end);
+	          // Add Description (if any)
+	          if (accAnnotations != null && accAnnotations.containsKey("DE"))
+	          {
+	            String desc = (String) accAnnotations.get("DE");
+	            seqO.setDescription((desc == null) ? "" : desc);
+	          }
+	          // Add DB References (if any)
+	          if (accAnnotations != null && accAnnotations.containsKey("DR"))
+	          {
+	            String dbr = (String) accAnnotations.get("DR");
+	            if (dbr != null && dbr.indexOf(";") > -1)
+	            {
+	              String src = dbr.substring(0, dbr.indexOf(";"));
+	              String acn = dbr.substring(dbr.indexOf(";") + 1);
+	              jalview.util.DBRefUtils.parseToDbRef(seqO, src, "0", acn);
+	              // seqO.addDBRef(dbref);
+	            }
+	          }        
+	          if (accAnnotations != null && accAnnotations.containsKey("SS"))
+	          {
+	        	  Vector v = (Vector) accAnnotations.get("SS");
+	        	  
+	        	  for (int i = 0; i < v.size(); i++)
+	        	    {
+	        		  AlignmentAnnotation an = (AlignmentAnnotation) v.elementAt(i);
+	        		  seqO.addAlignmentAnnotation(an);
+	        		  //annotations.add(an);
+	        	    }
+	          }
+	        
+	          Hashtable features = null;
+	          // We need to adjust the positions of all features to account for gaps
+	          try
+	          {
+	            features = (Hashtable) accAnnotations.remove("features");
+	          } catch (java.lang.NullPointerException e)
+	          {
+	            // loggerwarn("Getting Features for " + acc + ": " +
+	            // e.getMessage());
+	            // continue;
+	          }
+	          // if we have features
+	          if (features != null)
+	          {
+	            int posmap[] = seqO.findPositionMap();
+	            Enumeration i = features.keys();
+	            while (i.hasMoreElements())
+	            {
+	              // TODO: parse out secondary structure annotation as annotation
+	              // row
+	              // TODO: parse out scores as annotation row
+	              // TODO: map coding region to core jalview feature types
+	              String type = i.nextElement().toString();
+	              Hashtable content = (Hashtable) features.remove(type);
+	              Enumeration j = content.keys();
+	              while (j.hasMoreElements())
+	              {
+	                String desc = j.nextElement().toString();
+	                String ns = content.get(desc).toString();
+	                char[] byChar = ns.toCharArray();
+	                for (int k = 0; k < byChar.length; k++)
+	                {
+	                  char c = byChar[k];
+	                  if (!(c == ' ' || c == '_' || c == '-' || c == '.')) // PFAM
+	                  // uses
+	                  // '.'
+	                  // for
+	                  // feature
+	                  // background
+	                  {
+	                    int new_pos = posmap[k]; // look up nearest seqeunce
+	                    // position to this column
+	                    SequenceFeature feat = new SequenceFeature(type, desc,
+	                            new_pos, new_pos, 0f, null);
+	
+	                    seqO.addSequenceFeature(feat);
+	                  }
+	                }
+	              }
+	
+	            }
+	
+	          }
+	          // garbage collect
+	
+	          // logger.debug("Adding seq " + acc + " from " + start + " to " + end
+	          // + ": " + seq);
+	          this.seqs.addElement(seqO);
+	        }
+	        return; // finished parsing this segment of source
+	      }
+	      else if (!r.search(line))
+	      {
+	        // System.err.println("Found sequence line: " + line);
+	
+	        // Split sequence in sequence and accession parts
+	        if (!x.search(line))
+	        {
+	          // logger.error("Could not parse sequence line: " + line);
+	          throw new IOException("Could not parse sequence line: " + line);
+	        }
+	        String ns = (String) seqs.get(x.stringMatched(1));
+	        if (ns == null)
+	        {
+	          ns = "";
+	        }
+	        ns += x.stringMatched(2);
+	
+	        seqs.put(x.stringMatched(1), ns);
+	      }
+	      else
+	      {
+	        String annType = r.stringMatched(1);
+	        String annContent = r.stringMatched(2);
+	
+	        // System.err.println("type:" + annType + " content: " + annContent);
+	
+	        if (annType.equals("GF"))
+	        {
+	          /*
+	           * Generic per-File annotation, free text Magic features: #=GF NH
+	           * <tree in New Hampshire eXtended format> #=GF TN <Unique identifier
+	           * for the next tree> Pfam descriptions: 7. DESCRIPTION OF FIELDS
+	           * 
+	           * Compulsory fields: ------------------
+	           * 
+	           * AC Accession number: Accession number in form PFxxxxx.version or
+	           * PBxxxxxx. ID Identification: One word name for family. DE
+	           * Definition: Short description of family. AU Author: Authors of the
+	           * entry. SE Source of seed: The source suggesting the seed members
+	           * belong to one family. GA Gathering method: Search threshold to
+	           * build the full alignment. TC Trusted Cutoff: Lowest sequence score
+	           * and domain score of match in the full alignment. NC Noise Cutoff:
+	           * Highest sequence score and domain score of match not in full
+	           * alignment. TP Type: Type of family -- presently Family, Domain,
+	           * Motif or Repeat. SQ Sequence: Number of sequences in alignment. AM
+	           * Alignment Method The order ls and fs hits are aligned to the model
+	           * to build the full align. // End of alignment.
+	           * 
+	           * Optional fields: ----------------
+	           * 
+	           * DC Database Comment: Comment about database reference. DR Database
+	           * Reference: Reference to external database. RC Reference Comment:
+	           * Comment about literature reference. RN Reference Number: Reference
+	           * Number. RM Reference Medline: Eight digit medline UI number. RT
+	           * Reference Title: Reference Title. RA Reference Author: Reference
+	           * Author RL Reference Location: Journal location. PI Previous
+	           * identifier: Record of all previous ID lines. KW Keywords: Keywords.
+	           * CC Comment: Comments. NE Pfam accession: Indicates a nested domain.
+	           * NL Location: Location of nested domains - sequence ID, start and
+	           * end of insert.
+	           * 
+	           * Obsolete fields: ----------- AL Alignment method of seed: The
+	           * method used to align the seed members.
+	           */
+	          // Let's save the annotations, maybe we'll be able to do something
+	          // with them later...
+	          Regex an = new Regex("(\\w+)\\s*(.*)");
+	          if (an.search(annContent))
+	          {
+	            if (an.stringMatched(1).equals("NH"))
+	            {
+	              treeString.append(an.stringMatched(2));
+	            }
+	            else if (an.stringMatched(1).equals("TN"))
+	            {
+	              if (treeString.length() > 0)
+	              {
+	                if (treeName == null)
+	                {
+	                  treeName = "Tree " + (getTreeCount() + 1);
+	                }
+	                addNewickTree(treeName, treeString.toString());
+	              }
+	              treeName = an.stringMatched(2);
+	              treeString = new StringBuffer();
+	            }
+	            setAlignmentProperty(an.stringMatched(1), an.stringMatched(2));
+	          }
+	        }
+	        else if (annType.equals("GS"))
+	        {
+	          // Generic per-Sequence annotation, free text
+	          /*
+	           * Pfam uses these features: Feature Description ---------------------
+	           * ----------- AC <accession> ACcession number DE <freetext>
+	           * DEscription DR <db>; <accession>; Database Reference OS <organism>
+	           * OrganiSm (species) OC <clade> Organism Classification (clade, etc.)
+	           * LO <look> Look (Color, etc.)
+	           */
+	          if (s.search(annContent))
+	          {
+	            String acc = s.stringMatched(1);
+	            String type = s.stringMatched(2);
+	            String content = s.stringMatched(3);
+	            // TODO: store DR in a vector.
+	            // TODO: store AC according to generic file db annotation.
+	            Hashtable ann;
+	            if (seqAnn.containsKey(acc))
+	            {
+	              ann = (Hashtable) seqAnn.get(acc);
+	            }
+	            else
+	            {
+	              ann = new Hashtable();
+	            }
+	            ann.put(type, content);
+	            seqAnn.put(acc, ann);
+	          }
+	          else
+	          {
+	            throw new IOException("Error parsing " + line);
+	          }
+	        }
+	        else if (annType.equals("GC"))
+	        {
+	          // Generic per-Column annotation, exactly 1 char per column
+	          // always need a label.
+	          if (x.search(annContent))
+	          {
+	            // parse out and create alignment annotation directly.
+	            parseAnnotationRow(annotations, x.stringMatched(1),
+	                    x.stringMatched(2));
+	          }
+	        }
+	        else if (annType.equals("GR"))
+	        {
+	          // Generic per-Sequence AND per-Column markup, exactly 1 char per
+	          // column
+	          /*
+	           * Feature Description Markup letters ------- -----------
+	           * -------------- SS Secondary Structure [HGIEBTSCX] SA Surface
+	           * Accessibility [0-9X] (0=0%-10%; ...; 9=90%-100%) TM TransMembrane
+	           * [Mio] PP Posterior Probability [0-9*] (0=0.00-0.05; 1=0.05-0.15;
+	           * *=0.95-1.00) LI LIgand binding [*] AS Active Site [*] IN INtron (in
+	           * or after) [0-2]
+	           */
+	          if (s.search(annContent))
+	          {
+	            String acc = s.stringMatched(1);
+	            String type = s.stringMatched(2);
+	            String seq = new String(s.stringMatched(3));
+	            String description = null;
+	            // Check for additional information about the current annotation
+	            // We use a simple string tokenizer here for speed
+	            StringTokenizer sep = new StringTokenizer(seq, " \t");
+	            description = sep.nextToken();
+	            if (sep.hasMoreTokens())
+	            {
+	              seq = sep.nextToken();
+	            }
+	            else
+	            {
+	              seq = description;
+	              description = new String();
+	            }
+	            // sequence id with from-to fields
+	
+	            Hashtable ann;
+	            // Get an object with all the annotations for this sequence
+	            if (seqAnn.containsKey(acc))
+	            {
+	              // logger.debug("Found annotations for " + acc);
+	              ann = (Hashtable) seqAnn.get(acc);
+	            }
+	            else
+	            {
+	              // logger.debug("Creating new annotations holder for " + acc);
+	              ann = new Hashtable();
+	              seqAnn.put(acc, ann);
+	            }
+	            //TODO test structure, call parseAnnotationRow with vector from hashtable for specific sequence
+	            Hashtable features;
+	            // Get an object with all the content for an annotation
+	            if (ann.containsKey("features"))
+	            {
+	              // logger.debug("Found features for " + acc);
+	              features = (Hashtable) ann.get("features");
+	            }
+	            else
+	            {
+	              // logger.debug("Creating new features holder for " + acc);
+	              features = new Hashtable();
+	              ann.put("features", features);
+	            }
+	
+	            Hashtable content;
+	            if (features.containsKey(this.id2type(type)))
+	            {
+	              // logger.debug("Found content for " + this.id2type(type));
+	              content = (Hashtable) features.get(this.id2type(type));
+	            }
+	            else
+	            {
+	              // logger.debug("Creating new content holder for " +
+	              // this.id2type(type));
+	              content = new Hashtable();
+	              features.put(this.id2type(type), content);
+	            }
+	            String ns = (String) content.get(description);
+	            if (ns == null)
+	            {
+	              ns = "";
+	            }
+	            ns += seq;
+	            content.put(description, ns);
+	
+	            if(type.equals("SS")){
+	                Hashtable strucAnn;
+	                if (seqAnn.containsKey(acc))
+	                {
+	                  strucAnn = (Hashtable) seqAnn.get(acc);
+	                }
+	                else
+	                {
+	                  strucAnn = new Hashtable();
+	                }
+	                
+	                Vector newStruc=new Vector();
+	                parseAnnotationRow(newStruc, type,ns);
+	                
+	                strucAnn.put(type, newStruc);
+	                seqAnn.put(acc, strucAnn);
+	             }
+	          }
+			else
 			{
-			  annot[j] =rna.substring(j,j+1);
-		  
+						System.err
+						.println("Warning - couldn't parse sequence annotation row line:\n"
+						+ line);
+			// throw new IOException("Error parsing " + line);
+			}
 			}
-		  
-		  for(int k=0;k<rna.length();k++)
+			else
+			{
+			throw new IOException("Unknown annotation detected: " + annType
+				+ " " + annContent);
+			}
+			}
+		}
+		if (treeString.length() > 0)
 		{
-			  ann[k] = new Annotation(annot[k], "", jalview.schemes.ResidueProperties.getRNASecStrucState(annot[k]).charAt(0), 0f);
-			  			  
+		if (treeName == null)
+		{
+			treeName = "Tree " + (1 + getTreeCount());
 		}
-		  AlignmentAnnotation align = new AlignmentAnnotation("Sec. str.",current.getID(),ann);
-		  
-		  seqs[i].addAlignmentAnnotation(align);
-		  seqs[i].setRNA(result.get(i));
-		  this.annotations.addElement(align);
+		addNewickTree(treeName, treeString.toString());
 		}
- 	  this.setSeqs(seqs);
-	
-  }
-//    r = new Regex("# STOCKHOLM ([\\d\\.]+)");
- //    if (!r.search(nextLine()))
-//    {
-//      throw new IOException(
-//              "This file is not in valid STOCKHOLM format: First line does not contain '# STOCKHOLM'");
-//    }
-//    else
-//    {
-//      version = r.stringMatched(1);
-//      // logger.debug("Stockholm version: " + version);
-//    }
-//
-//    // We define some Regexes here that will be used regularily later
-//    rend = new Regex("^\\s*\\/\\/"); // Find the end of an alignment
-//    p = new Regex("(\\S+)\\/(\\d+)\\-(\\d+)"); // split sequence id in
-//    // id/from/to
-//    s = new Regex("(\\S+)\\s+(\\S*)\\s+(.*)"); // Parses annotation subtype
-//    r = new Regex("#=(G[FSRC]?)\\s+(.*)"); // Finds any annotation line
-//    x = new Regex("(\\S+)\\s+(\\S+)"); // split id from sequence
-//
-//    // Convert all bracket types to parentheses (necessary for passing to VARNA)
-//    Regex openparen = new Regex("(<|\\[)", "(");
-//    Regex closeparen = new Regex("(>|\\])", ")");
-//
-//    // Detect if file is RNA by looking for bracket types
-//    Regex detectbrackets = new Regex("(<|>|\\[|\\]|\\(|\\))");
-//
-//    rend.optimize();
-//    p.optimize();
-//    s.optimize();
-//    r.optimize();
-//    x.optimize();
-//    openparen.optimize();
-//    closeparen.optimize();
-//
-//    while ((line = nextLine()) != null)
-//    {
-//      if (line.length() == 0)
-//      {
-//        continue;
-//      }
-//      if (rend.search(line))
-//      {
-//        // End of the alignment, pass stuff back
-//
-//        this.noSeqs = seqs.size();
-//        // logger.debug("Number of sequences: " + this.noSeqs);
-//        Enumeration accs = seqs.keys();
-//        while (accs.hasMoreElements())
-//        {
-//          String acc = (String) accs.nextElement();
-//          // logger.debug("Processing sequence " + acc);
-//          String seq = (String) seqs.remove(acc);
-//          if (maxLength < seq.length())
-//          {
-//            maxLength = seq.length();
-//          }
-//          int start = 1;
-//          int end = -1;
-//          String sid = acc;
-//          /*
-//           * Retrieve hash of annotations for this accession
-//           * Associate Annotation with accession
-//           */
-//          Hashtable accAnnotations = null;
-//
-//          if (seqAnn != null && seqAnn.containsKey(acc))
-//          {
-//            accAnnotations = (Hashtable) seqAnn.remove(acc);
-//            //TODO: add structures to sequence
-//          }
-//
-//          // Split accession in id and from/to
-//          if (p.search(acc))
-//          {
-//            sid = p.stringMatched(1);
-//            start = Integer.parseInt(p.stringMatched(2));
-//            end = Integer.parseInt(p.stringMatched(3));
-//          }
-//          // logger.debug(sid + ", " + start + ", " + end);
-//
-//          Sequence seqO = new Sequence(sid, seq, start, end);
-//          // Add Description (if any)
-//          if (accAnnotations != null && accAnnotations.containsKey("DE"))
-//          {
-//            String desc = (String) accAnnotations.get("DE");
-//            seqO.setDescription((desc == null) ? "" : desc);
-//          }
-//          // Add DB References (if any)
-//          if (accAnnotations != null && accAnnotations.containsKey("DR"))
-//          {
-//            String dbr = (String) accAnnotations.get("DR");
-//            if (dbr != null && dbr.indexOf(";") > -1)
-//            {
-//              String src = dbr.substring(0, dbr.indexOf(";"));
-//              String acn = dbr.substring(dbr.indexOf(";") + 1);
-//              jalview.util.DBRefUtils.parseToDbRef(seqO, src, "0", acn);
-//              // seqO.addDBRef(dbref);
-//            }
-//          }        
-//          if (accAnnotations != null && accAnnotations.containsKey("SS"))
-//          {
-//        	  Vector v = (Vector) accAnnotations.get("SS");
-//        	  
-//        	  for (int i = 0; i < v.size(); i++)
-//        	    {
-//        		  AlignmentAnnotation an = (AlignmentAnnotation) v.elementAt(i);
-//        		  seqO.addAlignmentAnnotation(an);
-//        		  //annotations.add(an);
-//        	    }
-//          }
-//        
-//          Hashtable features = null;
-//          // We need to adjust the positions of all features to account for gaps
-//          try
-//          {
-//            features = (Hashtable) accAnnotations.remove("features");
-//          } catch (java.lang.NullPointerException e)
-//          {
-//            // loggerwarn("Getting Features for " + acc + ": " +
-//            // e.getMessage());
-//            // continue;
-//          }
-//          // if we have features
-//          if (features != null)
-//          {
-//            int posmap[] = seqO.findPositionMap();
-//            Enumeration i = features.keys();
-//            while (i.hasMoreElements())
-//            {
-//              // TODO: parse out secondary structure annotation as annotation
-//              // row
-//              // TODO: parse out scores as annotation row
-//              // TODO: map coding region to core jalview feature types
-//              String type = i.nextElement().toString();
-//              Hashtable content = (Hashtable) features.remove(type);
-//              Enumeration j = content.keys();
-//              while (j.hasMoreElements())
-//              {
-//                String desc = j.nextElement().toString();
-//                String ns = content.get(desc).toString();
-//                char[] byChar = ns.toCharArray();
-//                for (int k = 0; k < byChar.length; k++)
-//                {
-//                  char c = byChar[k];
-//                  if (!(c == ' ' || c == '_' || c == '-' || c == '.')) // PFAM
-//                  // uses
-//                  // '.'
-//                  // for
-//                  // feature
-//                  // background
-//                  {
-//                    int new_pos = posmap[k]; // look up nearest seqeunce
-//                    // position to this column
-//                    SequenceFeature feat = new SequenceFeature(type, desc,
-//                            new_pos, new_pos, 0f, null);
-//
-//                    seqO.addSequenceFeature(feat);
-//                  }
-//                }
-//              }
-//
-//            }
-//
-//          }
-//          // garbage collect
-//
-//          // logger.debug("Adding seq " + acc + " from " + start + " to " + end
-//          // + ": " + seq);
-//          this.seqs.addElement(seqO);
-//        }
-//        return; // finished parsing this segment of source
-//      }
-//      else if (!r.search(line))
-//      {
-//        // System.err.println("Found sequence line: " + line);
-//
-//        // Split sequence in sequence and accession parts
-//        if (!x.search(line))
-//        {
-//          // logger.error("Could not parse sequence line: " + line);
-//          throw new IOException("Could not parse sequence line: " + line);
-//        }
-//        String ns = (String) seqs.get(x.stringMatched(1));
-//        if (ns == null)
-//        {
-//          ns = "";
-//        }
-//        ns += x.stringMatched(2);
-//
-//        seqs.put(x.stringMatched(1), ns);
-//      }
-//      else
-//      {
-//        String annType = r.stringMatched(1);
-//        String annContent = r.stringMatched(2);
-//
-//        // System.err.println("type:" + annType + " content: " + annContent);
-//
-//        if (annType.equals("GF"))
-//        {
-//          /*
-//           * Generic per-File annotation, free text Magic features: #=GF NH
-//           * <tree in New Hampshire eXtended format> #=GF TN <Unique identifier
-//           * for the next tree> Pfam descriptions: 7. DESCRIPTION OF FIELDS
-//           * 
-//           * Compulsory fields: ------------------
-//           * 
-//           * AC Accession number: Accession number in form PFxxxxx.version or
-//           * PBxxxxxx. ID Identification: One word name for family. DE
-//           * Definition: Short description of family. AU Author: Authors of the
-//           * entry. SE Source of seed: The source suggesting the seed members
-//           * belong to one family. GA Gathering method: Search threshold to
-//           * build the full alignment. TC Trusted Cutoff: Lowest sequence score
-//           * and domain score of match in the full alignment. NC Noise Cutoff:
-//           * Highest sequence score and domain score of match not in full
-//           * alignment. TP Type: Type of family -- presently Family, Domain,
-//           * Motif or Repeat. SQ Sequence: Number of sequences in alignment. AM
-//           * Alignment Method The order ls and fs hits are aligned to the model
-//           * to build the full align. // End of alignment.
-//           * 
-//           * Optional fields: ----------------
-//           * 
-//           * DC Database Comment: Comment about database reference. DR Database
-//           * Reference: Reference to external database. RC Reference Comment:
-//           * Comment about literature reference. RN Reference Number: Reference
-//           * Number. RM Reference Medline: Eight digit medline UI number. RT
-//           * Reference Title: Reference Title. RA Reference Author: Reference
-//           * Author RL Reference Location: Journal location. PI Previous
-//           * identifier: Record of all previous ID lines. KW Keywords: Keywords.
-//           * CC Comment: Comments. NE Pfam accession: Indicates a nested domain.
-//           * NL Location: Location of nested domains - sequence ID, start and
-//           * end of insert.
-//           * 
-//           * Obsolete fields: ----------- AL Alignment method of seed: The
-//           * method used to align the seed members.
-//           */
-//          // Let's save the annotations, maybe we'll be able to do something
-//          // with them later...
-//          Regex an = new Regex("(\\w+)\\s*(.*)");
-//          if (an.search(annContent))
-//          {
-//            if (an.stringMatched(1).equals("NH"))
-//            {
-//              treeString.append(an.stringMatched(2));
-//            }
-//            else if (an.stringMatched(1).equals("TN"))
-//            {
-//              if (treeString.length() > 0)
-//              {
-//                if (treeName == null)
-//                {
-//                  treeName = "Tree " + (getTreeCount() + 1);
-//                }
-//                addNewickTree(treeName, treeString.toString());
-//              }
-//              treeName = an.stringMatched(2);
-//              treeString = new StringBuffer();
-//            }
-//            setAlignmentProperty(an.stringMatched(1), an.stringMatched(2));
-//          }
-//        }
-//        else if (annType.equals("GS"))
-//        {
-//          // Generic per-Sequence annotation, free text
-//          /*
-//           * Pfam uses these features: Feature Description ---------------------
-//           * ----------- AC <accession> ACcession number DE <freetext>
-//           * DEscription DR <db>; <accession>; Database Reference OS <organism>
-//           * OrganiSm (species) OC <clade> Organism Classification (clade, etc.)
-//           * LO <look> Look (Color, etc.)
-//           */
-//          if (s.search(annContent))
-//          {
-//            String acc = s.stringMatched(1);
-//            String type = s.stringMatched(2);
-//            String content = s.stringMatched(3);
-//            // TODO: store DR in a vector.
-//            // TODO: store AC according to generic file db annotation.
-//            Hashtable ann;
-//            if (seqAnn.containsKey(acc))
-//            {
-//              ann = (Hashtable) seqAnn.get(acc);
-//            }
-//            else
-//            {
-//              ann = new Hashtable();
-//            }
-//            ann.put(type, content);
-//            seqAnn.put(acc, ann);
-//          }
-//          else
-//          {
-//            throw new IOException("Error parsing " + line);
-//          }
-//        }
-//        else if (annType.equals("GC"))
-//        {
-//          // Generic per-Column annotation, exactly 1 char per column
-//          // always need a label.
-//          if (x.search(annContent))
-//          {
-//            // parse out and create alignment annotation directly.
-//            parseAnnotationRow(annotations, x.stringMatched(1),
-//                    x.stringMatched(2));
-//          }
-//        }
-//        else if (annType.equals("GR"))
-//        {
-//          // Generic per-Sequence AND per-Column markup, exactly 1 char per
-//          // column
-//          /*
-//           * Feature Description Markup letters ------- -----------
-//           * -------------- SS Secondary Structure [HGIEBTSCX] SA Surface
-//           * Accessibility [0-9X] (0=0%-10%; ...; 9=90%-100%) TM TransMembrane
-//           * [Mio] PP Posterior Probability [0-9*] (0=0.00-0.05; 1=0.05-0.15;
-//           * *=0.95-1.00) LI LIgand binding [*] AS Active Site [*] IN INtron (in
-//           * or after) [0-2]
-//           */
-//          if (s.search(annContent))
-//          {
-//            String acc = s.stringMatched(1);
-//            String type = s.stringMatched(2);
-//            String seq = new String(s.stringMatched(3));
-//            String description = null;
-//            // Check for additional information about the current annotation
-//            // We use a simple string tokenizer here for speed
-//            StringTokenizer sep = new StringTokenizer(seq, " \t");
-//            description = sep.nextToken();
-//            if (sep.hasMoreTokens())
-//            {
-//              seq = sep.nextToken();
-//            }
-//            else
-//            {
-//              seq = description;
-//              description = new String();
-//            }
-//            // sequence id with from-to fields
-//
-//            Hashtable ann;
-//            // Get an object with all the annotations for this sequence
-//            if (seqAnn.containsKey(acc))
-//            {
-//              // logger.debug("Found annotations for " + acc);
-//              ann = (Hashtable) seqAnn.get(acc);
-//            }
-//            else
-//            {
-//              // logger.debug("Creating new annotations holder for " + acc);
-//              ann = new Hashtable();
-//              seqAnn.put(acc, ann);
-//            }
-//            //TODO test structure, call parseAnnotationRow with vector from hashtable for specific sequence
-//            Hashtable features;
-//            // Get an object with all the content for an annotation
-//            if (ann.containsKey("features"))
-//            {
-//              // logger.debug("Found features for " + acc);
-//              features = (Hashtable) ann.get("features");
-//            }
-//            else
-//            {
-//              // logger.debug("Creating new features holder for " + acc);
-//              features = new Hashtable();
-//              ann.put("features", features);
-//            }
-//
-//            Hashtable content;
-//            if (features.containsKey(this.id2type(type)))
-//            {
-//              // logger.debug("Found content for " + this.id2type(type));
-//              content = (Hashtable) features.get(this.id2type(type));
-//            }
-//            else
-//            {
-//              // logger.debug("Creating new content holder for " +
-//              // this.id2type(type));
-//              content = new Hashtable();
-//              features.put(this.id2type(type), content);
-//            }
-//            String ns = (String) content.get(description);
-//            if (ns == null)
-//            {
-//              ns = "";
-//            }
-//            ns += seq;
-//            content.put(description, ns);
-//
-//            if(type.equals("SS")){
-//                Hashtable strucAnn;
-//                if (seqAnn.containsKey(acc))
-//                {
-//                  strucAnn = (Hashtable) seqAnn.get(acc);
-//                }
-//                else
-//                {
-//                  strucAnn = new Hashtable();
-//                }
-//                
-//                Vector newStruc=new Vector();
-//                parseAnnotationRow(newStruc, type,ns);
-//                
-//                strucAnn.put(type, newStruc);
-//                seqAnn.put(acc, strucAnn);
-//             }
-//          }
-//          else
-//          {
-//            System.err
-//                    .println("Warning - couldn't parse sequence annotation row line:\n"
-//                            + line);
-//            // throw new IOException("Error parsing " + line);
-//          }
-//        }
-//        else
-//        {
-//          throw new IOException("Unknown annotation detected: " + annType
-//                  + " " + annContent);
-//        }
-//      }
-//    }
-//    if (treeString.length() > 0)
-//    {
-//      if (treeName == null)
-//      {
-//        treeName = "Tree " + (1 + getTreeCount());
-//      }
-//      addNewickTree(treeName, treeString.toString());
-//    }
-//  }
-//
+	}
+
   protected static AlignmentAnnotation parseAnnotationRow(Vector annotation,
           String label, String annots)
   {
@@ -746,4 +726,4 @@ public class StockholmFile extends AlignFile
     dataName = dataName.substring(1,e).trim();
     return dataName;
   }
-}
\ No newline at end of file
+}