From: jprocter <Jim Procter>
Date: Tue, 22 Jan 2008 16:16:30 +0000 (+0000)
Subject: more efficient parsing of large files, do not create sequence features from GR annota... 
X-Git-Tag: Release_2_4~146
X-Git-Url: http://source.jalview.org/gitweb/?a=commitdiff_plain;h=b8984b54ec8f43a46ddd7eabc11100ab810866ef;p=jalview.git

more efficient parsing of large files, do not create sequence features from GR annotation positions with '.'
---

diff --git a/src/jalview/io/StockholmFile.java b/src/jalview/io/StockholmFile.java
index bac1916..945b8c5 100644
--- a/src/jalview/io/StockholmFile.java
+++ b/src/jalview/io/StockholmFile.java
@@ -124,7 +124,7 @@ public class StockholmFile extends AlignFile
         {
           String acc = (String) accs.nextElement();
           // logger.debug("Processing sequence " + acc);
-          String seq = (String) seqs.get(acc);
+          String seq = (String) seqs.remove(acc);
           if (maxLength < seq.length())
           {
             maxLength = seq.length();
@@ -137,7 +137,7 @@ public class StockholmFile extends AlignFile
 
           if (seqAnn != null && seqAnn.containsKey(acc))
           {
-            accAnnotations = (Hashtable) seqAnn.get(acc);
+            accAnnotations = (Hashtable) seqAnn.remove(acc);
           }
 
           // Split accession in id and from/to
@@ -172,7 +172,7 @@ public class StockholmFile extends AlignFile
           // We need to adjust the positions of all features to account for gaps
           try
           {
-            features = (Hashtable) accAnnotations.get("features");
+            features = (Hashtable) accAnnotations.remove("features");
           } catch (java.lang.NullPointerException e)
           {
             // loggerwarn("Getting Features for " + acc + ": " +
@@ -182,6 +182,7 @@ public class StockholmFile extends AlignFile
           // if we have features
           if (features != null)
           {
+            int posmap[] = seqO.findPositionMap();
             Enumeration i = features.keys();
             while (i.hasMoreElements())
             {
@@ -190,8 +191,7 @@ public class StockholmFile extends AlignFile
               // TODO: parse out scores as annotation row
               // TODO: map coding region to core jalview feature types
               String type = i.nextElement().toString();
-              Hashtable content = (Hashtable) features.get(type);
-
+              Hashtable content = (Hashtable) features.remove(type);
               Enumeration j = content.keys();
               while (j.hasMoreElements())
               {
@@ -201,9 +201,9 @@ public class StockholmFile extends AlignFile
                 for (int k = 0; k < byChar.length; k++)
                 {
                   char c = byChar[k];
-                  if (!(c == ' ' || c == '_' || c == '-'))
+                  if (!(c == ' ' || c == '_' || c == '-' || c == '.')) // PFAM uses '.' for feature background
                   {
-                    int new_pos = seqO.findPosition(k);
+                    int new_pos = posmap[k]; // look up nearest seqeunce position to this column
                     SequenceFeature feat = new SequenceFeature(type, desc,
                             new_pos, new_pos, 0f, null);
 
@@ -213,8 +213,10 @@ public class StockholmFile extends AlignFile
               }
 
             }
-
+            
           }
+          // garbage collect
+          
           // logger.debug("Adding seq " + acc + " from " + start + " to " + end
           // + ": " + seq);
           this.seqs.addElement(seqO);
@@ -370,14 +372,18 @@ public class StockholmFile extends AlignFile
           {
             String acc = s.stringMatched(1);
             String type = s.stringMatched(2);
-            String seq = s.stringMatched(3);
-            String description = new String();
-
+            String seq = new String(s.stringMatched(3));
+            String description = null;
             // Check for additional information about the current annotation
-            if (x.search(seq))
+            // We use a simple  string tokenizer here for speed
+            StringTokenizer sep = new StringTokenizer(seq," \t");
+            description = sep.nextToken();
+            if (sep.hasMoreTokens())
             {
-              description = x.stringMatched(1);
-              seq = x.stringMatched(2);
+              seq = sep.nextToken();
+            } else {
+              seq = description;
+              description = new String();
             }
             // sequence id with from-to fields