From: jprocter Date: Tue, 22 Jan 2008 16:16:30 +0000 (+0000) Subject: more efficient parsing of large files, do not create sequence features from GR annota... X-Git-Tag: Release_2_4~146 X-Git-Url: http://source.jalview.org/gitweb/?a=commitdiff_plain;h=b8984b54ec8f43a46ddd7eabc11100ab810866ef;p=jalview.git more efficient parsing of large files, do not create sequence features from GR annotation positions with '.' --- diff --git a/src/jalview/io/StockholmFile.java b/src/jalview/io/StockholmFile.java index bac1916..945b8c5 100644 --- a/src/jalview/io/StockholmFile.java +++ b/src/jalview/io/StockholmFile.java @@ -124,7 +124,7 @@ public class StockholmFile extends AlignFile { String acc = (String) accs.nextElement(); // logger.debug("Processing sequence " + acc); - String seq = (String) seqs.get(acc); + String seq = (String) seqs.remove(acc); if (maxLength < seq.length()) { maxLength = seq.length(); @@ -137,7 +137,7 @@ public class StockholmFile extends AlignFile if (seqAnn != null && seqAnn.containsKey(acc)) { - accAnnotations = (Hashtable) seqAnn.get(acc); + accAnnotations = (Hashtable) seqAnn.remove(acc); } // Split accession in id and from/to @@ -172,7 +172,7 @@ public class StockholmFile extends AlignFile // We need to adjust the positions of all features to account for gaps try { - features = (Hashtable) accAnnotations.get("features"); + features = (Hashtable) accAnnotations.remove("features"); } catch (java.lang.NullPointerException e) { // loggerwarn("Getting Features for " + acc + ": " + @@ -182,6 +182,7 @@ public class StockholmFile extends AlignFile // if we have features if (features != null) { + int posmap[] = seqO.findPositionMap(); Enumeration i = features.keys(); while (i.hasMoreElements()) { @@ -190,8 +191,7 @@ public class StockholmFile extends AlignFile // TODO: parse out scores as annotation row // TODO: map coding region to core jalview feature types String type = i.nextElement().toString(); - Hashtable content = (Hashtable) features.get(type); - + Hashtable content = (Hashtable) features.remove(type); Enumeration j = content.keys(); while (j.hasMoreElements()) { @@ -201,9 +201,9 @@ public class StockholmFile extends AlignFile for (int k = 0; k < byChar.length; k++) { char c = byChar[k]; - if (!(c == ' ' || c == '_' || c == '-')) + if (!(c == ' ' || c == '_' || c == '-' || c == '.')) // PFAM uses '.' for feature background { - int new_pos = seqO.findPosition(k); + int new_pos = posmap[k]; // look up nearest seqeunce position to this column SequenceFeature feat = new SequenceFeature(type, desc, new_pos, new_pos, 0f, null); @@ -213,8 +213,10 @@ public class StockholmFile extends AlignFile } } - + } + // garbage collect + // logger.debug("Adding seq " + acc + " from " + start + " to " + end // + ": " + seq); this.seqs.addElement(seqO); @@ -370,14 +372,18 @@ public class StockholmFile extends AlignFile { String acc = s.stringMatched(1); String type = s.stringMatched(2); - String seq = s.stringMatched(3); - String description = new String(); - + String seq = new String(s.stringMatched(3)); + String description = null; // Check for additional information about the current annotation - if (x.search(seq)) + // We use a simple string tokenizer here for speed + StringTokenizer sep = new StringTokenizer(seq," \t"); + description = sep.nextToken(); + if (sep.hasMoreTokens()) { - description = x.stringMatched(1); - seq = x.stringMatched(2); + seq = sep.nextToken(); + } else { + seq = description; + description = new String(); } // sequence id with from-to fields