- Enumeration j = content.keys();
- while (j.hasMoreElements())
- {
- String desc = j.nextElement().toString();
- String ns = content.get(desc).toString();
- char[] byChar = ns.toCharArray();
- for (int k = 0; k < byChar.length; k++)
- {
- char c = byChar[k];
- if (!(c == ' ' || c == '_' || c == '-' || c == '.')) // PFAM
- // uses
- // '.'
- // for
- // feature
- // background
- {
- int new_pos = posmap[k]; // look up nearest seqeunce
- // position to this column
- SequenceFeature feat = new SequenceFeature(type, desc,
- new_pos, new_pos, 0f, null);
-
- seqO.addSequenceFeature(feat);
- }
- }
- }
-
- }
-
- }
- // garbage collect
-
- // logger.debug("Adding seq " + acc + " from " + start + " to " + end
- // + ": " + seq);
- this.seqs.addElement(seqO);
- }
- return; // finished parsing this segment of source
- }
- else if (!r.search(line))
- {
- // System.err.println("Found sequence line: " + line);
-
- // Split sequence in sequence and accession parts
- if (!x.search(line))
- {
- // logger.error("Could not parse sequence line: " + line);
- throw new IOException("Could not parse sequence line: " + line);
- }
- String ns = (String) seqs.get(x.stringMatched(1));
- if (ns == null)
- {
- ns = "";
- }
- ns += x.stringMatched(2);
-
- seqs.put(x.stringMatched(1), ns);
- }
- else
- {
- String annType = r.stringMatched(1);
- String annContent = r.stringMatched(2);
-
- // System.err.println("type:" + annType + " content: " + annContent);
-
- if (annType.equals("GF"))
- {
- /*
- * Generic per-File annotation, free text Magic features: #=GF NH
- * <tree in New Hampshire eXtended format> #=GF TN <Unique identifier
- * for the next tree> Pfam descriptions: 7. DESCRIPTION OF FIELDS
- *
- * Compulsory fields: ------------------
- *
- * AC Accession number: Accession number in form PFxxxxx.version or
- * PBxxxxxx. ID Identification: One word name for family. DE
- * Definition: Short description of family. AU Author: Authors of the
- * entry. SE Source of seed: The source suggesting the seed members
- * belong to one family. GA Gathering method: Search threshold to
- * build the full alignment. TC Trusted Cutoff: Lowest sequence score
- * and domain score of match in the full alignment. NC Noise Cutoff:
- * Highest sequence score and domain score of match not in full
- * alignment. TP Type: Type of family -- presently Family, Domain,
- * Motif or Repeat. SQ Sequence: Number of sequences in alignment. AM
- * Alignment Method The order ls and fs hits are aligned to the model
- * to build the full align. // End of alignment.
- *
- * Optional fields: ----------------
- *
- * DC Database Comment: Comment about database reference. DR Database
- * Reference: Reference to external database. RC Reference Comment:
- * Comment about literature reference. RN Reference Number: Reference
- * Number. RM Reference Medline: Eight digit medline UI number. RT
- * Reference Title: Reference Title. RA Reference Author: Reference
- * Author RL Reference Location: Journal location. PI Previous
- * identifier: Record of all previous ID lines. KW Keywords: Keywords.
- * CC Comment: Comments. NE Pfam accession: Indicates a nested domain.
- * NL Location: Location of nested domains - sequence ID, start and
- * end of insert.
- *
- * Obsolete fields: ----------- AL Alignment method of seed: The
- * method used to align the seed members.
- */
- // Let's save the annotations, maybe we'll be able to do something
- // with them later...
- Regex an = new Regex("(\\w+)\\s*(.*)");
- if (an.search(annContent))
- {
- if (an.stringMatched(1).equals("NH"))
- {
- treeString.append(an.stringMatched(2));
- }
- else if (an.stringMatched(1).equals("TN"))
- {
- if (treeString.length() > 0)
- {
- if (treeName == null)
- {
- treeName = "Tree " + (getTreeCount() + 1);
- }
- addNewickTree(treeName, treeString.toString());
- }
- treeName = an.stringMatched(2);
- treeString = new StringBuffer();
- }
- setAlignmentProperty(an.stringMatched(1), an.stringMatched(2));
- }
- }
- else if (annType.equals("GS"))
- {
- // Generic per-Sequence annotation, free text
- /*
- * Pfam uses these features: Feature Description ---------------------
- * ----------- AC <accession> ACcession number DE <freetext>
- * DEscription DR <db>; <accession>; Database Reference OS <organism>
- * OrganiSm (species) OC <clade> Organism Classification (clade, etc.)
- * LO <look> Look (Color, etc.)
- */
- if (s.search(annContent))
- {
- String acc = s.stringMatched(1);
- String type = s.stringMatched(2);
- String content = s.stringMatched(3);
- // TODO: store DR in a vector.
- // TODO: store AC according to generic file db annotation.
- Hashtable ann;
- if (seqAnn.containsKey(acc))
- {
- ann = (Hashtable) seqAnn.get(acc);
- }
- else
- {
- ann = new Hashtable();
- }
- ann.put(type, content);
- seqAnn.put(acc, ann);
- }
- else
- {
- throw new IOException("Error parsing " + line);
- }
- }
- else if (annType.equals("GC"))
- {
- // Generic per-Column annotation, exactly 1 char per column
- // always need a label.
- if (x.search(annContent))
- {
- // parse out and create alignment annotation directly.
- parseAnnotationRow(annotations, x.stringMatched(1),
- x.stringMatched(2));
- }
- }
- else if (annType.equals("GR"))
- {
- // Generic per-Sequence AND per-Column markup, exactly 1 char per
- // column
- /*
- * Feature Description Markup letters ------- -----------
- * -------------- SS Secondary Structure [HGIEBTSCX] SA Surface
- * Accessibility [0-9X] (0=0%-10%; ...; 9=90%-100%) TM TransMembrane
- * [Mio] PP Posterior Probability [0-9*] (0=0.00-0.05; 1=0.05-0.15;
- * *=0.95-1.00) LI LIgand binding [*] AS Active Site [*] IN INtron (in
- * or after) [0-2]
- */
- if (s.search(annContent))
- {
- String acc = s.stringMatched(1);
- String type = s.stringMatched(2);
- String seq = new String(s.stringMatched(3));
- String description = null;
- // Check for additional information about the current annotation
- // We use a simple string tokenizer here for speed
- StringTokenizer sep = new StringTokenizer(seq, " \t");
- description = sep.nextToken();
- if (sep.hasMoreTokens())
- {
- seq = sep.nextToken();
- }
- else
- {
- seq = description;
- description = new String();
- }
- // sequence id with from-to fields
-
- Hashtable ann;
- // Get an object with all the annotations for this sequence
- if (seqAnn.containsKey(acc))
- {
- // logger.debug("Found annotations for " + acc);
- ann = (Hashtable) seqAnn.get(acc);
- }
- else
- {
- // logger.debug("Creating new annotations holder for " + acc);
- ann = new Hashtable();
- seqAnn.put(acc, ann);
- }
+ Enumeration j = content.keys();
+ while (j.hasMoreElements())
+ {
+ String desc = j.nextElement().toString();
+ String ns = content.get(desc).toString();
+ char[] byChar = ns.toCharArray();
+ for (int k = 0; k < byChar.length; k++)
+ {
+ char c = byChar[k];
+ if (!(c == ' ' || c == '_' || c == '-' || c == '.')) // PFAM
+ // uses
+ // '.'
+ // for
+ // feature
+ // background
+ {
+ int new_pos = posmap[k]; // look up nearest seqeunce
+ // position to this column
+ SequenceFeature feat = new SequenceFeature(type, desc,
+ new_pos, new_pos, 0f, null);
+
+ seqO.addSequenceFeature(feat);
+ }
+ }
+ }
+
+ }
+
+ }
+ // garbage collect
+
+ // logger.debug("Adding seq " + acc + " from " + start + " to " + end
+ // + ": " + seq);
+ this.seqs.addElement(seqO);
+ }
+ return; // finished parsing this segment of source
+ }
+ else if (!r.search(line))
+ {
+ // System.err.println("Found sequence line: " + line);
+
+ // Split sequence in sequence and accession parts
+ if (!x.search(line))
+ {
+ // logger.error("Could not parse sequence line: " + line);
+ throw new IOException(MessageManager.formatMessage(
+ "exception.couldnt_parse_sequence_line",
+ new String[] { line }));
+ }
+ String ns = seqs.get(x.stringMatched(1));
+ if (ns == null)
+ {
+ ns = "";
+ }
+ ns += x.stringMatched(2);
+
+ seqs.put(x.stringMatched(1), ns);
+ }
+ else
+ {
+ String annType = r.stringMatched(1);
+ String annContent = r.stringMatched(2);
+
+ // System.err.println("type:" + annType + " content: " + annContent);
+
+ if (annType.equals("GF"))
+ {
+ /*
+ * Generic per-File annotation, free text Magic features: #=GF NH
+ * <tree in New Hampshire eXtended format> #=GF TN <Unique identifier
+ * for the next tree> Pfam descriptions: 7. DESCRIPTION OF FIELDS
+ *
+ * Compulsory fields: ------------------
+ *
+ * AC Accession number: Accession number in form PFxxxxx.version or
+ * PBxxxxxx. ID Identification: One word name for family. DE
+ * Definition: Short description of family. AU Author: Authors of the
+ * entry. SE Source of seed: The source suggesting the seed members
+ * belong to one family. GA Gathering method: Search threshold to
+ * build the full alignment. TC Trusted Cutoff: Lowest sequence score
+ * and domain score of match in the full alignment. NC Noise Cutoff:
+ * Highest sequence score and domain score of match not in full
+ * alignment. TP Type: Type of family -- presently Family, Domain,
+ * Motif or Repeat. SQ Sequence: Number of sequences in alignment. AM
+ * Alignment Method The order ls and fs hits are aligned to the model
+ * to build the full align. // End of alignment.
+ *
+ * Optional fields: ----------------
+ *
+ * DC Database Comment: Comment about database reference. DR Database
+ * Reference: Reference to external database. RC Reference Comment:
+ * Comment about literature reference. RN Reference Number: Reference
+ * Number. RM Reference Medline: Eight digit medline UI number. RT
+ * Reference Title: Reference Title. RA Reference Author: Reference
+ * Author RL Reference Location: Journal location. PI Previous
+ * identifier: Record of all previous ID lines. KW Keywords: Keywords.
+ * CC Comment: Comments. NE Pfam accession: Indicates a nested domain.
+ * NL Location: Location of nested domains - sequence ID, start and
+ * end of insert.
+ *
+ * Obsolete fields: ----------- AL Alignment method of seed: The
+ * method used to align the seed members.
+ */
+ // Let's save the annotations, maybe we'll be able to do something
+ // with them later...
+ Regex an = new Regex("(\\w+)\\s*(.*)");
+ if (an.search(annContent))
+ {
+ if (an.stringMatched(1).equals("NH"))
+ {
+ treeString.append(an.stringMatched(2));
+ }
+ else if (an.stringMatched(1).equals("TN"))
+ {
+ if (treeString.length() > 0)
+ {
+ if (treeName == null)
+ {
+ treeName = "Tree " + (getTreeCount() + 1);
+ }
+ addNewickTree(treeName, treeString.toString());
+ }
+ treeName = an.stringMatched(2);
+ treeString = new StringBuffer();
+ }
+ setAlignmentProperty(an.stringMatched(1), an.stringMatched(2));
+ }
+ }
+ else if (annType.equals("GS"))
+ {
+ // Generic per-Sequence annotation, free text
+ /*
+ * Pfam uses these features: Feature Description ---------------------
+ * ----------- AC <accession> ACcession number DE <freetext>
+ * DEscription DR <db>; <accession>; Database Reference OS <organism>
+ * OrganiSm (species) OC <clade> Organism Classification (clade, etc.)
+ * LO <look> Look (Color, etc.)
+ */
+ if (s.search(annContent))
+ {
+ String acc = s.stringMatched(1);
+ String type = s.stringMatched(2);
+ String content = s.stringMatched(3);
+ // TODO: store DR in a vector.
+ // TODO: store AC according to generic file db annotation.
+ Hashtable ann;
+ if (seqAnn.containsKey(acc))
+ {
+ ann = (Hashtable) seqAnn.get(acc);
+ }
+ else
+ {
+ ann = new Hashtable();
+ }
+ ann.put(type, content);
+ seqAnn.put(acc, ann);
+ }
+ else
+ {
+ // throw new IOException("Error parsing " + line);
+ System.err.println(">> missing annotation: " + line);
+ }
+ }
+ else if (annType.equals("GC"))
+ {
+ // Generic per-Column annotation, exactly 1 char per column
+ // always need a label.
+ if (x.search(annContent))
+ {
+ // parse out and create alignment annotation directly.
+ parseAnnotationRow(annotations, x.stringMatched(1),
+ x.stringMatched(2));
+ }
+ }
+ else if (annType.equals("GR"))
+ {
+ // Generic per-Sequence AND per-Column markup, exactly 1 char per
+ // column
+ /*
+ * Feature Description Markup letters ------- -----------
+ * -------------- SS Secondary Structure [HGIEBTSCX] SA Surface
+ * Accessibility [0-9X] (0=0%-10%; ...; 9=90%-100%) TM TransMembrane
+ * [Mio] PP Posterior Probability [0-9*] (0=0.00-0.05; 1=0.05-0.15;
+ * *=0.95-1.00) LI LIgand binding [*] AS Active Site [*] IN INtron (in
+ * or after) [0-2]
+ */
+ if (s.search(annContent))
+ {
+ String acc = s.stringMatched(1);
+ String type = s.stringMatched(2);
+ String seq = new String(s.stringMatched(3));
+ String description = null;
+ // Check for additional information about the current annotation
+ // We use a simple string tokenizer here for speed
+ StringTokenizer sep = new StringTokenizer(seq, " \t");
+ description = sep.nextToken();
+ if (sep.hasMoreTokens())
+ {
+ seq = sep.nextToken();
+ }
+ else
+ {
+ seq = description;
+ description = new String();
+ }
+ // sequence id with from-to fields
+
+ Hashtable ann;
+ // Get an object with all the annotations for this sequence
+ if (seqAnn.containsKey(acc))
+ {
+ // logger.debug("Found annotations for " + acc);
+ ann = (Hashtable) seqAnn.get(acc);
+ }
+ else
+ {
+ // logger.debug("Creating new annotations holder for " + acc);
+ ann = new Hashtable();
+ seqAnn.put(acc, ann);
+ }