- if (annType.equals("GF"))\r
- {\r
- /* Generic per-File annotation, free text\r
- * Magic features:\r
- * #=GF NH <tree in New Hampshire eXtended format>\r
- * #=GF TN <Unique identifier for the next tree>\r
- * Pfam descriptions:\r
- 7. DESCRIPTION OF FIELDS\r
-\r
- Compulsory fields:\r
- ------------------\r
-\r
- AC Accession number: Accession number in form PFxxxxx.version or PBxxxxxx.\r
- ID Identification: One word name for family.\r
- DE Definition: Short description of family.\r
- AU Author: Authors of the entry.\r
- SE Source of seed: The source suggesting the seed members belong to one family.\r
- GA Gathering method: Search threshold to build the full alignment.\r
- TC Trusted Cutoff: Lowest sequence score and domain score of match in the full alignment.\r
- NC Noise Cutoff: Highest sequence score and domain score of match not in full alignment.\r
- TP Type: Type of family -- presently Family, Domain, Motif or Repeat.\r
- SQ Sequence: Number of sequences in alignment.\r
- AM Alignment Method The order ls and fs hits are aligned to the model to build the full align.\r
- // End of alignment.\r
-\r
- Optional fields:\r
- ----------------\r
-\r
- DC Database Comment: Comment about database reference.\r
- DR Database Reference: Reference to external database.\r
- RC Reference Comment: Comment about literature reference.\r
- RN Reference Number: Reference Number.\r
- RM Reference Medline: Eight digit medline UI number.\r
- RT Reference Title: Reference Title.\r
- RA Reference Author: Reference Author\r
- RL Reference Location: Journal location.\r
- PI Previous identifier: Record of all previous ID lines.\r
- KW Keywords: Keywords.\r
- CC Comment: Comments.\r
- NE Pfam accession: Indicates a nested domain.\r
- NL Location: Location of nested domains - sequence ID, start and end of insert.\r
-\r
- Obsolete fields:\r
- -----------\r
- AL Alignment method of seed: The method used to align the seed members.\r
- */\r
- // Let's save the annotations, maybe we'll be able to do something with them later...\r
- Regex an = new Regex("(\\w+)\\s*(.*)");\r
- if (an.search(annContent)) alAnn.put(an.stringMatched(1), an.stringMatched(2));\r
- }\r
- else if(annType.equals("GS"))\r
- {\r
- // Generic per-Sequence annotation, free text\r
- /* Pfam uses these features:\r
- Feature Description\r
- --------------------- -----------\r
- AC <accession> ACcession number\r
- DE <freetext> DEscription\r
- DR <db>; <accession>; Database Reference\r
- OS <organism> OrganiSm (species)\r
- OC <clade> Organism Classification (clade, etc.)\r
- LO <look> Look (Color, etc.)\r
- */\r
- if (s.search(annContent))\r
- {\r
- String acc = s.stringMatched(1);\r
- String type = s.stringMatched(2);\r
- String content = s.stringMatched(3);\r
-\r
- Hashtable ann;\r
- if (seqAnn.containsKey(acc))\r
- {\r
- ann = (Hashtable) seqAnn.get(acc);\r
- }\r
- else\r
- {\r
- ann = new Hashtable();\r
- }\r
- ann.put(type, content);\r
- seqAnn.put(acc, ann);\r
- }\r
- else\r
- {\r
- throw new IOException("Error parsing " + line);\r
- }\r
- }\r
- else if(annType.equals("GC"))\r
- {\r
- // Generic per-Column annotation, exactly 1 char per column\r
- }\r
- else if(annType.equals("GR"))\r
- {\r
- // Generic per-Sequence AND per-Column markup, exactly 1 char per column\r
- /*\r
- Feature Description Markup letters\r
- ------- ----------- --------------\r
- SS Secondary Structure [HGIEBTSCX]\r
- SA Surface Accessibility [0-9X]\r
- (0=0%-10%; ...; 9=90%-100%)\r
- TM TransMembrane [Mio]\r
- PP Posterior Probability [0-9*]\r
- (0=0.00-0.05; 1=0.05-0.15; *=0.95-1.00)\r
- LI LIgand binding [*]\r
- AS Active Site [*]\r
- IN INtron (in or after) [0-2]\r
- */\r
- if (s.search(annContent))\r
- {\r
- String acc = s.stringMatched(1);\r
- String type = s.stringMatched(2);\r
- String seq = s.stringMatched(3);\r
- String description = new String();\r
-\r
- // Check for additional information about the current annotation\r
- if (x.search(seq))\r
- {\r
- description = x.stringMatched(1);\r
- seq = x.stringMatched(2);\r
- }\r
- // sequence id with from-to fields\r
-\r
- Hashtable ann;\r
- // Get an object with all the annotations for this sequence\r
- if (seqAnn.containsKey(acc))\r
- {\r
- //logger.debug("Found annotations for " + acc);\r
- ann = (Hashtable) seqAnn.get(acc);\r
- }\r
- else\r
- {\r
- //logger.debug("Creating new annotations holder for " + acc);\r
- ann = new Hashtable();\r
- seqAnn.put(acc, ann);\r
- }\r
-\r
- Hashtable features;\r
- // Get an object with all the content for an annotation\r
- if (ann.containsKey("features"))\r
- {\r
- //logger.debug("Found features for " + acc);\r
- features = (Hashtable) ann.get("features");\r
- }\r
- else\r
- {\r
- //logger.debug("Creating new features holder for " + acc);\r
- features = new Hashtable();\r
- ann.put("features", features);\r
- }\r
-\r
- Hashtable content;\r
- if (features.containsKey(this.id2type(type)))\r
- {\r
- //logger.debug("Found content for " + this.id2type(type));\r
- content = (Hashtable) features.get(this.id2type(type));\r
- }\r
- else\r
- {\r
- //logger.debug("Creating new content holder for " + this.id2type(type));\r
- content = new Hashtable();\r
- features.put(this.id2type(type), content);\r
- }\r
- String ns = (String) content.get(description);\r
- if (ns == null) ns = "";\r
- ns += seq;\r
- content.put(description, seq);\r
- }\r
- else\r
- {\r
- throw new IOException("Error parsing " + line);\r
- }\r
- }\r
- else\r
- {\r
- throw new IOException("Unknown annotation detected: " + annType + " " + annContent);\r
- }\r