src/jalview/io/StockholmFile.java

   1 /*\r
   2  * Jalview - A Sequence Alignment Editor and Viewer (Version 2.8)\r
   3  * Copyright (C) 2012 J Procter, AM Waterhouse, LM Lui, J Engelhardt, G Barton, M Clamp, S Searle\r
   4  * \r
   5  * This file is part of Jalview.\r
   6  * \r
   7  * Jalview is free software: you can redistribute it and/or\r
   8  * modify it under the terms of the GNU General Public License \r
   9  * as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version.\r
  10  *  \r
  11  * Jalview is distributed in the hope that it will be useful, but \r
  12  * WITHOUT ANY WARRANTY; without even the implied warranty \r
  13  * of MERCHANTABILITY or FITNESS FOR A PARTICULAR \r
  14  * PURPOSE.  See the GNU General Public License for more details.\r
  15  * \r
  16  * You should have received a copy of the GNU General Public License along with Jalview.  If not, see <http://www.gnu.org/licenses/>.\r
  17  */\r
  18 /*\r
  19  * This extension was written by Benjamin Schuster-Boeckler at sanger.ac.uk\r
  20  */\r
  21 package jalview.io;\r
  22 \r
  23 import java.io.*;\r
  24 import java.util.*;\r
  25 \r
  26 import javax.xml.parsers.ParserConfigurationException;\r
  27 \r
  28 import org.xml.sax.SAXException;\r
  29 \r
  30 import com.stevesoft.pat.*;\r
  31 \r
  32 import fr.orsay.lri.varna.exceptions.ExceptionFileFormatOrSyntax;\r
  33 import fr.orsay.lri.varna.exceptions.ExceptionLoadingFailed;\r
  34 import fr.orsay.lri.varna.exceptions.ExceptionPermissionDenied;\r
  35 import fr.orsay.lri.varna.exceptions.ExceptionUnmatchedClosingParentheses;\r
  36 import fr.orsay.lri.varna.factories.RNAFactory;\r
  37 import fr.orsay.lri.varna.models.rna.RNA;\r
  38 import jalview.datamodel.*;\r
  39 import jalview.analysis.Rna;\r
  40 \r
  41 // import org.apache.log4j.*;\r
  42 \r
  43 /**\r
  44  * This class is supposed to parse a Stockholm format file into Jalview There\r
  45  * are TODOs in this class: we do not know what the database source and version\r
  46  * is for the file when parsing the #GS= AC tag which associates accessions with\r
  47  * sequences. Database references are also not parsed correctly: a separate\r
  48  * reference string parser must be added to parse the database reference form\r
  49  * into Jalview's local representation.\r
  50  * \r
  51  * @author bsb at sanger.ac.uk\r
  52  * @version 0.3 + jalview mods\r
  53  * \r
  54  */\r
  55 public class StockholmFile extends AlignFile\r
  56 {\r
  57   // static Logger logger = Logger.getLogger("jalview.io.StockholmFile");\r
  58   protected ArrayList<RNA> result;\r
  59   public String id;\r
  60   \r
  61   public StockholmFile()\r
  62   {\r
  63   }\r
  64 \r
  65   public StockholmFile(String inFile, String type) throws Exception \r
  66   {\r
  67     super(inFile, type);\r
  68   }\r
  69 \r
  70   public StockholmFile(FileParse source) throws Exception \r
  71   {\r
  72     super(source);\r
  73   }\r
  74 \r
  75   public void initData()\r
  76   {\r
  77     super.initData();\r
  78   }\r
  79 \r
  80   /**\r
  81    * Parse a file in Stockholm format into Jalview's data model. The file has to\r
  82    * be passed at construction time\r
  83    * \r
  84    * @throws IOException\r
  85    *           If there is an error with the input file\r
  86  * @throws ExceptionUnmatchedClosingParentheses \r
  87    */\r
  88   public void parse()\r
  89   {\r
  90           FileReader fr = null;\r
  91           fr = new FileReader(inFile); \r
  92 \r
  93         BufferedReader r = new BufferedReader (fr);\r
  94         result = RNAFactory.loadSecStrStockholm(r);\r
  95         System.out.println("this is the secondary scructure:" +result.size());\r
  96         SequenceI[] seqs = new SequenceI[result.size()];\r
  97           System.out.println(type); //the type is "File"\r
  98           System.out.println(inFile );//inFile is the path\r
  99         for(int i=0;i<result.size();i++)\r
 100         {\r
 101                 RNA current = result.get(i);\r
 102         \r
 103                   System.out.println(current.getSeq());\r
 104                   //System.out.println(result.get(i).getStructBPSEQ());\r
 105                   System.out.println(result.get(i).getStructDBN(true));\r
 106                   System.out.println(i);\r
 107                 String rna =current.getStructDBN(true);\r
 108                  String seq = current.getSeq();\r
 109                  int begin=0;\r
 110                   int end = seq.length()-1;\r
 111                   id = safeName(getDataName());\r
 112                   seqs[i] = new Sequence(id, seq, begin, end);\r
 113                   String[] annot=new String[rna.length()] ;\r
 114                   Annotation[] ann = new Annotation[rna.length()];\r
 115                   for(int j=0;j<rna.length();j++)\r
 116                         {\r
 117                           annot[j] =rna.substring(j,j+1);\r
 118                   \r
 119                         }\r
 120                   \r
 121                   for(int k=0;k<rna.length();k++)\r
 122                 {\r
 123                           ann[k] = new Annotation(annot[k], "", jalview.schemes.ResidueProperties.getRNASecStrucState(annot[k]).charAt(0), 0f);\r
 124                                                   \r
 125                 }\r
 126                   AlignmentAnnotation align = new AlignmentAnnotation("Sec. str.",current.getID(),ann);\r
 127                   \r
 128                   seqs[i].addAlignmentAnnotation(align);\r
 129                   seqs[i].setRNA(result.get(i));\r
 130                   this.annotations.addElement(align);\r
 131                 }\r
 132           this.setSeqs(seqs);\r
 133         \r
 134   }\r
 135 //    r = new Regex("# STOCKHOLM ([\\d\\.]+)");\r
 136  //    if (!r.search(nextLine()))\r
 137 //    {\r
 138 //      throw new IOException(\r
 139 //              "This file is not in valid STOCKHOLM format: First line does not contain '# STOCKHOLM'");\r
 140 //    }\r
 141 //    else\r
 142 //    {\r
 143 //      version = r.stringMatched(1);\r
 144 //      // logger.debug("Stockholm version: " + version);\r
 145 //    }\r
 146 //\r
 147 //    // We define some Regexes here that will be used regularily later\r
 148 //    rend = new Regex("^\\s*\\/\\/"); // Find the end of an alignment\r
 149 //    p = new Regex("(\\S+)\\/(\\d+)\\-(\\d+)"); // split sequence id in\r
 150 //    // id/from/to\r
 151 //    s = new Regex("(\\S+)\\s+(\\S*)\\s+(.*)"); // Parses annotation subtype\r
 152 //    r = new Regex("#=(G[FSRC]?)\\s+(.*)"); // Finds any annotation line\r
 153 //    x = new Regex("(\\S+)\\s+(\\S+)"); // split id from sequence\r
 154 //\r
 155 //    // Convert all bracket types to parentheses (necessary for passing to VARNA)\r
 156 //    Regex openparen = new Regex("(<|\\[)", "(");\r
 157 //    Regex closeparen = new Regex("(>|\\])", ")");\r
 158 //\r
 159 //    // Detect if file is RNA by looking for bracket types\r
 160 //    Regex detectbrackets = new Regex("(<|>|\\[|\\]|\\(|\\))");\r
 161 //\r
 162 //    rend.optimize();\r
 163 //    p.optimize();\r
 164 //    s.optimize();\r
 165 //    r.optimize();\r
 166 //    x.optimize();\r
 167 //    openparen.optimize();\r
 168 //    closeparen.optimize();\r
 169 //\r
 170 //    while ((line = nextLine()) != null)\r
 171 //    {\r
 172 //      if (line.length() == 0)\r
 173 //      {\r
 174 //        continue;\r
 175 //      }\r
 176 //      if (rend.search(line))\r
 177 //      {\r
 178 //        // End of the alignment, pass stuff back\r
 179 //\r
 180 //        this.noSeqs = seqs.size();\r
 181 //        // logger.debug("Number of sequences: " + this.noSeqs);\r
 182 //        Enumeration accs = seqs.keys();\r
 183 //        while (accs.hasMoreElements())\r
 184 //        {\r
 185 //          String acc = (String) accs.nextElement();\r
 186 //          // logger.debug("Processing sequence " + acc);\r
 187 //          String seq = (String) seqs.remove(acc);\r
 188 //          if (maxLength < seq.length())\r
 189 //          {\r
 190 //            maxLength = seq.length();\r
 191 //          }\r
 192 //          int start = 1;\r
 193 //          int end = -1;\r
 194 //          String sid = acc;\r
 195 //          /*\r
 196 //           * Retrieve hash of annotations for this accession\r
 197 //           * Associate Annotation with accession\r
 198 //           */\r
 199 //          Hashtable accAnnotations = null;\r
 200 //\r
 201 //          if (seqAnn != null && seqAnn.containsKey(acc))\r
 202 //          {\r
 203 //            accAnnotations = (Hashtable) seqAnn.remove(acc);\r
 204 //            //TODO: add structures to sequence\r
 205 //          }\r
 206 //\r
 207 //          // Split accession in id and from/to\r
 208 //          if (p.search(acc))\r
 209 //          {\r
 210 //            sid = p.stringMatched(1);\r
 211 //            start = Integer.parseInt(p.stringMatched(2));\r
 212 //            end = Integer.parseInt(p.stringMatched(3));\r
 213 //          }\r
 214 //          // logger.debug(sid + ", " + start + ", " + end);\r
 215 //\r
 216 //          Sequence seqO = new Sequence(sid, seq, start, end);\r
 217 //          // Add Description (if any)\r
 218 //          if (accAnnotations != null && accAnnotations.containsKey("DE"))\r
 219 //          {\r
 220 //            String desc = (String) accAnnotations.get("DE");\r
 221 //            seqO.setDescription((desc == null) ? "" : desc);\r
 222 //          }\r
 223 //          // Add DB References (if any)\r
 224 //          if (accAnnotations != null && accAnnotations.containsKey("DR"))\r
 225 //          {\r
 226 //            String dbr = (String) accAnnotations.get("DR");\r
 227 //            if (dbr != null && dbr.indexOf(";") > -1)\r
 228 //            {\r
 229 //              String src = dbr.substring(0, dbr.indexOf(";"));\r
 230 //              String acn = dbr.substring(dbr.indexOf(";") + 1);\r
 231 //              jalview.util.DBRefUtils.parseToDbRef(seqO, src, "0", acn);\r
 232 //              // seqO.addDBRef(dbref);\r
 233 //            }\r
 234 //          }        \r
 235 //          if (accAnnotations != null && accAnnotations.containsKey("SS"))\r
 236 //          {\r
 237 //                Vector v = (Vector) accAnnotations.get("SS");\r
 238 //                \r
 239 //                for (int i = 0; i < v.size(); i++)\r
 240 //                  {\r
 241 //                        AlignmentAnnotation an = (AlignmentAnnotation) v.elementAt(i);\r
 242 //                        seqO.addAlignmentAnnotation(an);\r
 243 //                        //annotations.add(an);\r
 244 //                  }\r
 245 //          }\r
 246 //        \r
 247 //          Hashtable features = null;\r
 248 //          // We need to adjust the positions of all features to account for gaps\r
 249 //          try\r
 250 //          {\r
 251 //            features = (Hashtable) accAnnotations.remove("features");\r
 252 //          } catch (java.lang.NullPointerException e)\r
 253 //          {\r
 254 //            // loggerwarn("Getting Features for " + acc + ": " +\r
 255 //            // e.getMessage());\r
 256 //            // continue;\r
 257 //          }\r
 258 //          // if we have features\r
 259 //          if (features != null)\r
 260 //          {\r
 261 //            int posmap[] = seqO.findPositionMap();\r
 262 //            Enumeration i = features.keys();\r
 263 //            while (i.hasMoreElements())\r
 264 //            {\r
 265 //              // TODO: parse out secondary structure annotation as annotation\r
 266 //              // row\r
 267 //              // TODO: parse out scores as annotation row\r
 268 //              // TODO: map coding region to core jalview feature types\r
 269 //              String type = i.nextElement().toString();\r
 270 //              Hashtable content = (Hashtable) features.remove(type);\r
 271 //              Enumeration j = content.keys();\r
 272 //              while (j.hasMoreElements())\r
 273 //              {\r
 274 //                String desc = j.nextElement().toString();\r
 275 //                String ns = content.get(desc).toString();\r
 276 //                char[] byChar = ns.toCharArray();\r
 277 //                for (int k = 0; k < byChar.length; k++)\r
 278 //                {\r
 279 //                  char c = byChar[k];\r
 280 //                  if (!(c == ' ' || c == '_' || c == '-' || c == '.')) // PFAM\r
 281 //                  // uses\r
 282 //                  // '.'\r
 283 //                  // for\r
 284 //                  // feature\r
 285 //                  // background\r
 286 //                  {\r
 287 //                    int new_pos = posmap[k]; // look up nearest seqeunce\r
 288 //                    // position to this column\r
 289 //                    SequenceFeature feat = new SequenceFeature(type, desc,\r
 290 //                            new_pos, new_pos, 0f, null);\r
 291 //\r
 292 //                    seqO.addSequenceFeature(feat);\r
 293 //                  }\r
 294 //                }\r
 295 //              }\r
 296 //\r
 297 //            }\r
 298 //\r
 299 //          }\r
 300 //          // garbage collect\r
 301 //\r
 302 //          // logger.debug("Adding seq " + acc + " from " + start + " to " + end\r
 303 //          // + ": " + seq);\r
 304 //          this.seqs.addElement(seqO);\r
 305 //        }\r
 306 //        return; // finished parsing this segment of source\r
 307 //      }\r
 308 //      else if (!r.search(line))\r
 309 //      {\r
 310 //        // System.err.println("Found sequence line: " + line);\r
 311 //\r
 312 //        // Split sequence in sequence and accession parts\r
 313 //        if (!x.search(line))\r
 314 //        {\r
 315 //          // logger.error("Could not parse sequence line: " + line);\r
 316 //          throw new IOException("Could not parse sequence line: " + line);\r
 317 //        }\r
 318 //        String ns = (String) seqs.get(x.stringMatched(1));\r
 319 //        if (ns == null)\r
 320 //        {\r
 321 //          ns = "";\r
 322 //        }\r
 323 //        ns += x.stringMatched(2);\r
 324 //\r
 325 //        seqs.put(x.stringMatched(1), ns);\r
 326 //      }\r
 327 //      else\r
 328 //      {\r
 329 //        String annType = r.stringMatched(1);\r
 330 //        String annContent = r.stringMatched(2);\r
 331 //\r
 332 //        // System.err.println("type:" + annType + " content: " + annContent);\r
 333 //\r
 334 //        if (annType.equals("GF"))\r
 335 //        {\r
 336 //          /*\r
 337 //           * Generic per-File annotation, free text Magic features: #=GF NH\r
 338 //           * <tree in New Hampshire eXtended format> #=GF TN <Unique identifier\r
 339 //           * for the next tree> Pfam descriptions: 7. DESCRIPTION OF FIELDS\r
 340 //           * \r
 341 //           * Compulsory fields: ------------------\r
 342 //           * \r
 343 //           * AC Accession number: Accession number in form PFxxxxx.version or\r
 344 //           * PBxxxxxx. ID Identification: One word name for family. DE\r
 345 //           * Definition: Short description of family. AU Author: Authors of the\r
 346 //           * entry. SE Source of seed: The source suggesting the seed members\r
 347 //           * belong to one family. GA Gathering method: Search threshold to\r
 348 //           * build the full alignment. TC Trusted Cutoff: Lowest sequence score\r
 349 //           * and domain score of match in the full alignment. NC Noise Cutoff:\r
 350 //           * Highest sequence score and domain score of match not in full\r
 351 //           * alignment. TP Type: Type of family -- presently Family, Domain,\r
 352 //           * Motif or Repeat. SQ Sequence: Number of sequences in alignment. AM\r
 353 //           * Alignment Method The order ls and fs hits are aligned to the model\r
 354 //           * to build the full align. // End of alignment.\r
 355 //           * \r
 356 //           * Optional fields: ----------------\r
 357 //           * \r
 358 //           * DC Database Comment: Comment about database reference. DR Database\r
 359 //           * Reference: Reference to external database. RC Reference Comment:\r
 360 //           * Comment about literature reference. RN Reference Number: Reference\r
 361 //           * Number. RM Reference Medline: Eight digit medline UI number. RT\r
 362 //           * Reference Title: Reference Title. RA Reference Author: Reference\r
 363 //           * Author RL Reference Location: Journal location. PI Previous\r
 364 //           * identifier: Record of all previous ID lines. KW Keywords: Keywords.\r
 365 //           * CC Comment: Comments. NE Pfam accession: Indicates a nested domain.\r
 366 //           * NL Location: Location of nested domains - sequence ID, start and\r
 367 //           * end of insert.\r
 368 //           * \r
 369 //           * Obsolete fields: ----------- AL Alignment method of seed: The\r
 370 //           * method used to align the seed members.\r
 371 //           */\r
 372 //          // Let's save the annotations, maybe we'll be able to do something\r
 373 //          // with them later...\r
 374 //          Regex an = new Regex("(\\w+)\\s*(.*)");\r
 375 //          if (an.search(annContent))\r
 376 //          {\r
 377 //            if (an.stringMatched(1).equals("NH"))\r
 378 //            {\r
 379 //              treeString.append(an.stringMatched(2));\r
 380 //            }\r
 381 //            else if (an.stringMatched(1).equals("TN"))\r
 382 //            {\r
 383 //              if (treeString.length() > 0)\r
 384 //              {\r
 385 //                if (treeName == null)\r
 386 //                {\r
 387 //                  treeName = "Tree " + (getTreeCount() + 1);\r
 388 //                }\r
 389 //                addNewickTree(treeName, treeString.toString());\r
 390 //              }\r
 391 //              treeName = an.stringMatched(2);\r
 392 //              treeString = new StringBuffer();\r
 393 //            }\r
 394 //            setAlignmentProperty(an.stringMatched(1), an.stringMatched(2));\r
 395 //          }\r
 396 //        }\r
 397 //        else if (annType.equals("GS"))\r
 398 //        {\r
 399 //          // Generic per-Sequence annotation, free text\r
 400 //          /*\r
 401 //           * Pfam uses these features: Feature Description ---------------------\r
 402 //           * ----------- AC <accession> ACcession number DE <freetext>\r
 403 //           * DEscription DR <db>; <accession>; Database Reference OS <organism>\r
 404 //           * OrganiSm (species) OC <clade> Organism Classification (clade, etc.)\r
 405 //           * LO <look> Look (Color, etc.)\r
 406 //           */\r
 407 //          if (s.search(annContent))\r
 408 //          {\r
 409 //            String acc = s.stringMatched(1);\r
 410 //            String type = s.stringMatched(2);\r
 411 //            String content = s.stringMatched(3);\r
 412 //            // TODO: store DR in a vector.\r
 413 //            // TODO: store AC according to generic file db annotation.\r
 414 //            Hashtable ann;\r
 415 //            if (seqAnn.containsKey(acc))\r
 416 //            {\r
 417 //              ann = (Hashtable) seqAnn.get(acc);\r
 418 //            }\r
 419 //            else\r
 420 //            {\r
 421 //              ann = new Hashtable();\r
 422 //            }\r
 423 //            ann.put(type, content);\r
 424 //            seqAnn.put(acc, ann);\r
 425 //          }\r
 426 //          else\r
 427 //          {\r
 428 //            throw new IOException("Error parsing " + line);\r
 429 //          }\r
 430 //        }\r
 431 //        else if (annType.equals("GC"))\r
 432 //        {\r
 433 //          // Generic per-Column annotation, exactly 1 char per column\r
 434 //          // always need a label.\r
 435 //          if (x.search(annContent))\r
 436 //          {\r
 437 //            // parse out and create alignment annotation directly.\r
 438 //            parseAnnotationRow(annotations, x.stringMatched(1),\r
 439 //                    x.stringMatched(2));\r
 440 //          }\r
 441 //        }\r
 442 //        else if (annType.equals("GR"))\r
 443 //        {\r
 444 //          // Generic per-Sequence AND per-Column markup, exactly 1 char per\r
 445 //          // column\r
 446 //          /*\r
 447 //           * Feature Description Markup letters ------- -----------\r
 448 //           * -------------- SS Secondary Structure [HGIEBTSCX] SA Surface\r
 449 //           * Accessibility [0-9X] (0=0%-10%; ...; 9=90%-100%) TM TransMembrane\r
 450 //           * [Mio] PP Posterior Probability [0-9*] (0=0.00-0.05; 1=0.05-0.15;\r
 451 //           * *=0.95-1.00) LI LIgand binding [*] AS Active Site [*] IN INtron (in\r
 452 //           * or after) [0-2]\r
 453 //           */\r
 454 //          if (s.search(annContent))\r
 455 //          {\r
 456 //            String acc = s.stringMatched(1);\r
 457 //            String type = s.stringMatched(2);\r
 458 //            String seq = new String(s.stringMatched(3));\r
 459 //            String description = null;\r
 460 //            // Check for additional information about the current annotation\r
 461 //            // We use a simple string tokenizer here for speed\r
 462 //            StringTokenizer sep = new StringTokenizer(seq, " \t");\r
 463 //            description = sep.nextToken();\r
 464 //            if (sep.hasMoreTokens())\r
 465 //            {\r
 466 //              seq = sep.nextToken();\r
 467 //            }\r
 468 //            else\r
 469 //            {\r
 470 //              seq = description;\r
 471 //              description = new String();\r
 472 //            }\r
 473 //            // sequence id with from-to fields\r
 474 //\r
 475 //            Hashtable ann;\r
 476 //            // Get an object with all the annotations for this sequence\r
 477 //            if (seqAnn.containsKey(acc))\r
 478 //            {\r
 479 //              // logger.debug("Found annotations for " + acc);\r
 480 //              ann = (Hashtable) seqAnn.get(acc);\r
 481 //            }\r
 482 //            else\r
 483 //            {\r
 484 //              // logger.debug("Creating new annotations holder for " + acc);\r
 485 //              ann = new Hashtable();\r
 486 //              seqAnn.put(acc, ann);\r
 487 //            }\r
 488 //            //TODO test structure, call parseAnnotationRow with vector from hashtable for specific sequence\r
 489 //            Hashtable features;\r
 490 //            // Get an object with all the content for an annotation\r
 491 //            if (ann.containsKey("features"))\r
 492 //            {\r
 493 //              // logger.debug("Found features for " + acc);\r
 494 //              features = (Hashtable) ann.get("features");\r
 495 //            }\r
 496 //            else\r
 497 //            {\r
 498 //              // logger.debug("Creating new features holder for " + acc);\r
 499 //              features = new Hashtable();\r
 500 //              ann.put("features", features);\r
 501 //            }\r
 502 //\r
 503 //            Hashtable content;\r
 504 //            if (features.containsKey(this.id2type(type)))\r
 505 //            {\r
 506 //              // logger.debug("Found content for " + this.id2type(type));\r
 507 //              content = (Hashtable) features.get(this.id2type(type));\r
 508 //            }\r
 509 //            else\r
 510 //            {\r
 511 //              // logger.debug("Creating new content holder for " +\r
 512 //              // this.id2type(type));\r
 513 //              content = new Hashtable();\r
 514 //              features.put(this.id2type(type), content);\r
 515 //            }\r
 516 //            String ns = (String) content.get(description);\r
 517 //            if (ns == null)\r
 518 //            {\r
 519 //              ns = "";\r
 520 //            }\r
 521 //            ns += seq;\r
 522 //            content.put(description, ns);\r
 523 //
 524 //            if(type.equals("SS")){\r
 525 //                Hashtable strucAnn;\r
 526 //                if (seqAnn.containsKey(acc))\r
 527 //                {\r
 528 //                  strucAnn = (Hashtable) seqAnn.get(acc);\r
 529 //                }\r
 530 //                else\r
 531 //                {\r
 532 //                  strucAnn = new Hashtable();\r
 533 //                }\r
 534 //                \r
 535 //                Vector newStruc=new Vector();\r
 536 //                parseAnnotationRow(newStruc, type,ns);\r
 537 //                \r
 538 //                strucAnn.put(type, newStruc);\r
 539 //                seqAnn.put(acc, strucAnn);\r
 540 //             }\r
 541 //          }\r
 542 //          else\r
 543 //          {\r
 544 //            System.err\r
 545 //                    .println("Warning - couldn't parse sequence annotation row line:\n"\r
 546 //                            + line);\r
 547 //            // throw new IOException("Error parsing " + line);\r
 548 //          }\r
 549 //        }\r
 550 //        else\r
 551 //        {\r
 552 //          throw new IOException("Unknown annotation detected: " + annType\r
 553 //                  + " " + annContent);\r
 554 //        }\r
 555 //      }\r
 556 //    }\r
 557 //    if (treeString.length() > 0)\r
 558 //    {\r
 559 //      if (treeName == null)\r
 560 //      {\r
 561 //        treeName = "Tree " + (1 + getTreeCount());\r
 562 //      }\r
 563 //      addNewickTree(treeName, treeString.toString());\r
 564 //    }\r
 565 //  }\r
 566 //\r
 567   protected static AlignmentAnnotation parseAnnotationRow(Vector annotation,\r
 568           String label, String annots)\r
 569   {\r
 570     String convert1, convert2 = null;\r
 571 \r
 572     // Convert all bracket types to parentheses\r
 573     Regex openparen = new Regex("(<|\\[)", "(");\r
 574     Regex closeparen = new Regex("(>|\\])", ")");\r
 575 \r
 576    // Detect if file is RNA by looking for bracket types\r
 577     Regex detectbrackets = new Regex("(<|>|\\[|\\]|\\(|\\))");\r
 578 \r
 579     convert1 = openparen.replaceAll(annots);\r
 580     convert2 = closeparen.replaceAll(convert1);\r
 581     annots = convert2;\r
 582 \r
 583     String type = (label.indexOf("_cons") == label.length() - 5) ? label\r
 584             .substring(0, label.length() - 5) : label;\r
 585     boolean ss = false;\r
 586     type = id2type(type);\r
 587     if (type.equals("secondary structure"))\r
 588     {\r
 589       ss = true;\r
 590     }\r
 591     // decide on secondary structure or not.\r
 592     Annotation[] els = new Annotation[annots.length()];\r
 593     for (int i = 0; i < annots.length(); i++)\r
 594     {\r
 595       String pos = annots.substring(i, i + 1);\r
 596       Annotation ann;\r
 597       ann = new Annotation(pos, "", ' ', 0f); // 0f is 'valid' null - will not\r
 598       // be written out\r
 599       if (ss)\r
 600       {\r
 601         if (detectbrackets.search(pos))\r
 602         {\r
 603           ann.secondaryStructure = jalview.schemes.ResidueProperties\r
 604                  .getRNASecStrucState(pos).charAt(0);\r
 605         }\r
 606         else\r
 607         {\r
 608           ann.secondaryStructure = jalview.schemes.ResidueProperties\r
 609                   .getDssp3state(pos).charAt(0);\r
 610         }\r
 611 \r
 612         if (ann.secondaryStructure == pos.charAt(0) || pos.charAt(0) == 'C')\r
 613         {\r
 614           ann.displayCharacter = ""; // null; // " ";\r
 615         }\r
 616         else\r
 617        {\r
 618           ann.displayCharacter = " " + ann.displayCharacter;\r
 619         }\r
 620       }\r
 621 \r
 622       els[i] = ann;\r
 623     }\r
 624    AlignmentAnnotation annot = null;\r
 625     Enumeration e = annotation.elements();\r
 626     while (e.hasMoreElements())\r
 627     {\r
 628       annot = (AlignmentAnnotation) e.nextElement();\r
 629       if (annot.label.equals(type))\r
 630         break;\r
 631       annot = null;\r
 632     }\r
 633     if (annot == null)\r
 634     {\r
 635       annot = new AlignmentAnnotation(type, type, els);\r
 636       annotation.addElement(annot);\r
 637     }\r
 638     else\r
 639    {\r
 640       Annotation[] anns = new Annotation[annot.annotations.length\r
 641               + els.length];\r
 642      System.arraycopy(annot.annotations, 0, anns, 0,\r
 643              annot.annotations.length);\r
 644       System.arraycopy(els, 0, anns, annot.annotations.length, els.length);\r
 645       annot.annotations = anns;\r
 646       //System.out.println("else: ");\r
 647    }\r
 648    return annot;\r
 649   }\r
 650 \r
 651   public static String print(SequenceI[] s)\r
 652   {\r
 653     return "not yet implemented";\r
 654   }\r
 655 \r
 656   public String print()\r
 657   {\r
 658     return print(getSeqsAsArray());\r
 659   }\r
 660 \r
 661   private static Hashtable typeIds = null;\r
 662   static\r
 663   {\r
 664     if (typeIds == null)\r
 665     {\r
 666       typeIds = new Hashtable();\r
 667       typeIds.put("SS", "secondary structure");\r
 668       typeIds.put("SA", "surface accessibility");\r
 669       typeIds.put("TM", "transmembrane");\r
 670       typeIds.put("PP", "posterior probability");\r
 671       typeIds.put("LI", "ligand binding");\r
 672       typeIds.put("AS", "active site");\r
 673       typeIds.put("IN", "intron");\r
 674       typeIds.put("IR", "interacting residue");\r
 675       typeIds.put("AC", "accession");\r
 676       typeIds.put("OS", "organism");\r
 677       typeIds.put("CL", "class");\r
 678       typeIds.put("DE", "description");\r
 679       typeIds.put("DR", "reference");\r
 680       typeIds.put("LO", "look");\r
 681       typeIds.put("RF", "reference positions");\r
 682 \r
 683     }\r
 684   }\r
 685 \r
 686   protected static String id2type(String id)\r
 687   {\r
 688     if (typeIds.containsKey(id))\r
 689     {\r
 690       return (String) typeIds.get(id);\r
 691     }\r
 692     System.err.println("Warning : Unknown Stockholm annotation type code "\r
 693             + id);\r
 694     return id;\r
 695   }\r
 696   /**\r
 697    * //ssline is complete secondary structure line private AlignmentAnnotation\r
 698    * addHelices(Vector annotation, String label, String ssline) {\r
 699    * \r
 700    * // decide on secondary structure or not. Annotation[] els = new\r
 701    * Annotation[ssline.length()]; for (int i = 0; i < ssline.length(); i++) {\r
 702    * String pos = ssline.substring(i, i + 1); Annotation ann; ann = new\r
 703    * Annotation(pos, "", ' ', 0f); // 0f is 'valid' null - will not\r
 704    * \r
 705    * ann.secondaryStructure =\r
 706    * jalview.schemes.ResidueProperties.getRNAssState(pos).charAt(0);\r
 707    * \r
 708    * ann.displayCharacter = "x" + ann.displayCharacter;\r
 709    * \r
 710    * System.out.println(ann.displayCharacter);\r
 711    * \r
 712    * els[i] = ann; } AlignmentAnnotation helicesAnnot = null; Enumeration e =\r
 713    * annotation.elements(); while (e.hasMoreElements()) { helicesAnnot =\r
 714    * (AlignmentAnnotation) e.nextElement(); if (helicesAnnot.label.equals(type))\r
 715    * break; helicesAnnot = null; } if (helicesAnnot == null) { helicesAnnot =\r
 716    * new AlignmentAnnotation(type, type, els);\r
 717    * annotation.addElement(helicesAnnot); } else { Annotation[] anns = new\r
 718    * Annotation[helicesAnnot.annotations.length + els.length];\r
 719    * System.arraycopy(helicesAnnot.annotations, 0, anns, 0,\r
 720    * helicesAnnot.annotations.length); System.arraycopy(els, 0, anns,\r
 721    * helicesAnnot.annotations.length, els.length); helicesAnnot.annotations =\r
 722    * anns; }\r
 723    * \r
 724    * helicesAnnot.features = Rna.GetBasePairs(ssline);\r
 725    * Rna.HelixMap(helicesAnnot.features);\r
 726    * \r
 727    * \r
 728    * return helicesAnnot; }\r
 729    * \r
 730    */\r
 731   /**\r
 732    * make a friendly ID string.\r
 733    * \r
 734    * @param dataName\r
 735    * @return truncated dataName to after last '/'\r
 736    */\r
 737   private String safeName(String dataName)\r
 738   {\r
 739     int b = 0;\r
 740     while ((b = dataName.indexOf("/")) > -1 && b < dataName.length() )\r
 741     {              \r
 742         dataName = dataName.substring(b + 1).trim();\r
 743         \r
 744     }\r
 745     int e = (dataName.length() - dataName.indexOf("."))+1;\r
 746     dataName = dataName.substring(1,e).trim();\r
 747     return dataName;\r
 748   }\r
 749 }