src/jalview/io/StockholmFile.java

   1 /*
   2  * Jalview - A Sequence Alignment Editor and Viewer (Version 2.8)
   3  * Copyright (C) 2012 J Procter, AM Waterhouse, LM Lui, J Engelhardt, G Barton, M Clamp, S Searle
   4  *
   5  * This file is part of Jalview.
   6  *
   7  * Jalview is free software: you can redistribute it and/or
   8  * modify it under the terms of the GNU General Public License
   9  * as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version.
  10  *
  11  * Jalview is distributed in the hope that it will be useful, but
  12  * WITHOUT ANY WARRANTY; without even the implied warranty
  13  * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
  14  * PURPOSE.  See the GNU General Public License for more details.
  15  *
  16  * You should have received a copy of the GNU General Public License along with Jalview.  If not, see <http://www.gnu.org/licenses/>.
  17  */
  18 /*
  19  * This extension was written by Benjamin Schuster-Boeckler at sanger.ac.uk
  20  */
  21 package jalview.io;
  22
  23 import java.io.*;
  24 import java.util.*;
  25
  26 import com.stevesoft.pat.*;
  27 import jalview.datamodel.*;
  28 import jalview.util.Format;
  29
  30 // import org.apache.log4j.*;
  31
  32 /**
  33  * This class is supposed to parse a Stockholm format file into Jalview There
  34  * are TODOs in this class: we do not know what the database source and version
  35  * is for the file when parsing the #GS= AC tag which associates accessions with
  36  * sequences. Database references are also not parsed correctly: a separate
  37  * reference string parser must be added to parse the database reference form
  38  * into Jalview's local representation.
  39  *
  40  * @author bsb at sanger.ac.uk
  41  * @version 0.3 + jalview mods
  42  *
  43  */
  44 public class StockholmFile extends AlignFile
  45 {
  46   // static Logger logger = Logger.getLogger("jalview.io.StockholmFile");
  47   StringBuffer out; // output buffer
  48
  49   AlignmentI al;
  50
  51   public StockholmFile()
  52   {
  53   }
  54
  55   /**
  56    * Creates a new StockholmFile object for output.
  57    */
  58   public StockholmFile(AlignmentI al)
  59   {
  60     this.al = al;
  61   }
  62
  63   public StockholmFile(String inFile, String type) throws IOException
  64   {
  65     super(inFile, type);
  66   }
  67
  68   public StockholmFile(FileParse source) throws IOException
  69   {
  70     super(source);
  71   }
  72
  73   public void initData()
  74   {
  75     super.initData();
  76   }
  77
  78   /**
  79    * Parse a file in Stockholm format into Jalview's data model. The file has to
  80    * be passed at construction time
  81    *
  82    * @throws IOException
  83    *           If there is an error with the input file
  84    */
  85   public void parse() throws IOException
  86   {
  87     StringBuffer treeString = new StringBuffer();
  88     String treeName = null;
  89     // --------------- Variable Definitions -------------------
  90     String line;
  91     String version;
  92     // String id;
  93     Hashtable seqAnn = new Hashtable(); // Sequence related annotations
  94     Hashtable seqs = new Hashtable();
  95     Regex p, r, rend, s, x;
  96
  97     // Temporary line for processing RNA annotation
  98     // String RNAannot = "";
  99
 100     // ------------------ Parsing File ----------------------
 101     // First, we have to check that this file has STOCKHOLM format, i.e. the
 102     // first line must match
 103     r = new Regex("# STOCKHOLM ([\\d\\.]+)");
 104     if (!r.search(nextLine()))
 105     {
 106       throw new IOException(
 107               "This file is not in valid STOCKHOLM format: First line does not contain '# STOCKHOLM'");
 108     }
 109     else
 110     {
 111       version = r.stringMatched(1);
 112       // logger.debug("Stockholm version: " + version);
 113     }
 114
 115     // We define some Regexes here that will be used regularily later
 116     rend = new Regex("^\\s*\\/\\/"); // Find the end of an alignment
 117     p = new Regex("(\\S+)\\/(\\d+)\\-(\\d+)"); // split sequence id in
 118     // id/from/to
 119     s = new Regex("(\\S+)\\s+(\\S*)\\s+(.*)"); // Parses annotation subtype
 120     r = new Regex("#=(G[FSRC]?)\\s+(.*)"); // Finds any annotation line
 121     x = new Regex("(\\S+)\\s+(\\S+)"); // split id from sequence
 122
 123     // Convert all bracket types to parentheses (necessary for passing to VARNA)
 124     Regex openparen = new Regex("(<|\\[)", "(");
 125     Regex closeparen = new Regex("(>|\\])", ")");
 126
 127     // Detect if file is RNA by looking for bracket types
 128     Regex detectbrackets = new Regex("(<|>|\\[|\\]|\\(|\\))");
 129
 130     rend.optimize();
 131     p.optimize();
 132     s.optimize();
 133     r.optimize();
 134     x.optimize();
 135     openparen.optimize();
 136     closeparen.optimize();
 137
 138     while ((line = nextLine()) != null)
 139     {
 140       if (line.length() == 0)
 141       {
 142         continue;
 143       }
 144       if (rend.search(line))
 145       {
 146         // End of the alignment, pass stuff back
 147         this.noSeqs = seqs.size();
 148
 149         String propety = null;
 150         Regex pf = new Regex("PF[0-9]{5}(.*)"); // Finds AC for Pfam
 151         Regex rf = new Regex("RF[0-9]{5}(.*)"); // Finds AC for Rfam
 152         if (getAlignmentProperty("AC") != null)
 153         {
 154           String dbType = getAlignmentProperty("AC").toString();
 155           if (pf.search(dbType))
 156           {
 157             propety = "PFAM";
 158           }
 159           else if (rf.search(dbType))
 160           {
 161             propety = "RFAM";
 162           }
 163         }
 164         // logger.debug("Number of sequences: " + this.noSeqs);
 165         Enumeration accs = seqs.keys();
 166         while (accs.hasMoreElements())
 167         {
 168           String acc = (String) accs.nextElement();
 169           // logger.debug("Processing sequence " + acc);
 170           String seq = (String) seqs.remove(acc);
 171           if (maxLength < seq.length())
 172           {
 173             maxLength = seq.length();
 174           }
 175           int start = 1;
 176           int end = -1;
 177           String sid = acc;
 178           /*
 179            * Retrieve hash of annotations for this accession Associate
 180            * Annotation with accession
 181            */
 182           Hashtable accAnnotations = null;
 183
 184           if (seqAnn != null && seqAnn.containsKey(acc))
 185           {
 186             accAnnotations = (Hashtable) seqAnn.remove(acc);
 187             // TODO: add structures to sequence
 188           }
 189
 190           // Split accession in id and from/to
 191           if (p.search(acc))
 192           {
 193             sid = p.stringMatched(1);
 194             start = Integer.parseInt(p.stringMatched(2));
 195             end = Integer.parseInt(p.stringMatched(3));
 196           }
 197           // logger.debug(sid + ", " + start + ", " + end);
 198
 199           Sequence seqO = new Sequence(sid, seq, start, end);
 200           // Add Description (if any)
 201           if (accAnnotations != null && accAnnotations.containsKey("DE"))
 202           {
 203             String desc = (String) accAnnotations.get("DE");
 204             seqO.setDescription((desc == null) ? "" : desc);
 205           }
 206
 207           // Add DB References (if any)
 208           if (accAnnotations != null && accAnnotations.containsKey("DR"))
 209           {
 210             String dbr = (String) accAnnotations.get("DR");
 211             if (dbr != null && dbr.indexOf(";") > -1)
 212             {
 213               String src = dbr.substring(0, dbr.indexOf(";"));
 214               String acn = dbr.substring(dbr.indexOf(";") + 1);
 215               jalview.util.DBRefUtils.parseToDbRef(seqO, src, "0", acn);
 216             }
 217           }
 218
 219           if (accAnnotations != null && accAnnotations.containsKey("AC")
 220                   && propety != null)
 221           {
 222             String dbr = (String) accAnnotations.get("AC");
 223             if (dbr != null)
 224             {
 225               String src = propety;
 226               String acn = dbr.toString();
 227               jalview.util.DBRefUtils.parseToDbRef(seqO, src, "0", acn);
 228             }
 229           }
 230
 231           Hashtable features = null;
 232           // We need to adjust the positions of all features to account for gaps
 233           try
 234           {
 235             features = (Hashtable) accAnnotations.remove("features");
 236           } catch (java.lang.NullPointerException e)
 237           {
 238             // loggerwarn("Getting Features for " + acc + ": " +
 239             // e.getMessage());
 240             // continue;
 241           }
 242           // if we have features
 243           if (features != null)
 244           {
 245             int posmap[] = seqO.findPositionMap();
 246             Enumeration i = features.keys();
 247             while (i.hasMoreElements())
 248             {
 249               // TODO: parse out secondary structure annotation as annotation
 250               // row
 251               // TODO: parse out scores as annotation row
 252               // TODO: map coding region to core jalview feature types
 253               String type = i.nextElement().toString();
 254               Hashtable content = (Hashtable) features.remove(type);
 255
 256               // add alignment annotation for this feature
 257               String key = type2id(type);
 258               if (key != null)
 259               {
 260                 if (accAnnotations != null
 261                         && accAnnotations.containsKey(key))
 262                 {
 263                   Vector vv = (Vector) accAnnotations.get(key);
 264                   for (int ii = 0; ii < vv.size(); ii++)
 265                   {
 266                     AlignmentAnnotation an = (AlignmentAnnotation) vv
 267                             .elementAt(ii);
 268                     seqO.addAlignmentAnnotation(an);
 269                   }
 270                 }
 271               }
 272
 273               Enumeration j = content.keys();
 274               while (j.hasMoreElements())
 275               {
 276                 String desc = j.nextElement().toString();
 277                 String ns = content.get(desc).toString();
 278                 char[] byChar = ns.toCharArray();
 279                 for (int k = 0; k < byChar.length; k++)
 280                 {
 281                   char c = byChar[k];
 282                   if (!(c == ' ' || c == '_' || c == '-' || c == '.')) // PFAM
 283                   // uses
 284                   // '.'
 285                   // for
 286                   // feature
 287                   // background
 288                   {
 289                     int new_pos = posmap[k]; // look up nearest seqeunce
 290                     // position to this column
 291                     SequenceFeature feat = new SequenceFeature(type, desc,
 292                             new_pos, new_pos, 0f, null);
 293
 294                     seqO.addSequenceFeature(feat);
 295                   }
 296                 }
 297               }
 298
 299             }
 300
 301           }
 302           // garbage collect
 303
 304           // logger.debug("Adding seq " + acc + " from " + start + " to " + end
 305           // + ": " + seq);
 306           this.seqs.addElement(seqO);
 307         }
 308         return; // finished parsing this segment of source
 309       }
 310       else if (!r.search(line))
 311       {
 312         // System.err.println("Found sequence line: " + line);
 313
 314         // Split sequence in sequence and accession parts
 315         if (!x.search(line))
 316         {
 317           // logger.error("Could not parse sequence line: " + line);
 318           throw new IOException("Could not parse sequence line: " + line);
 319         }
 320         String ns = (String) seqs.get(x.stringMatched(1));
 321         if (ns == null)
 322         {
 323           ns = "";
 324         }
 325         ns += x.stringMatched(2);
 326
 327         seqs.put(x.stringMatched(1), ns);
 328       }
 329       else
 330       {
 331         String annType = r.stringMatched(1);
 332         String annContent = r.stringMatched(2);
 333
 334         // System.err.println("type:" + annType + " content: " + annContent);
 335
 336         if (annType.equals("GF"))
 337         {
 338           /*
 339            * Generic per-File annotation, free text Magic features: #=GF NH
 340            * <tree in New Hampshire eXtended format> #=GF TN <Unique identifier
 341            * for the next tree> Pfam descriptions: 7. DESCRIPTION OF FIELDS
 342            *
 343            * Compulsory fields: ------------------
 344            *
 345            * AC Accession number: Accession number in form PFxxxxx.version or
 346            * PBxxxxxx. ID Identification: One word name for family. DE
 347            * Definition: Short description of family. AU Author: Authors of the
 348            * entry. SE Source of seed: The source suggesting the seed members
 349            * belong to one family. GA Gathering method: Search threshold to
 350            * build the full alignment. TC Trusted Cutoff: Lowest sequence score
 351            * and domain score of match in the full alignment. NC Noise Cutoff:
 352            * Highest sequence score and domain score of match not in full
 353            * alignment. TP Type: Type of family -- presently Family, Domain,
 354            * Motif or Repeat. SQ Sequence: Number of sequences in alignment. AM
 355            * Alignment Method The order ls and fs hits are aligned to the model
 356            * to build the full align. // End of alignment.
 357            *
 358            * Optional fields: ----------------
 359            *
 360            * DC Database Comment: Comment about database reference. DR Database
 361            * Reference: Reference to external database. RC Reference Comment:
 362            * Comment about literature reference. RN Reference Number: Reference
 363            * Number. RM Reference Medline: Eight digit medline UI number. RT
 364            * Reference Title: Reference Title. RA Reference Author: Reference
 365            * Author RL Reference Location: Journal location. PI Previous
 366            * identifier: Record of all previous ID lines. KW Keywords: Keywords.
 367            * CC Comment: Comments. NE Pfam accession: Indicates a nested domain.
 368            * NL Location: Location of nested domains - sequence ID, start and
 369            * end of insert.
 370            *
 371            * Obsolete fields: ----------- AL Alignment method of seed: The
 372            * method used to align the seed members.
 373            */
 374           // Let's save the annotations, maybe we'll be able to do something
 375           // with them later...
 376           Regex an = new Regex("(\\w+)\\s*(.*)");
 377           if (an.search(annContent))
 378           {
 379             if (an.stringMatched(1).equals("NH"))
 380             {
 381               treeString.append(an.stringMatched(2));
 382             }
 383             else if (an.stringMatched(1).equals("TN"))
 384             {
 385               if (treeString.length() > 0)
 386               {
 387                 if (treeName == null)
 388                 {
 389                   treeName = "Tree " + (getTreeCount() + 1);
 390                 }
 391                 addNewickTree(treeName, treeString.toString());
 392               }
 393               treeName = an.stringMatched(2);
 394               treeString = new StringBuffer();
 395             }
 396             setAlignmentProperty(an.stringMatched(1), an.stringMatched(2));
 397           }
 398         }
 399         else if (annType.equals("GS"))
 400         {
 401           // Generic per-Sequence annotation, free text
 402           /*
 403            * Pfam uses these features: Feature Description ---------------------
 404            * ----------- AC <accession> ACcession number DE <freetext>
 405            * DEscription DR <db>; <accession>; Database Reference OS <organism>
 406            * OrganiSm (species) OC <clade> Organism Classification (clade, etc.)
 407            * LO <look> Look (Color, etc.)
 408            */
 409           if (s.search(annContent))
 410           {
 411             String acc = s.stringMatched(1);
 412             String type = s.stringMatched(2);
 413             String content = s.stringMatched(3);
 414             // TODO: store DR in a vector.
 415             // TODO: store AC according to generic file db annotation.
 416             Hashtable ann;
 417             if (seqAnn.containsKey(acc))
 418             {
 419               ann = (Hashtable) seqAnn.get(acc);
 420             }
 421             else
 422             {
 423               ann = new Hashtable();
 424             }
 425             ann.put(type, content);
 426             seqAnn.put(acc, ann);
 427           }
 428           else
 429           {
 430             throw new IOException("Error parsing " + line);
 431           }
 432         }
 433         else if (annType.equals("GC"))
 434         {
 435           // Generic per-Column annotation, exactly 1 char per column
 436           // always need a label.
 437           if (x.search(annContent))
 438           {
 439             // parse out and create alignment annotation directly.
 440             parseAnnotationRow(annotations, x.stringMatched(1),
 441                     x.stringMatched(2));
 442           }
 443         }
 444         else if (annType.equals("GR"))
 445         {
 446           // Generic per-Sequence AND per-Column markup, exactly 1 char per
 447           // column
 448           /*
 449            * Feature Description Markup letters ------- -----------
 450            * -------------- SS Secondary Structure [HGIEBTSCX] SA Surface
 451            * Accessibility [0-9X] (0=0%-10%; ...; 9=90%-100%) TM TransMembrane
 452            * [Mio] PP Posterior Probability [0-9*] (0=0.00-0.05; 1=0.05-0.15;
 453            * *=0.95-1.00) LI LIgand binding [*] AS Active Site [*] IN INtron (in
 454            * or after) [0-2]
 455            */
 456           if (s.search(annContent))
 457           {
 458             String acc = s.stringMatched(1);
 459             String type = s.stringMatched(2);
 460             String seq = new String(s.stringMatched(3));
 461             String description = null;
 462             // Check for additional information about the current annotation
 463             // We use a simple string tokenizer here for speed
 464             StringTokenizer sep = new StringTokenizer(seq, " \t");
 465             description = sep.nextToken();
 466             if (sep.hasMoreTokens())
 467             {
 468               seq = sep.nextToken();
 469             }
 470             else
 471             {
 472               seq = description;
 473               description = new String();
 474             }
 475             // sequence id with from-to fields
 476
 477             Hashtable ann;
 478             // Get an object with all the annotations for this sequence
 479             if (seqAnn.containsKey(acc))
 480             {
 481               // logger.debug("Found annotations for " + acc);
 482               ann = (Hashtable) seqAnn.get(acc);
 483             }
 484             else
 485             {
 486               // logger.debug("Creating new annotations holder for " + acc);
 487               ann = new Hashtable();
 488               seqAnn.put(acc, ann);
 489             }
 490             // TODO test structure, call parseAnnotationRow with vector from
 491             // hashtable for specific sequence
 492             Hashtable features;
 493             // Get an object with all the content for an annotation
 494             if (ann.containsKey("features"))
 495             {
 496               // logger.debug("Found features for " + acc);
 497               features = (Hashtable) ann.get("features");
 498             }
 499             else
 500             {
 501               // logger.debug("Creating new features holder for " + acc);
 502               features = new Hashtable();
 503               ann.put("features", features);
 504             }
 505
 506             Hashtable content;
 507             if (features.containsKey(this.id2type(type)))
 508             {
 509               // logger.debug("Found content for " + this.id2type(type));
 510               content = (Hashtable) features.get(this.id2type(type));
 511             }
 512             else
 513             {
 514               // logger.debug("Creating new content holder for " +
 515               // this.id2type(type));
 516               content = new Hashtable();
 517               features.put(this.id2type(type), content);
 518             }
 519             String ns = (String) content.get(description);
 520             if (ns == null)
 521             {
 522               ns = "";
 523             }
 524             ns += seq;
 525             content.put(description, ns);
 526             Hashtable strucAnn;
 527             if (seqAnn.containsKey(acc))
 528             {
 529               strucAnn = (Hashtable) seqAnn.get(acc);
 530             }
 531             else
 532             {
 533               strucAnn = new Hashtable();
 534             }
 535
 536             Vector newStruc = new Vector();
 537             parseAnnotationRow(newStruc, type, ns);
 538             strucAnn.put(type, newStruc);
 539             seqAnn.put(acc, strucAnn);
 540           }
 541           else
 542           {
 543             System.err
 544                     .println("Warning - couldn't parse sequence annotation row line:\n"
 545                             + line);
 546             // throw new IOException("Error parsing " + line);
 547           }
 548         }
 549         else
 550         {
 551           throw new IOException("Unknown annotation detected: " + annType
 552                   + " " + annContent);
 553         }
 554       }
 555     }
 556     if (treeString.length() > 0)
 557     {
 558       if (treeName == null)
 559       {
 560         treeName = "Tree " + (1 + getTreeCount());
 561       }
 562       addNewickTree(treeName, treeString.toString());
 563     }
 564   }
 565
 566   protected static AlignmentAnnotation parseAnnotationRow(
 567           Vector annotation, String label, String annots)
 568   {
 569     String convert1, convert2 = null;
 570
 571     // Convert all bracket types to parentheses
 572     Regex openparen = new Regex("(<|\\[)", "(");
 573     Regex closeparen = new Regex("(>|\\])", ")");
 574
 575     // Detect if file is RNA by looking for bracket types
 576     Regex detectbrackets = new Regex("(<|>|\\[|\\]|\\(|\\))");
 577
 578     convert1 = openparen.replaceAll(annots);
 579     convert2 = closeparen.replaceAll(convert1);
 580     annots = convert2;
 581
 582     String type = label;
 583     if (label.contains("_cons"))
 584     {
 585       type = (label.indexOf("_cons") == label.length() - 5) ? label
 586               .substring(0, label.length() - 5) : label;
 587     }
 588     boolean ss = false;
 589     type = id2type(type);
 590     if (type.equals("secondary structure"))
 591     {
 592       ss = true;
 593     }
 594     // decide on secondary structure or not.
 595     Annotation[] els = new Annotation[annots.length()];
 596     for (int i = 0; i < annots.length(); i++)
 597     {
 598       String pos = annots.substring(i, i + 1);
 599       Annotation ann;
 600       ann = new Annotation(pos, "", ' ', 0f); // 0f is 'valid' null - will not
 601       // be written out
 602       if (ss)
 603       {
 604         if (detectbrackets.search(pos))
 605         {
 606           ann.secondaryStructure = jalview.schemes.ResidueProperties
 607                   .getRNASecStrucState(pos).charAt(0);
 608         }
 609         else
 610         {
 611           ann.secondaryStructure = jalview.schemes.ResidueProperties
 612                   .getDssp3state(pos).charAt(0);
 613         }
 614
 615         if (ann.secondaryStructure == pos.charAt(0) || pos.charAt(0) == 'C')
 616         {
 617           ann.displayCharacter = ""; // null; // " ";
 618         }
 619         else
 620         {
 621           ann.displayCharacter = " " + ann.displayCharacter;
 622         }
 623       }
 624
 625       els[i] = ann;
 626     }
 627     AlignmentAnnotation annot = null;
 628     Enumeration e = annotation.elements();
 629     while (e.hasMoreElements())
 630     {
 631       annot = (AlignmentAnnotation) e.nextElement();
 632       if (annot.label.equals(type))
 633         break;
 634       annot = null;
 635     }
 636     if (annot == null)
 637     {
 638       annot = new AlignmentAnnotation(type, type, els);
 639       annotation.addElement(annot);
 640     }
 641     else
 642     {
 643       Annotation[] anns = new Annotation[annot.annotations.length
 644               + els.length];
 645       System.arraycopy(annot.annotations, 0, anns, 0,
 646               annot.annotations.length);
 647       System.arraycopy(els, 0, anns, annot.annotations.length, els.length);
 648       annot.annotations = anns;
 649       // System.out.println("else: ");
 650     }
 651     return annot;
 652   }
 653
 654   public String print(SequenceI[] s)
 655   {
 656     // find max length of id
 657     int max = 0;
 658     int maxid = 0;
 659     int in = 0;
 660     Hashtable dataRef = null;
 661     while ((in < s.length) && (s[in] != null))
 662     {
 663       String tmp = printId(s[in]);
 664       if (s[in].getSequence().length > max)
 665       {
 666         max = s[in].getSequence().length;
 667       }
 668
 669       if (tmp.length() > maxid)
 670       {
 671         maxid = tmp.length();
 672       }
 673       if (s[in].getDBRef() != null)
 674       {
 675         for (int idb = 0; idb < s[in].getDBRef().length; idb++)
 676         {
 677           if (dataRef == null)
 678             dataRef = new Hashtable();
 679
 680           String datAs1 = s[in].getDBRef()[idb].getSource().toString()
 681                   + " ; "
 682                   + s[in].getDBRef()[idb].getAccessionId().toString();
 683           dataRef.put(tmp, datAs1);
 684         }
 685       }
 686       in++;
 687     }
 688     maxid += 9;
 689     int i = 0;
 690
 691     // output database type
 692     if (al.getProperties() != null)
 693     {
 694       if (!al.getProperties().isEmpty())
 695       {
 696         Enumeration key = al.getProperties().keys();
 697         Enumeration val = al.getProperties().elements();
 698         while (key.hasMoreElements())
 699         {
 700           out.append("#=GF " + key.nextElement() + " " + val.nextElement());
 701           out.append(newline);
 702         }
 703       }
 704     }
 705
 706     // output database accessions
 707     if (dataRef != null)
 708     {
 709       Enumeration en = dataRef.keys();
 710       while (en.hasMoreElements())
 711       {
 712         Object idd = en.nextElement();
 713         String type = (String) dataRef.remove(idd);
 714         out.append(new Format("%-" + (maxid - 2) + "s").form("#=GS "
 715                 + idd.toString() + " "));
 716         if (type.contains("PFAM") || type.contains("RFAM"))
 717         {
 718
 719           out.append(" AC " + type.substring(type.indexOf(";") + 1));
 720         }
 721         else
 722         {
 723           out.append(" DR " + type + " ");
 724         }
 725         out.append(newline);
 726       }
 727     }
 728
 729     // output annotations
 730     while (i < s.length && s[i] != null)
 731     {
 732       if (s[i].getDatasetSequence() != null)
 733       {
 734         SequenceI ds = s[i].getDatasetSequence();
 735         AlignmentAnnotation[] alAnot;
 736         Annotation[] ann;
 737         Annotation annot;
 738         alAnot = s[i].getAnnotation();
 739         String feature = "";
 740         if (alAnot != null)
 741         {
 742           for (int j = 0; j < alAnot.length; j++)
 743           {
 744             if (ds.getSequenceFeatures() != null)
 745             {
 746               feature = ds.getSequenceFeatures()[0].type;
 747             }
 748             String key = type2id(feature);
 749
 750             if (key == null)
 751               continue;
 752
 753             // out.append("#=GR ");
 754             out.append(new Format("%-" + maxid + "s").form("#=GR "
 755                     + printId(s[i]) + " " + key + " "));
 756             ann = alAnot[j].annotations;
 757             String seq = "";
 758             for (int k = 0; k < ann.length; k++)
 759             {
 760               annot = ann[k];
 761               String ch = (annot == null) ? Character.toString(s[i]
 762                       .getCharAt(k)) : annot.displayCharacter;
 763               if (ch.length() == 0)
 764               {
 765                 if (key.equals("SS"))
 766                 {
 767                   char ll = annot.secondaryStructure;
 768                   seq = (Character.toString(ll).equals(" ")) ? seq + "C"
 769                           : seq + ll;
 770                 }
 771                 else
 772                 {
 773                   seq += ".";
 774                 }
 775               }
 776               else if (ch.length() == 1)
 777               {
 778                 seq += ch;
 779               }
 780               else if (ch.length() > 1)
 781               {
 782                 seq += ch.charAt(1);
 783               }
 784             }
 785             out.append(seq);
 786             out.append(newline);
 787           }
 788         }
 789       }
 790
 791       out.append(new Format("%-" + maxid + "s").form(printId(s[i]) + " "));
 792       out.append(s[i].getSequenceAsString());
 793       out.append(newline);
 794       i++;
 795     }
 796
 797     // alignment annotation
 798     AlignmentAnnotation aa;
 799     if (al.getAlignmentAnnotation() != null)
 800     {
 801       for (int ia = 0; ia < al.getAlignmentAnnotation().length; ia++)
 802       {
 803         aa = al.getAlignmentAnnotation()[ia];
 804         if (aa.autoCalculated || !aa.visible)
 805         {
 806           continue;
 807         }
 808         String seq = "";
 809         String label;
 810
 811         if (aa.label.equals("seq"))
 812           label = "seq_cons";
 813         else
 814           label = type2id(aa.label.toLowerCase()) + "_cons";
 815
 816         if (label == null)
 817           label = aa.label;
 818
 819         out.append(new Format("%-" + maxid + "s").form("#=GC " + label
 820                 + " "));
 821         for (int j = 0; j < aa.annotations.length; j++)
 822         {
 823           String ch = (aa.annotations[j] == null) ? "-"
 824                   : aa.annotations[j].displayCharacter;
 825           if (ch.length() == 0)
 826           {
 827             char ll = aa.annotations[j].secondaryStructure;
 828             if (Character.toString(ll).equals(" "))
 829               seq += "C";
 830             else
 831               seq += ll;
 832           }
 833           else if (ch.length() == 1)
 834           {
 835             seq += ch;
 836           }
 837           else if (ch.length() > 1)
 838           {
 839             seq += ch.charAt(1);
 840           }
 841         }
 842         out.append(seq);
 843         out.append(newline);
 844       }
 845     }
 846     return out.toString();
 847   }
 848
 849   public String print()
 850   {
 851     out = new StringBuffer();
 852     out.append("# STOCKHOLM 1.0");
 853     out.append(newline);
 854     print(getSeqsAsArray());
 855
 856     out.append("//");
 857     out.append(newline);
 858     return out.toString();
 859   }
 860
 861   private static Hashtable typeIds = null;
 862   static
 863   {
 864     if (typeIds == null)
 865     {
 866       typeIds = new Hashtable();
 867       typeIds.put("SS", "secondary structure");
 868       typeIds.put("SA", "surface accessibility");
 869       typeIds.put("TM", "transmembrane");
 870       typeIds.put("PP", "posterior probability");
 871       typeIds.put("LI", "ligand binding");
 872       typeIds.put("AS", "active site");
 873       typeIds.put("IN", "intron");
 874       typeIds.put("IR", "interacting residue");
 875       typeIds.put("AC", "accession");
 876       typeIds.put("OS", "organism");
 877       typeIds.put("CL", "class");
 878       typeIds.put("DE", "description");
 879       typeIds.put("DR", "reference");
 880       typeIds.put("LO", "look");
 881       typeIds.put("RF", "reference positions");
 882
 883     }
 884   }
 885
 886   protected static String id2type(String id)
 887   {
 888     if (typeIds.containsKey(id))
 889     {
 890       return (String) typeIds.get(id);
 891     }
 892     System.err.println("Warning : Unknown Stockholm annotation type code "
 893             + id);
 894     return id;
 895   }
 896
 897   protected static String type2id(String type)
 898   {
 899     String key = null;
 900     Enumeration e = typeIds.keys();
 901     while (e.hasMoreElements())
 902     {
 903       Object ll = e.nextElement();
 904       if (typeIds.get(ll).toString().equals(type))
 905       {
 906         key = (String) ll;
 907         break;
 908       }
 909     }
 910     if (key != null)
 911     {
 912       return (String) key;
 913     }
 914     System.err.println("Warning : Unknown Stockholm annotation type: "
 915             + type);
 916     return key;
 917   }
 918   /**
 919    * //ssline is complete secondary structure line private AlignmentAnnotation
 920    * addHelices(Vector annotation, String label, String ssline) {
 921    *
 922    * // decide on secondary structure or not. Annotation[] els = new
 923    * Annotation[ssline.length()]; for (int i = 0; i < ssline.length(); i++) {
 924    * String pos = ssline.substring(i, i + 1); Annotation ann; ann = new
 925    * Annotation(pos, "", ' ', 0f); // 0f is 'valid' null - will not
 926    *
 927    * ann.secondaryStructure =
 928    * jalview.schemes.ResidueProperties.getRNAssState(pos).charAt(0);
 929    *
 930    * ann.displayCharacter = "x" + ann.displayCharacter;
 931    *
 932    * System.out.println(ann.displayCharacter);
 933    *
 934    * els[i] = ann; } AlignmentAnnotation helicesAnnot = null; Enumeration e =
 935    * annotation.elements(); while (e.hasMoreElements()) { helicesAnnot =
 936    * (AlignmentAnnotation) e.nextElement(); if (helicesAnnot.label.equals(type))
 937    * break; helicesAnnot = null; } if (helicesAnnot == null) { helicesAnnot =
 938    * new AlignmentAnnotation(type, type, els);
 939    * annotation.addElement(helicesAnnot); } else { Annotation[] anns = new
 940    * Annotation[helicesAnnot.annotations.length + els.length];
 941    * System.arraycopy(helicesAnnot.annotations, 0, anns, 0,
 942    * helicesAnnot.annotations.length); System.arraycopy(els, 0, anns,
 943    * helicesAnnot.annotations.length, els.length); helicesAnnot.annotations =
 944    * anns; }
 945    *
 946    * helicesAnnot.features = Rna.GetBasePairs(ssline);
 947    * Rna.HelixMap(helicesAnnot.features);
 948    *
 949    *
 950    * return helicesAnnot; }
 951    */
 952 }