src/jalview/io/StockholmFile.java

   1 /*
   2  * Jalview - A Sequence Alignment Editor and Viewer (Version 2.8.0b1)
   3  * Copyright (C) 2014 The Jalview Authors
   4  *
   5  * This file is part of Jalview.
   6  *
   7  * Jalview is free software: you can redistribute it and/or
   8  * modify it under the terms of the GNU General Public License
   9  * as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version.
  10  *
  11  * Jalview is distributed in the hope that it will be useful, but
  12  * WITHOUT ANY WARRANTY; without even the implied warranty
  13  * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
  14  * PURPOSE.  See the GNU General Public License for more details.
  15  *
  16  * You should have received a copy of the GNU General Public License along with Jalview.  If not, see <http://www.gnu.org/licenses/>.
  17  * The Jalview Authors are detailed in the 'AUTHORS' file.
  18  */
  19 /*
  20  * This extension was written by Benjamin Schuster-Boeckler at sanger.ac.uk
  21  */
  22 package jalview.io;
  23
  24 import java.io.*;
  25 import java.util.*;
  26
  27 import com.stevesoft.pat.*;
  28 import jalview.datamodel.*;
  29 import jalview.util.Format;
  30
  31 // import org.apache.log4j.*;
  32
  33 /**
  34  * This class is supposed to parse a Stockholm format file into Jalview There
  35  * are TODOs in this class: we do not know what the database source and version
  36  * is for the file when parsing the #GS= AC tag which associates accessions with
  37  * sequences. Database references are also not parsed correctly: a separate
  38  * reference string parser must be added to parse the database reference form
  39  * into Jalview's local representation.
  40  *
  41  * @author bsb at sanger.ac.uk
  42  * @version 0.3 + jalview mods
  43  *
  44  */
  45 public class StockholmFile extends AlignFile
  46 {
  47   // static Logger logger = Logger.getLogger("jalview.io.StockholmFile");
  48   StringBuffer out; // output buffer
  49
  50   AlignmentI al;
  51
  52   public StockholmFile()
  53   {
  54   }
  55
  56   /**
  57    * Creates a new StockholmFile object for output.
  58    */
  59   public StockholmFile(AlignmentI al)
  60   {
  61     this.al = al;
  62   }
  63
  64   public StockholmFile(String inFile, String type) throws IOException
  65   {
  66     super(inFile, type);
  67   }
  68
  69   public StockholmFile(FileParse source) throws IOException
  70   {
  71     super(source);
  72   }
  73
  74   public void initData()
  75   {
  76     super.initData();
  77   }
  78
  79   /**
  80    * Parse a file in Stockholm format into Jalview's data model. The file has to
  81    * be passed at construction time
  82    *
  83    * @throws IOException
  84    *           If there is an error with the input file
  85    */
  86   public void parse() throws IOException
  87   {
  88     StringBuffer treeString = new StringBuffer();
  89     String treeName = null;
  90     // --------------- Variable Definitions -------------------
  91     String line;
  92     String version;
  93     // String id;
  94     Hashtable seqAnn = new Hashtable(); // Sequence related annotations
  95     Hashtable seqs = new Hashtable();
  96     Regex p, r, rend, s, x;
  97
  98     // Temporary line for processing RNA annotation
  99     // String RNAannot = "";
 100
 101     // ------------------ Parsing File ----------------------
 102     // First, we have to check that this file has STOCKHOLM format, i.e. the
 103     // first line must match
 104     r = new Regex("# STOCKHOLM ([\\d\\.]+)");
 105     if (!r.search(nextLine()))
 106     {
 107       throw new IOException(
 108               "This file is not in valid STOCKHOLM format: First line does not contain '# STOCKHOLM'");
 109     }
 110     else
 111     {
 112       version = r.stringMatched(1);
 113       // logger.debug("Stockholm version: " + version);
 114     }
 115
 116     // We define some Regexes here that will be used regularily later
 117     rend = new Regex("^\\s*\\/\\/"); // Find the end of an alignment
 118     p = new Regex("(\\S+)\\/(\\d+)\\-(\\d+)"); // split sequence id in
 119     // id/from/to
 120     s = new Regex("(\\S+)\\s+(\\S*)\\s+(.*)"); // Parses annotation subtype
 121     r = new Regex("#=(G[FSRC]?)\\s+(.*)"); // Finds any annotation line
 122     x = new Regex("(\\S+)\\s+(\\S+)"); // split id from sequence
 123
 124     // Convert all bracket types to parentheses (necessary for passing to VARNA)
 125     Regex openparen = new Regex("(<|\\[)", "(");
 126     Regex closeparen = new Regex("(>|\\])", ")");
 127
 128     // Detect if file is RNA by looking for bracket types
 129     Regex detectbrackets = new Regex("(<|>|\\[|\\]|\\(|\\))");
 130
 131     rend.optimize();
 132     p.optimize();
 133     s.optimize();
 134     r.optimize();
 135     x.optimize();
 136     openparen.optimize();
 137     closeparen.optimize();
 138
 139     while ((line = nextLine()) != null)
 140     {
 141       if (line.length() == 0)
 142       {
 143         continue;
 144       }
 145       if (rend.search(line))
 146       {
 147         // End of the alignment, pass stuff back
 148         this.noSeqs = seqs.size();
 149
 150         String seqdb,dbsource = null;
 151         Regex pf = new Regex("PF[0-9]{5}(.*)"); // Finds AC for Pfam
 152         Regex rf = new Regex("RF[0-9]{5}(.*)"); // Finds AC for Rfam
 153         if (getAlignmentProperty("AC") != null)
 154         {
 155           String dbType = getAlignmentProperty("AC").toString();
 156           if (pf.search(dbType))
 157           {
 158             // PFAM Alignment - so references are typically from Uniprot
 159             dbsource = "PFAM";
 160           }
 161           else if (rf.search(dbType))
 162           {
 163             dbsource = "RFAM";
 164           }
 165         }
 166         // logger.debug("Number of sequences: " + this.noSeqs);
 167         Enumeration accs = seqs.keys();
 168         while (accs.hasMoreElements())
 169         {
 170           String acc = (String) accs.nextElement();
 171           // logger.debug("Processing sequence " + acc);
 172           String seq = (String) seqs.remove(acc);
 173           if (maxLength < seq.length())
 174           {
 175             maxLength = seq.length();
 176           }
 177           int start = 1;
 178           int end = -1;
 179           String sid = acc;
 180           /*
 181            * Retrieve hash of annotations for this accession Associate
 182            * Annotation with accession
 183            */
 184           Hashtable accAnnotations = null;
 185
 186           if (seqAnn != null && seqAnn.containsKey(acc))
 187           {
 188             accAnnotations = (Hashtable) seqAnn.remove(acc);
 189             // TODO: add structures to sequence
 190           }
 191
 192           // Split accession in id and from/to
 193           if (p.search(acc))
 194           {
 195             sid = p.stringMatched(1);
 196             start = Integer.parseInt(p.stringMatched(2));
 197             end = Integer.parseInt(p.stringMatched(3));
 198           }
 199           // logger.debug(sid + ", " + start + ", " + end);
 200
 201           Sequence seqO = new Sequence(sid, seq, start, end);
 202           // Add Description (if any)
 203           if (accAnnotations != null && accAnnotations.containsKey("DE"))
 204           {
 205             String desc = (String) accAnnotations.get("DE");
 206             seqO.setDescription((desc == null) ? "" : desc);
 207           }
 208
 209           // Add DB References (if any)
 210           if (accAnnotations != null && accAnnotations.containsKey("DR"))
 211           {
 212             String dbr = (String) accAnnotations.get("DR");
 213             if (dbr != null && dbr.indexOf(";") > -1)
 214             {
 215               String src = dbr.substring(0, dbr.indexOf(";"));
 216               String acn = dbr.substring(dbr.indexOf(";") + 1);
 217               jalview.util.DBRefUtils.parseToDbRef(seqO, src, "0", acn);
 218             }
 219           }
 220
 221           if (accAnnotations != null && accAnnotations.containsKey("AC"))
 222           {
 223             if (dbsource != null)
 224             {
 225               String dbr = (String) accAnnotations.get("AC");
 226               if (dbr != null)
 227               {
 228                 // we could get very clever here - but for now - just try to guess accession type from source of alignment plus structure of accession
 229                 guessDatabaseFor(seqO, dbr, dbsource);
 230
 231               }
 232             }
 233             // else - do what ?  add the data anyway and prompt the user to specify what references these are ?
 234           }
 235
 236           Hashtable features = null;
 237           // We need to adjust the positions of all features to account for gaps
 238           try
 239           {
 240             features = (Hashtable) accAnnotations.remove("features");
 241           } catch (java.lang.NullPointerException e)
 242           {
 243             // loggerwarn("Getting Features for " + acc + ": " +
 244             // e.getMessage());
 245             // continue;
 246           }
 247           // if we have features
 248           if (features != null)
 249           {
 250             int posmap[] = seqO.findPositionMap();
 251             Enumeration i = features.keys();
 252             while (i.hasMoreElements())
 253             {
 254               // TODO: parse out secondary structure annotation as annotation
 255               // row
 256               // TODO: parse out scores as annotation row
 257               // TODO: map coding region to core jalview feature types
 258               String type = i.nextElement().toString();
 259               Hashtable content = (Hashtable) features.remove(type);
 260
 261               // add alignment annotation for this feature
 262               String key = type2id(type);
 263               if (key != null)
 264               {
 265                 if (accAnnotations != null
 266                         && accAnnotations.containsKey(key))
 267                 {
 268                   Vector vv = (Vector) accAnnotations.get(key);
 269                   for (int ii = 0; ii < vv.size(); ii++)
 270                   {
 271                     AlignmentAnnotation an = (AlignmentAnnotation) vv
 272                             .elementAt(ii);
 273                     seqO.addAlignmentAnnotation(an);
 274                   }
 275                 }
 276               }
 277
 278               Enumeration j = content.keys();
 279               while (j.hasMoreElements())
 280               {
 281                 String desc = j.nextElement().toString();
 282                 String ns = content.get(desc).toString();
 283                 char[] byChar = ns.toCharArray();
 284                 for (int k = 0; k < byChar.length; k++)
 285                 {
 286                   char c = byChar[k];
 287                   if (!(c == ' ' || c == '_' || c == '-' || c == '.')) // PFAM
 288                   // uses
 289                   // '.'
 290                   // for
 291                   // feature
 292                   // background
 293                   {
 294                     int new_pos = posmap[k]; // look up nearest seqeunce
 295                     // position to this column
 296                     SequenceFeature feat = new SequenceFeature(type, desc,
 297                             new_pos, new_pos, 0f, null);
 298
 299                     seqO.addSequenceFeature(feat);
 300                   }
 301                 }
 302               }
 303
 304             }
 305
 306           }
 307           // garbage collect
 308
 309           // logger.debug("Adding seq " + acc + " from " + start + " to " + end
 310           // + ": " + seq);
 311           this.seqs.addElement(seqO);
 312         }
 313         return; // finished parsing this segment of source
 314       }
 315       else if (!r.search(line))
 316       {
 317         // System.err.println("Found sequence line: " + line);
 318
 319         // Split sequence in sequence and accession parts
 320         if (!x.search(line))
 321         {
 322           // logger.error("Could not parse sequence line: " + line);
 323           throw new IOException("Could not parse sequence line: " + line);
 324         }
 325         String ns = (String) seqs.get(x.stringMatched(1));
 326         if (ns == null)
 327         {
 328           ns = "";
 329         }
 330         ns += x.stringMatched(2);
 331
 332         seqs.put(x.stringMatched(1), ns);
 333       }
 334       else
 335       {
 336         String annType = r.stringMatched(1);
 337         String annContent = r.stringMatched(2);
 338
 339         // System.err.println("type:" + annType + " content: " + annContent);
 340
 341         if (annType.equals("GF"))
 342         {
 343           /*
 344            * Generic per-File annotation, free text Magic features: #=GF NH
 345            * <tree in New Hampshire eXtended format> #=GF TN <Unique identifier
 346            * for the next tree> Pfam descriptions: 7. DESCRIPTION OF FIELDS
 347            *
 348            * Compulsory fields: ------------------
 349            *
 350            * AC Accession number: Accession number in form PFxxxxx.version or
 351            * PBxxxxxx. ID Identification: One word name for family. DE
 352            * Definition: Short description of family. AU Author: Authors of the
 353            * entry. SE Source of seed: The source suggesting the seed members
 354            * belong to one family. GA Gathering method: Search threshold to
 355            * build the full alignment. TC Trusted Cutoff: Lowest sequence score
 356            * and domain score of match in the full alignment. NC Noise Cutoff:
 357            * Highest sequence score and domain score of match not in full
 358            * alignment. TP Type: Type of family -- presently Family, Domain,
 359            * Motif or Repeat. SQ Sequence: Number of sequences in alignment. AM
 360            * Alignment Method The order ls and fs hits are aligned to the model
 361            * to build the full align. // End of alignment.
 362            *
 363            * Optional fields: ----------------
 364            *
 365            * DC Database Comment: Comment about database reference. DR Database
 366            * Reference: Reference to external database. RC Reference Comment:
 367            * Comment about literature reference. RN Reference Number: Reference
 368            * Number. RM Reference Medline: Eight digit medline UI number. RT
 369            * Reference Title: Reference Title. RA Reference Author: Reference
 370            * Author RL Reference Location: Journal location. PI Previous
 371            * identifier: Record of all previous ID lines. KW Keywords: Keywords.
 372            * CC Comment: Comments. NE Pfam accession: Indicates a nested domain.
 373            * NL Location: Location of nested domains - sequence ID, start and
 374            * end of insert.
 375            *
 376            * Obsolete fields: ----------- AL Alignment method of seed: The
 377            * method used to align the seed members.
 378            */
 379           // Let's save the annotations, maybe we'll be able to do something
 380           // with them later...
 381           Regex an = new Regex("(\\w+)\\s*(.*)");
 382           if (an.search(annContent))
 383           {
 384             if (an.stringMatched(1).equals("NH"))
 385             {
 386               treeString.append(an.stringMatched(2));
 387             }
 388             else if (an.stringMatched(1).equals("TN"))
 389             {
 390               if (treeString.length() > 0)
 391               {
 392                 if (treeName == null)
 393                 {
 394                   treeName = "Tree " + (getTreeCount() + 1);
 395                 }
 396                 addNewickTree(treeName, treeString.toString());
 397               }
 398               treeName = an.stringMatched(2);
 399               treeString = new StringBuffer();
 400             }
 401             setAlignmentProperty(an.stringMatched(1), an.stringMatched(2));
 402           }
 403         }
 404         else if (annType.equals("GS"))
 405         {
 406           // Generic per-Sequence annotation, free text
 407           /*
 408            * Pfam uses these features: Feature Description ---------------------
 409            * ----------- AC <accession> ACcession number DE <freetext>
 410            * DEscription DR <db>; <accession>; Database Reference OS <organism>
 411            * OrganiSm (species) OC <clade> Organism Classification (clade, etc.)
 412            * LO <look> Look (Color, etc.)
 413            */
 414           if (s.search(annContent))
 415           {
 416             String acc = s.stringMatched(1);
 417             String type = s.stringMatched(2);
 418             String content = s.stringMatched(3);
 419             // TODO: store DR in a vector.
 420             // TODO: store AC according to generic file db annotation.
 421             Hashtable ann;
 422             if (seqAnn.containsKey(acc))
 423             {
 424               ann = (Hashtable) seqAnn.get(acc);
 425             }
 426             else
 427             {
 428               ann = new Hashtable();
 429             }
 430             ann.put(type, content);
 431             seqAnn.put(acc, ann);
 432           }
 433           else
 434           {
 435             throw new IOException("Error parsing " + line);
 436           }
 437         }
 438         else if (annType.equals("GC"))
 439         {
 440           // Generic per-Column annotation, exactly 1 char per column
 441           // always need a label.
 442           if (x.search(annContent))
 443           {
 444             // parse out and create alignment annotation directly.
 445             parseAnnotationRow(annotations, x.stringMatched(1),
 446                     x.stringMatched(2));
 447           }
 448         }
 449         else if (annType.equals("GR"))
 450         {
 451           // Generic per-Sequence AND per-Column markup, exactly 1 char per
 452           // column
 453           /*
 454            * Feature Description Markup letters ------- -----------
 455            * -------------- SS Secondary Structure [HGIEBTSCX] SA Surface
 456            * Accessibility [0-9X] (0=0%-10%; ...; 9=90%-100%) TM TransMembrane
 457            * [Mio] PP Posterior Probability [0-9*] (0=0.00-0.05; 1=0.05-0.15;
 458            * *=0.95-1.00) LI LIgand binding [*] AS Active Site [*] IN INtron (in
 459            * or after) [0-2]
 460            */
 461           if (s.search(annContent))
 462           {
 463             String acc = s.stringMatched(1);
 464             String type = s.stringMatched(2);
 465             String seq = new String(s.stringMatched(3));
 466             String description = null;
 467             // Check for additional information about the current annotation
 468             // We use a simple string tokenizer here for speed
 469             StringTokenizer sep = new StringTokenizer(seq, " \t");
 470             description = sep.nextToken();
 471             if (sep.hasMoreTokens())
 472             {
 473               seq = sep.nextToken();
 474             }
 475             else
 476             {
 477               seq = description;
 478               description = new String();
 479             }
 480             // sequence id with from-to fields
 481
 482             Hashtable ann;
 483             // Get an object with all the annotations for this sequence
 484             if (seqAnn.containsKey(acc))
 485             {
 486               // logger.debug("Found annotations for " + acc);
 487               ann = (Hashtable) seqAnn.get(acc);
 488             }
 489             else
 490             {
 491               // logger.debug("Creating new annotations holder for " + acc);
 492               ann = new Hashtable();
 493               seqAnn.put(acc, ann);
 494             }
 495             // TODO test structure, call parseAnnotationRow with vector from
 496             // hashtable for specific sequence
 497             Hashtable features;
 498             // Get an object with all the content for an annotation
 499             if (ann.containsKey("features"))
 500             {
 501               // logger.debug("Found features for " + acc);
 502               features = (Hashtable) ann.get("features");
 503             }
 504             else
 505             {
 506               // logger.debug("Creating new features holder for " + acc);
 507               features = new Hashtable();
 508               ann.put("features", features);
 509             }
 510
 511             Hashtable content;
 512             if (features.containsKey(this.id2type(type)))
 513             {
 514               // logger.debug("Found content for " + this.id2type(type));
 515               content = (Hashtable) features.get(this.id2type(type));
 516             }
 517             else
 518             {
 519               // logger.debug("Creating new content holder for " +
 520               // this.id2type(type));
 521               content = new Hashtable();
 522               features.put(this.id2type(type), content);
 523             }
 524             String ns = (String) content.get(description);
 525             if (ns == null)
 526             {
 527               ns = "";
 528             }
 529             ns += seq;
 530             content.put(description, ns);
 531             Hashtable strucAnn;
 532             if (seqAnn.containsKey(acc))
 533             {
 534               strucAnn = (Hashtable) seqAnn.get(acc);
 535             }
 536             else
 537             {
 538               strucAnn = new Hashtable();
 539             }
 540
 541             Vector newStruc = new Vector();
 542             parseAnnotationRow(newStruc, type, ns);
 543             strucAnn.put(type, newStruc);
 544             seqAnn.put(acc, strucAnn);
 545           }
 546           else
 547           {
 548             System.err
 549                     .println("Warning - couldn't parse sequence annotation row line:\n"
 550                             + line);
 551             // throw new IOException("Error parsing " + line);
 552           }
 553         }
 554         else
 555         {
 556           throw new IOException("Unknown annotation detected: " + annType
 557                   + " " + annContent);
 558         }
 559       }
 560     }
 561     if (treeString.length() > 0)
 562     {
 563       if (treeName == null)
 564       {
 565         treeName = "Tree " + (1 + getTreeCount());
 566       }
 567       addNewickTree(treeName, treeString.toString());
 568     }
 569   }
 570
 571   /**
 572    * Demangle an accession string and guess the originating sequence database for a given sequence
 573    * @param seqO sequence to be annotated
 574    * @param dbr Accession string for sequence
 575    * @param dbsource source database for alignment (PFAM or RFAM)
 576    */
 577   private void guessDatabaseFor(Sequence seqO, String dbr, String dbsource)
 578   {
 579     DBRefEntry dbrf=null;
 580     List<DBRefEntry> dbrs=new ArrayList<DBRefEntry>();
 581     String seqdb="Unknown",sdbac=""+dbr;
 582     int st=-1,en=-1,p;
 583     if ((st=sdbac.indexOf("/"))>-1)
 584     {
 585       String num,range=sdbac.substring(st+1);
 586       sdbac = sdbac.substring(0,st);
 587       if ((p=range.indexOf("-"))>-1)
 588       {
 589         p++;
 590         if (p<range.length())
 591         {
 592         num = range.substring(p).trim();
 593         try {
 594           en = Integer.parseInt(num);
 595         } catch (NumberFormatException x)
 596         {
 597           // could warn here that index is invalid
 598           en = -1;
 599         }
 600         }
 601       } else {
 602         p=range.length();
 603       }
 604       num=range.substring(0,p).trim();
 605       try {
 606         st = Integer.parseInt(num);
 607       } catch (NumberFormatException x)
 608       {
 609         // could warn here that index is invalid
 610         st = -1;
 611       }
 612     }
 613     if (dbsource.equals("PFAM")) {
 614       seqdb = "UNIPROT";
 615       if (sdbac.indexOf(".")>-1)
 616       {
 617         // strip of last subdomain
 618         sdbac = sdbac.substring(0,sdbac.indexOf("."));
 619         dbrf = jalview.util.DBRefUtils.parseToDbRef(seqO, seqdb, dbsource, sdbac);
 620         if (dbrf!=null)
 621         {
 622           dbrs.add(dbrf);
 623         }
 624       }
 625       dbrf = jalview.util.DBRefUtils.parseToDbRef(seqO, dbsource, dbsource, dbr);
 626       if (dbr!=null)
 627       {
 628         dbrs.add(dbrf);
 629       }
 630     } else {
 631       seqdb = "EMBL"; // total guess - could be ENA, or something else these days
 632       if (sdbac.indexOf(".")>-1)
 633       {
 634         // strip off last subdomain
 635         sdbac = sdbac.substring(0,sdbac.indexOf("."));
 636         dbrf = jalview.util.DBRefUtils.parseToDbRef(seqO, seqdb, dbsource, sdbac);
 637         if (dbrf!=null)
 638         {
 639           dbrs.add(dbrf);
 640         }
 641       }
 642
 643       dbrf = jalview.util.DBRefUtils.parseToDbRef(seqO, dbsource, dbsource, dbr);
 644       if (dbrf!=null)
 645       {
 646         dbrs.add(dbrf);
 647       }
 648     }
 649     if (st!=-1 && en!=-1)
 650     {
 651       for (DBRefEntry d:dbrs)
 652       {
 653         jalview.util.MapList mp = new jalview.util.MapList(new int[] { seqO.getStart(),seqO.getEnd()},new int[] { st,en},1,1);
 654         jalview.datamodel.Mapping mping = new Mapping(mp);
 655         d.setMap(mping);
 656       }
 657     }
 658   }
 659
 660   protected static AlignmentAnnotation parseAnnotationRow(
 661           Vector annotation, String label, String annots)
 662   {
 663     String convert1, convert2 = null;
 664
 665     // Convert all bracket types to parentheses
 666     Regex openparen = new Regex("(<|\\[)", "(");
 667     Regex closeparen = new Regex("(>|\\])", ")");
 668
 669     // Detect if file is RNA by looking for bracket types
 670     Regex detectbrackets = new Regex("(<|>|\\[|\\]|\\(|\\))");
 671
 672     convert1 = openparen.replaceAll(annots);
 673     convert2 = closeparen.replaceAll(convert1);
 674     annots = convert2;
 675
 676     String type = label;
 677     if (label.contains("_cons"))
 678     {
 679       type = (label.indexOf("_cons") == label.length() - 5) ? label
 680               .substring(0, label.length() - 5) : label;
 681     }
 682     boolean ss = false;
 683     type = id2type(type);
 684     if (type.equals("secondary structure"))
 685     {
 686       ss = true;
 687     }
 688     // decide on secondary structure or not.
 689     Annotation[] els = new Annotation[annots.length()];
 690     for (int i = 0; i < annots.length(); i++)
 691     {
 692       String pos = annots.substring(i, i + 1);
 693       Annotation ann;
 694       ann = new Annotation(pos, "", ' ', 0f); // 0f is 'valid' null - will not
 695       // be written out
 696       if (ss)
 697       {
 698         if (detectbrackets.search(pos))
 699         {
 700           ann.secondaryStructure = jalview.schemes.ResidueProperties
 701                   .getRNASecStrucState(pos).charAt(0);
 702         }
 703         else
 704         {
 705           ann.secondaryStructure = jalview.schemes.ResidueProperties
 706                   .getDssp3state(pos).charAt(0);
 707         }
 708
 709         if (ann.secondaryStructure == pos.charAt(0) || pos.charAt(0) == 'C')
 710         {
 711           ann.displayCharacter = ""; // null; // " ";
 712         }
 713         else
 714         {
 715           ann.displayCharacter = " " + ann.displayCharacter;
 716         }
 717       }
 718
 719       els[i] = ann;
 720     }
 721     AlignmentAnnotation annot = null;
 722     Enumeration e = annotation.elements();
 723     while (e.hasMoreElements())
 724     {
 725       annot = (AlignmentAnnotation) e.nextElement();
 726       if (annot.label.equals(type))
 727         break;
 728       annot = null;
 729     }
 730     if (annot == null)
 731     {
 732       annot = new AlignmentAnnotation(type, type, els);
 733       annotation.addElement(annot);
 734     }
 735     else
 736     {
 737       Annotation[] anns = new Annotation[annot.annotations.length
 738               + els.length];
 739       System.arraycopy(annot.annotations, 0, anns, 0,
 740               annot.annotations.length);
 741       System.arraycopy(els, 0, anns, annot.annotations.length, els.length);
 742       annot.annotations = anns;
 743       // System.out.println("else: ");
 744     }
 745     return annot;
 746   }
 747
 748   public String print(SequenceI[] s)
 749   {
 750     // find max length of id
 751     int max = 0;
 752     int maxid = 0;
 753     int in = 0;
 754     Hashtable dataRef = null;
 755     while ((in < s.length) && (s[in] != null))
 756     {
 757       String tmp = printId(s[in]);
 758       if (s[in].getSequence().length > max)
 759       {
 760         max = s[in].getSequence().length;
 761       }
 762
 763       if (tmp.length() > maxid)
 764       {
 765         maxid = tmp.length();
 766       }
 767       if (s[in].getDBRef() != null)
 768       {
 769         for (int idb = 0; idb < s[in].getDBRef().length; idb++)
 770         {
 771           if (dataRef == null)
 772             dataRef = new Hashtable();
 773
 774           String datAs1 = s[in].getDBRef()[idb].getSource().toString()
 775                   + " ; "
 776                   + s[in].getDBRef()[idb].getAccessionId().toString();
 777           dataRef.put(tmp, datAs1);
 778         }
 779       }
 780       in++;
 781     }
 782     maxid += 9;
 783     int i = 0;
 784
 785     // output database type
 786     if (al.getProperties() != null)
 787     {
 788       if (!al.getProperties().isEmpty())
 789       {
 790         Enumeration key = al.getProperties().keys();
 791         Enumeration val = al.getProperties().elements();
 792         while (key.hasMoreElements())
 793         {
 794           out.append("#=GF " + key.nextElement() + " " + val.nextElement());
 795           out.append(newline);
 796         }
 797       }
 798     }
 799
 800     // output database accessions
 801     if (dataRef != null)
 802     {
 803       Enumeration en = dataRef.keys();
 804       while (en.hasMoreElements())
 805       {
 806         Object idd = en.nextElement();
 807         String type = (String) dataRef.remove(idd);
 808         out.append(new Format("%-" + (maxid - 2) + "s").form("#=GS "
 809                 + idd.toString() + " "));
 810         if (type.contains("PFAM") || type.contains("RFAM"))
 811         {
 812
 813           out.append(" AC " + type.substring(type.indexOf(";") + 1));
 814         }
 815         else
 816         {
 817           out.append(" DR " + type + " ");
 818         }
 819         out.append(newline);
 820       }
 821     }
 822
 823     // output annotations
 824     while (i < s.length && s[i] != null)
 825     {
 826       if (s[i].getDatasetSequence() != null)
 827       {
 828         SequenceI ds = s[i].getDatasetSequence();
 829         AlignmentAnnotation[] alAnot;
 830         Annotation[] ann;
 831         Annotation annot;
 832         alAnot = s[i].getAnnotation();
 833         String feature = "";
 834         if (alAnot != null)
 835         {
 836           for (int j = 0; j < alAnot.length; j++)
 837           {
 838             if (ds.getSequenceFeatures() != null)
 839             {
 840               feature = ds.getSequenceFeatures()[0].type;
 841             }
 842             String key = type2id(feature);
 843
 844             if (key == null)
 845               continue;
 846
 847             // out.append("#=GR ");
 848             out.append(new Format("%-" + maxid + "s").form("#=GR "
 849                     + printId(s[i]) + " " + key + " "));
 850             ann = alAnot[j].annotations;
 851             String seq = "";
 852             for (int k = 0; k < ann.length; k++)
 853             {
 854               annot = ann[k];
 855               String ch = (annot == null) ? Character.toString(s[i]
 856                       .getCharAt(k)) : annot.displayCharacter;
 857               if (ch.length() == 0)
 858               {
 859                 if (key.equals("SS"))
 860                 {
 861                   char ll = annot.secondaryStructure;
 862                   seq = (Character.toString(ll).equals(" ")) ? seq + "C"
 863                           : seq + ll;
 864                 }
 865                 else
 866                 {
 867                   seq += ".";
 868                 }
 869               }
 870               else if (ch.length() == 1)
 871               {
 872                 seq += ch;
 873               }
 874               else if (ch.length() > 1)
 875               {
 876                 seq += ch.charAt(1);
 877               }
 878             }
 879             out.append(seq);
 880             out.append(newline);
 881           }
 882         }
 883       }
 884
 885       out.append(new Format("%-" + maxid + "s").form(printId(s[i]) + " "));
 886       out.append(s[i].getSequenceAsString());
 887       out.append(newline);
 888       i++;
 889     }
 890
 891     // alignment annotation
 892     AlignmentAnnotation aa;
 893     if (al.getAlignmentAnnotation() != null)
 894     {
 895       for (int ia = 0; ia < al.getAlignmentAnnotation().length; ia++)
 896       {
 897         aa = al.getAlignmentAnnotation()[ia];
 898         if (aa.autoCalculated || !aa.visible)
 899         {
 900           continue;
 901         }
 902         String seq = "";
 903         String label;
 904
 905         if (aa.label.equals("seq"))
 906           label = "seq_cons";
 907         else
 908           label = type2id(aa.label.toLowerCase()) + "_cons";
 909
 910         if (label == null)
 911           label = aa.label;
 912
 913         out.append(new Format("%-" + maxid + "s").form("#=GC " + label
 914                 + " "));
 915         for (int j = 0; j < aa.annotations.length; j++)
 916         {
 917           String ch = (aa.annotations[j] == null) ? "-"
 918                   : aa.annotations[j].displayCharacter;
 919           if (ch.length() == 0)
 920           {
 921             char ll = aa.annotations[j].secondaryStructure;
 922             if (Character.toString(ll).equals(" "))
 923               seq += "C";
 924             else
 925               seq += ll;
 926           }
 927           else if (ch.length() == 1)
 928           {
 929             seq += ch;
 930           }
 931           else if (ch.length() > 1)
 932           {
 933             seq += ch.charAt(1);
 934           }
 935         }
 936         out.append(seq);
 937         out.append(newline);
 938       }
 939     }
 940     return out.toString();
 941   }
 942
 943   public String print()
 944   {
 945     out = new StringBuffer();
 946     out.append("# STOCKHOLM 1.0");
 947     out.append(newline);
 948     print(getSeqsAsArray());
 949
 950     out.append("//");
 951     out.append(newline);
 952     return out.toString();
 953   }
 954
 955   private static Hashtable typeIds = null;
 956   static
 957   {
 958     if (typeIds == null)
 959     {
 960       typeIds = new Hashtable();
 961       typeIds.put("SS", "secondary structure");
 962       typeIds.put("SA", "surface accessibility");
 963       typeIds.put("TM", "transmembrane");
 964       typeIds.put("PP", "posterior probability");
 965       typeIds.put("LI", "ligand binding");
 966       typeIds.put("AS", "active site");
 967       typeIds.put("IN", "intron");
 968       typeIds.put("IR", "interacting residue");
 969       typeIds.put("AC", "accession");
 970       typeIds.put("OS", "organism");
 971       typeIds.put("CL", "class");
 972       typeIds.put("DE", "description");
 973       typeIds.put("DR", "reference");
 974       typeIds.put("LO", "look");
 975       typeIds.put("RF", "reference positions");
 976
 977     }
 978   }
 979
 980   protected static String id2type(String id)
 981   {
 982     if (typeIds.containsKey(id))
 983     {
 984       return (String) typeIds.get(id);
 985     }
 986     System.err.println("Warning : Unknown Stockholm annotation type code "
 987             + id);
 988     return id;
 989   }
 990
 991   protected static String type2id(String type)
 992   {
 993     String key = null;
 994     Enumeration e = typeIds.keys();
 995     while (e.hasMoreElements())
 996     {
 997       Object ll = e.nextElement();
 998       if (typeIds.get(ll).toString().equals(type))
 999       {
1000         key = (String) ll;
1001         break;
1002       }
1003     }
1004     if (key != null)
1005     {
1006       return (String) key;
1007     }
1008     System.err.println("Warning : Unknown Stockholm annotation type: "
1009             + type);
1010     return key;
1011   }
1012   /**
1013    * //ssline is complete secondary structure line private AlignmentAnnotation
1014    * addHelices(Vector annotation, String label, String ssline) {
1015    *
1016    * // decide on secondary structure or not. Annotation[] els = new
1017    * Annotation[ssline.length()]; for (int i = 0; i < ssline.length(); i++) {
1018    * String pos = ssline.substring(i, i + 1); Annotation ann; ann = new
1019    * Annotation(pos, "", ' ', 0f); // 0f is 'valid' null - will not
1020    *
1021    * ann.secondaryStructure =
1022    * jalview.schemes.ResidueProperties.getRNAssState(pos).charAt(0);
1023    *
1024    * ann.displayCharacter = "x" + ann.displayCharacter;
1025    *
1026    * System.out.println(ann.displayCharacter);
1027    *
1028    * els[i] = ann; } AlignmentAnnotation helicesAnnot = null; Enumeration e =
1029    * annotation.elements(); while (e.hasMoreElements()) { helicesAnnot =
1030    * (AlignmentAnnotation) e.nextElement(); if (helicesAnnot.label.equals(type))
1031    * break; helicesAnnot = null; } if (helicesAnnot == null) { helicesAnnot =
1032    * new AlignmentAnnotation(type, type, els);
1033    * annotation.addElement(helicesAnnot); } else { Annotation[] anns = new
1034    * Annotation[helicesAnnot.annotations.length + els.length];
1035    * System.arraycopy(helicesAnnot.annotations, 0, anns, 0,
1036    * helicesAnnot.annotations.length); System.arraycopy(els, 0, anns,
1037    * helicesAnnot.annotations.length, els.length); helicesAnnot.annotations =
1038    * anns; }
1039    *
1040    * helicesAnnot.features = Rna.GetBasePairs(ssline);
1041    * Rna.HelixMap(helicesAnnot.features);
1042    *
1043    *
1044    * return helicesAnnot; }
1045    */
1046 }