src/jalview/io/StockholmFile.java

   1 /*
   2  * Jalview - A Sequence Alignment Editor and Viewer (Version 2.8)
   3  * Copyright (C) 2012 J Procter, AM Waterhouse, LM Lui, J Engelhardt, G Barton, M Clamp, S Searle
   4  *
   5  * This file is part of Jalview.
   6  *
   7  * Jalview is free software: you can redistribute it and/or
   8  * modify it under the terms of the GNU General Public License
   9  * as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version.
  10  *
  11  * Jalview is distributed in the hope that it will be useful, but
  12  * WITHOUT ANY WARRANTY; without even the implied warranty
  13  * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
  14  * PURPOSE.  See the GNU General Public License for more details.
  15  *
  16  * You should have received a copy of the GNU General Public License along with Jalview.  If not, see <http://www.gnu.org/licenses/>.
  17  */
  18 /*
  19  * This extension was written by Benjamin Schuster-Boeckler at sanger.ac.uk
  20  */
  21 package jalview.io;
  22
  23 import java.io.*;
  24 import java.util.*;
  25
  26 import com.stevesoft.pat.*;
  27 import jalview.datamodel.*;
  28 import jalview.util.Format;
  29
  30 // import org.apache.log4j.*;
  31
  32 /**
  33  * This class is supposed to parse a Stockholm format file into Jalview There
  34  * are TODOs in this class: we do not know what the database source and version
  35  * is for the file when parsing the #GS= AC tag which associates accessions with
  36  * sequences. Database references are also not parsed correctly: a separate
  37  * reference string parser must be added to parse the database reference form
  38  * into Jalview's local representation.
  39  *
  40  * @author bsb at sanger.ac.uk
  41  * @version 0.3 + jalview mods
  42  *
  43  */
  44 public class StockholmFile extends AlignFile
  45 {
  46   // static Logger logger = Logger.getLogger("jalview.io.StockholmFile");
  47   StringBuffer out; // output buffer
  48
  49   AlignmentI al;
  50
  51   public StockholmFile()
  52   {
  53   }
  54
  55   /**
  56    * Creates a new StockholmFile object for output.
  57    */
  58   public StockholmFile(AlignmentI al)
  59   {
  60     this.al = al;
  61   }
  62
  63   public StockholmFile(String inFile, String type) throws IOException
  64   {
  65     super(inFile, type);
  66   }
  67
  68   public StockholmFile(FileParse source) throws IOException
  69   {
  70     super(source);
  71   }
  72
  73   public void initData()
  74   {
  75     super.initData();
  76   }
  77
  78   /**
  79    * Parse a file in Stockholm format into Jalview's data model. The file has to
  80    * be passed at construction time
  81    *
  82    * @throws IOException
  83    *           If there is an error with the input file
  84    */
  85   public void parse() throws IOException
  86   {
  87     StringBuffer treeString = new StringBuffer();
  88     String treeName = null;
  89     // --------------- Variable Definitions -------------------
  90     String line;
  91     String version;
  92     // String id;
  93     Hashtable seqAnn = new Hashtable(); // Sequence related annotations
  94     Hashtable seqs = new Hashtable();
  95     Regex p, r, rend, s, x;
  96
  97     // Temporary line for processing RNA annotation
  98     // String RNAannot = "";
  99
 100     // ------------------ Parsing File ----------------------
 101     // First, we have to check that this file has STOCKHOLM format, i.e. the
 102     // first line must match
 103     r = new Regex("# STOCKHOLM ([\\d\\.]+)");
 104     if (!r.search(nextLine()))
 105     {
 106       throw new IOException(
 107               "This file is not in valid STOCKHOLM format: First line does not contain '# STOCKHOLM'");
 108     }
 109     else
 110     {
 111       version = r.stringMatched(1);
 112       // logger.debug("Stockholm version: " + version);
 113     }
 114
 115     // We define some Regexes here that will be used regularily later
 116     rend = new Regex("^\\s*\\/\\/"); // Find the end of an alignment
 117     p = new Regex("(\\S+)\\/(\\d+)\\-(\\d+)"); // split sequence id in
 118     // id/from/to
 119     s = new Regex("(\\S+)\\s+(\\S*)\\s+(.*)"); // Parses annotation subtype
 120     r = new Regex("#=(G[FSRC]?)\\s+(.*)"); // Finds any annotation line
 121     x = new Regex("(\\S+)\\s+(\\S+)"); // split id from sequence
 122
 123     // Convert all bracket types to parentheses (necessary for passing to VARNA)
 124     Regex openparen = new Regex("(<|\\[)", "(");
 125     Regex closeparen = new Regex("(>|\\])", ")");
 126
 127     // Detect if file is RNA by looking for bracket types
 128     Regex detectbrackets = new Regex("(<|>|\\[|\\]|\\(|\\))");
 129
 130     rend.optimize();
 131     p.optimize();
 132     s.optimize();
 133     r.optimize();
 134     x.optimize();
 135     openparen.optimize();
 136     closeparen.optimize();
 137
 138     while ((line = nextLine()) != null)
 139     {
 140       if (line.length() == 0)
 141       {
 142         continue;
 143       }
 144       if (rend.search(line))
 145       {
 146         // End of the alignment, pass stuff back
 147         this.noSeqs = seqs.size();
 148
 149         String seqdb,dbsource = null;
 150         Regex pf = new Regex("PF[0-9]{5}(.*)"); // Finds AC for Pfam
 151         Regex rf = new Regex("RF[0-9]{5}(.*)"); // Finds AC for Rfam
 152         if (getAlignmentProperty("AC") != null)
 153         {
 154           String dbType = getAlignmentProperty("AC").toString();
 155           if (pf.search(dbType))
 156           {
 157             // PFAM Alignment - so references are typically from Uniprot
 158             dbsource = "PFAM";
 159           }
 160           else if (rf.search(dbType))
 161           {
 162             dbsource = "RFAM";
 163           }
 164         }
 165         // logger.debug("Number of sequences: " + this.noSeqs);
 166         Enumeration accs = seqs.keys();
 167         while (accs.hasMoreElements())
 168         {
 169           String acc = (String) accs.nextElement();
 170           // logger.debug("Processing sequence " + acc);
 171           String seq = (String) seqs.remove(acc);
 172           if (maxLength < seq.length())
 173           {
 174             maxLength = seq.length();
 175           }
 176           int start = 1;
 177           int end = -1;
 178           String sid = acc;
 179           /*
 180            * Retrieve hash of annotations for this accession Associate
 181            * Annotation with accession
 182            */
 183           Hashtable accAnnotations = null;
 184
 185           if (seqAnn != null && seqAnn.containsKey(acc))
 186           {
 187             accAnnotations = (Hashtable) seqAnn.remove(acc);
 188             // TODO: add structures to sequence
 189           }
 190
 191           // Split accession in id and from/to
 192           if (p.search(acc))
 193           {
 194             sid = p.stringMatched(1);
 195             start = Integer.parseInt(p.stringMatched(2));
 196             end = Integer.parseInt(p.stringMatched(3));
 197           }
 198           // logger.debug(sid + ", " + start + ", " + end);
 199
 200           Sequence seqO = new Sequence(sid, seq, start, end);
 201           // Add Description (if any)
 202           if (accAnnotations != null && accAnnotations.containsKey("DE"))
 203           {
 204             String desc = (String) accAnnotations.get("DE");
 205             seqO.setDescription((desc == null) ? "" : desc);
 206           }
 207
 208           // Add DB References (if any)
 209           if (accAnnotations != null && accAnnotations.containsKey("DR"))
 210           {
 211             String dbr = (String) accAnnotations.get("DR");
 212             if (dbr != null && dbr.indexOf(";") > -1)
 213             {
 214               String src = dbr.substring(0, dbr.indexOf(";"));
 215               String acn = dbr.substring(dbr.indexOf(";") + 1);
 216               jalview.util.DBRefUtils.parseToDbRef(seqO, src, "0", acn);
 217             }
 218           }
 219
 220           if (accAnnotations != null && accAnnotations.containsKey("AC"))
 221           {
 222             if (dbsource != null)
 223             {
 224               String dbr = (String) accAnnotations.get("AC");
 225               if (dbr != null)
 226               {
 227                 // we could get very clever here - but for now - just try to guess accession type from source of alignment plus structure of accession
 228                 guessDatabaseFor(seqO, dbr, dbsource);
 229
 230               }
 231             }
 232             // else - do what ?  add the data anyway and prompt the user to specify what references these are ?
 233           }
 234
 235           Hashtable features = null;
 236           // We need to adjust the positions of all features to account for gaps
 237           try
 238           {
 239             features = (Hashtable) accAnnotations.remove("features");
 240           } catch (java.lang.NullPointerException e)
 241           {
 242             // loggerwarn("Getting Features for " + acc + ": " +
 243             // e.getMessage());
 244             // continue;
 245           }
 246           // if we have features
 247           if (features != null)
 248           {
 249             int posmap[] = seqO.findPositionMap();
 250             Enumeration i = features.keys();
 251             while (i.hasMoreElements())
 252             {
 253               // TODO: parse out secondary structure annotation as annotation
 254               // row
 255               // TODO: parse out scores as annotation row
 256               // TODO: map coding region to core jalview feature types
 257               String type = i.nextElement().toString();
 258               Hashtable content = (Hashtable) features.remove(type);
 259
 260               // add alignment annotation for this feature
 261               String key = type2id(type);
 262               if (key != null)
 263               {
 264                 if (accAnnotations != null
 265                         && accAnnotations.containsKey(key))
 266                 {
 267                   Vector vv = (Vector) accAnnotations.get(key);
 268                   for (int ii = 0; ii < vv.size(); ii++)
 269                   {
 270                     AlignmentAnnotation an = (AlignmentAnnotation) vv
 271                             .elementAt(ii);
 272                     seqO.addAlignmentAnnotation(an);
 273                   }
 274                 }
 275               }
 276
 277               Enumeration j = content.keys();
 278               while (j.hasMoreElements())
 279               {
 280                 String desc = j.nextElement().toString();
 281                 String ns = content.get(desc).toString();
 282                 char[] byChar = ns.toCharArray();
 283                 for (int k = 0; k < byChar.length; k++)
 284                 {
 285                   char c = byChar[k];
 286                   if (!(c == ' ' || c == '_' || c == '-' || c == '.')) // PFAM
 287                   // uses
 288                   // '.'
 289                   // for
 290                   // feature
 291                   // background
 292                   {
 293                     int new_pos = posmap[k]; // look up nearest seqeunce
 294                     // position to this column
 295                     SequenceFeature feat = new SequenceFeature(type, desc,
 296                             new_pos, new_pos, 0f, null);
 297
 298                     seqO.addSequenceFeature(feat);
 299                   }
 300                 }
 301               }
 302
 303             }
 304
 305           }
 306           // garbage collect
 307
 308           // logger.debug("Adding seq " + acc + " from " + start + " to " + end
 309           // + ": " + seq);
 310           this.seqs.addElement(seqO);
 311         }
 312         return; // finished parsing this segment of source
 313       }
 314       else if (!r.search(line))
 315       {
 316         // System.err.println("Found sequence line: " + line);
 317
 318         // Split sequence in sequence and accession parts
 319         if (!x.search(line))
 320         {
 321           // logger.error("Could not parse sequence line: " + line);
 322           throw new IOException("Could not parse sequence line: " + line);
 323         }
 324         String ns = (String) seqs.get(x.stringMatched(1));
 325         if (ns == null)
 326         {
 327           ns = "";
 328         }
 329         ns += x.stringMatched(2);
 330
 331         seqs.put(x.stringMatched(1), ns);
 332       }
 333       else
 334       {
 335         String annType = r.stringMatched(1);
 336         String annContent = r.stringMatched(2);
 337
 338         // System.err.println("type:" + annType + " content: " + annContent);
 339
 340         if (annType.equals("GF"))
 341         {
 342           /*
 343            * Generic per-File annotation, free text Magic features: #=GF NH
 344            * <tree in New Hampshire eXtended format> #=GF TN <Unique identifier
 345            * for the next tree> Pfam descriptions: 7. DESCRIPTION OF FIELDS
 346            *
 347            * Compulsory fields: ------------------
 348            *
 349            * AC Accession number: Accession number in form PFxxxxx.version or
 350            * PBxxxxxx. ID Identification: One word name for family. DE
 351            * Definition: Short description of family. AU Author: Authors of the
 352            * entry. SE Source of seed: The source suggesting the seed members
 353            * belong to one family. GA Gathering method: Search threshold to
 354            * build the full alignment. TC Trusted Cutoff: Lowest sequence score
 355            * and domain score of match in the full alignment. NC Noise Cutoff:
 356            * Highest sequence score and domain score of match not in full
 357            * alignment. TP Type: Type of family -- presently Family, Domain,
 358            * Motif or Repeat. SQ Sequence: Number of sequences in alignment. AM
 359            * Alignment Method The order ls and fs hits are aligned to the model
 360            * to build the full align. // End of alignment.
 361            *
 362            * Optional fields: ----------------
 363            *
 364            * DC Database Comment: Comment about database reference. DR Database
 365            * Reference: Reference to external database. RC Reference Comment:
 366            * Comment about literature reference. RN Reference Number: Reference
 367            * Number. RM Reference Medline: Eight digit medline UI number. RT
 368            * Reference Title: Reference Title. RA Reference Author: Reference
 369            * Author RL Reference Location: Journal location. PI Previous
 370            * identifier: Record of all previous ID lines. KW Keywords: Keywords.
 371            * CC Comment: Comments. NE Pfam accession: Indicates a nested domain.
 372            * NL Location: Location of nested domains - sequence ID, start and
 373            * end of insert.
 374            *
 375            * Obsolete fields: ----------- AL Alignment method of seed: The
 376            * method used to align the seed members.
 377            */
 378           // Let's save the annotations, maybe we'll be able to do something
 379           // with them later...
 380           Regex an = new Regex("(\\w+)\\s*(.*)");
 381           if (an.search(annContent))
 382           {
 383             if (an.stringMatched(1).equals("NH"))
 384             {
 385               treeString.append(an.stringMatched(2));
 386             }
 387             else if (an.stringMatched(1).equals("TN"))
 388             {
 389               if (treeString.length() > 0)
 390               {
 391                 if (treeName == null)
 392                 {
 393                   treeName = "Tree " + (getTreeCount() + 1);
 394                 }
 395                 addNewickTree(treeName, treeString.toString());
 396               }
 397               treeName = an.stringMatched(2);
 398               treeString = new StringBuffer();
 399             }
 400             setAlignmentProperty(an.stringMatched(1), an.stringMatched(2));
 401           }
 402         }
 403         else if (annType.equals("GS"))
 404         {
 405           // Generic per-Sequence annotation, free text
 406           /*
 407            * Pfam uses these features: Feature Description ---------------------
 408            * ----------- AC <accession> ACcession number DE <freetext>
 409            * DEscription DR <db>; <accession>; Database Reference OS <organism>
 410            * OrganiSm (species) OC <clade> Organism Classification (clade, etc.)
 411            * LO <look> Look (Color, etc.)
 412            */
 413           if (s.search(annContent))
 414           {
 415             String acc = s.stringMatched(1);
 416             String type = s.stringMatched(2);
 417             String content = s.stringMatched(3);
 418             // TODO: store DR in a vector.
 419             // TODO: store AC according to generic file db annotation.
 420             Hashtable ann;
 421             if (seqAnn.containsKey(acc))
 422             {
 423               ann = (Hashtable) seqAnn.get(acc);
 424             }
 425             else
 426             {
 427               ann = new Hashtable();
 428             }
 429             ann.put(type, content);
 430             seqAnn.put(acc, ann);
 431           }
 432           else
 433           {
 434             throw new IOException("Error parsing " + line);
 435           }
 436         }
 437         else if (annType.equals("GC"))
 438         {
 439           // Generic per-Column annotation, exactly 1 char per column
 440           // always need a label.
 441           if (x.search(annContent))
 442           {
 443             // parse out and create alignment annotation directly.
 444             parseAnnotationRow(annotations, x.stringMatched(1),
 445                     x.stringMatched(2));
 446           }
 447         }
 448         else if (annType.equals("GR"))
 449         {
 450           // Generic per-Sequence AND per-Column markup, exactly 1 char per
 451           // column
 452           /*
 453            * Feature Description Markup letters ------- -----------
 454            * -------------- SS Secondary Structure [HGIEBTSCX] SA Surface
 455            * Accessibility [0-9X] (0=0%-10%; ...; 9=90%-100%) TM TransMembrane
 456            * [Mio] PP Posterior Probability [0-9*] (0=0.00-0.05; 1=0.05-0.15;
 457            * *=0.95-1.00) LI LIgand binding [*] AS Active Site [*] IN INtron (in
 458            * or after) [0-2]
 459            */
 460           if (s.search(annContent))
 461           {
 462             String acc = s.stringMatched(1);
 463             String type = s.stringMatched(2);
 464             String seq = new String(s.stringMatched(3));
 465             String description = null;
 466             // Check for additional information about the current annotation
 467             // We use a simple string tokenizer here for speed
 468             StringTokenizer sep = new StringTokenizer(seq, " \t");
 469             description = sep.nextToken();
 470             if (sep.hasMoreTokens())
 471             {
 472               seq = sep.nextToken();
 473             }
 474             else
 475             {
 476               seq = description;
 477               description = new String();
 478             }
 479             // sequence id with from-to fields
 480
 481             Hashtable ann;
 482             // Get an object with all the annotations for this sequence
 483             if (seqAnn.containsKey(acc))
 484             {
 485               // logger.debug("Found annotations for " + acc);
 486               ann = (Hashtable) seqAnn.get(acc);
 487             }
 488             else
 489             {
 490               // logger.debug("Creating new annotations holder for " + acc);
 491               ann = new Hashtable();
 492               seqAnn.put(acc, ann);
 493             }
 494             // TODO test structure, call parseAnnotationRow with vector from
 495             // hashtable for specific sequence
 496             Hashtable features;
 497             // Get an object with all the content for an annotation
 498             if (ann.containsKey("features"))
 499             {
 500               // logger.debug("Found features for " + acc);
 501               features = (Hashtable) ann.get("features");
 502             }
 503             else
 504             {
 505               // logger.debug("Creating new features holder for " + acc);
 506               features = new Hashtable();
 507               ann.put("features", features);
 508             }
 509
 510             Hashtable content;
 511             if (features.containsKey(this.id2type(type)))
 512             {
 513               // logger.debug("Found content for " + this.id2type(type));
 514               content = (Hashtable) features.get(this.id2type(type));
 515             }
 516             else
 517             {
 518               // logger.debug("Creating new content holder for " +
 519               // this.id2type(type));
 520               content = new Hashtable();
 521               features.put(this.id2type(type), content);
 522             }
 523             String ns = (String) content.get(description);
 524             if (ns == null)
 525             {
 526               ns = "";
 527             }
 528             ns += seq;
 529             content.put(description, ns);
 530             Hashtable strucAnn;
 531             if (seqAnn.containsKey(acc))
 532             {
 533               strucAnn = (Hashtable) seqAnn.get(acc);
 534             }
 535             else
 536             {
 537               strucAnn = new Hashtable();
 538             }
 539
 540             Vector newStruc = new Vector();
 541             parseAnnotationRow(newStruc, type, ns);
 542             strucAnn.put(type, newStruc);
 543             seqAnn.put(acc, strucAnn);
 544           }
 545           else
 546           {
 547             System.err
 548                     .println("Warning - couldn't parse sequence annotation row line:\n"
 549                             + line);
 550             // throw new IOException("Error parsing " + line);
 551           }
 552         }
 553         else
 554         {
 555           throw new IOException("Unknown annotation detected: " + annType
 556                   + " " + annContent);
 557         }
 558       }
 559     }
 560     if (treeString.length() > 0)
 561     {
 562       if (treeName == null)
 563       {
 564         treeName = "Tree " + (1 + getTreeCount());
 565       }
 566       addNewickTree(treeName, treeString.toString());
 567     }
 568   }
 569
 570   /**
 571    * Demangle an accession string and guess the originating sequence database for a given sequence
 572    * @param seqO sequence to be annotated
 573    * @param dbr Accession string for sequence
 574    * @param dbsource source database for alignment (PFAM or RFAM)
 575    */
 576   private void guessDatabaseFor(Sequence seqO, String dbr, String dbsource)
 577   {
 578     DBRefEntry dbrf=null;
 579     List<DBRefEntry> dbrs=new ArrayList<DBRefEntry>();
 580     String seqdb="Unknown",sdbac=""+dbr;
 581     int st=-1,en=-1,p;
 582     if ((st=sdbac.indexOf("/"))>-1)
 583     {
 584       String num,range=sdbac.substring(st+1);
 585       sdbac = sdbac.substring(0,st);
 586       if ((p=range.indexOf("-"))>-1)
 587       {
 588         p++;
 589         if (p<range.length())
 590         {
 591         num = range.substring(p).trim();
 592         try {
 593           en = Integer.parseInt(num);
 594         } catch (NumberFormatException x)
 595         {
 596           // could warn here that index is invalid
 597           en = -1;
 598         }
 599         }
 600       } else {
 601         p=range.length();
 602       }
 603       num=range.substring(0,p).trim();
 604       try {
 605         st = Integer.parseInt(num);
 606       } catch (NumberFormatException x)
 607       {
 608         // could warn here that index is invalid
 609         st = -1;
 610       }
 611     }
 612     if (dbsource.equals("PFAM")) {
 613       seqdb = "UNIPROT";
 614       if (sdbac.indexOf(".")>-1)
 615       {
 616         // strip of last subdomain
 617         sdbac = sdbac.substring(0,sdbac.indexOf("."));
 618         dbrf = jalview.util.DBRefUtils.parseToDbRef(seqO, seqdb, dbsource, sdbac);
 619         if (dbrf!=null)
 620         {
 621           dbrs.add(dbrf);
 622         }
 623       }
 624       dbrf = jalview.util.DBRefUtils.parseToDbRef(seqO, dbsource, dbsource, dbr);
 625       if (dbr!=null)
 626       {
 627         dbrs.add(dbrf);
 628       }
 629     } else {
 630       seqdb = "EMBL"; // total guess - could be ENA, or something else these days
 631       if (sdbac.indexOf(".")>-1)
 632       {
 633         // strip off last subdomain
 634         sdbac = sdbac.substring(0,sdbac.indexOf("."));
 635         dbrf = jalview.util.DBRefUtils.parseToDbRef(seqO, seqdb, dbsource, sdbac);
 636         if (dbrf!=null)
 637         {
 638           dbrs.add(dbrf);
 639         }
 640       }
 641
 642       dbrf = jalview.util.DBRefUtils.parseToDbRef(seqO, dbsource, dbsource, dbr);
 643       if (dbrf!=null)
 644       {
 645         dbrs.add(dbrf);
 646       }
 647     }
 648     if (st!=-1 && en!=-1)
 649     {
 650       for (DBRefEntry d:dbrs)
 651       {
 652         jalview.util.MapList mp = new jalview.util.MapList(new int[] { seqO.getStart(),seqO.getEnd()},new int[] { st,en},1,1);
 653         jalview.datamodel.Mapping mping = new Mapping(mp);
 654         d.setMap(mping);
 655       }
 656     }
 657   }
 658
 659   protected static AlignmentAnnotation parseAnnotationRow(
 660           Vector annotation, String label, String annots)
 661   {
 662     String convert1, convert2 = null;
 663
 664     // Convert all bracket types to parentheses
 665     Regex openparen = new Regex("(<|\\[)", "(");
 666     Regex closeparen = new Regex("(>|\\])", ")");
 667
 668     // Detect if file is RNA by looking for bracket types
 669     Regex detectbrackets = new Regex("(<|>|\\[|\\]|\\(|\\))");
 670
 671     convert1 = openparen.replaceAll(annots);
 672     convert2 = closeparen.replaceAll(convert1);
 673     annots = convert2;
 674
 675     String type = label;
 676     if (label.contains("_cons"))
 677     {
 678       type = (label.indexOf("_cons") == label.length() - 5) ? label
 679               .substring(0, label.length() - 5) : label;
 680     }
 681     boolean ss = false;
 682     type = id2type(type);
 683     if (type.equals("secondary structure"))
 684     {
 685       ss = true;
 686     }
 687     // decide on secondary structure or not.
 688     Annotation[] els = new Annotation[annots.length()];
 689     for (int i = 0; i < annots.length(); i++)
 690     {
 691       String pos = annots.substring(i, i + 1);
 692       Annotation ann;
 693       ann = new Annotation(pos, "", ' ', 0f); // 0f is 'valid' null - will not
 694       // be written out
 695       if (ss)
 696       {
 697         if (detectbrackets.search(pos))
 698         {
 699           ann.secondaryStructure = jalview.schemes.ResidueProperties
 700                   .getRNASecStrucState(pos).charAt(0);
 701         }
 702         else
 703         {
 704           ann.secondaryStructure = jalview.schemes.ResidueProperties
 705                   .getDssp3state(pos).charAt(0);
 706         }
 707
 708         if (ann.secondaryStructure == pos.charAt(0) || pos.charAt(0) == 'C')
 709         {
 710           ann.displayCharacter = ""; // null; // " ";
 711         }
 712         else
 713         {
 714           ann.displayCharacter = " " + ann.displayCharacter;
 715         }
 716       }
 717
 718       els[i] = ann;
 719     }
 720     AlignmentAnnotation annot = null;
 721     Enumeration e = annotation.elements();
 722     while (e.hasMoreElements())
 723     {
 724       annot = (AlignmentAnnotation) e.nextElement();
 725       if (annot.label.equals(type))
 726         break;
 727       annot = null;
 728     }
 729     if (annot == null)
 730     {
 731       annot = new AlignmentAnnotation(type, type, els);
 732       annotation.addElement(annot);
 733     }
 734     else
 735     {
 736       Annotation[] anns = new Annotation[annot.annotations.length
 737               + els.length];
 738       System.arraycopy(annot.annotations, 0, anns, 0,
 739               annot.annotations.length);
 740       System.arraycopy(els, 0, anns, annot.annotations.length, els.length);
 741       annot.annotations = anns;
 742       // System.out.println("else: ");
 743     }
 744     return annot;
 745   }
 746
 747   public String print(SequenceI[] s)
 748   {
 749     // find max length of id
 750     int max = 0;
 751     int maxid = 0;
 752     int in = 0;
 753     Hashtable dataRef = null;
 754     while ((in < s.length) && (s[in] != null))
 755     {
 756       String tmp = printId(s[in]);
 757       if (s[in].getSequence().length > max)
 758       {
 759         max = s[in].getSequence().length;
 760       }
 761
 762       if (tmp.length() > maxid)
 763       {
 764         maxid = tmp.length();
 765       }
 766       if (s[in].getDBRef() != null)
 767       {
 768         for (int idb = 0; idb < s[in].getDBRef().length; idb++)
 769         {
 770           if (dataRef == null)
 771             dataRef = new Hashtable();
 772
 773           String datAs1 = s[in].getDBRef()[idb].getSource().toString()
 774                   + " ; "
 775                   + s[in].getDBRef()[idb].getAccessionId().toString();
 776           dataRef.put(tmp, datAs1);
 777         }
 778       }
 779       in++;
 780     }
 781     maxid += 9;
 782     int i = 0;
 783
 784     // output database type
 785     if (al.getProperties() != null)
 786     {
 787       if (!al.getProperties().isEmpty())
 788       {
 789         Enumeration key = al.getProperties().keys();
 790         Enumeration val = al.getProperties().elements();
 791         while (key.hasMoreElements())
 792         {
 793           out.append("#=GF " + key.nextElement() + " " + val.nextElement());
 794           out.append(newline);
 795         }
 796       }
 797     }
 798
 799     // output database accessions
 800     if (dataRef != null)
 801     {
 802       Enumeration en = dataRef.keys();
 803       while (en.hasMoreElements())
 804       {
 805         Object idd = en.nextElement();
 806         String type = (String) dataRef.remove(idd);
 807         out.append(new Format("%-" + (maxid - 2) + "s").form("#=GS "
 808                 + idd.toString() + " "));
 809         if (type.contains("PFAM") || type.contains("RFAM"))
 810         {
 811
 812           out.append(" AC " + type.substring(type.indexOf(";") + 1));
 813         }
 814         else
 815         {
 816           out.append(" DR " + type + " ");
 817         }
 818         out.append(newline);
 819       }
 820     }
 821
 822     // output annotations
 823     while (i < s.length && s[i] != null)
 824     {
 825       if (s[i].getDatasetSequence() != null)
 826       {
 827         SequenceI ds = s[i].getDatasetSequence();
 828         AlignmentAnnotation[] alAnot;
 829         Annotation[] ann;
 830         Annotation annot;
 831         alAnot = s[i].getAnnotation();
 832         String feature = "";
 833         if (alAnot != null)
 834         {
 835           for (int j = 0; j < alAnot.length; j++)
 836           {
 837             if (ds.getSequenceFeatures() != null)
 838             {
 839               feature = ds.getSequenceFeatures()[0].type;
 840             }
 841             String key = type2id(feature);
 842
 843             if (key == null)
 844               continue;
 845
 846             // out.append("#=GR ");
 847             out.append(new Format("%-" + maxid + "s").form("#=GR "
 848                     + printId(s[i]) + " " + key + " "));
 849             ann = alAnot[j].annotations;
 850             String seq = "";
 851             for (int k = 0; k < ann.length; k++)
 852             {
 853               annot = ann[k];
 854               String ch = (annot == null) ? Character.toString(s[i]
 855                       .getCharAt(k)) : annot.displayCharacter;
 856               if (ch.length() == 0)
 857               {
 858                 if (key.equals("SS"))
 859                 {
 860                   char ll = annot.secondaryStructure;
 861                   seq = (Character.toString(ll).equals(" ")) ? seq + "C"
 862                           : seq + ll;
 863                 }
 864                 else
 865                 {
 866                   seq += ".";
 867                 }
 868               }
 869               else if (ch.length() == 1)
 870               {
 871                 seq += ch;
 872               }
 873               else if (ch.length() > 1)
 874               {
 875                 seq += ch.charAt(1);
 876               }
 877             }
 878             out.append(seq);
 879             out.append(newline);
 880           }
 881         }
 882       }
 883
 884       out.append(new Format("%-" + maxid + "s").form(printId(s[i]) + " "));
 885       out.append(s[i].getSequenceAsString());
 886       out.append(newline);
 887       i++;
 888     }
 889
 890     // alignment annotation
 891     AlignmentAnnotation aa;
 892     if (al.getAlignmentAnnotation() != null)
 893     {
 894       for (int ia = 0; ia < al.getAlignmentAnnotation().length; ia++)
 895       {
 896         aa = al.getAlignmentAnnotation()[ia];
 897         if (aa.autoCalculated || !aa.visible)
 898         {
 899           continue;
 900         }
 901         String seq = "";
 902         String label;
 903
 904         if (aa.label.equals("seq"))
 905           label = "seq_cons";
 906         else
 907           label = type2id(aa.label.toLowerCase()) + "_cons";
 908
 909         if (label == null)
 910           label = aa.label;
 911
 912         out.append(new Format("%-" + maxid + "s").form("#=GC " + label
 913                 + " "));
 914         for (int j = 0; j < aa.annotations.length; j++)
 915         {
 916           String ch = (aa.annotations[j] == null) ? "-"
 917                   : aa.annotations[j].displayCharacter;
 918           if (ch.length() == 0)
 919           {
 920             char ll = aa.annotations[j].secondaryStructure;
 921             if (Character.toString(ll).equals(" "))
 922               seq += "C";
 923             else
 924               seq += ll;
 925           }
 926           else if (ch.length() == 1)
 927           {
 928             seq += ch;
 929           }
 930           else if (ch.length() > 1)
 931           {
 932             seq += ch.charAt(1);
 933           }
 934         }
 935         out.append(seq);
 936         out.append(newline);
 937       }
 938     }
 939     return out.toString();
 940   }
 941
 942   public String print()
 943   {
 944     out = new StringBuffer();
 945     out.append("# STOCKHOLM 1.0");
 946     out.append(newline);
 947     print(getSeqsAsArray());
 948
 949     out.append("//");
 950     out.append(newline);
 951     return out.toString();
 952   }
 953
 954   private static Hashtable typeIds = null;
 955   static
 956   {
 957     if (typeIds == null)
 958     {
 959       typeIds = new Hashtable();
 960       typeIds.put("SS", "secondary structure");
 961       typeIds.put("SA", "surface accessibility");
 962       typeIds.put("TM", "transmembrane");
 963       typeIds.put("PP", "posterior probability");
 964       typeIds.put("LI", "ligand binding");
 965       typeIds.put("AS", "active site");
 966       typeIds.put("IN", "intron");
 967       typeIds.put("IR", "interacting residue");
 968       typeIds.put("AC", "accession");
 969       typeIds.put("OS", "organism");
 970       typeIds.put("CL", "class");
 971       typeIds.put("DE", "description");
 972       typeIds.put("DR", "reference");
 973       typeIds.put("LO", "look");
 974       typeIds.put("RF", "reference positions");
 975
 976     }
 977   }
 978
 979   protected static String id2type(String id)
 980   {
 981     if (typeIds.containsKey(id))
 982     {
 983       return (String) typeIds.get(id);
 984     }
 985     System.err.println("Warning : Unknown Stockholm annotation type code "
 986             + id);
 987     return id;
 988   }
 989
 990   protected static String type2id(String type)
 991   {
 992     String key = null;
 993     Enumeration e = typeIds.keys();
 994     while (e.hasMoreElements())
 995     {
 996       Object ll = e.nextElement();
 997       if (typeIds.get(ll).toString().equals(type))
 998       {
 999         key = (String) ll;
1000         break;
1001       }
1002     }
1003     if (key != null)
1004     {
1005       return (String) key;
1006     }
1007     System.err.println("Warning : Unknown Stockholm annotation type: "
1008             + type);
1009     return key;
1010   }
1011   /**
1012    * //ssline is complete secondary structure line private AlignmentAnnotation
1013    * addHelices(Vector annotation, String label, String ssline) {
1014    *
1015    * // decide on secondary structure or not. Annotation[] els = new
1016    * Annotation[ssline.length()]; for (int i = 0; i < ssline.length(); i++) {
1017    * String pos = ssline.substring(i, i + 1); Annotation ann; ann = new
1018    * Annotation(pos, "", ' ', 0f); // 0f is 'valid' null - will not
1019    *
1020    * ann.secondaryStructure =
1021    * jalview.schemes.ResidueProperties.getRNAssState(pos).charAt(0);
1022    *
1023    * ann.displayCharacter = "x" + ann.displayCharacter;
1024    *
1025    * System.out.println(ann.displayCharacter);
1026    *
1027    * els[i] = ann; } AlignmentAnnotation helicesAnnot = null; Enumeration e =
1028    * annotation.elements(); while (e.hasMoreElements()) { helicesAnnot =
1029    * (AlignmentAnnotation) e.nextElement(); if (helicesAnnot.label.equals(type))
1030    * break; helicesAnnot = null; } if (helicesAnnot == null) { helicesAnnot =
1031    * new AlignmentAnnotation(type, type, els);
1032    * annotation.addElement(helicesAnnot); } else { Annotation[] anns = new
1033    * Annotation[helicesAnnot.annotations.length + els.length];
1034    * System.arraycopy(helicesAnnot.annotations, 0, anns, 0,
1035    * helicesAnnot.annotations.length); System.arraycopy(els, 0, anns,
1036    * helicesAnnot.annotations.length, els.length); helicesAnnot.annotations =
1037    * anns; }
1038    *
1039    * helicesAnnot.features = Rna.GetBasePairs(ssline);
1040    * Rna.HelixMap(helicesAnnot.features);
1041    *
1042    *
1043    * return helicesAnnot; }
1044    */
1045 }