src/jalview/io/FeaturesFile.java

   1 /*
   2  * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
   3  * Copyright (C) $$Year-Rel$$ The Jalview Authors
   4  *
   5  * This file is part of Jalview.
   6  *
   7  * Jalview is free software: you can redistribute it and/or
   8  * modify it under the terms of the GNU General Public License
   9  * as published by the Free Software Foundation, either version 3
  10  * of the License, or (at your option) any later version.
  11  *
  12  * Jalview is distributed in the hope that it will be useful, but
  13  * WITHOUT ANY WARRANTY; without even the implied warranty
  14  * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
  15  * PURPOSE.  See the GNU General Public License for more details.
  16  *
  17  * You should have received a copy of the GNU General Public License
  18  * along with Jalview.  If not, see <http://www.gnu.org/licenses/>.
  19  * The Jalview Authors are detailed in the 'AUTHORS' file.
  20  */
  21 package jalview.io;
  22
  23 import jalview.analysis.AlignmentUtils;
  24 import jalview.analysis.Finder;
  25 import jalview.analysis.SequenceIdMatcher;
  26 import jalview.api.AlignViewportI;
  27 import jalview.api.FeatureColourI;
  28 import jalview.api.FeatureRenderer;
  29 import jalview.api.FeaturesSourceI;
  30 import jalview.api.FinderI;
  31 import jalview.datamodel.AlignedCodonFrame;
  32 import jalview.datamodel.Alignment;
  33 import jalview.datamodel.AlignmentI;
  34 import jalview.datamodel.MappedFeatures;
  35 import jalview.datamodel.SearchResultMatchI;
  36 import jalview.datamodel.SequenceDummy;
  37 import jalview.datamodel.SequenceFeature;
  38 import jalview.datamodel.SequenceI;
  39 import jalview.datamodel.features.FeatureMatcherSet;
  40 import jalview.datamodel.features.FeatureMatcherSetI;
  41 import jalview.gui.Desktop;
  42 import jalview.io.gff.GffHelperBase;
  43 import jalview.io.gff.GffHelperFactory;
  44 import jalview.io.gff.GffHelperI;
  45 import jalview.schemes.FeatureColour;
  46 import jalview.util.ColorUtils;
  47 import jalview.util.MapList;
  48 import jalview.util.ParseHtmlBodyAndLinks;
  49 import jalview.util.StringUtils;
  50
  51 import java.awt.Color;
  52 import java.io.IOException;
  53 import java.util.ArrayList;
  54 import java.util.Arrays;
  55 import java.util.Collections;
  56 import java.util.HashMap;
  57 import java.util.LinkedHashMap;
  58 import java.util.List;
  59 import java.util.Map;
  60 import java.util.Map.Entry;
  61 import java.util.TreeMap;
  62
  63 /**
  64  * Parses and writes features files, which may be in Jalview, GFF2 or GFF3
  65  * format. These are tab-delimited formats but with differences in the use of
  66  * columns.
  67  *
  68  * A Jalview feature file may define feature colours and then declare that the
  69  * remainder of the file is in GFF format with the line 'GFF'.
  70  *
  71  * GFF3 files may include alignment mappings for features, which Jalview will
  72  * attempt to model, and may include sequence data following a ##FASTA line.
  73  *
  74  *
  75  * @author AMW
  76  * @author jbprocter
  77  * @author gmcarstairs
  78  */
  79 public class FeaturesFile extends AlignFile implements FeaturesSourceI
  80 {
  81   private static final String TAB_REGEX = "\\t";
  82
  83   private static final String STARTGROUP = "STARTGROUP";
  84
  85   private static final String ENDGROUP = "ENDGROUP";
  86
  87   private static final String STARTFILTERS = "STARTFILTERS";
  88
  89   private static final String ENDFILTERS = "ENDFILTERS";
  90
  91   private static final String STARTMOTIFS = "STARTMOTIFS";
  92
  93   private static final String ENDMOTIFS = "ENDMOTIFS";
  94
  95   private static final String ID_NOT_SPECIFIED = "ID_NOT_SPECIFIED";
  96
  97   private static final String NOTE = "Note";
  98
  99   protected static final String GFF_VERSION = "##gff-version";
 100
 101   private AlignmentI lastmatchedAl = null;
 102
 103   private SequenceIdMatcher matcher = null;
 104
 105   protected AlignmentI dataset;
 106
 107   protected int gffVersion;
 108
 109   /**
 110    * Creates a new FeaturesFile object.
 111    */
 112   public FeaturesFile()
 113   {
 114   }
 115
 116   /**
 117    * Constructor which does not parse the file immediately
 118    *
 119    * @param file
 120    * @param paste
 121    * @throws IOException
 122    */
 123   public FeaturesFile(String file, DataSourceType paste)
 124           throws IOException
 125   {
 126     super(false, file, paste);
 127   }
 128
 129   /**
 130    * @param source
 131    * @throws IOException
 132    */
 133   public FeaturesFile(FileParse source) throws IOException
 134   {
 135     super(source);
 136   }
 137
 138   /**
 139    * Constructor that optionally parses the file immediately
 140    *
 141    * @param parseImmediately
 142    * @param file
 143    * @param type
 144    * @throws IOException
 145    */
 146   public FeaturesFile(boolean parseImmediately, String file,
 147           DataSourceType type) throws IOException
 148   {
 149     super(parseImmediately, file, type);
 150   }
 151
 152   /**
 153    * Parse GFF or sequence features file using case-independent matching,
 154    * discarding URLs
 155    *
 156    * @param align
 157    *          - alignment/dataset containing sequences that are to be annotated
 158    * @param colours
 159    *          - hashtable to store feature colour definitions
 160    * @param removeHTML
 161    *          - process html strings into plain text
 162    * @return true if features were added
 163    */
 164   public boolean parse(AlignmentI align,
 165           Map<String, FeatureColourI> colours, boolean removeHTML)
 166   {
 167     return parse(align, colours, removeHTML, false);
 168   }
 169
 170   /**
 171    * Extends the default addProperties by also adding peptide-to-cDNA mappings
 172    * (if any) derived while parsing a GFF file
 173    */
 174   @Override
 175   public void addProperties(AlignmentI al)
 176   {
 177     super.addProperties(al);
 178     if (dataset != null && dataset.getCodonFrames() != null)
 179     {
 180       AlignmentI ds = (al.getDataset() == null) ? al : al.getDataset();
 181       for (AlignedCodonFrame codons : dataset.getCodonFrames())
 182       {
 183         ds.addCodonFrame(codons);
 184       }
 185     }
 186   }
 187
 188   /**
 189    * Parse GFF or Jalview format sequence features file
 190    *
 191    * @param align
 192    *          - alignment/dataset containing sequences that are to be annotated
 193    * @param colours
 194    *          - map to store feature colour definitions
 195    * @param removeHTML
 196    *          - process html strings into plain text
 197    * @param relaxedIdmatching
 198    *          - when true, ID matches to compound sequence IDs are allowed
 199    * @return true if features were added
 200    */
 201   public boolean parse(AlignmentI align,
 202           Map<String, FeatureColourI> colours, boolean removeHTML,
 203           boolean relaxedIdmatching)
 204   {
 205     return parse(align, colours, null, removeHTML, relaxedIdmatching);
 206   }
 207
 208   /**
 209    * Parse GFF or Jalview format sequence features file
 210    *
 211    * @param align
 212    *          - alignment/dataset containing sequences that are to be annotated
 213    * @param colours
 214    *          - map to store feature colour definitions
 215    * @param filters
 216    *          - map to store feature filter definitions
 217    * @param removeHTML
 218    *          - process html strings into plain text
 219    * @param relaxedIdmatching
 220    *          - when true, ID matches to compound sequence IDs are allowed
 221    * @return true if features were added
 222    */
 223   public boolean parse(AlignmentI align,
 224           Map<String, FeatureColourI> colours,
 225           Map<String, FeatureMatcherSetI> filters, boolean removeHTML,
 226           boolean relaxedIdmatching)
 227   {
 228     Map<String, String> gffProps = new HashMap<>();
 229     /*
 230      * keep track of any sequences we try to create from the data
 231      */
 232     List<SequenceI> newseqs = new ArrayList<>();
 233
 234     String line = null;
 235     try
 236     {
 237       String[] gffColumns;
 238       String featureGroup = null;
 239
 240       while ((line = nextLine()) != null)
 241       {
 242         line = line.trim();
 243         // skip comments/process pragmas
 244         if (line.length() == 0 || line.startsWith("#"))
 245         {
 246           if (line.toLowerCase().startsWith("##"))
 247           {
 248             processGffPragma(line, gffProps, align, newseqs);
 249           }
 250           continue;
 251         }
 252
 253         gffColumns = line.split(TAB_REGEX);
 254         if (gffColumns.length == 1)
 255         {
 256           if (line.trim().equalsIgnoreCase("GFF"))
 257           {
 258             /*
 259              * Jalview features file with appended GFF
 260              * assume GFF2 (though it may declare ##gff-version 3)
 261              */
 262             gffVersion = 2;
 263             continue;
 264           }
 265         }
 266
 267         if (gffColumns.length > 0 && gffColumns.length < 4)
 268         {
 269           /*
 270            * if 2 or 3 tokens, we anticipate one of either
 271            * STARTGROUP ENDGROUP STARTFILTERS STARTMOTIFS
 272            * or a feature type and colour specification
 273            */
 274           String ft = gffColumns[0];
 275           if (ft.equalsIgnoreCase(STARTFILTERS))
 276           {
 277             parseFilters(filters);
 278             continue;
 279           }
 280           if (ft.equalsIgnoreCase(STARTMOTIFS))
 281           {
 282             parseMotifs(align, featureGroup);
 283             continue;
 284           }
 285           if (ft.equalsIgnoreCase(STARTGROUP))
 286           {
 287             featureGroup = gffColumns[1];
 288           }
 289           else if (ft.equalsIgnoreCase(ENDGROUP))
 290           {
 291             featureGroup = null;
 292           }
 293           else
 294           {
 295             String colscheme = gffColumns[1];
 296             FeatureColourI colour = FeatureColour
 297                     .parseJalviewFeatureColour(colscheme);
 298             if (colour != null)
 299             {
 300               colours.put(ft, colour);
 301             }
 302           }
 303           continue;
 304         }
 305
 306         /*
 307          * if not handled above, that just leaves a feature details line
 308          * in either Jalview or GFF format
 309          */
 310         if (gffVersion == 0)
 311         {
 312           parseJalviewFeature(line, gffColumns, align, colours, removeHTML,
 313                   relaxedIdmatching, featureGroup);
 314         }
 315         else
 316         {
 317           parseGff(gffColumns, align, relaxedIdmatching, newseqs);
 318         }
 319       }
 320       resetMatcher();
 321     } catch (Exception ex)
 322     {
 323       // should report somewhere useful for UI if necessary
 324       warningMessage = ((warningMessage == null) ? "" : warningMessage)
 325               + "Parsing error at\n" + line;
 326       System.out.println("Error parsing feature file: " + ex + "\n" + line);
 327       ex.printStackTrace(System.err);
 328       resetMatcher();
 329       return false;
 330     }
 331
 332     /*
 333      * experimental - add any dummy sequences with features to the alignment
 334      * - we need them for Ensembl feature extraction - though maybe not otherwise
 335      */
 336     for (SequenceI newseq : newseqs)
 337     {
 338       if (newseq.getFeatures().hasFeatures())
 339       {
 340         align.addSequence(newseq);
 341       }
 342     }
 343     return true;
 344   }
 345
 346   /**
 347    * Reads lines up to and including the next ENDMOTIFS, and processes each one
 348    * by
 349    * <ul>
 350    * <li>searching the alignment for the supplied motif (first column)</li>
 351    * <li>creating features for matches, with feature type, description and
 352    * (optionally) score given in the remaining columns</li>
 353    * </ul>
 354    *
 355    * @param alignment
 356    * @param featureGroup
 357    * @throws IOException
 358    */
 359   protected void parseMotifs(AlignmentI alignment, String featureGroup)
 360           throws IOException
 361   {
 362     FinderI finder = new Finder(alignment);
 363     String line;
 364     while ((line = nextLine()) != null)
 365     {
 366       if (line.toUpperCase().startsWith(ENDMOTIFS))
 367       {
 368         return;
 369       }
 370       String[] tokens = line.split(TAB_REGEX);
 371       if (tokens.length != 3 && tokens.length != 4)
 372       {
 373         System.err.println(String.format("Invalid token count %d for %s",
 374                 tokens.length, line));
 375       }
 376       String motif = tokens[0];
 377       String featureType = tokens[1];
 378       String description = tokens[2];
 379       float score = 0f;
 380       if (tokens.length > 3)
 381       {
 382         try
 383         {
 384           score = Float.valueOf(tokens[3]);
 385         } catch (NumberFormatException e)
 386         {
 387           System.err.println("Invalid score in " + line);
 388         }
 389       }
 390       finder.findAll(motif, true, false);
 391       List<SearchResultMatchI> matches = finder.getSearchResults()
 392               .getResults();
 393       for (SearchResultMatchI match : matches)
 394       {
 395         SequenceFeature sf = new SequenceFeature(featureType, description,
 396                 match.getStart(), match.getEnd(), score, featureGroup);
 397         match.getSequence().addSequenceFeature(sf);
 398       }
 399     }
 400   }
 401
 402   /**
 403    * Reads input lines from STARTFILTERS to ENDFILTERS and adds a feature type
 404    * filter to the map for each line parsed. After exit from this method,
 405    * nextLine() should return the line after ENDFILTERS (or we are already at
 406    * end of file if ENDFILTERS was missing).
 407    *
 408    * @param filters
 409    * @throws IOException
 410    */
 411   protected void parseFilters(Map<String, FeatureMatcherSetI> filters)
 412           throws IOException
 413   {
 414     String line;
 415     while ((line = nextLine()) != null)
 416     {
 417       if (line.toUpperCase().startsWith(ENDFILTERS))
 418       {
 419         return;
 420       }
 421       String[] tokens = line.split(TAB_REGEX);
 422       if (tokens.length != 2)
 423       {
 424         System.err.println(String.format("Invalid token count %d for %s",
 425                 tokens.length, line));
 426       }
 427       else
 428       {
 429         String featureType = tokens[0];
 430         FeatureMatcherSetI fm = FeatureMatcherSet.fromString(tokens[1]);
 431         if (fm != null && filters != null)
 432         {
 433           filters.put(featureType, fm);
 434         }
 435       }
 436     }
 437   }
 438
 439   /**
 440    * Try to parse a Jalview format feature specification and add it as a
 441    * sequence feature to any matching sequences in the alignment. Returns true
 442    * if successful (a feature was added), or false if not.
 443    *
 444    * @param line
 445    * @param gffColumns
 446    * @param alignment
 447    * @param featureColours
 448    * @param removeHTML
 449    * @param relaxedIdmatching
 450    * @param featureGroup
 451    */
 452   protected boolean parseJalviewFeature(String line, String[] gffColumns,
 453           AlignmentI alignment, Map<String, FeatureColourI> featureColours,
 454           boolean removeHTML, boolean relaxedIdMatching,
 455           String featureGroup)
 456   {
 457     /*
 458      * tokens: description seqid seqIndex start end type [score]
 459      */
 460     if (gffColumns.length < 6)
 461     {
 462       System.err.println("Ignoring feature line '" + line
 463               + "' with too few columns (" + gffColumns.length + ")");
 464       return false;
 465     }
 466     String desc = gffColumns[0];
 467     String seqId = gffColumns[1];
 468     SequenceI seq = findSequence(seqId, alignment, null, relaxedIdMatching);
 469
 470     if (!ID_NOT_SPECIFIED.equals(seqId))
 471     {
 472       seq = findSequence(seqId, alignment, null, relaxedIdMatching);
 473     }
 474     else
 475     {
 476       seqId = null;
 477       seq = null;
 478       String seqIndex = gffColumns[2];
 479       try
 480       {
 481         int idx = Integer.parseInt(seqIndex);
 482         seq = alignment.getSequenceAt(idx);
 483       } catch (NumberFormatException ex)
 484       {
 485         System.err.println("Invalid sequence index: " + seqIndex);
 486       }
 487     }
 488
 489     if (seq == null)
 490     {
 491       System.out.println("Sequence not found: " + line);
 492       return false;
 493     }
 494
 495     int startPos = Integer.parseInt(gffColumns[3]);
 496     int endPos = Integer.parseInt(gffColumns[4]);
 497
 498     String ft = gffColumns[5];
 499
 500     if (!featureColours.containsKey(ft))
 501     {
 502       /*
 503        * Perhaps an old style groups file with no colours -
 504        * synthesize a colour from the feature type
 505        */
 506       Color colour = ColorUtils.createColourFromName(ft);
 507       featureColours.put(ft, new FeatureColour(colour));
 508     }
 509     SequenceFeature sf = null;
 510     if (gffColumns.length > 6)
 511     {
 512       float score = Float.NaN;
 513       try
 514       {
 515         score = Float.valueOf(gffColumns[6]).floatValue();
 516       } catch (NumberFormatException ex)
 517       {
 518         sf = new SequenceFeature(ft, desc, startPos, endPos, featureGroup);
 519       }
 520       sf = new SequenceFeature(ft, desc, startPos, endPos, score,
 521               featureGroup);
 522     }
 523     else
 524     {
 525       sf = new SequenceFeature(ft, desc, startPos, endPos, featureGroup);
 526     }
 527
 528     parseDescriptionHTML(sf, removeHTML);
 529
 530     seq.addSequenceFeature(sf);
 531
 532     while (seqId != null
 533             && (seq = alignment.findName(seq, seqId, false)) != null)
 534     {
 535       seq.addSequenceFeature(new SequenceFeature(sf));
 536     }
 537     return true;
 538   }
 539
 540   /**
 541    * clear any temporary handles used to speed up ID matching
 542    */
 543   protected void resetMatcher()
 544   {
 545     lastmatchedAl = null;
 546     matcher = null;
 547   }
 548
 549   /**
 550    * Returns a sequence matching the given id, as follows
 551    * <ul>
 552    * <li>strict matching is on exact sequence name</li>
 553    * <li>relaxed matching allows matching on a token within the sequence name,
 554    * or a dbxref</li>
 555    * <li>first tries to find a match in the alignment sequences</li>
 556    * <li>else tries to find a match in the new sequences already generated while
 557    * parsing the features file</li>
 558    * <li>else creates a new placeholder sequence, adds it to the new sequences
 559    * list, and returns it</li>
 560    * </ul>
 561    *
 562    * @param seqId
 563    * @param align
 564    * @param newseqs
 565    * @param relaxedIdMatching
 566    *
 567    * @return
 568    */
 569   protected SequenceI findSequence(String seqId, AlignmentI align,
 570           List<SequenceI> newseqs, boolean relaxedIdMatching)
 571   {
 572     // TODO encapsulate in SequenceIdMatcher, share the matcher
 573     // with the GffHelper (removing code duplication)
 574     SequenceI match = null;
 575     if (relaxedIdMatching)
 576     {
 577       if (lastmatchedAl != align)
 578       {
 579         lastmatchedAl = align;
 580         matcher = new SequenceIdMatcher(align.getSequencesArray());
 581         if (newseqs != null)
 582         {
 583           matcher.addAll(newseqs);
 584         }
 585       }
 586       match = matcher.findIdMatch(seqId);
 587     }
 588     else
 589     {
 590       match = align.findName(seqId, true);
 591       if (match == null && newseqs != null)
 592       {
 593         for (SequenceI m : newseqs)
 594         {
 595           if (seqId.equals(m.getName()))
 596           {
 597             return m;
 598           }
 599         }
 600       }
 601
 602     }
 603     if (match == null && newseqs != null)
 604     {
 605       match = new SequenceDummy(seqId);
 606       if (relaxedIdMatching)
 607       {
 608         matcher.addAll(Arrays.asList(new SequenceI[] { match }));
 609       }
 610       // add dummy sequence to the newseqs list
 611       newseqs.add(match);
 612     }
 613     return match;
 614   }
 615
 616   public void parseDescriptionHTML(SequenceFeature sf, boolean removeHTML)
 617   {
 618     if (sf.getDescription() == null)
 619     {
 620       return;
 621     }
 622     ParseHtmlBodyAndLinks parsed = new ParseHtmlBodyAndLinks(
 623             sf.getDescription(), removeHTML, newline);
 624
 625     if (removeHTML)
 626     {
 627       sf.setDescription(parsed.getNonHtmlContent());
 628     }
 629
 630     for (String link : parsed.getLinks())
 631     {
 632       sf.addLink(link);
 633     }
 634   }
 635
 636   /**
 637    * Returns contents of a Jalview format features file, for visible features, as
 638    * filtered by type and group. Features with a null group are displayed if their
 639    * feature type is visible. Non-positional features may optionally be included
 640    * (with no check on type or group).
 641    *
 642    * @param sequences
 643    * @param fr
 644    * @param includeNonPositional
 645    *                               if true, include non-positional features
 646    *                               (regardless of group or type)
 647    * @param includeComplement
 648    *                               if true, include visible complementary
 649    *                               (CDS/protein) positional features, with
 650    *                               locations converted to local sequence
 651    *                               coordinates
 652    * @return
 653    */
 654   public String printJalviewFormat(SequenceI[] sequences,
 655           FeatureRenderer fr, boolean includeNonPositional,
 656           boolean includeComplement)
 657   {
 658     Map<String, FeatureColourI> visibleColours = fr
 659             .getDisplayedFeatureCols();
 660     Map<String, FeatureMatcherSetI> featureFilters = fr.getFeatureFilters();
 661
 662     /*
 663      * write out feature colours (if we know them)
 664      */
 665     // TODO: decide if feature links should also be written here ?
 666     StringBuilder out = new StringBuilder(256);
 667     if (visibleColours != null)
 668     {
 669       for (Entry<String, FeatureColourI> featureColour : visibleColours
 670               .entrySet())
 671       {
 672         FeatureColourI colour = featureColour.getValue();
 673         out.append(colour.toJalviewFormat(featureColour.getKey())).append(
 674                 newline);
 675       }
 676     }
 677
 678     String[] types = visibleColours == null ? new String[0]
 679             : visibleColours.keySet()
 680                     .toArray(new String[visibleColours.keySet().size()]);
 681
 682     /*
 683      * feature filters if any
 684      */
 685     outputFeatureFilters(out, visibleColours, featureFilters);
 686
 687     /*
 688      * output features within groups
 689      */
 690     int count = outputFeaturesByGroup(out, fr, types, sequences,
 691             includeNonPositional);
 692
 693     if (includeComplement)
 694     {
 695       count += outputComplementFeatures(out, fr, sequences);
 696     }
 697
 698     return count > 0 ? out.toString() : "No Features Visible";
 699   }
 700
 701   /**
 702    * Outputs any visible complementary (CDS/peptide) positional features as
 703    * Jalview format, within feature group. The coordinates of the linked features
 704    * are converted to the corresponding positions of the local sequences.
 705    *
 706    * @param out
 707    * @param fr
 708    * @param sequences
 709    * @return
 710    */
 711   private int outputComplementFeatures(StringBuilder out,
 712           FeatureRenderer fr, SequenceI[] sequences)
 713   {
 714     AlignViewportI comp = fr.getViewport().getCodingComplement();
 715     FeatureRenderer fr2 = Desktop.getAlignFrameFor(comp)
 716             .getFeatureRenderer();
 717
 718     /*
 719      * bin features by feature group and sequence
 720      */
 721     Map<String, Map<String, List<SequenceFeature>>> map = new TreeMap<>(
 722             String.CASE_INSENSITIVE_ORDER);
 723     int count = 0;
 724
 725     for (SequenceI seq : sequences)
 726     {
 727       /*
 728        * find complementary features
 729        */
 730       List<SequenceFeature> complementary = findComplementaryFeatures(seq,
 731               fr2);
 732       String seqName = seq.getName();
 733
 734       for (SequenceFeature sf : complementary)
 735       {
 736         String group = sf.getFeatureGroup();
 737         if (!map.containsKey(group))
 738         {
 739           map.put(group, new LinkedHashMap<>()); // preserves sequence order
 740         }
 741         Map<String, List<SequenceFeature>> groupFeatures = map.get(group);
 742         if (!groupFeatures.containsKey(seqName))
 743         {
 744           groupFeatures.put(seqName, new ArrayList<>());
 745         }
 746         List<SequenceFeature> foundFeatures = groupFeatures.get(seqName);
 747         foundFeatures.add(sf);
 748         count++;
 749       }
 750     }
 751
 752     /*
 753      * output features by group
 754      */
 755     for (Entry<String, Map<String, List<SequenceFeature>>> groupFeatures : map.entrySet())
 756     {
 757       out.append(newline);
 758       String group = groupFeatures.getKey();
 759       if (!"".equals(group))
 760       {
 761         out.append(STARTGROUP).append(TAB).append(group).append(newline);
 762       }
 763       Map<String, List<SequenceFeature>> seqFeaturesMap = groupFeatures
 764               .getValue();
 765       for (Entry<String, List<SequenceFeature>> seqFeatures : seqFeaturesMap
 766               .entrySet())
 767       {
 768         String sequenceName = seqFeatures.getKey();
 769         for (SequenceFeature sf : seqFeatures.getValue())
 770         {
 771           formatJalviewFeature(out, sequenceName, sf);
 772         }
 773       }
 774       if (!"".equals(group))
 775       {
 776         out.append(ENDGROUP).append(TAB).append(group).append(newline);
 777       }
 778     }
 779
 780     return count;
 781   }
 782
 783   /**
 784    * Answers a list of mapped features visible in the (CDS/protein) complement,
 785    * with feature positions translated to local sequence coordinates
 786    *
 787    * @param seq
 788    * @param fr2
 789    * @return
 790    */
 791   protected List<SequenceFeature> findComplementaryFeatures(SequenceI seq,
 792           FeatureRenderer fr2)
 793   {
 794     /*
 795      * avoid duplication of features (e.g. peptide feature
 796      * at all 3 mapped codon positions)
 797      */
 798     List<SequenceFeature> found = new ArrayList<>();
 799     List<SequenceFeature> complementary = new ArrayList<>();
 800
 801     for (int pos = seq.getStart(); pos <= seq.getEnd(); pos++)
 802     {
 803       MappedFeatures mf = fr2.findComplementFeaturesAtResidue(seq, pos);
 804
 805       if (mf != null)
 806       {
 807         MapList mapping = mf.mapping.getMap();
 808         for (SequenceFeature sf : mf.features)
 809         {
 810           /*
 811            * make a virtual feature with local coordinates
 812            */
 813           if (!found.contains(sf))
 814           {
 815             String group = sf.getFeatureGroup();
 816             if (group == null)
 817             {
 818               group = "";
 819             }
 820             found.add(sf);
 821             int begin = sf.getBegin();
 822             int end = sf.getEnd();
 823             int[] range = mf.mapping.getTo() == seq.getDatasetSequence()
 824                     ? mapping.locateInTo(begin, end)
 825                     : mapping.locateInFrom(begin, end);
 826             SequenceFeature sf2 = new SequenceFeature(sf, range[0],
 827                     range[1], group, sf.getScore());
 828             complementary.add(sf2);
 829           }
 830         }
 831       }
 832     }
 833
 834     return complementary;
 835   }
 836
 837   /**
 838    * Outputs any feature filters defined for visible feature types, sandwiched by
 839    * STARTFILTERS and ENDFILTERS lines
 840    *
 841    * @param out
 842    * @param visible
 843    * @param featureFilters
 844    */
 845   void outputFeatureFilters(StringBuilder out,
 846           Map<String, FeatureColourI> visible,
 847           Map<String, FeatureMatcherSetI> featureFilters)
 848   {
 849     if (visible == null || featureFilters == null
 850             || featureFilters.isEmpty())
 851     {
 852       return;
 853     }
 854
 855     boolean first = true;
 856     for (String featureType : visible.keySet())
 857     {
 858       FeatureMatcherSetI filter = featureFilters.get(featureType);
 859       if (filter != null)
 860       {
 861         if (first)
 862         {
 863           first = false;
 864           out.append(newline).append(STARTFILTERS).append(newline);
 865         }
 866         out.append(featureType).append(TAB).append(filter.toStableString())
 867                 .append(newline);
 868       }
 869     }
 870     if (!first)
 871     {
 872       out.append(ENDFILTERS).append(newline);
 873     }
 874
 875   }
 876
 877   /**
 878    * Appends output of visible sequence features within feature groups to the
 879    * output buffer. Groups other than the null or empty group are sandwiched by
 880    * STARTGROUP and ENDGROUP lines. Answers the number of features written.
 881    *
 882    * @param out
 883    * @param fr
 884    * @param featureTypes
 885    * @param sequences
 886    * @param includeNonPositional
 887    * @return
 888    */
 889   private int outputFeaturesByGroup(StringBuilder out,
 890           FeatureRenderer fr, String[] featureTypes,
 891           SequenceI[] sequences, boolean includeNonPositional)
 892   {
 893     List<String> featureGroups = fr.getFeatureGroups();
 894
 895     /*
 896      * sort groups alphabetically, and ensure that features with a
 897      * null or empty group are output after those in named groups
 898      */
 899     List<String> sortedGroups = new ArrayList<>(featureGroups);
 900     sortedGroups.remove(null);
 901     sortedGroups.remove("");
 902     Collections.sort(sortedGroups);
 903     sortedGroups.add(null);
 904     sortedGroups.add("");
 905
 906     int count = 0;
 907     List<String> visibleGroups = fr.getDisplayedFeatureGroups();
 908
 909     /*
 910      * loop over all groups (may be visible or not);
 911      * non-positional features are output even if group is not visible
 912      */
 913     for (String group : sortedGroups)
 914     {
 915       boolean firstInGroup = true;
 916       boolean isNullGroup = group == null || "".equals(group);
 917
 918       for (int i = 0; i < sequences.length; i++)
 919       {
 920         String sequenceName = sequences[i].getName();
 921         List<SequenceFeature> features = new ArrayList<>();
 922
 923         /*
 924          * get any non-positional features in this group, if wanted
 925          * (for any feature type, whether visible or not)
 926          */
 927         if (includeNonPositional)
 928         {
 929           features.addAll(sequences[i].getFeatures()
 930                   .getFeaturesForGroup(false, group));
 931         }
 932
 933         /*
 934          * add positional features for visible feature types, but
 935          * (for named groups) only if feature group is visible
 936          */
 937         if (featureTypes.length > 0
 938                 && (isNullGroup || visibleGroups.contains(group)))
 939         {
 940           features.addAll(sequences[i].getFeatures().getFeaturesForGroup(
 941                   true, group, featureTypes));
 942         }
 943
 944         for (SequenceFeature sf : features)
 945         {
 946           if (sf.isNonPositional() || fr.isVisible(sf))
 947           {
 948             count++;
 949             if (firstInGroup)
 950             {
 951               out.append(newline);
 952               if (!isNullGroup)
 953               {
 954                 out.append(STARTGROUP).append(TAB).append(group)
 955                         .append(newline);
 956               }
 957             }
 958             firstInGroup = false;
 959             formatJalviewFeature(out, sequenceName, sf);
 960           }
 961         }
 962       }
 963
 964       if (!isNullGroup && !firstInGroup)
 965       {
 966         out.append(ENDGROUP).append(TAB).append(group).append(newline);
 967       }
 968     }
 969     return count;
 970   }
 971
 972   /**
 973    * Formats one feature in Jalview format and appends to the string buffer
 974    *
 975    * @param out
 976    * @param sequenceName
 977    * @param sequenceFeature
 978    */
 979   protected void formatJalviewFeature(
 980           StringBuilder out, String sequenceName,
 981           SequenceFeature sequenceFeature)
 982   {
 983     if (sequenceFeature.description == null
 984             || sequenceFeature.description.equals(""))
 985     {
 986       out.append(sequenceFeature.type).append(TAB);
 987     }
 988     else
 989     {
 990       if (sequenceFeature.links != null
 991               && sequenceFeature.getDescription().indexOf("<html>") == -1)
 992       {
 993         out.append("<html>");
 994       }
 995
 996       out.append(sequenceFeature.description);
 997       if (sequenceFeature.links != null)
 998       {
 999         for (int l = 0; l < sequenceFeature.links.size(); l++)
1000         {
1001           String label = sequenceFeature.links.elementAt(l);
1002           String href = label.substring(label.indexOf("|") + 1);
1003           label = label.substring(0, label.indexOf("|"));
1004
1005           if (sequenceFeature.description.indexOf(href) == -1)
1006           {
1007             out.append(" <a href=\"").append(href).append("\">")
1008                     .append(label).append("</a>");
1009           }
1010         }
1011
1012         if (sequenceFeature.getDescription().indexOf("</html>") == -1)
1013         {
1014           out.append("</html>");
1015         }
1016       }
1017
1018       out.append(TAB);
1019     }
1020     out.append(sequenceName);
1021     out.append("\t-1\t");
1022     out.append(sequenceFeature.begin);
1023     out.append(TAB);
1024     out.append(sequenceFeature.end);
1025     out.append(TAB);
1026     out.append(sequenceFeature.type);
1027     if (!Float.isNaN(sequenceFeature.score))
1028     {
1029       out.append(TAB);
1030       out.append(sequenceFeature.score);
1031     }
1032     out.append(newline);
1033   }
1034
1035   /**
1036    * Parse method that is called when a GFF file is dragged to the desktop
1037    */
1038   @Override
1039   public void parse()
1040   {
1041     AlignViewportI av = getViewport();
1042     if (av != null)
1043     {
1044       if (av.getAlignment() != null)
1045       {
1046         dataset = av.getAlignment().getDataset();
1047       }
1048       if (dataset == null)
1049       {
1050         // working in the applet context ?
1051         dataset = av.getAlignment();
1052       }
1053     }
1054     else
1055     {
1056       dataset = new Alignment(new SequenceI[] {});
1057     }
1058
1059     Map<String, FeatureColourI> featureColours = new HashMap<>();
1060     boolean parseResult = parse(dataset, featureColours, false, true);
1061     if (!parseResult)
1062     {
1063       // pass error up somehow
1064     }
1065     if (av != null)
1066     {
1067       // update viewport with the dataset data ?
1068     }
1069     else
1070     {
1071       setSeqs(dataset.getSequencesArray());
1072     }
1073   }
1074
1075   /**
1076    * Implementation of unused abstract method
1077    *
1078    * @return error message
1079    */
1080   @Override
1081   public String print(SequenceI[] sqs, boolean jvsuffix)
1082   {
1083     System.out.println("Use printGffFormat() or printJalviewFormat()");
1084     return null;
1085   }
1086
1087   /**
1088    * Returns features output in GFF2 format
1089    *
1090    * @param sequences
1091    *                                       the sequences whose features are to be
1092    *                                       output
1093    * @param visible
1094    *                                       a map whose keys are the type names of
1095    *                                       visible features
1096    * @param visibleFeatureGroups
1097    * @param includeNonPositionalFeatures
1098    * @param includeComplement
1099    * @return
1100    */
1101   public String printGffFormat(SequenceI[] sequences,
1102           FeatureRenderer fr, boolean includeNonPositionalFeatures,
1103           boolean includeComplement)
1104   {
1105     FeatureRenderer fr2 = null;
1106     if (includeComplement)
1107     {
1108       AlignViewportI comp = fr.getViewport().getCodingComplement();
1109       fr2 = Desktop.getAlignFrameFor(comp).getFeatureRenderer();
1110     }
1111
1112     Map<String, FeatureColourI> visibleColours = fr.getDisplayedFeatureCols();
1113
1114     StringBuilder out = new StringBuilder(256);
1115
1116     out.append(String.format("%s %d\n", GFF_VERSION, gffVersion == 0 ? 2 : gffVersion));
1117
1118     String[] types = visibleColours == null ? new String[0]
1119             : visibleColours.keySet()
1120                     .toArray(new String[visibleColours.keySet().size()]);
1121
1122     for (SequenceI seq : sequences)
1123     {
1124       List<SequenceFeature> seqFeatures = new ArrayList<>();
1125       List<SequenceFeature> features = new ArrayList<>();
1126       if (includeNonPositionalFeatures)
1127       {
1128         features.addAll(seq.getFeatures().getNonPositionalFeatures());
1129       }
1130       if (visibleColours != null && !visibleColours.isEmpty())
1131       {
1132         features.addAll(seq.getFeatures().getPositionalFeatures(types));
1133       }
1134       for (SequenceFeature sf : features)
1135       {
1136         if (sf.isNonPositional() || fr.isVisible(sf))
1137         {
1138           /*
1139            * drop features hidden by group visibility, colour threshold,
1140            * or feature filter condition
1141            */
1142           seqFeatures.add(sf);
1143         }
1144       }
1145
1146       if (includeComplement)
1147       {
1148         seqFeatures.addAll(findComplementaryFeatures(seq, fr2));
1149       }
1150
1151       /*
1152        * sort features here if wanted
1153        */
1154       for (SequenceFeature sf : seqFeatures)
1155       {
1156         formatGffFeature(out, seq, sf);
1157         out.append(newline);
1158       }
1159     }
1160
1161     return out.toString();
1162   }
1163
1164   /**
1165    * Formats one feature as GFF and appends to the string buffer
1166    */
1167   private void formatGffFeature(StringBuilder out, SequenceI seq,
1168           SequenceFeature sf)
1169   {
1170     String source = sf.featureGroup;
1171     if (source == null)
1172     {
1173       source = sf.getDescription();
1174     }
1175
1176     out.append(seq.getName());
1177     out.append(TAB);
1178     out.append(source);
1179     out.append(TAB);
1180     out.append(sf.type);
1181     out.append(TAB);
1182     out.append(sf.begin);
1183     out.append(TAB);
1184     out.append(sf.end);
1185     out.append(TAB);
1186     out.append(sf.score);
1187     out.append(TAB);
1188
1189     int strand = sf.getStrand();
1190     out.append(strand == 1 ? "+" : (strand == -1 ? "-" : "."));
1191     out.append(TAB);
1192
1193     String phase = sf.getPhase();
1194     out.append(phase == null ? "." : phase);
1195
1196     // miscellaneous key-values (GFF column 9)
1197     String attributes = sf.getAttributes();
1198     if (attributes != null)
1199     {
1200       out.append(TAB).append(attributes);
1201     }
1202   }
1203
1204   /**
1205    * Returns a mapping given list of one or more Align descriptors (exonerate
1206    * format)
1207    *
1208    * @param alignedRegions
1209    *          a list of "Align fromStart toStart fromCount"
1210    * @param mapIsFromCdna
1211    *          if true, 'from' is dna, else 'from' is protein
1212    * @param strand
1213    *          either 1 (forward) or -1 (reverse)
1214    * @return
1215    * @throws IOException
1216    */
1217   protected MapList constructCodonMappingFromAlign(
1218           List<String> alignedRegions, boolean mapIsFromCdna, int strand)
1219           throws IOException
1220   {
1221     if (strand == 0)
1222     {
1223       throw new IOException(
1224               "Invalid strand for a codon mapping (cannot be 0)");
1225     }
1226     int regions = alignedRegions.size();
1227     // arrays to hold [start, end] for each aligned region
1228     int[] fromRanges = new int[regions * 2]; // from dna
1229     int[] toRanges = new int[regions * 2]; // to protein
1230     int fromRangesIndex = 0;
1231     int toRangesIndex = 0;
1232
1233     for (String range : alignedRegions)
1234     {
1235       /*
1236        * Align mapFromStart mapToStart mapFromCount
1237        * e.g. if mapIsFromCdna
1238        *     Align 11270 143 120
1239        * means:
1240        *     120 bases from pos 11270 align to pos 143 in peptide
1241        * if !mapIsFromCdna this would instead be
1242        *     Align 143 11270 40
1243        */
1244       String[] tokens = range.split(" ");
1245       if (tokens.length != 3)
1246       {
1247         throw new IOException("Wrong number of fields for Align");
1248       }
1249       int fromStart = 0;
1250       int toStart = 0;
1251       int fromCount = 0;
1252       try
1253       {
1254         fromStart = Integer.parseInt(tokens[0]);
1255         toStart = Integer.parseInt(tokens[1]);
1256         fromCount = Integer.parseInt(tokens[2]);
1257       } catch (NumberFormatException nfe)
1258       {
1259         throw new IOException(
1260                 "Invalid number in Align field: " + nfe.getMessage());
1261       }
1262
1263       /*
1264        * Jalview always models from dna to protein, so adjust values if the
1265        * GFF mapping is from protein to dna
1266        */
1267       if (!mapIsFromCdna)
1268       {
1269         fromCount *= 3;
1270         int temp = fromStart;
1271         fromStart = toStart;
1272         toStart = temp;
1273       }
1274       fromRanges[fromRangesIndex++] = fromStart;
1275       fromRanges[fromRangesIndex++] = fromStart + strand * (fromCount - 1);
1276
1277       /*
1278        * If a codon has an intron gap, there will be contiguous 'toRanges';
1279        * this is handled for us by the MapList constructor.
1280        * (It is not clear that exonerate ever generates this case)
1281        */
1282       toRanges[toRangesIndex++] = toStart;
1283       toRanges[toRangesIndex++] = toStart + (fromCount - 1) / 3;
1284     }
1285
1286     return new MapList(fromRanges, toRanges, 3, 1);
1287   }
1288
1289   /**
1290    * Parse a GFF format feature. This may include creating a 'dummy' sequence to
1291    * hold the feature, or for its mapped sequence, or both, to be resolved
1292    * either later in the GFF file (##FASTA section), or when the user loads
1293    * additional sequences.
1294    *
1295    * @param gffColumns
1296    * @param alignment
1297    * @param relaxedIdMatching
1298    * @param newseqs
1299    * @return
1300    */
1301   protected SequenceI parseGff(String[] gffColumns, AlignmentI alignment,
1302           boolean relaxedIdMatching, List<SequenceI> newseqs)
1303   {
1304     /*
1305      * GFF: seqid source type start end score strand phase [attributes]
1306      */
1307     if (gffColumns.length < 5)
1308     {
1309       System.err.println("Ignoring GFF feature line with too few columns ("
1310               + gffColumns.length + ")");
1311       return null;
1312     }
1313
1314     /*
1315      * locate referenced sequence in alignment _or_
1316      * as a forward or external reference (SequenceDummy)
1317      */
1318     String seqId = gffColumns[0];
1319     SequenceI seq = findSequence(seqId, alignment, newseqs,
1320             relaxedIdMatching);
1321
1322     SequenceFeature sf = null;
1323     GffHelperI helper = GffHelperFactory.getHelper(gffColumns);
1324     if (helper != null)
1325     {
1326       try
1327       {
1328         sf = helper.processGff(seq, gffColumns, alignment, newseqs,
1329                 relaxedIdMatching);
1330         if (sf != null)
1331         {
1332           seq.addSequenceFeature(sf);
1333           while ((seq = alignment.findName(seq, seqId, true)) != null)
1334           {
1335             seq.addSequenceFeature(new SequenceFeature(sf));
1336           }
1337         }
1338       } catch (IOException e)
1339       {
1340         System.err.println("GFF parsing failed with: " + e.getMessage());
1341         return null;
1342       }
1343     }
1344
1345     return seq;
1346   }
1347
1348   /**
1349    * Process the 'column 9' data of the GFF file. This is less formally defined,
1350    * and its interpretation will vary depending on the tool that has generated
1351    * it.
1352    *
1353    * @param attributes
1354    * @param sf
1355    */
1356   protected void processGffColumnNine(String attributes, SequenceFeature sf)
1357   {
1358     sf.setAttributes(attributes);
1359
1360     /*
1361      * Parse attributes in column 9 and add them to the sequence feature's
1362      * 'otherData' table; use Note as a best proxy for description
1363      */
1364     char nameValueSeparator = gffVersion == 3 ? '=' : ' ';
1365     // TODO check we don't break GFF2 values which include commas here
1366     Map<String, List<String>> nameValues = GffHelperBase
1367             .parseNameValuePairs(attributes, ";", nameValueSeparator, ",");
1368     for (Entry<String, List<String>> attr : nameValues.entrySet())
1369     {
1370       String values = StringUtils.listToDelimitedString(attr.getValue(),
1371               "; ");
1372       sf.setValue(attr.getKey(), values);
1373       if (NOTE.equals(attr.getKey()))
1374       {
1375         sf.setDescription(values);
1376       }
1377     }
1378   }
1379
1380   /**
1381    * After encountering ##fasta in a GFF3 file, process the remainder of the
1382    * file as FAST sequence data. Any placeholder sequences created during
1383    * feature parsing are updated with the actual sequences.
1384    *
1385    * @param align
1386    * @param newseqs
1387    * @throws IOException
1388    */
1389   protected void processAsFasta(AlignmentI align, List<SequenceI> newseqs)
1390           throws IOException
1391   {
1392     try
1393     {
1394       mark();
1395     } catch (IOException q)
1396     {
1397     }
1398     FastaFile parser = new FastaFile(this);
1399     List<SequenceI> includedseqs = parser.getSeqs();
1400
1401     SequenceIdMatcher smatcher = new SequenceIdMatcher(newseqs);
1402
1403     /*
1404      * iterate over includedseqs, and replacing matching ones with newseqs
1405      * sequences. Generic iterator not used here because we modify
1406      * includedseqs as we go
1407      */
1408     for (int p = 0, pSize = includedseqs.size(); p < pSize; p++)
1409     {
1410       // search for any dummy seqs that this sequence can be used to update
1411       SequenceI includedSeq = includedseqs.get(p);
1412       SequenceI dummyseq = smatcher.findIdMatch(includedSeq);
1413       if (dummyseq != null && dummyseq instanceof SequenceDummy)
1414       {
1415         // probably have the pattern wrong
1416         // idea is that a flyweight proxy for a sequence ID can be created for
1417         // 1. stable reference creation
1418         // 2. addition of annotation
1419         // 3. future replacement by a real sequence
1420         // current pattern is to create SequenceDummy objects - a convenience
1421         // constructor for a Sequence.
1422         // problem is that when promoted to a real sequence, all references
1423         // need to be updated somehow. We avoid that by keeping the same object.
1424         ((SequenceDummy) dummyseq).become(includedSeq);
1425         dummyseq.createDatasetSequence();
1426
1427         /*
1428          * Update mappings so they are now to the dataset sequence
1429          */
1430         for (AlignedCodonFrame mapping : align.getCodonFrames())
1431         {
1432           mapping.updateToDataset(dummyseq);
1433         }
1434
1435         /*
1436          * replace parsed sequence with the realised forward reference
1437          */
1438         includedseqs.set(p, dummyseq);
1439
1440         /*
1441          * and remove from the newseqs list
1442          */
1443         newseqs.remove(dummyseq);
1444       }
1445     }
1446
1447     /*
1448      * finally add sequences to the dataset
1449      */
1450     for (SequenceI seq : includedseqs)
1451     {
1452       // experimental: mapping-based 'alignment' to query sequence
1453       AlignmentUtils.alignSequenceAs(seq, align,
1454               String.valueOf(align.getGapCharacter()), false, true);
1455
1456       // rename sequences if GFF handler requested this
1457       // TODO a more elegant way e.g. gffHelper.postProcess(newseqs) ?
1458       List<SequenceFeature> sfs = seq.getFeatures().getPositionalFeatures();
1459       if (!sfs.isEmpty())
1460       {
1461         String newName = (String) sfs.get(0).getValue(
1462                 GffHelperI.RENAME_TOKEN);
1463         if (newName != null)
1464         {
1465           seq.setName(newName);
1466         }
1467       }
1468       align.addSequence(seq);
1469     }
1470   }
1471
1472   /**
1473    * Process a ## directive
1474    *
1475    * @param line
1476    * @param gffProps
1477    * @param align
1478    * @param newseqs
1479    * @throws IOException
1480    */
1481   protected void processGffPragma(String line, Map<String, String> gffProps,
1482           AlignmentI align, List<SequenceI> newseqs) throws IOException
1483   {
1484     line = line.trim();
1485     if ("###".equals(line))
1486     {
1487       // close off any open 'forward references'
1488       return;
1489     }
1490
1491     String[] tokens = line.substring(2).split(" ");
1492     String pragma = tokens[0];
1493     String value = tokens.length == 1 ? null : tokens[1];
1494
1495     if ("gff-version".equalsIgnoreCase(pragma))
1496     {
1497       if (value != null)
1498       {
1499         try
1500         {
1501           // value may be e.g. "3.1.2"
1502           gffVersion = Integer.parseInt(value.split("\\.")[0]);
1503         } catch (NumberFormatException e)
1504         {
1505           // ignore
1506         }
1507       }
1508     }
1509     else if ("sequence-region".equalsIgnoreCase(pragma))
1510     {
1511       // could capture <seqid start end> if wanted here
1512     }
1513     else if ("feature-ontology".equalsIgnoreCase(pragma))
1514     {
1515       // should resolve against the specified feature ontology URI
1516     }
1517     else if ("attribute-ontology".equalsIgnoreCase(pragma))
1518     {
1519       // URI of attribute ontology - not currently used in GFF3
1520     }
1521     else if ("source-ontology".equalsIgnoreCase(pragma))
1522     {
1523       // URI of source ontology - not currently used in GFF3
1524     }
1525     else if ("species-build".equalsIgnoreCase(pragma))
1526     {
1527       // save URI of specific NCBI taxon version of annotations
1528       gffProps.put("species-build", value);
1529     }
1530     else if ("fasta".equalsIgnoreCase(pragma))
1531     {
1532       // process the rest of the file as a fasta file and replace any dummy
1533       // sequence IDs
1534       processAsFasta(align, newseqs);
1535     }
1536     else
1537     {
1538       System.err.println("Ignoring unknown pragma: " + line);
1539     }
1540   }
1541 }