src/jalview/ext/ensembl/EnsemblSeqProxy.java

   1 package jalview.ext.ensembl;
   2
   3 import jalview.datamodel.Alignment;
   4 import jalview.datamodel.AlignmentI;
   5 import jalview.datamodel.DBRefEntry;
   6 import jalview.datamodel.DBRefSource;
   7 import jalview.datamodel.Mapping;
   8 import jalview.datamodel.SequenceFeature;
   9 import jalview.datamodel.SequenceI;
  10 import jalview.exceptions.JalviewException;
  11 import jalview.io.FastaFile;
  12 import jalview.io.FileParse;
  13 import jalview.io.gff.SequenceOntology;
  14 import jalview.util.DBRefUtils;
  15 import jalview.util.MapList;
  16
  17 import java.io.IOException;
  18 import java.net.MalformedURLException;
  19 import java.net.URL;
  20 import java.util.ArrayList;
  21 import java.util.Arrays;
  22 import java.util.Collections;
  23 import java.util.Comparator;
  24 import java.util.List;
  25
  26 /**
  27  * Base class for Ensembl sequence fetchers
  28  *
  29  * @author gmcarstairs
  30  */
  31 public abstract class EnsemblSeqProxy extends EnsemblRestClient
  32 {
  33   protected static final String CONSEQUENCE_TYPE = "consequence_type";
  34
  35   protected static final String PARENT = "Parent";
  36
  37   protected static final String ID = "ID";
  38
  39   /*
  40    * this needs special handling, as it isA sequence_variant in the
  41    * Sequence Ontology, but behaves in Ensembl as if it isA transcript
  42    */
  43   protected static final String NMD_VARIANT = "NMD_transcript_variant";
  44
  45   public enum EnsemblSeqType
  46   {
  47     /**
  48      * type=genomic for the full dna including introns
  49      */
  50     GENOMIC("genomic"),
  51
  52     /**
  53      * type=cdna for transcribed dna including UTRs
  54      */
  55     CDNA("cdna"),
  56
  57     /**
  58      * type=cds for coding dna excluding UTRs
  59      */
  60     CDS("cds"),
  61
  62     /**
  63      * type=protein for the peptide product sequence
  64      */
  65     PROTEIN("protein");
  66
  67     /*
  68      * the value of the 'type' parameter to fetch this version of
  69      * an Ensembl sequence
  70      */
  71     private String type;
  72
  73     EnsemblSeqType(String t)
  74     {
  75       type = t;
  76     }
  77
  78     public String getType()
  79     {
  80       return type;
  81     }
  82
  83   }
  84
  85   /**
  86    * A comparator to sort ranges into ascending start position order
  87    */
  88   private class RangeSorter implements Comparator<int[]>
  89   {
  90     boolean forwards;
  91
  92     RangeSorter(boolean forward)
  93     {
  94       forwards = forward;
  95     }
  96
  97     @Override
  98     public int compare(int[] o1, int[] o2)
  99     {
 100       return (forwards ? 1 : -1) * Integer.compare(o1[0], o2[0]);
 101     }
 102
 103   }
 104
 105   /**
 106    * Constructor
 107    */
 108   public EnsemblSeqProxy()
 109   {
 110   }
 111
 112   /**
 113    * Makes the sequence queries to Ensembl's REST service and returns an
 114    * alignment consisting of the returned sequences. This overloaded method
 115    * allows the genomic sequence (with features) to be passed in if it has
 116    * already been retrieved, to avoid repeat calls to fetch it.
 117    */
 118   public AlignmentI getSequenceRecords(String query,
 119           SequenceI genomicSequence) throws Exception
 120   {
 121     long now = System.currentTimeMillis();
 122     // TODO use a String... query vararg instead?
 123
 124     // danger: accession separator used as a regex here, a string elsewhere
 125     // in this case it is ok (it is just a space), but (e.g.) '\' would not be
 126     List<String> allIds = Arrays.asList(query
 127             .split(getAccessionSeparator()));
 128     AlignmentI alignment = null;
 129     inProgress = true;
 130
 131     /*
 132      * execute queries, if necessary in batches of the
 133      * maximum allowed number of ids
 134      */
 135     int maxQueryCount = getMaximumQueryCount();
 136     for (int v = 0, vSize = allIds.size(); v < vSize; v += maxQueryCount)
 137     {
 138       int p = Math.min(vSize, v + maxQueryCount);
 139       List<String> ids = allIds.subList(v, p);
 140       try
 141       {
 142         alignment = fetchSequences(ids, alignment);
 143       } catch (Throwable r)
 144       {
 145         inProgress = false;
 146         String msg = "Aborting ID retrieval after " + v
 147                 + " chunks. Unexpected problem (" + r.getLocalizedMessage()
 148                 + ")";
 149         System.err.println(msg);
 150         if (alignment != null)
 151         {
 152           break; // return what we got
 153         }
 154         else
 155         {
 156           throw new JalviewException(msg, r);
 157         }
 158       }
 159     }
 160
 161     /*
 162      * fetch and transfer genomic sequence features
 163      */
 164     for (String accId : allIds)
 165     {
 166       addFeaturesAndProduct(accId, alignment, genomicSequence);
 167     }
 168
 169     inProgress = false;
 170     System.out.println(getClass().getName() + " took "
 171             + (System.currentTimeMillis() - now) + "ms to fetch");
 172     return alignment;
 173   }
 174
 175   /**
 176    * Fetches Ensembl features using the /overlap REST endpoint, and adds them to
 177    * the sequence in the alignment. Also fetches the protein product, maps it
 178    * from the CDS features of the sequence, and saves it as a cross-reference of
 179    * the dna sequence.
 180    *
 181    * @param accId
 182    * @param alignment
 183    */
 184   protected void addFeaturesAndProduct(String accId, AlignmentI alignment,
 185           SequenceI genomicSequence)
 186   {
 187     try
 188     {
 189       /*
 190        * get 'dummy' genomic sequence with exon, cds and variation features
 191        */
 192       if (genomicSequence == null)
 193       {
 194         EnsemblOverlap gffFetcher = new EnsemblOverlap();
 195         EnsemblFeatureType[] features = getFeaturesToFetch();
 196         AlignmentI geneFeatures = gffFetcher.getSequenceRecords(accId,
 197                 features);
 198         if (geneFeatures.getHeight() > 0)
 199         {
 200           genomicSequence = geneFeatures.getSequenceAt(0);
 201         }
 202       }
 203       if (genomicSequence != null)
 204       {
 205         /*
 206          * transfer features to the query sequence
 207          */
 208         SequenceI querySeq = alignment.findName(accId);
 209         if (transferFeatures(accId, genomicSequence, querySeq))
 210         {
 211
 212           /*
 213            * fetch and map protein product, and add it as a cross-reference
 214            * of the retrieved sequence
 215            */
 216           addProteinProduct(querySeq);
 217         }
 218       }
 219     } catch (IOException e)
 220     {
 221       System.err.println("Error transferring Ensembl features: "
 222               + e.getMessage());
 223     }
 224   }
 225
 226   /**
 227    * Returns those sequence feature types to fetch from Ensembl. We may want
 228    * features either because they are of interest to the user, or as means to
 229    * identify the locations of the sequence on the genomic sequence (CDS
 230    * features identify CDS, exon features identify cDNA etc).
 231    *
 232    * @return
 233    */
 234   protected abstract EnsemblFeatureType[] getFeaturesToFetch();
 235
 236   /**
 237    * Fetches and maps the protein product, and adds it as a cross-reference of
 238    * the retrieved sequence
 239    */
 240   protected void addProteinProduct(SequenceI querySeq)
 241   {
 242     String accId = querySeq.getName();
 243     try
 244     {
 245       AlignmentI protein = new EnsemblProtein().getSequenceRecords(accId);
 246       if (protein == null || protein.getHeight() == 0)
 247       {
 248         System.out.println("Failed to retrieve protein for " + accId);
 249         return;
 250       }
 251       SequenceI proteinSeq = protein.getSequenceAt(0);
 252
 253       /*
 254        * need dataset sequences (to be the subject of mappings)
 255        */
 256       proteinSeq.createDatasetSequence();
 257       querySeq.createDatasetSequence();
 258
 259       MapList mapList = mapCdsToProtein(querySeq, proteinSeq);
 260       if (mapList != null)
 261       {
 262         Mapping map = new Mapping(proteinSeq.getDatasetSequence(), mapList);
 263         DBRefEntry dbr = new DBRefEntry(getDbSource(), getDbVersion(),
 264                 accId, map);
 265         querySeq.getDatasetSequence().addDBRef(dbr);
 266       }
 267     } catch (Exception e)
 268     {
 269       System.err
 270               .println(String.format("Error retrieving protein for %s: %s",
 271                       accId, e.getMessage()));
 272     }
 273   }
 274
 275   /**
 276    * Returns a mapping from dna to protein by inspecting sequence features of
 277    * type "CDS" on the dna.
 278    *
 279    * @param dnaSeq
 280    * @param proteinSeq
 281    * @return
 282    */
 283   protected MapList mapCdsToProtein(SequenceI dnaSeq, SequenceI proteinSeq)
 284   {
 285     SequenceFeature[] sfs = dnaSeq.getSequenceFeatures();
 286     if (sfs == null)
 287     {
 288       return null;
 289     }
 290
 291     List<int[]> ranges = new ArrayList<int[]>(50);
 292     SequenceOntology so = SequenceOntology.getInstance();
 293
 294     int mappedDnaLength = 0;
 295
 296     /*
 297      * Map CDS columns of dna to peptide. No need to worry about reverse strand
 298      * dna here since the retrieved sequence is as transcribed (reverse
 299      * complement for reverse strand), i.e in the same sense as the peptide.
 300      */
 301     boolean fivePrimeIncomplete = false;
 302     for (SequenceFeature sf : sfs)
 303     {
 304       /*
 305        * process a CDS feature (or a sub-type of CDS)
 306        */
 307       if (so.isA(sf.getType(), SequenceOntology.CDS))
 308       {
 309         int phase = 0;
 310         try {
 311           phase = Integer.parseInt(sf.getPhase());
 312         } catch (NumberFormatException e)
 313         {
 314           // ignore
 315         }
 316         /*
 317          * phase > 0 on first codon means 5' incomplete - skip to the start
 318          * of the next codon; example ENST00000496384
 319          */
 320         int begin = sf.getBegin();
 321         int end = sf.getEnd();
 322         if (ranges.isEmpty() && phase > 0)
 323         {
 324           fivePrimeIncomplete = true;
 325           begin += phase;
 326           if (begin > end)
 327           {
 328             continue; // shouldn't happen?
 329           }
 330         }
 331         ranges.add(new int[] { begin, end });
 332         mappedDnaLength += Math.abs(end - begin) + 1;
 333       }
 334     }
 335     int proteinLength = proteinSeq.getLength();
 336     List<int[]> proteinRange = new ArrayList<int[]>();
 337     int proteinStart = 1;
 338     if (fivePrimeIncomplete && proteinSeq.getCharAt(0) == 'X')
 339     {
 340       proteinStart = 2;
 341       proteinLength--;
 342     }
 343     proteinRange.add(new int[] { proteinStart, proteinLength });
 344
 345     /*
 346      * dna length should map to protein (or protein plus stop codon)
 347      */
 348     int codesForResidues = mappedDnaLength / 3;
 349     if (codesForResidues == proteinLength
 350             || codesForResidues == (proteinLength + 1))
 351     {
 352       return new MapList(ranges, proteinRange, 3, 1);
 353     }
 354     return null;
 355   }
 356
 357   /**
 358    * Fetches sequences for the list of accession ids and adds them to the
 359    * alignment. Returns the extended (or created) alignment.
 360    *
 361    * @param ids
 362    * @param alignment
 363    * @return
 364    * @throws JalviewException
 365    * @throws IOException
 366    */
 367   protected AlignmentI fetchSequences(List<String> ids, AlignmentI alignment)
 368           throws JalviewException, IOException
 369   {
 370     if (!isEnsemblAvailable())
 371     {
 372       inProgress = false;
 373       throw new JalviewException("ENSEMBL Rest API not available.");
 374     }
 375     FileParse fp = getSequenceReader(ids);
 376     FastaFile fr = new FastaFile(fp);
 377     if (fr.hasWarningMessage())
 378     {
 379       System.out.println(String.format(
 380               "Warning when retrieving %d ids %s\n%s", ids.size(),
 381               ids.toString(), fr.getWarningMessage()));
 382     }
 383     else if (fr.getSeqs().size() != ids.size())
 384     {
 385       System.out.println(String.format(
 386               "Only retrieved %d sequences for %d query strings", fr
 387                       .getSeqs().size(), ids.size()));
 388     }
 389     if (fr.getSeqs().size() > 0)
 390     {
 391       AlignmentI seqal = new Alignment(
 392               fr.getSeqsAsArray());
 393       for (SequenceI sq:seqal.getSequences())
 394       {
 395         if (sq.getDescription() == null)
 396         {
 397           sq.setDescription(getDbName());
 398         }
 399         String name = sq.getName();
 400         if (ids.contains(name)
 401                 || ids.contains(name.replace("ENSP", "ENST")))
 402         {
 403           DBRefUtils.parseToDbRef(sq, DBRefSource.ENSEMBL, "0", name);
 404         }
 405       }
 406       if (alignment == null)
 407       {
 408         alignment = seqal;
 409       }
 410       else
 411       {
 412         alignment.append(seqal);
 413       }
 414     }
 415     return alignment;
 416   }
 417
 418   /**
 419    * Returns the URL for the REST call
 420    *
 421    * @return
 422    * @throws MalformedURLException
 423    */
 424   @Override
 425   protected URL getUrl(List<String> ids) throws MalformedURLException
 426   {
 427     // ids are not used - they go in the POST body instead
 428     StringBuffer urlstring = new StringBuffer(128);
 429     urlstring.append(SEQUENCE_ID_URL);
 430
 431     // @see https://github.com/Ensembl/ensembl-rest/wiki/Output-formats
 432     urlstring.append("?type=").append(getSourceEnsemblType().getType());
 433     urlstring.append(("&Accept=text/x-fasta"));
 434
 435     URL url = new URL(urlstring.toString());
 436     return url;
 437   }
 438
 439   /**
 440    * A sequence/id POST request currently allows up to 50 queries
 441    *
 442    * @see http://rest.ensembl.org/documentation/info/sequence_id_post
 443    */
 444   @Override
 445   public int getMaximumQueryCount()
 446   {
 447     return 50;
 448   }
 449
 450   @Override
 451   protected boolean useGetRequest()
 452   {
 453     return false;
 454   }
 455
 456   @Override
 457   protected String getRequestMimeType()
 458   {
 459     return "application/json";
 460   }
 461
 462   @Override
 463   protected String getResponseMimeType()
 464   {
 465     return "text/x-fasta";
 466   }
 467
 468   /**
 469    *
 470    * @return the configured sequence return type for this source
 471    */
 472   protected abstract EnsemblSeqType getSourceEnsemblType();
 473
 474   /**
 475    * Returns a list of [start, end] genomic ranges corresponding to the sequence
 476    * being retrieved.
 477    *
 478    * The correspondence between the frames of reference is made by locating
 479    * those features on the genomic sequence which identify the retrieved
 480    * sequence. Specifically
 481    * <ul>
 482    * <li>genomic sequence is identified by "transcript" features with
 483    * ID=transcript:transcriptId</li>
 484    * <li>cdna sequence is identified by "exon" features with
 485    * Parent=transcript:transcriptId</li>
 486    * <li>cds sequence is identified by "CDS" features with
 487    * Parent=transcript:transcriptId</li>
 488    * </ul>
 489    *
 490    * The returned ranges are sorted to run forwards (for positive strand) or
 491    * backwards (for negative strand). Aborts and returns null if both positive
 492    * and negative strand are found (this should not normally happen).
 493    *
 494    * @param sourceSequence
 495    * @param accId
 496    * @param start
 497    *          the start position of the sequence we are mapping to
 498    * @return
 499    */
 500   protected MapList getGenomicRanges(SequenceI sourceSequence,
 501           String accId, int start)
 502   {
 503     SequenceFeature[] sfs = sourceSequence.getSequenceFeatures();
 504     if (sfs == null)
 505     {
 506       return null;
 507     }
 508
 509     /*
 510      * generously initial size for number of cds regions
 511      * (worst case titin Q8WZ42 has c. 313 exons)
 512      */
 513     List<int[]> regions = new ArrayList<int[]>(100);
 514     int mappedLength = 0;
 515     int direction = 1; // forward
 516     boolean directionSet = false;
 517
 518     for (SequenceFeature sf : sfs)
 519     {
 520       /*
 521        * accept the target feature type or a specialisation of it
 522        * (e.g. coding_exon for exon)
 523        */
 524       if (identifiesSequence(sf, accId))
 525       {
 526           int strand = sf.getStrand();
 527
 528           if (directionSet && strand != direction)
 529           {
 530             // abort - mix of forward and backward
 531           System.err.println("Error: forward and backward strand for "
 532                   + accId);
 533             return null;
 534           }
 535           direction = strand;
 536           directionSet = true;
 537
 538           /*
 539            * add to CDS ranges, semi-sorted forwards/backwards
 540            */
 541           if (strand < 0)
 542           {
 543             regions.add(0, new int[] { sf.getEnd(), sf.getBegin() });
 544           }
 545           else
 546           {
 547           regions.add(new int[] { sf.getBegin(), sf.getEnd() });
 548         }
 549         mappedLength += Math.abs(sf.getEnd() - sf.getBegin() + 1);
 550
 551         if (!isSpliceable())
 552         {
 553           /*
 554            * 'gene' sequence is contiguous so we can stop as soon as its
 555            * identifying feature has been found
 556            */
 557           break;
 558         }
 559       }
 560     }
 561
 562     if (regions.isEmpty())
 563     {
 564       System.out.println("Failed to identify target sequence for " + accId
 565               + " from genomic features");
 566       return null;
 567     }
 568
 569     /*
 570      * a final sort is needed since Ensembl returns CDS sorted within source
 571      * (havana / ensembl_havana)
 572      */
 573     Collections.sort(regions, new RangeSorter(direction == 1));
 574
 575     List<int[]> to = new ArrayList<int[]>();
 576     to.add(new int[] { start, start + mappedLength - 1 });
 577
 578     return new MapList(regions, to, 1, 1);
 579   }
 580
 581   /**
 582    * Answers true if the sequence being retrieved may occupy discontiguous
 583    * regions on the genomic sequence.
 584    */
 585   protected boolean isSpliceable()
 586   {
 587     return true;
 588   }
 589
 590   /**
 591    * Returns true if the sequence feature marks positions of the genomic
 592    * sequence feature which are within the sequence being retrieved. For
 593    * example, an 'exon' feature whose parent is the target transcript marks the
 594    * cdna positions of the transcript.
 595    *
 596    * @param sf
 597    * @param accId
 598    * @return
 599    */
 600   protected abstract boolean identifiesSequence(SequenceFeature sf,
 601           String accId);
 602
 603   /**
 604    * Transfers the sequence feature to the target sequence, locating its start
 605    * and end range based on the mapping. Features which do not overlap the
 606    * target sequence are ignored.
 607    *
 608    * @param sf
 609    * @param targetSequence
 610    * @param mapping
 611    *          mapping from the sequence feature's coordinates to the target
 612    *          sequence
 613    */
 614   protected void transferFeature(SequenceFeature sf,
 615           SequenceI targetSequence, MapList mapping)
 616   {
 617     int start = sf.getBegin();
 618     int end = sf.getEnd();
 619     int[] mappedRange = mapping.locateInTo(start, end);
 620
 621     if (mappedRange != null)
 622     {
 623       SequenceFeature copy = new SequenceFeature(sf);
 624       copy.setBegin(Math.min(mappedRange[0], mappedRange[1]));
 625       copy.setEnd(Math.max(mappedRange[0], mappedRange[1]));
 626       targetSequence.addSequenceFeature(copy);
 627
 628       /*
 629        * for sequence_variant, make an additional feature with consequence
 630        */
 631       if (SequenceOntology.getInstance().isSequenceVariant(sf.getType()))
 632       {
 633         String consequence = (String) sf.getValue(CONSEQUENCE_TYPE);
 634         if (consequence != null)
 635         {
 636           SequenceFeature sf2 = new SequenceFeature("consequence",
 637                   consequence, copy.getBegin(), copy.getEnd(), 0f,
 638                   null);
 639           targetSequence.addSequenceFeature(sf2);
 640         }
 641       }
 642     }
 643   }
 644
 645   /**
 646    * Transfers features from sourceSequence to targetSequence
 647    *
 648    * @param accessionId
 649    * @param sourceSequence
 650    * @param targetSequence
 651    * @return true if any features were transferred, else false
 652    */
 653   protected boolean transferFeatures(String accessionId,
 654           SequenceI sourceSequence, SequenceI targetSequence)
 655   {
 656     if (sourceSequence == null || targetSequence == null)
 657     {
 658       return false;
 659     }
 660
 661     SequenceFeature[] sfs = sourceSequence.getSequenceFeatures();
 662     MapList mapping = getGenomicRanges(sourceSequence, accessionId,
 663             targetSequence.getStart());
 664     if (mapping == null)
 665     {
 666       return false;
 667     }
 668
 669     return transferFeatures(sfs, targetSequence, mapping, accessionId);
 670   }
 671
 672   /**
 673    * Transfer features to the target sequence. The start/end positions are
 674    * converted using the mapping. Features which do not overlap are ignored.
 675    * Features whose parent is not the specified identifier are also ignored.
 676    *
 677    * @param features
 678    * @param targetSequence
 679    * @param mapping
 680    * @param parentId
 681    * @return
 682    */
 683   protected boolean transferFeatures(SequenceFeature[] features,
 684           SequenceI targetSequence, MapList mapping, String parentId)
 685   {
 686     final boolean forwardStrand = mapping.isFromForwardStrand();
 687
 688     /*
 689      * sort features by start position (descending if reverse strand)
 690      * before transferring (in forwards order) to the target sequence
 691      */
 692     Arrays.sort(features, new Comparator<SequenceFeature>()
 693     {
 694       @Override
 695       public int compare(SequenceFeature o1, SequenceFeature o2)
 696       {
 697         int c = Integer.compare(o1.getBegin(), o2.getBegin());
 698         return forwardStrand ? c : -c;
 699       }
 700     });
 701
 702     boolean transferred = false;
 703     for (SequenceFeature sf : features)
 704     {
 705       if (retainFeature(sf, parentId))
 706       {
 707         transferFeature(sf, targetSequence, mapping);
 708         transferred = true;
 709       }
 710     }
 711     return transferred;
 712   }
 713
 714   /**
 715    * Answers true if the feature type is one we want to keep for the sequence.
 716    * Some features are only retrieved in order to identify the sequence range,
 717    * and may then be discarded as redundant information (e.g. "CDS" feature for
 718    * a CDS sequence).
 719    */
 720   @SuppressWarnings("unused")
 721   protected boolean retainFeature(SequenceFeature sf, String accessionId)
 722   {
 723     return true; // override as required
 724   }
 725
 726   /**
 727    * Answers true if the feature has a Parent which refers to the given
 728    * accession id, or if the feature has no parent. Answers false if the
 729    * feature's Parent is for a different accession id.
 730    *
 731    * @param sf
 732    * @param identifier
 733    * @return
 734    */
 735   protected boolean featureMayBelong(SequenceFeature sf, String identifier)
 736   {
 737     String parent = (String) sf.getValue(PARENT);
 738     // using contains to allow for prefix "gene:", "transcript:" etc
 739     if (parent != null && !parent.contains(identifier))
 740     {
 741       // this genomic feature belongs to a different transcript
 742       return false;
 743     }
 744     return true;
 745   }
 746
 747   @Override
 748   public String getDescription()
 749   {
 750     return "Ensembl " + getSourceEnsemblType().getType()
 751             + " sequence with variant features";
 752   }
 753
 754   @Override
 755   public AlignmentI getSequenceRecords(String identifier) throws Exception
 756   {
 757     return getSequenceRecords(identifier, null);
 758   }
 759
 760   /**
 761    * Returns a (possibly empty) list of features on the sequence which have the
 762    * specified sequence ontology type (or a sub-type of it), and the given
 763    * identifier as parent
 764    *
 765    * @param sequence
 766    * @param type
 767    * @param parentId
 768    * @return
 769    */
 770   protected List<SequenceFeature> findFeatures(SequenceI sequence,
 771           String type, String parentId)
 772   {
 773     List<SequenceFeature> result = new ArrayList<SequenceFeature>();
 774
 775     SequenceFeature[] sfs = sequence.getSequenceFeatures();
 776     if (sfs != null) {
 777       SequenceOntology so = SequenceOntology.getInstance();
 778       for (SequenceFeature sf :sfs) {
 779         if (so.isA(sf.getType(), type))
 780         {
 781           String parent = (String) sf.getValue(PARENT);
 782           if (parent.equals(parentId))
 783           {
 784             result.add(sf);
 785           }
 786         }
 787       }
 788     }
 789     return result;
 790   }
 791
 792   /**
 793    * Answers true if the feature type is either 'NMD_transcript_variant' or
 794    * 'transcript' or one of its sub-types in the Sequence Ontology. This is
 795    * needed because NMD_transcript_variant behaves like 'transcript' in Ensembl
 796    * although strictly speaking it is not (it is a sub-type of
 797    * sequence_variant).
 798    *
 799    * @param featureType
 800    * @return
 801    */
 802   public static boolean isTranscript(String featureType)
 803   {
 804     return NMD_VARIANT.equals(featureType)
 805             || SequenceOntology.getInstance().isA(featureType, SequenceOntology.TRANSCRIPT);
 806   }
 807 }