EnsemblSeqProxy.java

   1 package jalview.ext.ensembl;
   2
   3 import jalview.analysis.AlignmentUtils;
   4 import jalview.datamodel.Alignment;
   5 import jalview.datamodel.AlignmentI;
   6 import jalview.datamodel.DBRefEntry;
   7 import jalview.datamodel.DBRefSource;
   8 import jalview.datamodel.Mapping;
   9 import jalview.datamodel.SequenceFeature;
  10 import jalview.datamodel.SequenceI;
  11 import jalview.exceptions.JalviewException;
  12 import jalview.io.FastaFile;
  13 import jalview.io.FileParse;
  14 import jalview.io.gff.SequenceOntologyFactory;
  15 import jalview.io.gff.SequenceOntologyI;
  16 import jalview.util.DBRefUtils;
  17 import jalview.util.MapList;
  18
  19 import java.io.IOException;
  20 import java.net.MalformedURLException;
  21 import java.net.URL;
  22 import java.util.ArrayList;
  23 import java.util.Arrays;
  24 import java.util.Collections;
  25 import java.util.Comparator;
  26 import java.util.List;
  27
  28 /**
  29  * Base class for Ensembl sequence fetchers
  30  *
  31  * @see http://rest.ensembl.org/documentation/info/sequence_id
  32  * @author gmcarstairs
  33  */
  34 public abstract class EnsemblSeqProxy extends EnsemblRestClient
  35 {
  36   private static final List<String> CROSS_REFERENCES = Arrays
  37           .asList(new String[] { "CCDS", "Uniprot/SWISSPROT",
  38               "Uniprot/SPTREMBL" });
  39
  40   protected static final String CONSEQUENCE_TYPE = "consequence_type";
  41
  42   protected static final String PARENT = "Parent";
  43
  44   protected static final String ID = "ID";
  45
  46   protected static final String NAME = "Name";
  47
  48   protected static final String DESCRIPTION = "description";
  49
  50   /*
  51    * enum for 'type' parameter to the /sequence REST service
  52    */
  53   public enum EnsemblSeqType
  54   {
  55     /**
  56      * type=genomic to fetch full dna including introns
  57      */
  58     GENOMIC("genomic"),
  59
  60     /**
  61      * type=cdna to fetch dna including UTRs
  62      */
  63     CDNA("cdna"),
  64
  65     /**
  66      * type=cds to fetch coding dna excluding UTRs
  67      */
  68     CDS("cds"),
  69
  70     /**
  71      * type=protein to fetch peptide product sequence
  72      */
  73     PROTEIN("protein");
  74
  75     /*
  76      * the value of the 'type' parameter to fetch this version of
  77      * an Ensembl sequence
  78      */
  79     private String type;
  80
  81     EnsemblSeqType(String t)
  82     {
  83       type = t;
  84     }
  85
  86     public String getType()
  87     {
  88       return type;
  89     }
  90
  91   }
  92
  93   /**
  94    * A comparator to sort ranges into ascending start position order
  95    */
  96   private class RangeSorter implements Comparator<int[]>
  97   {
  98     boolean forwards;
  99
 100     RangeSorter(boolean forward)
 101     {
 102       forwards = forward;
 103     }
 104
 105     @Override
 106     public int compare(int[] o1, int[] o2)
 107     {
 108       return (forwards ? 1 : -1) * Integer.compare(o1[0], o2[0]);
 109     }
 110
 111   }
 112
 113   /**
 114    * Default constructor (to use rest.ensembl.org)
 115    */
 116   public EnsemblSeqProxy()
 117   {
 118     super();
 119   }
 120
 121   /**
 122    * Constructor given the target domain to fetch data from
 123    */
 124   public EnsemblSeqProxy(String d)
 125   {
 126     super(d);
 127   }
 128
 129   /**
 130    * Makes the sequence queries to Ensembl's REST service and returns an
 131    * alignment consisting of the returned sequences.
 132    */
 133   @Override
 134   public AlignmentI getSequenceRecords(String query) throws Exception
 135   {
 136     // TODO use a String... query vararg instead?
 137
 138     // danger: accession separator used as a regex here, a string elsewhere
 139     // in this case it is ok (it is just a space), but (e.g.) '\' would not be
 140     List<String> allIds = Arrays.asList(query
 141             .split(getAccessionSeparator()));
 142     AlignmentI alignment = null;
 143     inProgress = true;
 144
 145     /*
 146      * execute queries, if necessary in batches of the
 147      * maximum allowed number of ids
 148      */
 149     int maxQueryCount = getMaximumQueryCount();
 150     for (int v = 0, vSize = allIds.size(); v < vSize; v += maxQueryCount)
 151     {
 152       int p = Math.min(vSize, v + maxQueryCount);
 153       List<String> ids = allIds.subList(v, p);
 154       try
 155       {
 156         alignment = fetchSequences(ids, alignment);
 157       } catch (Throwable r)
 158       {
 159         inProgress = false;
 160         String msg = "Aborting ID retrieval after " + v
 161                 + " chunks. Unexpected problem (" + r.getLocalizedMessage()
 162                 + ")";
 163         System.err.println(msg);
 164         break;
 165       }
 166     }
 167
 168     if (alignment == null)
 169     {
 170       return null;
 171     }
 172
 173     /*
 174      * fetch and transfer genomic sequence features,
 175      * fetch protein product and add as cross-reference
 176      */
 177     for (String accId : allIds)
 178     {
 179       addFeaturesAndProduct(accId, alignment);
 180     }
 181
 182     for (SequenceI seq : alignment.getSequences())
 183     {
 184       getCrossReferences(seq);
 185     }
 186
 187     return alignment;
 188   }
 189
 190   /**
 191    * Fetches Ensembl features using the /overlap REST endpoint, and adds them to
 192    * the sequence in the alignment. Also fetches the protein product, maps it
 193    * from the CDS features of the sequence, and saves it as a cross-reference of
 194    * the dna sequence.
 195    *
 196    * @param accId
 197    * @param alignment
 198    */
 199   protected void addFeaturesAndProduct(String accId, AlignmentI alignment)
 200   {
 201     if (alignment == null)
 202     {
 203       return;
 204     }
 205
 206     try
 207     {
 208       /*
 209        * get 'dummy' genomic sequence with exon, cds and variation features
 210        */
 211       SequenceI genomicSequence = null;
 212       EnsemblFeatures gffFetcher = new EnsemblFeatures(getDomain());
 213       EnsemblFeatureType[] features = getFeaturesToFetch();
 214       AlignmentI geneFeatures = gffFetcher.getSequenceRecords(accId,
 215               features);
 216       if (geneFeatures.getHeight() > 0)
 217       {
 218         genomicSequence = geneFeatures.getSequenceAt(0);
 219       }
 220       if (genomicSequence != null)
 221       {
 222         /*
 223          * transfer features to the query sequence
 224          */
 225         SequenceI querySeq = alignment.findName(accId);
 226         if (transferFeatures(accId, genomicSequence, querySeq))
 227         {
 228
 229           /*
 230            * fetch and map protein product, and add it as a cross-reference
 231            * of the retrieved sequence
 232            */
 233           addProteinProduct(querySeq);
 234         }
 235       }
 236     } catch (IOException e)
 237     {
 238       System.err.println("Error transferring Ensembl features: "
 239               + e.getMessage());
 240     }
 241   }
 242
 243   /**
 244    * Returns those sequence feature types to fetch from Ensembl. We may want
 245    * features either because they are of interest to the user, or as means to
 246    * identify the locations of the sequence on the genomic sequence (CDS
 247    * features identify CDS, exon features identify cDNA etc).
 248    *
 249    * @return
 250    */
 251   protected abstract EnsemblFeatureType[] getFeaturesToFetch();
 252
 253   /**
 254    * Fetches and maps the protein product, and adds it as a cross-reference of
 255    * the retrieved sequence
 256    */
 257   protected void addProteinProduct(SequenceI querySeq)
 258   {
 259     String accId = querySeq.getName();
 260     try
 261     {
 262       AlignmentI protein = new EnsemblProtein(getDomain())
 263               .getSequenceRecords(accId);
 264       if (protein == null || protein.getHeight() == 0)
 265       {
 266         System.out.println("No protein product found for " + accId);
 267         return;
 268       }
 269       SequenceI proteinSeq = protein.getSequenceAt(0);
 270
 271       /*
 272        * need dataset sequences (to be the subject of mappings)
 273        */
 274       proteinSeq.createDatasetSequence();
 275       querySeq.createDatasetSequence();
 276
 277       MapList mapList = AlignmentUtils.mapCdsToProtein(querySeq, proteinSeq);
 278       if (mapList != null)
 279       {
 280         // clunky: ensure Uniprot xref if we have one is on mapped sequence
 281         SequenceI ds = proteinSeq.getDatasetSequence();
 282         ds.setSourceDBRef(proteinSeq.getSourceDBRef());
 283         Mapping map = new Mapping(ds, mapList);
 284         DBRefEntry dbr = new DBRefEntry(getDbSource(), getDbVersion(),
 285                 accId, map);
 286         querySeq.getDatasetSequence().addDBRef(dbr);
 287
 288         /*
 289          * copy exon features to protein, compute peptide variants from dna
 290          * variants and add as features on the protein sequence ta-da
 291          */
 292         AlignmentUtils.computeProteinFeatures(querySeq, proteinSeq, mapList);
 293       }
 294     } catch (Exception e)
 295     {
 296       System.err
 297               .println(String.format("Error retrieving protein for %s: %s",
 298                       accId, e.getMessage()));
 299     }
 300   }
 301
 302   /**
 303    * Get database xrefs from Ensembl, and attach them to the sequence
 304    *
 305    * @param seq
 306    */
 307   protected void getCrossReferences(SequenceI seq)
 308   {
 309     while (seq.getDatasetSequence() != null)
 310     {
 311       seq = seq.getDatasetSequence();
 312     }
 313
 314     EnsemblXref xrefFetcher = new EnsemblXref(getDomain());
 315     List<DBRefEntry> xrefs = xrefFetcher.getCrossReferences(seq.getName(),
 316             getCrossReferenceDatabases());
 317     for (DBRefEntry xref : xrefs)
 318     {
 319       seq.addDBRef(xref);
 320       /*
 321        * Save any Uniprot xref to be the reference for SIFTS mapping
 322        */
 323       if (DBRefSource.UNIPROT.equals(xref.getSource()))
 324       {
 325         seq.setSourceDBRef(xref);
 326       }
 327     }
 328   }
 329
 330   /**
 331    * Returns a list of database names to be used when fetching cross-references.
 332    * Specifically, the names are used to filter data returned by the Ensembl
 333    * xrefs REST service on the value in field 'dbname'.
 334    *
 335    * @return
 336    */
 337   protected List<String> getCrossReferenceDatabases()
 338   {
 339     return CROSS_REFERENCES;
 340   }
 341
 342   /**
 343    * Fetches sequences for the list of accession ids and adds them to the
 344    * alignment. Returns the extended (or created) alignment.
 345    *
 346    * @param ids
 347    * @param alignment
 348    * @return
 349    * @throws JalviewException
 350    * @throws IOException
 351    */
 352   protected AlignmentI fetchSequences(List<String> ids, AlignmentI alignment)
 353           throws JalviewException, IOException
 354   {
 355     if (!isEnsemblAvailable())
 356     {
 357       inProgress = false;
 358       throw new JalviewException("ENSEMBL Rest API not available.");
 359     }
 360     FileParse fp = getSequenceReader(ids);
 361     FastaFile fr = new FastaFile(fp);
 362     if (fr.hasWarningMessage())
 363     {
 364       System.out.println(String.format(
 365               "Warning when retrieving %d ids %s\n%s", ids.size(),
 366               ids.toString(), fr.getWarningMessage()));
 367     }
 368     else if (fr.getSeqs().size() != ids.size())
 369     {
 370       System.out.println(String.format(
 371               "Only retrieved %d sequences for %d query strings", fr
 372                       .getSeqs().size(), ids.size()));
 373     }
 374
 375     if (fr.getSeqs().size() == 1 && fr.getSeqs().get(0).getLength() == 0)
 376     {
 377       /*
 378        * POST request has returned an empty FASTA file e.g. for invalid id
 379        */
 380       throw new IOException("No data returned for " + ids);
 381     }
 382
 383     if (fr.getSeqs().size() > 0)
 384     {
 385       AlignmentI seqal = new Alignment(
 386               fr.getSeqsAsArray());
 387       for (SequenceI sq:seqal.getSequences())
 388       {
 389         if (sq.getDescription() == null)
 390         {
 391           sq.setDescription(getDbName());
 392         }
 393         String name = sq.getName();
 394         if (ids.contains(name)
 395                 || ids.contains(name.replace("ENSP", "ENST")))
 396         {
 397           DBRefUtils.parseToDbRef(sq, DBRefSource.ENSEMBL, "0", name);
 398         }
 399       }
 400       if (alignment == null)
 401       {
 402         alignment = seqal;
 403       }
 404       else
 405       {
 406         alignment.append(seqal);
 407       }
 408     }
 409     return alignment;
 410   }
 411
 412   /**
 413    * Returns the URL for the REST call
 414    *
 415    * @return
 416    * @throws MalformedURLException
 417    */
 418   @Override
 419   protected URL getUrl(List<String> ids) throws MalformedURLException
 420   {
 421     /*
 422      * a single id is included in the URL path
 423      * multiple ids go in the POST body instead
 424      */
 425     StringBuffer urlstring = new StringBuffer(128);
 426     urlstring.append(getDomain() + "/sequence/id");
 427     if (ids.size() == 1)
 428     {
 429       urlstring.append("/").append(ids.get(0));
 430     }
 431     // @see https://github.com/Ensembl/ensembl-rest/wiki/Output-formats
 432     urlstring.append("?type=").append(getSourceEnsemblType().getType());
 433     urlstring.append(("&Accept=text/x-fasta"));
 434
 435     URL url = new URL(urlstring.toString());
 436     return url;
 437   }
 438
 439   /**
 440    * A sequence/id POST request currently allows up to 50 queries
 441    *
 442    * @see http://rest.ensembl.org/documentation/info/sequence_id_post
 443    */
 444   @Override
 445   public int getMaximumQueryCount()
 446   {
 447     return 50;
 448   }
 449
 450   @Override
 451   protected boolean useGetRequest()
 452   {
 453     return false;
 454   }
 455
 456   @Override
 457   protected String getRequestMimeType(boolean multipleIds)
 458   {
 459     return multipleIds ? "application/json" : "text/x-fasta";
 460   }
 461
 462   @Override
 463   protected String getResponseMimeType()
 464   {
 465     return "text/x-fasta";
 466   }
 467
 468   /**
 469    *
 470    * @return the configured sequence return type for this source
 471    */
 472   protected abstract EnsemblSeqType getSourceEnsemblType();
 473
 474   /**
 475    * Returns a list of [start, end] genomic ranges corresponding to the sequence
 476    * being retrieved.
 477    *
 478    * The correspondence between the frames of reference is made by locating
 479    * those features on the genomic sequence which identify the retrieved
 480    * sequence. Specifically
 481    * <ul>
 482    * <li>genomic sequence is identified by "transcript" features with
 483    * ID=transcript:transcriptId</li>
 484    * <li>cdna sequence is identified by "exon" features with
 485    * Parent=transcript:transcriptId</li>
 486    * <li>cds sequence is identified by "CDS" features with
 487    * Parent=transcript:transcriptId</li>
 488    * </ul>
 489    *
 490    * The returned ranges are sorted to run forwards (for positive strand) or
 491    * backwards (for negative strand). Aborts and returns null if both positive
 492    * and negative strand are found (this should not normally happen).
 493    *
 494    * @param sourceSequence
 495    * @param accId
 496    * @param start
 497    *          the start position of the sequence we are mapping to
 498    * @return
 499    */
 500   protected MapList getGenomicRangesFromFeatures(SequenceI sourceSequence,
 501           String accId, int start)
 502   {
 503     SequenceFeature[] sfs = sourceSequence.getSequenceFeatures();
 504     if (sfs == null)
 505     {
 506       return null;
 507     }
 508
 509     /*
 510      * generously initial size for number of cds regions
 511      * (worst case titin Q8WZ42 has c. 313 exons)
 512      */
 513     List<int[]> regions = new ArrayList<int[]>(100);
 514     int mappedLength = 0;
 515     int direction = 1; // forward
 516     boolean directionSet = false;
 517
 518     for (SequenceFeature sf : sfs)
 519     {
 520       /*
 521        * accept the target feature type or a specialisation of it
 522        * (e.g. coding_exon for exon)
 523        */
 524       if (identifiesSequence(sf, accId))
 525       {
 526         int strand = sf.getStrand();
 527         strand = strand == 0 ? 1 : strand; // treat unknown as forward
 528
 529         if (directionSet && strand != direction)
 530         {
 531           // abort - mix of forward and backward
 532           System.err.println("Error: forward and backward strand for "
 533                   + accId);
 534             return null;
 535           }
 536           direction = strand;
 537           directionSet = true;
 538
 539           /*
 540            * add to CDS ranges, semi-sorted forwards/backwards
 541            */
 542           if (strand < 0)
 543           {
 544             regions.add(0, new int[] { sf.getEnd(), sf.getBegin() });
 545           }
 546           else
 547           {
 548           regions.add(new int[] { sf.getBegin(), sf.getEnd() });
 549         }
 550         mappedLength += Math.abs(sf.getEnd() - sf.getBegin() + 1);
 551
 552         if (!isSpliceable())
 553         {
 554           /*
 555            * 'gene' sequence is contiguous so we can stop as soon as its
 556            * identifying feature has been found
 557            */
 558           break;
 559         }
 560       }
 561     }
 562
 563     if (regions.isEmpty())
 564     {
 565       System.out.println("Failed to identify target sequence for " + accId
 566               + " from genomic features");
 567       return null;
 568     }
 569
 570     /*
 571      * a final sort is needed since Ensembl returns CDS sorted within source
 572      * (havana / ensembl_havana)
 573      */
 574     Collections.sort(regions, new RangeSorter(direction == 1));
 575
 576     List<int[]> to = Arrays.asList(new int[] { start,
 577         start + mappedLength - 1 });
 578
 579     return new MapList(regions, to, 1, 1);
 580   }
 581
 582   /**
 583    * Answers true if the sequence being retrieved may occupy discontiguous
 584    * regions on the genomic sequence.
 585    */
 586   protected boolean isSpliceable()
 587   {
 588     return true;
 589   }
 590
 591   /**
 592    * Returns true if the sequence feature marks positions of the genomic
 593    * sequence feature which are within the sequence being retrieved. For
 594    * example, an 'exon' feature whose parent is the target transcript marks the
 595    * cdna positions of the transcript.
 596    *
 597    * @param sf
 598    * @param accId
 599    * @return
 600    */
 601   protected abstract boolean identifiesSequence(SequenceFeature sf,
 602           String accId);
 603
 604   /**
 605    * Transfers the sequence feature to the target sequence, locating its start
 606    * and end range based on the mapping. Features which do not overlap the
 607    * target sequence are ignored.
 608    *
 609    * @param sf
 610    * @param targetSequence
 611    * @param mapping
 612    *          mapping from the sequence feature's coordinates to the target
 613    *          sequence
 614    */
 615   protected void transferFeature(SequenceFeature sf,
 616           SequenceI targetSequence, MapList mapping)
 617   {
 618     int start = sf.getBegin();
 619     int end = sf.getEnd();
 620     int[] mappedRange = mapping.locateInTo(start, end);
 621
 622     if (mappedRange != null)
 623     {
 624       SequenceFeature copy = new SequenceFeature(sf);
 625       copy.setBegin(Math.min(mappedRange[0], mappedRange[1]));
 626       copy.setEnd(Math.max(mappedRange[0], mappedRange[1]));
 627       targetSequence.addSequenceFeature(copy);
 628
 629       /*
 630        * for sequence_variant, make an additional feature with consequence
 631        */
 632       // if (SequenceOntologyFactory.getInstance().isA(sf.getType(),
 633       // SequenceOntologyI.SEQUENCE_VARIANT))
 634       // {
 635       // String consequence = (String) sf.getValue(CONSEQUENCE_TYPE);
 636       // if (consequence != null)
 637       // {
 638       // SequenceFeature sf2 = new SequenceFeature("consequence",
 639       // consequence, copy.getBegin(), copy.getEnd(), 0f,
 640       // null);
 641       // targetSequence.addSequenceFeature(sf2);
 642       // }
 643       // }
 644     }
 645   }
 646
 647   /**
 648    * Transfers features from sourceSequence to targetSequence
 649    *
 650    * @param accessionId
 651    * @param sourceSequence
 652    * @param targetSequence
 653    * @return true if any features were transferred, else false
 654    */
 655   protected boolean transferFeatures(String accessionId,
 656           SequenceI sourceSequence, SequenceI targetSequence)
 657   {
 658     if (sourceSequence == null || targetSequence == null)
 659     {
 660       return false;
 661     }
 662
 663     // long start = System.currentTimeMillis();
 664     SequenceFeature[] sfs = sourceSequence.getSequenceFeatures();
 665     MapList mapping = getGenomicRangesFromFeatures(sourceSequence, accessionId,
 666             targetSequence.getStart());
 667     if (mapping == null)
 668     {
 669       return false;
 670     }
 671
 672     boolean result = transferFeatures(sfs, targetSequence, mapping,
 673             accessionId);
 674     // System.out.println("transferFeatures (" + (sfs.length) + " --> "
 675     // + targetSequence.getSequenceFeatures().length + ") to "
 676     // + targetSequence.getName()
 677     // + " took " + (System.currentTimeMillis() - start) + "ms");
 678     return result;
 679   }
 680
 681   /**
 682    * Transfer features to the target sequence. The start/end positions are
 683    * converted using the mapping. Features which do not overlap are ignored.
 684    * Features whose parent is not the specified identifier are also ignored.
 685    *
 686    * @param features
 687    * @param targetSequence
 688    * @param mapping
 689    * @param parentId
 690    * @return
 691    */
 692   protected boolean transferFeatures(SequenceFeature[] features,
 693           SequenceI targetSequence, MapList mapping, String parentId)
 694   {
 695     final boolean forwardStrand = mapping.isFromForwardStrand();
 696
 697     /*
 698      * sort features by start position (descending if reverse strand)
 699      * before transferring (in forwards order) to the target sequence
 700      */
 701     Arrays.sort(features, new Comparator<SequenceFeature>()
 702     {
 703       @Override
 704       public int compare(SequenceFeature o1, SequenceFeature o2)
 705       {
 706         int c = Integer.compare(o1.getBegin(), o2.getBegin());
 707         return forwardStrand ? c : -c;
 708       }
 709     });
 710
 711     boolean transferred = false;
 712     for (SequenceFeature sf : features)
 713     {
 714       if (retainFeature(sf, parentId))
 715       {
 716         transferFeature(sf, targetSequence, mapping);
 717         transferred = true;
 718       }
 719     }
 720     return transferred;
 721   }
 722
 723   /**
 724    * Answers true if the feature type is one we want to keep for the sequence.
 725    * Some features are only retrieved in order to identify the sequence range,
 726    * and may then be discarded as redundant information (e.g. "CDS" feature for
 727    * a CDS sequence).
 728    */
 729   @SuppressWarnings("unused")
 730   protected boolean retainFeature(SequenceFeature sf, String accessionId)
 731   {
 732     return true; // override as required
 733   }
 734
 735   /**
 736    * Answers true if the feature has a Parent which refers to the given
 737    * accession id, or if the feature has no parent. Answers false if the
 738    * feature's Parent is for a different accession id.
 739    *
 740    * @param sf
 741    * @param identifier
 742    * @return
 743    */
 744   protected boolean featureMayBelong(SequenceFeature sf, String identifier)
 745   {
 746     String parent = (String) sf.getValue(PARENT);
 747     // using contains to allow for prefix "gene:", "transcript:" etc
 748     if (parent != null && !parent.contains(identifier))
 749     {
 750       // this genomic feature belongs to a different transcript
 751       return false;
 752     }
 753     return true;
 754   }
 755
 756   @Override
 757   public String getDescription()
 758   {
 759     return "Ensembl " + getSourceEnsemblType().getType()
 760             + " sequence with variant features";
 761   }
 762
 763   /**
 764    * Returns a (possibly empty) list of features on the sequence which have the
 765    * specified sequence ontology type (or a sub-type of it), and the given
 766    * identifier as parent
 767    *
 768    * @param sequence
 769    * @param type
 770    * @param parentId
 771    * @return
 772    */
 773   protected List<SequenceFeature> findFeatures(SequenceI sequence,
 774           String type, String parentId)
 775   {
 776     List<SequenceFeature> result = new ArrayList<SequenceFeature>();
 777
 778     SequenceFeature[] sfs = sequence.getSequenceFeatures();
 779     if (sfs != null) {
 780       SequenceOntologyI so = SequenceOntologyFactory.getInstance();
 781       for (SequenceFeature sf :sfs) {
 782         if (so.isA(sf.getType(), type))
 783         {
 784           String parent = (String) sf.getValue(PARENT);
 785           if (parent.equals(parentId))
 786           {
 787             result.add(sf);
 788           }
 789         }
 790       }
 791     }
 792     return result;
 793   }
 794
 795   /**
 796    * Answers true if the feature type is either 'NMD_transcript_variant' or
 797    * 'transcript' or one of its sub-types in the Sequence Ontology. This is
 798    * needed because NMD_transcript_variant behaves like 'transcript' in Ensembl
 799    * although strictly speaking it is not (it is a sub-type of
 800    * sequence_variant).
 801    *
 802    * @param featureType
 803    * @return
 804    */
 805   public static boolean isTranscript(String featureType)
 806   {
 807     return SequenceOntologyI.NMD_TRANSCRIPT_VARIANT.equals(featureType)
 808             || SequenceOntologyFactory.getInstance().isA(featureType,
 809                     SequenceOntologyI.TRANSCRIPT);
 810   }
 811 }