package jalview.ext.ensembl; import jalview.analysis.AlignmentUtils; import jalview.datamodel.Alignment; import jalview.datamodel.AlignmentI; import jalview.datamodel.DBRefEntry; import jalview.datamodel.DBRefSource; import jalview.datamodel.Mapping; import jalview.datamodel.SequenceFeature; import jalview.datamodel.SequenceI; import jalview.exceptions.JalviewException; import jalview.io.FastaFile; import jalview.io.FileParse; import jalview.io.gff.SequenceOntologyFactory; import jalview.io.gff.SequenceOntologyI; import jalview.schemes.ResidueProperties; import jalview.util.DBRefUtils; import jalview.util.MapList; import jalview.util.MappingUtils; import jalview.util.StringUtils; import java.io.IOException; import java.net.MalformedURLException; import java.net.URL; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; import java.util.Comparator; import java.util.LinkedHashMap; import java.util.List; import java.util.Map.Entry; /** * Base class for Ensembl sequence fetchers * * @author gmcarstairs */ public abstract class EnsemblSeqProxy extends EnsemblRestClient { private static final List CROSS_REFERENCES = Arrays .asList(new String[] { "CCDS", "Uniprot/SWISSPROT" }); protected static final String CONSEQUENCE_TYPE = "consequence_type"; protected static final String PARENT = "Parent"; protected static final String ID = "ID"; protected static final String NAME = "Name"; /* * enum for 'type' parameter to the /sequence REST service */ public enum EnsemblSeqType { /** * type=genomic to fetch full dna including introns */ GENOMIC("genomic"), /** * type=cdna to fetch dna including UTRs */ CDNA("cdna"), /** * type=cds to fetch coding dna excluding UTRs */ CDS("cds"), /** * type=protein to fetch peptide product sequence */ PROTEIN("protein"); /* * the value of the 'type' parameter to fetch this version of * an Ensembl sequence */ private String type; EnsemblSeqType(String t) { type = t; } public String getType() { return type; } } /** * A comparator to sort ranges into ascending start position order */ private class RangeSorter implements Comparator { boolean forwards; RangeSorter(boolean forward) { forwards = forward; } @Override public int compare(int[] o1, int[] o2) { return (forwards ? 1 : -1) * Integer.compare(o1[0], o2[0]); } } /** * Constructor */ public EnsemblSeqProxy() { } /** * Makes the sequence queries to Ensembl's REST service and returns an * alignment consisting of the returned sequences. */ @Override public AlignmentI getSequenceRecords(String query) throws Exception { // TODO use a String... query vararg instead? // danger: accession separator used as a regex here, a string elsewhere // in this case it is ok (it is just a space), but (e.g.) '\' would not be List allIds = Arrays.asList(query .split(getAccessionSeparator())); AlignmentI alignment = null; inProgress = true; /* * execute queries, if necessary in batches of the * maximum allowed number of ids */ int maxQueryCount = getMaximumQueryCount(); for (int v = 0, vSize = allIds.size(); v < vSize; v += maxQueryCount) { int p = Math.min(vSize, v + maxQueryCount); List ids = allIds.subList(v, p); try { alignment = fetchSequences(ids, alignment); } catch (Throwable r) { inProgress = false; String msg = "Aborting ID retrieval after " + v + " chunks. Unexpected problem (" + r.getLocalizedMessage() + ")"; System.err.println(msg); break; } } if (alignment == null) { return null; } /* * fetch and transfer genomic sequence features, * fetch protein product and add as cross-reference */ for (String accId : allIds) { addFeaturesAndProduct(accId, alignment); } for (SequenceI seq : alignment.getSequences()) { getCrossReferences(seq); } return alignment; } /** * Fetches Ensembl features using the /overlap REST endpoint, and adds them to * the sequence in the alignment. Also fetches the protein product, maps it * from the CDS features of the sequence, and saves it as a cross-reference of * the dna sequence. * * @param accId * @param alignment */ protected void addFeaturesAndProduct(String accId, AlignmentI alignment) { if (alignment == null) { return; } try { /* * get 'dummy' genomic sequence with exon, cds and variation features */ SequenceI genomicSequence = null; EnsemblFeatures gffFetcher = new EnsemblFeatures(); EnsemblFeatureType[] features = getFeaturesToFetch(); AlignmentI geneFeatures = gffFetcher.getSequenceRecords(accId, features); if (geneFeatures.getHeight() > 0) { genomicSequence = geneFeatures.getSequenceAt(0); } if (genomicSequence != null) { /* * transfer features to the query sequence */ SequenceI querySeq = alignment.findName(accId); if (transferFeatures(accId, genomicSequence, querySeq)) { /* * fetch and map protein product, and add it as a cross-reference * of the retrieved sequence */ addProteinProduct(querySeq); } } } catch (IOException e) { System.err.println("Error transferring Ensembl features: " + e.getMessage()); } } /** * Returns those sequence feature types to fetch from Ensembl. We may want * features either because they are of interest to the user, or as means to * identify the locations of the sequence on the genomic sequence (CDS * features identify CDS, exon features identify cDNA etc). * * @return */ protected abstract EnsemblFeatureType[] getFeaturesToFetch(); /** * Fetches and maps the protein product, and adds it as a cross-reference of * the retrieved sequence */ protected void addProteinProduct(SequenceI querySeq) { String accId = querySeq.getName(); try { AlignmentI protein = new EnsemblProtein().getSequenceRecords(accId); if (protein == null || protein.getHeight() == 0) { System.out.println("Failed to retrieve protein for " + accId); return; } SequenceI proteinSeq = protein.getSequenceAt(0); /* * need dataset sequences (to be the subject of mappings) */ proteinSeq.createDatasetSequence(); querySeq.createDatasetSequence(); MapList mapList = mapCdsToProtein(querySeq, proteinSeq); if (mapList != null) { // clunky: ensure Uniprot xref if we have one is on mapped sequence SequenceI ds = proteinSeq.getDatasetSequence(); ds.setSourceDBRef(proteinSeq.getSourceDBRef()); Mapping map = new Mapping(ds, mapList); DBRefEntry dbr = new DBRefEntry(getDbSource(), getDbVersion(), accId, map); querySeq.getDatasetSequence().addDBRef(dbr); /* * compute peptide variants from dna variants and add as * sequence features on the protein sequence ta-da */ computeProteinFeatures(querySeq, proteinSeq, mapList); } } catch (Exception e) { System.err .println(String.format("Error retrieving protein for %s: %s", accId, e.getMessage())); } } /** * Get database xrefs from Ensembl, and attach them to the sequence * * @param seq */ protected void getCrossReferences(SequenceI seq) { while (seq.getDatasetSequence() != null) { seq = seq.getDatasetSequence(); } EnsemblXref xrefFetcher = new EnsemblXref(); List xrefs = xrefFetcher.getCrossReferences(seq.getName(), getCrossReferenceDatabases()); for (DBRefEntry xref : xrefs) { seq.addDBRef(xref); /* * Save any Uniprot xref to be the reference for SIFTS mapping */ if (DBRefSource.UNIPROT.equals(xref.getSource())) { seq.setSourceDBRef(xref); } } } /** * Returns a list of database names to be used when fetching cross-references. * * @return */ protected List getCrossReferenceDatabases() { return CROSS_REFERENCES; } /** * Returns a mapping from dna to protein by inspecting sequence features of * type "CDS" on the dna. * * @param dnaSeq * @param proteinSeq * @return */ protected MapList mapCdsToProtein(SequenceI dnaSeq, SequenceI proteinSeq) { List ranges = new ArrayList(50); int mappedDnaLength = getCdsRanges(dnaSeq, ranges); int proteinLength = proteinSeq.getLength(); int proteinEnd = proteinLength; int proteinStart = 1; /* * incomplete start codon may mean X at start of peptide * we ignore both for mapping purposes */ if (proteinSeq.getCharAt(0) == 'X') { proteinStart = 2; proteinLength--; } List proteinRange = new ArrayList(); /* * dna length should map to protein (or protein plus stop codon) */ int codesForResidues = mappedDnaLength / 3; if (codesForResidues == (proteinLength + 1)) { MappingUtils.unmapStopCodon(ranges, mappedDnaLength); codesForResidues--; } if (codesForResidues == proteinLength) { proteinRange.add(new int[] { proteinStart, proteinEnd }); return new MapList(ranges, proteinRange, 3, 1); } return null; } /** * Adds CDS ranges to the ranges list, and returns the total length mapped * from. * * No need to worry about reverse strand dna, here since the retrieved * sequence is as transcribed (reverse complement for reverse strand), i.e in * the same sense as the peptide. * * @param dnaSeq * @param ranges * @return */ protected int getCdsRanges(SequenceI dnaSeq, List ranges) { SequenceFeature[] sfs = dnaSeq.getSequenceFeatures(); if (sfs == null) { return 0; } SequenceOntologyI so = SequenceOntologyFactory.getInstance(); int mappedDnaLength = 0; for (SequenceFeature sf : sfs) { /* * process a CDS feature (or a sub-type of CDS) */ if (so.isA(sf.getType(), SequenceOntologyI.CDS)) { int phase = 0; try { phase = Integer.parseInt(sf.getPhase()); } catch (NumberFormatException e) { // ignore } /* * phase > 0 on first codon means 5' incomplete - skip to the start * of the next codon; example ENST00000496384 */ int begin = sf.getBegin(); int end = sf.getEnd(); if (ranges.isEmpty()) { begin += phase; if (begin > end) { continue; // shouldn't happen? } } ranges.add(new int[] { begin, end }); mappedDnaLength += Math.abs(end - begin) + 1; } } return mappedDnaLength; } /** * Fetches sequences for the list of accession ids and adds them to the * alignment. Returns the extended (or created) alignment. * * @param ids * @param alignment * @return * @throws JalviewException * @throws IOException */ protected AlignmentI fetchSequences(List ids, AlignmentI alignment) throws JalviewException, IOException { if (!isEnsemblAvailable()) { inProgress = false; throw new JalviewException("ENSEMBL Rest API not available."); } FileParse fp = getSequenceReader(ids); FastaFile fr = new FastaFile(fp); if (fr.hasWarningMessage()) { System.out.println(String.format( "Warning when retrieving %d ids %s\n%s", ids.size(), ids.toString(), fr.getWarningMessage())); } else if (fr.getSeqs().size() != ids.size()) { System.out.println(String.format( "Only retrieved %d sequences for %d query strings", fr .getSeqs().size(), ids.size())); } if (fr.getSeqs().size() == 1 && fr.getSeqs().get(0).getLength() == 0) { /* * POST request has returned an empty FASTA file e.g. for invalid id */ throw new IOException("No data returned for " + ids); } if (fr.getSeqs().size() > 0) { AlignmentI seqal = new Alignment( fr.getSeqsAsArray()); for (SequenceI sq:seqal.getSequences()) { if (sq.getDescription() == null) { sq.setDescription(getDbName()); } String name = sq.getName(); if (ids.contains(name) || ids.contains(name.replace("ENSP", "ENST"))) { DBRefUtils.parseToDbRef(sq, DBRefSource.ENSEMBL, "0", name); } } if (alignment == null) { alignment = seqal; } else { alignment.append(seqal); } } return alignment; } /** * Returns the URL for the REST call * * @return * @throws MalformedURLException */ @Override protected URL getUrl(List ids) throws MalformedURLException { /* * a single id is included in the URL path * multiple ids go in the POST body instead */ StringBuffer urlstring = new StringBuffer(128); urlstring.append(SEQUENCE_ID_URL); if (ids.size() == 1) { urlstring.append("/").append(ids.get(0)); } // @see https://github.com/Ensembl/ensembl-rest/wiki/Output-formats urlstring.append("?type=").append(getSourceEnsemblType().getType()); urlstring.append(("&Accept=text/x-fasta")); URL url = new URL(urlstring.toString()); return url; } /** * A sequence/id POST request currently allows up to 50 queries * * @see http://rest.ensembl.org/documentation/info/sequence_id_post */ @Override public int getMaximumQueryCount() { return 50; } @Override protected boolean useGetRequest() { return false; } @Override protected String getRequestMimeType(boolean multipleIds) { return multipleIds ? "application/json" : "text/x-fasta"; } @Override protected String getResponseMimeType() { return "text/x-fasta"; } /** * * @return the configured sequence return type for this source */ protected abstract EnsemblSeqType getSourceEnsemblType(); /** * Returns a list of [start, end] genomic ranges corresponding to the sequence * being retrieved. * * The correspondence between the frames of reference is made by locating * those features on the genomic sequence which identify the retrieved * sequence. Specifically *
    *
  • genomic sequence is identified by "transcript" features with * ID=transcript:transcriptId
  • *
  • cdna sequence is identified by "exon" features with * Parent=transcript:transcriptId
  • *
  • cds sequence is identified by "CDS" features with * Parent=transcript:transcriptId
  • *
* * The returned ranges are sorted to run forwards (for positive strand) or * backwards (for negative strand). Aborts and returns null if both positive * and negative strand are found (this should not normally happen). * * @param sourceSequence * @param accId * @param start * the start position of the sequence we are mapping to * @return */ protected MapList getGenomicRangesFromFeatures(SequenceI sourceSequence, String accId, int start) { SequenceFeature[] sfs = sourceSequence.getSequenceFeatures(); if (sfs == null) { return null; } /* * generously initial size for number of cds regions * (worst case titin Q8WZ42 has c. 313 exons) */ List regions = new ArrayList(100); int mappedLength = 0; int direction = 1; // forward boolean directionSet = false; for (SequenceFeature sf : sfs) { /* * accept the target feature type or a specialisation of it * (e.g. coding_exon for exon) */ if (identifiesSequence(sf, accId)) { int strand = sf.getStrand(); strand = strand == 0 ? 1 : strand; // treat unknown as forward if (directionSet && strand != direction) { // abort - mix of forward and backward System.err.println("Error: forward and backward strand for " + accId); return null; } direction = strand; directionSet = true; /* * add to CDS ranges, semi-sorted forwards/backwards */ if (strand < 0) { regions.add(0, new int[] { sf.getEnd(), sf.getBegin() }); } else { regions.add(new int[] { sf.getBegin(), sf.getEnd() }); } mappedLength += Math.abs(sf.getEnd() - sf.getBegin() + 1); if (!isSpliceable()) { /* * 'gene' sequence is contiguous so we can stop as soon as its * identifying feature has been found */ break; } } } if (regions.isEmpty()) { System.out.println("Failed to identify target sequence for " + accId + " from genomic features"); return null; } /* * a final sort is needed since Ensembl returns CDS sorted within source * (havana / ensembl_havana) */ Collections.sort(regions, new RangeSorter(direction == 1)); List to = Arrays.asList(new int[] { start, start + mappedLength - 1 }); return new MapList(regions, to, 1, 1); } /** * Answers true if the sequence being retrieved may occupy discontiguous * regions on the genomic sequence. */ protected boolean isSpliceable() { return true; } /** * Returns true if the sequence feature marks positions of the genomic * sequence feature which are within the sequence being retrieved. For * example, an 'exon' feature whose parent is the target transcript marks the * cdna positions of the transcript. * * @param sf * @param accId * @return */ protected abstract boolean identifiesSequence(SequenceFeature sf, String accId); /** * Transfers the sequence feature to the target sequence, locating its start * and end range based on the mapping. Features which do not overlap the * target sequence are ignored. * * @param sf * @param targetSequence * @param mapping * mapping from the sequence feature's coordinates to the target * sequence */ protected void transferFeature(SequenceFeature sf, SequenceI targetSequence, MapList mapping) { int start = sf.getBegin(); int end = sf.getEnd(); int[] mappedRange = mapping.locateInTo(start, end); if (mappedRange != null) { SequenceFeature copy = new SequenceFeature(sf); copy.setBegin(Math.min(mappedRange[0], mappedRange[1])); copy.setEnd(Math.max(mappedRange[0], mappedRange[1])); targetSequence.addSequenceFeature(copy); /* * for sequence_variant, make an additional feature with consequence */ // if (SequenceOntologyFactory.getInstance().isA(sf.getType(), // SequenceOntologyI.SEQUENCE_VARIANT)) // { // String consequence = (String) sf.getValue(CONSEQUENCE_TYPE); // if (consequence != null) // { // SequenceFeature sf2 = new SequenceFeature("consequence", // consequence, copy.getBegin(), copy.getEnd(), 0f, // null); // targetSequence.addSequenceFeature(sf2); // } // } } } /** * Transfers features from sourceSequence to targetSequence * * @param accessionId * @param sourceSequence * @param targetSequence * @return true if any features were transferred, else false */ protected boolean transferFeatures(String accessionId, SequenceI sourceSequence, SequenceI targetSequence) { if (sourceSequence == null || targetSequence == null) { return false; } // long start = System.currentTimeMillis(); SequenceFeature[] sfs = sourceSequence.getSequenceFeatures(); MapList mapping = getGenomicRangesFromFeatures(sourceSequence, accessionId, targetSequence.getStart()); if (mapping == null) { return false; } boolean result = transferFeatures(sfs, targetSequence, mapping, accessionId); // System.out.println("transferFeatures (" + (sfs.length) + " --> " // + targetSequence.getSequenceFeatures().length + ") to " // + targetSequence.getName() // + " took " + (System.currentTimeMillis() - start) + "ms"); return result; } /** * Transfer features to the target sequence. The start/end positions are * converted using the mapping. Features which do not overlap are ignored. * Features whose parent is not the specified identifier are also ignored. * * @param features * @param targetSequence * @param mapping * @param parentId * @return */ protected boolean transferFeatures(SequenceFeature[] features, SequenceI targetSequence, MapList mapping, String parentId) { final boolean forwardStrand = mapping.isFromForwardStrand(); /* * sort features by start position (descending if reverse strand) * before transferring (in forwards order) to the target sequence */ Arrays.sort(features, new Comparator() { @Override public int compare(SequenceFeature o1, SequenceFeature o2) { int c = Integer.compare(o1.getBegin(), o2.getBegin()); return forwardStrand ? c : -c; } }); boolean transferred = false; for (SequenceFeature sf : features) { if (retainFeature(sf, parentId)) { transferFeature(sf, targetSequence, mapping); transferred = true; } } return transferred; } /** * Answers true if the feature type is one we want to keep for the sequence. * Some features are only retrieved in order to identify the sequence range, * and may then be discarded as redundant information (e.g. "CDS" feature for * a CDS sequence). */ @SuppressWarnings("unused") protected boolean retainFeature(SequenceFeature sf, String accessionId) { return true; // override as required } /** * Answers true if the feature has a Parent which refers to the given * accession id, or if the feature has no parent. Answers false if the * feature's Parent is for a different accession id. * * @param sf * @param identifier * @return */ protected boolean featureMayBelong(SequenceFeature sf, String identifier) { String parent = (String) sf.getValue(PARENT); // using contains to allow for prefix "gene:", "transcript:" etc if (parent != null && !parent.contains(identifier)) { // this genomic feature belongs to a different transcript return false; } return true; } @Override public String getDescription() { return "Ensembl " + getSourceEnsemblType().getType() + " sequence with variant features"; } /** * Returns a (possibly empty) list of features on the sequence which have the * specified sequence ontology type (or a sub-type of it), and the given * identifier as parent * * @param sequence * @param type * @param parentId * @return */ protected List findFeatures(SequenceI sequence, String type, String parentId) { List result = new ArrayList(); SequenceFeature[] sfs = sequence.getSequenceFeatures(); if (sfs != null) { SequenceOntologyI so = SequenceOntologyFactory.getInstance(); for (SequenceFeature sf :sfs) { if (so.isA(sf.getType(), type)) { String parent = (String) sf.getValue(PARENT); if (parent.equals(parentId)) { result.add(sf); } } } } return result; } /** * Maps exon features from dna to protein, and computes variants in peptide * product generated by variants in dna, and adds them as sequence_variant * features on the protein sequence. Returns the number of variant features * added. * * @param dnaSeq * @param peptide * @param dnaToProtein */ static int computeProteinFeatures(SequenceI dnaSeq, SequenceI peptide, MapList dnaToProtein) { while (dnaSeq.getDatasetSequence() != null) { dnaSeq = dnaSeq.getDatasetSequence(); } while (peptide.getDatasetSequence() != null) { peptide = peptide.getDatasetSequence(); } AlignmentUtils.transferFeatures(dnaSeq, peptide, dnaToProtein, SequenceOntologyI.EXON); LinkedHashMap variants = buildDnaVariantsMap( dnaSeq, dnaToProtein); /* * scan codon variations, compute peptide variants and add to peptide sequence */ int count = 0; for (Entry variant : variants.entrySet()) { int peptidePos = variant.getKey(); String[][] codonVariants = variant.getValue(); String residue = String.valueOf(peptide.getCharAt(peptidePos - 1)); // 0-based List peptideVariants = computePeptideVariants(codonVariants, residue); if (!peptideVariants.isEmpty()) { String desc = StringUtils.listToDelimitedString(peptideVariants, ", "); SequenceFeature sf = new SequenceFeature( SequenceOntologyI.SEQUENCE_VARIANT, desc, peptidePos, peptidePos, 0f, null); peptide.addSequenceFeature(sf); count++; } } /* * ugly sort to get sequence features in start position order * - would be better to store in Sequence as a TreeSet instead? */ Arrays.sort(peptide.getSequenceFeatures(), new Comparator() { @Override public int compare(SequenceFeature o1, SequenceFeature o2) { int c = Integer.compare(o1.getBegin(), o2.getBegin()); return c == 0 ? Integer.compare(o1.getEnd(), o2.getEnd()) : c; } }); return count; } /** * Builds a map whose key is position in the protein sequence, and value is an * array of all variants for the coding codon positions * * @param dnaSeq * @param dnaToProtein * @return */ static LinkedHashMap buildDnaVariantsMap( SequenceI dnaSeq, MapList dnaToProtein) { /* * map from peptide position to all variant features of the codon for it * LinkedHashMap ensures we add the peptide features in sequence order */ LinkedHashMap variants = new LinkedHashMap(); SequenceOntologyI so = SequenceOntologyFactory.getInstance(); SequenceFeature[] dnaFeatures = dnaSeq.getSequenceFeatures(); if (dnaFeatures == null) { return variants; } int dnaStart = dnaSeq.getStart(); int[] lastCodon = null; int lastPeptidePostion = 0; /* * build a map of codon variations for peptides */ for (SequenceFeature sf : dnaFeatures) { int dnaCol = sf.getBegin(); if (dnaCol != sf.getEnd()) { // not handling multi-locus variant features continue; } if (so.isA(sf.getType(), SequenceOntologyI.SEQUENCE_VARIANT)) { int[] mapsTo = dnaToProtein.locateInTo(dnaCol, dnaCol); if (mapsTo == null) { // feature doesn't lie within coding region continue; } int peptidePosition = mapsTo[0]; String[][] codonVariants = variants.get(peptidePosition); if (codonVariants == null) { codonVariants = new String[3][]; variants.put(peptidePosition, codonVariants); } /* * extract dna variants to a string array */ String alls = (String) sf.getValue("alleles"); if (alls == null) { continue; } String[] alleles = alls.split(","); /* * get this peptides codon positions e.g. [3, 4, 5] or [4, 7, 10] */ int[] codon = peptidePosition == lastPeptidePostion ? lastCodon : MappingUtils.flattenRanges(dnaToProtein.locateInFrom( peptidePosition, peptidePosition)); lastPeptidePostion = peptidePosition; lastCodon = codon; /* * save nucleotide (and this variant) for each codon position */ for (int codonPos = 0; codonPos < 3; codonPos++) { String nucleotide = String.valueOf(dnaSeq .getCharAt(codon[codonPos] - dnaStart)); if (codon[codonPos] == dnaCol) { /* * record current dna base and its alleles */ String[] dnaVariants = new String[alleles.length + 1]; dnaVariants[0] = nucleotide; System.arraycopy(alleles, 0, dnaVariants, 1, alleles.length); codonVariants[codonPos] = dnaVariants; } else if (codonVariants[codonPos] == null) { /* * record current dna base only * (at least until we find any variation and overwrite it) */ codonVariants[codonPos] = new String[] { nucleotide }; } } } } return variants; } /** * Returns a sorted, non-redundant list of all peptide translations generated * by the given dna variants, excluding the current residue value * * @param codonVariants * an array of base values (acgtACGT) for codon positions 1, 2, 3 * @param residue * the current residue translation * @return */ static List computePeptideVariants( String[][] codonVariants, String residue) { List result = new ArrayList(); for (String base1 : codonVariants[0]) { for (String base2 : codonVariants[1]) { for (String base3 : codonVariants[2]) { String codon = base1 + base2 + base3; // TODO: report frameshift/insertion/deletion // and multiple-base variants?! String peptide = codon.contains("-") ? "-" : ResidueProperties .codonTranslate(codon); if (peptide != null && !result.contains(peptide) && !peptide.equalsIgnoreCase(residue)) { result.add(peptide); } } } } /* * sort alphabetically with STOP at the end */ Collections.sort(result, new Comparator() { @Override public int compare(String o1, String o2) { if ("STOP".equals(o1)) { return 1; } else if ("STOP".equals(o2)) { return -1; } else { return o1.compareTo(o2); } } }); return result; } /** * Answers true if the feature type is either 'NMD_transcript_variant' or * 'transcript' or one of its sub-types in the Sequence Ontology. This is * needed because NMD_transcript_variant behaves like 'transcript' in Ensembl * although strictly speaking it is not (it is a sub-type of * sequence_variant). * * @param featureType * @return */ public static boolean isTranscript(String featureType) { return SequenceOntologyI.NMD_TRANSCRIPT_VARIANT.equals(featureType) || SequenceOntologyFactory.getInstance().isA(featureType, SequenceOntologyI.TRANSCRIPT); } }