}
Sequence transcript = new Sequence(accId, seqChars, 1, transcriptLength);
+ String geneName = (String) transcriptFeature.getValue(NAME);
+ if (geneName != null)
+ {
+ transcript.setDescription(geneName);
+ }
transcript.createDatasetSequence();
al.addSequence(transcript);
/**
* Returns a list of the transcript features on the sequence whose Parent is
- * the gene for the accession id. Also removes all transcript features from
- * the gene sequence, as we have no further need for them and they obscure
- * more useful features on the display.
+ * the gene for the accession id.
*
* @param accId
* @param geneSequence
{
List<SequenceFeature> transcriptFeatures = new ArrayList<SequenceFeature>();
- List<SequenceFeature> keptFeatures = new ArrayList<SequenceFeature>();
String parentIdentifier = "gene:" + accId;
SequenceFeature[] sfs = geneSequence.getSequenceFeatures();
transcriptFeatures.add(sf);
}
}
- else
- {
- keptFeatures.add(sf);
- }
}
}
- SequenceFeature[] featuresRetained = keptFeatures.toArray(new SequenceFeature[keptFeatures.size()]);
- geneSequence.getDatasetSequence().setSequenceFeatures(featuresRetained);
return transcriptFeatures;
}
/**
* Return the desired value for the Content-Type request header
*
+ * @param multipleIds
+ *
* @return
* @see https://github.com/Ensembl/ensembl-rest/wiki/HTTP-Headers
*/
- protected abstract String getRequestMimeType();
+ protected abstract String getRequestMimeType(boolean multipleIds);
/**
* Return the desired value for the Accept request header
* POST method allows multiple queries in one request; it is supported for
* sequence queries, but not for overlap
*/
- connection.setRequestMethod(useGetRequest() ? HttpMethod.GET
- : HttpMethod.POST);
- connection.setRequestProperty("Content-Type", getRequestMimeType());
+ boolean multipleIds = ids.size() > 1;// useGetRequest();
+ connection.setRequestMethod(multipleIds ? HttpMethod.POST
+ : HttpMethod.GET);
+ connection.setRequestProperty("Content-Type",
+ getRequestMimeType(multipleIds));
connection.setRequestProperty("Accept", getResponseMimeType());
connection.setUseCaches(false);
connection.setDoInput(true);
- connection.setDoOutput(true);
+ connection.setDoOutput(multipleIds);
- if (!useGetRequest())
+ if (multipleIds)
{
writePostBody(connection, ids);
}
if (responseCode != 200)
{
+ /*
+ * note: a GET request for an invalid id returns an error code e.g. 415
+ * but POST request returns 200 and an empty Fasta response
+ */
throw new RuntimeException(
"Response code was not 200. Detected response was "
+ responseCode);
*/
protected static final String NMD_VARIANT = "NMD_transcript_variant";
+ protected static final String NAME = "Name";
+
public enum EnsemblSeqType
{
/**
/**
* Makes the sequence queries to Ensembl's REST service and returns an
- * alignment consisting of the returned sequences. This overloaded method
- * allows the genomic sequence (with features) to be passed in if it has
- * already been retrieved, to avoid repeat calls to fetch it.
+ * alignment consisting of the returned sequences.
*/
- public AlignmentI getSequenceRecords(String query,
- SequenceI genomicSequence) throws Exception
+ @Override
+ public AlignmentI getSequenceRecords(String query) throws Exception
{
long now = System.currentTimeMillis();
// TODO use a String... query vararg instead?
}
/*
- * fetch and transfer genomic sequence features
+ * fetch and transfer genomic sequence features,
+ * fetch protein product and add as cross-reference
*/
for (String accId : allIds)
{
- addFeaturesAndProduct(accId, alignment, genomicSequence);
+ addFeaturesAndProduct(accId, alignment);
}
inProgress = false;
* @param accId
* @param alignment
*/
- protected void addFeaturesAndProduct(String accId, AlignmentI alignment,
- SequenceI genomicSequence)
+ protected void addFeaturesAndProduct(String accId, AlignmentI alignment)
{
+ if (alignment == null)
+ {
+ return;
+ }
+
try
{
/*
* get 'dummy' genomic sequence with exon, cds and variation features
*/
- if (genomicSequence == null)
+ SequenceI genomicSequence = null;
+ EnsemblOverlap gffFetcher = new EnsemblOverlap();
+ EnsemblFeatureType[] features = getFeaturesToFetch();
+ AlignmentI geneFeatures = gffFetcher.getSequenceRecords(accId,
+ features);
+ if (geneFeatures.getHeight() > 0)
{
- EnsemblOverlap gffFetcher = new EnsemblOverlap();
- EnsemblFeatureType[] features = getFeaturesToFetch();
- AlignmentI geneFeatures = gffFetcher.getSequenceRecords(accId,
- features);
- if (geneFeatures.getHeight() > 0)
- {
- genomicSequence = geneFeatures.getSequenceAt(0);
- }
+ genomicSequence = geneFeatures.getSequenceAt(0);
}
if (genomicSequence != null)
{
"Only retrieved %d sequences for %d query strings", fr
.getSeqs().size(), ids.size()));
}
+
+ if (fr.getSeqs().size() == 1 && fr.getSeqs().get(0).getLength() == 0)
+ {
+ /*
+ * POST request has returned an empty FASTA file e.g. for invalid id
+ */
+ throw new IOException("No data returned for " + ids);
+ }
+
if (fr.getSeqs().size() > 0)
{
AlignmentI seqal = new Alignment(
@Override
protected URL getUrl(List<String> ids) throws MalformedURLException
{
- // ids are not used - they go in the POST body instead
+ /*
+ * a single id is included in the URL path
+ * multiple ids go in the POST body instead
+ */
StringBuffer urlstring = new StringBuffer(128);
urlstring.append(SEQUENCE_ID_URL);
-
+ if (ids.size() == 1)
+ {
+ urlstring.append("/").append(ids.get(0));
+ }
// @see https://github.com/Ensembl/ensembl-rest/wiki/Output-formats
urlstring.append("?type=").append(getSourceEnsemblType().getType());
urlstring.append(("&Accept=text/x-fasta"));
}
@Override
- protected String getRequestMimeType()
+ protected String getRequestMimeType(boolean multipleIds)
{
- return "application/json";
+ return multipleIds ? "application/json" : "text/x-fasta";
}
@Override
+ " sequence with variant features";
}
- @Override
- public AlignmentI getSequenceRecords(String identifier) throws Exception
- {
- return getSequenceRecords(identifier, null);
- }
-
/**
* Returns a (possibly empty) list of features on the sequence which have the
* specified sequence ontology type (or a sub-type of it), and the given