import jalview.datamodel.Alignment;
import jalview.datamodel.AlignmentI;
+import jalview.io.DataSourceType;
import jalview.io.FeaturesFile;
import jalview.io.FileParse;
+import java.io.BufferedReader;
import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URL;
// TODO: use a vararg String... for getSequenceRecords instead?
List<String> queries = new ArrayList<>();
queries.add(query);
- FileParse fp = getSequenceReader(queries);
- if (fp == null || !fp.isValid())
+ BufferedReader fp = getSequenceReader(queries);
+ if (fp == null)
{
return null;
}
- FeaturesFile fr = new FeaturesFile(fp);
+ FeaturesFile fr = new FeaturesFile(
+ new FileParse(fp, null, DataSourceType.URL));
return new Alignment(fr.getSeqsAsArray());
}
* describes the required encoding of the response.
*/
@Override
- protected String getRequestMimeType(boolean multipleIds)
+ protected String getRequestMimeType()
{
return "text/x-gff3";
}
/**
- * Returns the MIME type for GFF3.
+ * Returns the MIME type for GFF3
*/
@Override
protected String getResponseMimeType()
return true;
}
- @Override
- protected String getRequestMimeType(boolean multipleIds)
- {
- return "application/json";
- }
-
- @Override
- protected String getResponseMimeType()
- {
- return "application/json";
- }
-
/**
* Answers the domain (http://rest.ensembl.org or
* http://rest.ensemblgenomes.org) for the given division, or null if not
return true;
}
- @Override
- protected String getRequestMimeType(boolean multipleIds)
- {
- return "application/json";
- }
-
- @Override
- protected String getResponseMimeType()
- {
- return "application/json";
- }
-
/**
* Returns the gene id related to the given identifier (which may be for a
* gene, transcript or protein), or null if none is found
}
@Override
- protected String getRequestMimeType(boolean multipleIds)
- {
- return "application/json";
- }
-
- @Override
- protected String getResponseMimeType()
- {
- return "application/json";
- }
-
- @Override
protected URL getUrl(List<String> ids) throws MalformedURLException
{
return null; // not used
*/
package jalview.ext.ensembl;
-import jalview.io.DataSourceType;
-import jalview.io.FileParse;
import jalview.util.StringUtils;
import java.io.BufferedReader;
protected abstract boolean useGetRequest();
/**
- * Return the desired value for the Content-Type request header
- *
- * @param multipleIds
+ * Returns the desired value for the Content-Type request header. Default is
+ * application/json, override if required to vary this.
*
* @return
* @see https://github.com/Ensembl/ensembl-rest/wiki/HTTP-Headers
*/
- protected abstract String getRequestMimeType(boolean multipleIds);
+ protected String getRequestMimeType()
+ {
+ return "application/json";
+ }
/**
- * Return the desired value for the Accept request header
+ * Return the desired value for the Accept request header. Default is
+ * application/json, override if required to vary this.
*
* @return
* @see https://github.com/Ensembl/ensembl-rest/wiki/HTTP-Headers
*/
- protected abstract String getResponseMimeType();
+ protected String getResponseMimeType()
+ {
+ return "application/json";
+ }
/**
* Checks Ensembl's REST 'ping' endpoint, and returns true if response
}
/**
- * returns a reader to a Fasta response from the Ensembl sequence endpoint
+ * Returns a reader to a (Json) response from the Ensembl sequence endpoint.
+ * If the request failed the return value may be null.
*
* @param ids
* @return
* @throws IOException
*/
- protected FileParse getSequenceReader(List<String> ids) throws IOException
+ protected BufferedReader getSequenceReader(List<String> ids)
+ throws IOException
{
URL url = getUrl(ids);
BufferedReader reader = getHttpResponse(url, ids);
- if (reader == null)
- {
- // request failed
- return null;
- }
- FileParse fp = new FileParse(reader, url.toString(),
- DataSourceType.URL);
- return fp;
+ return reader;
}
/**
boolean multipleIds = ids != null && ids.size() > 1;
connection.setRequestMethod(
multipleIds ? HttpMethod.POST : HttpMethod.GET);
- connection.setRequestProperty("Content-Type",
- getRequestMimeType(multipleIds));
+ connection.setRequestProperty("Content-Type", getRequestMimeType());
connection.setRequestProperty("Accept", getResponseMimeType());
connection.setUseCaches(false);
import jalview.datamodel.DBRefEntry;
import jalview.datamodel.DBRefSource;
import jalview.datamodel.Mapping;
+import jalview.datamodel.Sequence;
import jalview.datamodel.SequenceFeature;
import jalview.datamodel.SequenceI;
import jalview.datamodel.features.SequenceFeatures;
import jalview.exceptions.JalviewException;
-import jalview.io.FastaFile;
-import jalview.io.FileParse;
import jalview.io.gff.Gff3Helper;
import jalview.io.gff.SequenceOntologyFactory;
import jalview.io.gff.SequenceOntologyI;
import jalview.util.IntRangeComparator;
import jalview.util.MapList;
+import java.io.BufferedReader;
import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.Collections;
import java.util.List;
+import org.json.simple.JSONObject;
+import org.json.simple.parser.JSONParser;
+import org.json.simple.parser.ParseException;
+
/**
* Base class for Ensembl sequence fetchers
*
inProgress = false;
throw new JalviewException("ENSEMBL Rest API not available.");
}
- FileParse fp = getSequenceReader(ids);
- if (fp == null)
+ BufferedReader br = getSequenceReader(ids);
+ if (br == null)
{
return alignment;
}
- FastaFile fr = new FastaFile(fp);
- if (fr.hasWarningMessage())
+ List<SequenceI> seqs = parseSequenceJson(br);
+
+ if (seqs.isEmpty())
{
- System.out.println(
- String.format("Warning when retrieving %d ids %s\n%s",
- ids.size(), ids.toString(), fr.getWarningMessage()));
+ throw new IOException("No data returned for " + ids);
}
- else if (fr.getSeqs().size() != ids.size())
+
+ if (seqs.size() != ids.size())
{
System.out.println(String.format(
"Only retrieved %d sequences for %d query strings",
- fr.getSeqs().size(), ids.size()));
+ seqs.size(), ids.size()));
}
- if (fr.getSeqs().size() == 1 && fr.getSeqs().get(0).getLength() == 0)
+ if (!seqs.isEmpty())
{
- /*
- * POST request has returned an empty FASTA file e.g. for invalid id
- */
- throw new IOException("No data returned for " + ids);
- }
-
- if (fr.getSeqs().size() > 0)
- {
- AlignmentI seqal = new Alignment(fr.getSeqsAsArray());
- for (SequenceI sq : seqal.getSequences())
+ AlignmentI seqal = new Alignment(
+ seqs.toArray(new SequenceI[seqs.size()]));
+ for (SequenceI seq : seqs)
{
- if (sq.getDescription() == null)
+ if (seq.getDescription() == null)
{
- sq.setDescription(getDbName());
+ seq.setDescription(getDbName());
}
- String name = sq.getName();
+ String name = seq.getName();
if (ids.contains(name)
|| ids.contains(name.replace("ENSP", "ENST")))
{
- DBRefEntry dbref = DBRefUtils.parseToDbRef(sq, getDbSource(),
+ // TODO JAL-3077 use true accession version in dbref
+ DBRefEntry dbref = DBRefUtils.parseToDbRef(seq, getDbSource(),
getEnsemblDataVersion(), name);
- sq.addDBRef(dbref);
+ seq.addDBRef(dbref);
}
}
if (alignment == null)
}
/**
+ * Parses a JSON response into a list of sequences
+ *
+ * @param br
+ * @return
+ * @see http://rest.ensembl.org/documentation/info/sequence_id
+ */
+ protected List<SequenceI> parseSequenceJson(BufferedReader br)
+ {
+ JSONParser jp = new JSONParser();
+ List<SequenceI> result = new ArrayList<>();
+ try
+ {
+ /*
+ * for now, assumes only one sequence returned; refactor if needed
+ * in future to handle a JSONArray with more than one
+ */
+ final JSONObject val = (JSONObject) jp.parse(br);
+ Object s = val.get("desc");
+ String desc = s == null ? null : s.toString();
+ s = val.get("id");
+ String id = s == null ? null : s.toString();
+ s = val.get("seq");
+ String seq = s == null ? null : s.toString();
+ Sequence sequence = new Sequence(id, seq);
+ if (desc != null)
+ {
+ sequence.setDescription(desc);
+ }
+ // todo JAL-3077 make a DBRefEntry with true accession version
+ // s = val.get("version");
+ // String version = s == null ? "0" : s.toString();
+ // DBRefEntry dbref = new DBRefEntry(getDbSource(), version, id);
+ // sequence.addDBRef(dbref);
+ result.add(sequence);
+ } catch (ParseException | IOException e)
+ {
+ System.err.println("Error processing JSON response: " + e.toString());
+ // ignore
+ }
+ return result;
+ }
+
+ /**
* Returns the URL for the REST call
*
* @return
}
// @see https://github.com/Ensembl/ensembl-rest/wiki/Output-formats
urlstring.append("?type=").append(getSourceEnsemblType().getType());
- urlstring.append(("&Accept=text/x-fasta"));
+ urlstring.append(("&Accept=application/json"));
+ urlstring.append(("&Content-Type=application/json"));
String objectType = getObjectType();
if (objectType != null)
return false;
}
- @Override
- protected String getRequestMimeType(boolean multipleIds)
- {
- return multipleIds ? "application/json" : "text/x-fasta";
- }
-
- @Override
- protected String getResponseMimeType()
- {
- return "text/x-fasta";
- }
-
/**
*
* @return the configured sequence return type for this source
String accId);
/**
+ * Answers a list of sequence features that mark positions of the genomic
+ * sequence feature which are within the sequence being retrieved. For
+ * example, an 'exon' feature whose parent is the target transcript marks the
+ * cdna positions of the transcript. For a gene sequence, this is trivially
+ * just the 'gene' feature with matching gene id.
+ *
+ * @param seq
+ * @param accId
+ * @return
+ */
+ protected abstract List<SequenceFeature> getIdentifyingFeatures(
+ SequenceI seq, String accId);
+
+ /**
* Transfers the sequence feature to the target sequence, locating its start
* and end range based on the mapping. Features which do not overlap the
* target sequence are ignored.
return true;
}
- @Override
- protected String getRequestMimeType(boolean multipleIds)
- {
- return "application/json";
- }
-
- @Override
- protected String getResponseMimeType()
- {
- return "application/json";
- }
-
/**
* Calls the Ensembl xrefs REST endpoint and retrieves any cross-references
* ("primary_id") for the given identifier (Ensembl accession id) and database
*/
public List<DBRefEntry> getCrossReferences(String identifier)
{
- List<DBRefEntry> result = new ArrayList<DBRefEntry>();
- List<String> ids = new ArrayList<String>();
+ List<DBRefEntry> result = new ArrayList<>();
+ List<String> ids = new ArrayList<>();
ids.add(identifier);
BufferedReader br = null;
throws IOException
{
JSONParser jp = new JSONParser();
- List<DBRefEntry> result = new ArrayList<DBRefEntry>();
+ List<DBRefEntry> result = new ArrayList<>();
try
{
JSONArray responses = (JSONArray) jp.parse(br);
{
return false;
}
-
- @Override
- protected String getRequestMimeType(boolean b)
- {
- return null;
- }
-
- @Override
- protected String getResponseMimeType()
- {
- return null;
- }
-
};
}
import static org.testng.AssertJUnit.assertEquals;
import static org.testng.AssertJUnit.assertSame;
-import jalview.datamodel.Alignment;
+import jalview.datamodel.AlignmentI;
import jalview.datamodel.SequenceFeature;
import jalview.datamodel.SequenceI;
import jalview.datamodel.features.SequenceFeatures;
import jalview.gui.JvOptionPane;
import jalview.io.DataSourceType;
import jalview.io.FastaFile;
-import jalview.io.FileParse;
import jalview.io.gff.SequenceOntologyFactory;
import jalview.io.gff.SequenceOntologyLite;
+ "LKKALMMRGLIPECCAVYRIQDGEKKPIGWDTDISWLTGEELHVEVLENVPLTTHNFVRK\n"
+ "TFFTLAFCDFCRKLLFQGFRCQTCGYKFHQRCSTEVPLMCVNYDQLDLLFVSKFFEHHPI\n"
+ "PQEEASLAETALTSGSSPSAPASDSIGPQILTSPSPSKSIPIPQPFRPADEDHRNQFGQR\n"
- + "DRSSSAPNVHINTIEPVNIDDLIRDQGFRGDGGSTTGLSATPPASLPGSLTNVKALQKSP\n"
+ + "DRSSSAPNVHINTIEPVNIDDLIRDQGFRGDG\n"
+ // ? insertion added in ENSP00000288602.11, not in P15056
+ + "APLNQLMRCLRKYQSRTPSPLLHSVPSEIVFDFEPGPVFR\n"
+ // end insertion
+ + "GSTTGLSATPPASLPGSLTNVKALQKSP\n"
+ "GPQRERKSSSSSEDRNRMKTLGRRDSSDDWEIPDGQITVGQRIGSGSFGTVYKGKWHGDV\n"
+ "AVKMLNVTAPTPQQLQAFKNEVGVLRKTRHVNILLFMGYSTKPQLAIVTQWCEGSSLYHH\n"
+ "LHIIETKFEMIKLIDIARQTAQGMDYLHAKSIIHRDLKSNNIFLHEDLTVKIGDFGLATV\n"
}
@Test(dataProvider = "ens_seqs", suiteName = "live")
- public void testGetOneSeqs(EnsemblRestClient proxy, String sq,
+ public void testGetSequenceRecords(EnsemblSeqProxy proxy, String sq,
String fastasq) throws Exception
{
- FileParse fp = proxy.getSequenceReader(Arrays
- .asList(new String[] { sq }));
- SequenceI[] sqs = new FastaFile(fp).getSeqsAsArray();
FastaFile trueRes = new FastaFile(fastasq, DataSourceType.PASTE);
- SequenceI[] trueSqs = trueRes.getSeqsAsArray();
- Assert.assertEquals(sqs.length, trueSqs.length,
+ SequenceI[] expected = trueRes.getSeqsAsArray();
+ AlignmentI retrieved = proxy.getSequenceRecords(sq);
+
+ Assert.assertEquals(retrieved.getHeight(), expected.length,
"Different number of sequences retrieved for query " + sq);
- Alignment ral = new Alignment(sqs);
- for (SequenceI tr : trueSqs)
+
+ for (SequenceI tr : expected)
{
SequenceI[] rseq;
Assert.assertNotNull(
- rseq = ral.findSequenceMatch(tr.getName()),
+ rseq = retrieved.findSequenceMatch(tr.getName()),
"Couldn't find sequences matching expected sequence "
+ tr.getName());
Assert.assertEquals(rseq.length, 1,
"Sequences differ for " + tr.getName() + "\n" + "Exp:"
+ tr.getSequenceAsString() + "\n" + "Got:"
+ rseq[0].getSequenceAsString());
-
}
}