import jalview.datamodel.Alignment;
import jalview.datamodel.AlignmentI;
+import jalview.io.DataSourceType;
import jalview.io.FeaturesFile;
import jalview.io.FileParse;
+import java.io.BufferedReader;
import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URL;
// TODO: use a vararg String... for getSequenceRecords instead?
List<String> queries = new ArrayList<>();
queries.add(query);
- FileParse fp = getSequenceReader(queries);
- if (fp == null || !fp.isValid())
+ BufferedReader fp = getSequenceReader(queries);
+ if (fp == null)
{
return null;
}
- FeaturesFile fr = new FeaturesFile(fp);
+ FeaturesFile fr = new FeaturesFile(
+ new FileParse(fp, null, DataSourceType.URL));
return new Alignment(fr.getSeqsAsArray());
}
* describes the required encoding of the response.
*/
@Override
- protected String getRequestMimeType(boolean multipleIds)
+ protected String getRequestMimeType()
{
return "text/x-gff3";
}
/**
- * Returns the MIME type for GFF3.
+ * Returns the MIME type for GFF3
*/
@Override
protected String getResponseMimeType()
return true;
}
- @Override
- protected String getRequestMimeType(boolean multipleIds)
- {
- return "application/json";
- }
-
- @Override
- protected String getResponseMimeType()
- {
- return "application/json";
- }
-
/**
* Returns the gene id related to the given identifier, which may be for a
* gene, transcript or protein
--- /dev/null
+package jalview.ext.ensembl;
+
+import jalview.datamodel.AlignmentI;
+import jalview.datamodel.DBRefSource;
+
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.net.MalformedURLException;
+import java.net.URL;
+import java.util.Iterator;
+import java.util.List;
+
+import org.json.simple.JSONArray;
+import org.json.simple.JSONObject;
+import org.json.simple.parser.JSONParser;
+import org.json.simple.parser.ParseException;
+
+public class EnsemblMap extends EnsemblRestClient
+{
+ private static final String MAPPED = "mapped";
+
+ private static final String MAPPINGS = "mappings";
+
+ /**
+ * Default constructor (to use rest.ensembl.org)
+ */
+ public EnsemblMap()
+ {
+ super();
+ }
+
+ /**
+ * Constructor given the target domain to fetch data from
+ *
+ * @param
+ */
+ public EnsemblMap(String domain)
+ {
+ super(domain);
+ }
+
+ @Override
+ public String getDbName()
+ {
+ return DBRefSource.ENSEMBL;
+ }
+
+ @Override
+ public AlignmentI getSequenceRecords(String queries) throws Exception
+ {
+ return null; // not used
+ }
+
+ /**
+ * Constructs a URL of the format <code>
+ * http://rest.ensembl.org/map/human/GRCh38/17:45051610..45109016:1/GRCh37?content-type=application/json
+ * </code>
+ *
+ * @param species
+ * @param chromosome
+ * @param fromRef
+ * @param toRef
+ * @param startPos
+ * @param endPos
+ * @return
+ * @throws MalformedURLException
+ */
+ protected URL getAssemblyMapUrl(String species, String chromosome, String fromRef,
+ String toRef, int startPos, int endPos)
+ throws MalformedURLException
+ {
+ /*
+ * start-end might be reverse strand - present forwards to the service
+ */
+ boolean forward = startPos <= endPos;
+ int start = forward ? startPos : endPos;
+ int end = forward ? endPos : startPos;
+ String strand = forward ? "1" : "-1";
+ String url = String.format(
+ "%s/map/%s/%s/%s:%d..%d:%s/%s?content-type=application/json",
+ getDomain(), species, fromRef, chromosome, start, end, strand,
+ toRef);
+ return new URL(url);
+ }
+
+ @Override
+ protected boolean useGetRequest()
+ {
+ return true;
+ }
+
+ @Override
+ protected URL getUrl(List<String> ids) throws MalformedURLException
+ {
+ return null; // not used
+ }
+
+ /**
+ * Calls the REST /map service to get the chromosomal coordinates (start/end)
+ * in 'toRef' that corresponding to the (start/end) queryRange in 'fromRef'
+ *
+ * @param species
+ * @param chromosome
+ * @param fromRef
+ * @param toRef
+ * @param queryRange
+ * @return
+ * @see http://rest.ensemblgenomes.org/documentation/info/assembly_map
+ */
+ public int[] getAssemblyMapping(String species, String chromosome,
+ String fromRef, String toRef, int[] queryRange)
+ {
+ URL url = null;
+ BufferedReader br = null;
+
+ try
+ {
+ url = getAssemblyMapUrl(species, chromosome, fromRef, toRef, queryRange[0],
+ queryRange[1]);
+ br = getHttpResponse(url, null);
+ return (parseAssemblyMappingResponse(br));
+ } catch (Throwable t)
+ {
+ System.out.println("Error calling " + url + ": " + t.getMessage());
+ return null;
+ } finally
+ {
+ if (br != null)
+ {
+ try
+ {
+ br.close();
+ } catch (IOException e)
+ {
+ // ignore
+ }
+ }
+ }
+ }
+
+ /**
+ * Parses the JSON response from the /map/<species>/ REST service. The
+ * format is (with some fields omitted)
+ *
+ * <pre>
+ * {"mappings":
+ * [{
+ * "original": {"end":45109016,"start":45051610},
+ * "mapped" : {"end":43186384,"start":43128978}
+ * }] }
+ * </pre>
+ *
+ * @param br
+ * @return
+ */
+ protected int[] parseAssemblyMappingResponse(BufferedReader br)
+ {
+ int[] result = null;
+ JSONParser jp = new JSONParser();
+
+ try
+ {
+ JSONObject parsed = (JSONObject) jp.parse(br);
+ JSONArray mappings = (JSONArray) parsed.get(MAPPINGS);
+
+ Iterator rvals = mappings.iterator();
+ while (rvals.hasNext())
+ {
+ // todo check for "mapped"
+ JSONObject val = (JSONObject) rvals.next();
+ JSONObject mapped = (JSONObject) val.get(MAPPED);
+ int start = Integer.parseInt(mapped.get("start").toString());
+ int end = Integer.parseInt(mapped.get("end").toString());
+ String strand = mapped.get("strand").toString();
+ if ("1".equals(strand))
+ {
+ result = new int[] { start, end };
+ }
+ else
+ {
+ result = new int[] { end, start };
+ }
+ }
+ } catch (IOException | ParseException | NumberFormatException e)
+ {
+ // ignore
+ }
+ return result;
+ }
+
+ /**
+ * Constructs a URL to the /map/cds/<id> or /map/cdna/<id> REST service. The
+ * REST call is to either ensembl or ensemblgenomes, as determined from the
+ * division, e.g. Ensembl or EnsemblProtists.
+ *
+ * @param domain
+ * @param accession
+ * @param start
+ * @param end
+ * @param cdsOrCdna
+ * @return
+ * @throws MalformedURLException
+ */
+ URL getIdMapUrl(String domain, String accession, int start, int end,
+ String cdsOrCdna) throws MalformedURLException
+ {
+ String url = String
+ .format("%s/map/%s/%s/%d..%d?include_original_region=1&content-type=application/json",
+ domain, cdsOrCdna, accession, start, end);
+ return new URL(url);
+ }
+
+}
*/
package jalview.ext.ensembl;
-import jalview.io.DataSourceType;
-import jalview.io.FileParse;
import jalview.util.StringUtils;
import java.io.BufferedReader;
protected abstract boolean useGetRequest();
/**
- * Return the desired value for the Content-Type request header
- *
- * @param multipleIds
+ * Returns the desired value for the Content-Type request header. Default is
+ * application/json, override if required to vary this.
*
* @return
* @see https://github.com/Ensembl/ensembl-rest/wiki/HTTP-Headers
*/
- protected abstract String getRequestMimeType(boolean multipleIds);
+ protected String getRequestMimeType()
+ {
+ return "application/json";
+ }
/**
- * Return the desired value for the Accept request header
+ * Return the desired value for the Accept request header. Default is
+ * application/json, override if required to vary this.
*
* @return
* @see https://github.com/Ensembl/ensembl-rest/wiki/HTTP-Headers
*/
- protected abstract String getResponseMimeType();
+ protected String getResponseMimeType()
+ {
+ return "application/json";
+ }
/**
* Checks Ensembl's REST 'ping' endpoint, and returns true if response
}
/**
- * returns a reader to a Fasta response from the Ensembl sequence endpoint
+ * Returns a reader to a (Json) response from the Ensembl sequence endpoint.
+ * If the request failed the return value may be null.
*
* @param ids
* @return
* @throws IOException
*/
- protected FileParse getSequenceReader(List<String> ids) throws IOException
+ protected BufferedReader getSequenceReader(List<String> ids)
+ throws IOException
{
URL url = getUrl(ids);
BufferedReader reader = getHttpResponse(url, ids);
- if (reader == null)
- {
- // request failed
- return null;
- }
- FileParse fp = new FileParse(reader, url.toString(),
- DataSourceType.URL);
- return fp;
+ return reader;
}
/**
boolean multipleIds = ids != null && ids.size() > 1;
connection.setRequestMethod(
multipleIds ? HttpMethod.POST : HttpMethod.GET);
- connection.setRequestProperty("Content-Type",
- getRequestMimeType(multipleIds));
+ connection.setRequestProperty("Content-Type", getRequestMimeType());
connection.setRequestProperty("Accept", getResponseMimeType());
connection.setUseCaches(false);
import jalview.datamodel.DBRefEntry;
import jalview.datamodel.DBRefSource;
import jalview.datamodel.Mapping;
+import jalview.datamodel.Sequence;
import jalview.datamodel.SequenceFeature;
import jalview.datamodel.SequenceI;
import jalview.datamodel.features.SequenceFeatures;
import jalview.exceptions.JalviewException;
-import jalview.io.FastaFile;
-import jalview.io.FileParse;
import jalview.io.gff.SequenceOntologyFactory;
import jalview.io.gff.SequenceOntologyI;
import jalview.util.Comparison;
import jalview.util.IntRangeComparator;
import jalview.util.MapList;
+import java.io.BufferedReader;
import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.Collections;
import java.util.List;
+import org.json.simple.JSONObject;
+import org.json.simple.parser.JSONParser;
+import org.json.simple.parser.ParseException;
+
/**
* Base class for Ensembl sequence fetchers
*
inProgress = false;
throw new JalviewException("ENSEMBL Rest API not available.");
}
- FileParse fp = getSequenceReader(ids);
- if (fp == null)
+ BufferedReader br = getSequenceReader(ids);
+ if (br == null)
{
return alignment;
}
- FastaFile fr = new FastaFile(fp);
- if (fr.hasWarningMessage())
+ List<SequenceI> seqs = parseSequenceJson(br);
+
+ if (seqs.isEmpty())
{
- System.out.println(
- String.format("Warning when retrieving %d ids %s\n%s",
- ids.size(), ids.toString(), fr.getWarningMessage()));
+ throw new IOException("No data returned for " + ids);
}
- else if (fr.getSeqs().size() != ids.size())
+
+ if (seqs.size() != ids.size())
{
System.out.println(String.format(
"Only retrieved %d sequences for %d query strings",
- fr.getSeqs().size(), ids.size()));
+ seqs.size(), ids.size()));
}
- if (fr.getSeqs().size() == 1 && fr.getSeqs().get(0).getLength() == 0)
+ if (!seqs.isEmpty())
{
- /*
- * POST request has returned an empty FASTA file e.g. for invalid id
- */
- throw new IOException("No data returned for " + ids);
- }
-
- if (fr.getSeqs().size() > 0)
- {
- AlignmentI seqal = new Alignment(fr.getSeqsAsArray());
- for (SequenceI sq : seqal.getSequences())
+ AlignmentI seqal = new Alignment(
+ seqs.toArray(new SequenceI[seqs.size()]));
+ for (SequenceI seq : seqs)
{
- if (sq.getDescription() == null)
+ if (seq.getDescription() == null)
{
- sq.setDescription(getDbName());
+ seq.setDescription(getDbName());
}
- String name = sq.getName();
+ String name = seq.getName();
if (ids.contains(name)
|| ids.contains(name.replace("ENSP", "ENST")))
{
- DBRefEntry dbref = DBRefUtils.parseToDbRef(sq, getDbSource(),
+ // TODO JAL-3077 use true accession version in dbref
+ DBRefEntry dbref = DBRefUtils.parseToDbRef(seq, getDbSource(),
getEnsemblDataVersion(), name);
- sq.addDBRef(dbref);
+ seq.addDBRef(dbref);
}
}
if (alignment == null)
}
/**
+ * Parses a JSON response into a list of sequences
+ *
+ * @param br
+ * @return
+ * @see http://rest.ensembl.org/documentation/info/sequence_id
+ */
+ protected List<SequenceI> parseSequenceJson(BufferedReader br)
+ {
+ JSONParser jp = new JSONParser();
+ List<SequenceI> result = new ArrayList<>();
+ try
+ {
+ /*
+ * for now, assumes only one sequence returned; refactor if needed
+ * in future to handle a JSONArray with more than one
+ */
+ final JSONObject val = (JSONObject) jp.parse(br);
+ Object s = val.get("desc");
+ String desc = s == null ? null : s.toString();
+ s = val.get("id");
+ String id = s == null ? null : s.toString();
+ s = val.get("seq");
+ String seq = s == null ? null : s.toString();
+ Sequence sequence = new Sequence(id, seq);
+ if (desc != null)
+ {
+ sequence.setDescription(desc);
+ }
+ // todo JAL-3077 make a DBRefEntry with true accession version
+ // s = val.get("version");
+ // String version = s == null ? "0" : s.toString();
+ // DBRefEntry dbref = new DBRefEntry(getDbSource(), version, id);
+ // sequence.addDBRef(dbref);
+ result.add(sequence);
+ } catch (ParseException | IOException e)
+ {
+ System.err.println("Error processing JSON response: " + e.toString());
+ // ignore
+ }
+ return result;
+ }
+
+ /**
* Returns the URL for the REST call
*
* @return
}
// @see https://github.com/Ensembl/ensembl-rest/wiki/Output-formats
urlstring.append("?type=").append(getSourceEnsemblType().getType());
- urlstring.append(("&Accept=text/x-fasta"));
+ urlstring.append(("&Accept=application/json"));
+ urlstring.append(("&Content-Type=application/json"));
String objectType = getObjectType();
if (objectType != null)
return false;
}
- @Override
- protected String getRequestMimeType(boolean multipleIds)
- {
- return multipleIds ? "application/json" : "text/x-fasta";
- }
-
- @Override
- protected String getResponseMimeType()
- {
- return "text/x-fasta";
- }
-
/**
*
* @return the configured sequence return type for this source
return true;
}
- @Override
- protected String getRequestMimeType(boolean multipleIds)
- {
- return "application/json";
- }
-
- @Override
- protected String getResponseMimeType()
- {
- return "application/json";
- }
-
/**
* Calls the Ensembl xrefs REST endpoint and retrieves any cross-references
* ("primary_id") for the given identifier (Ensembl accession id) and database
*/
public List<DBRefEntry> getCrossReferences(String identifier)
{
- List<DBRefEntry> result = new ArrayList<DBRefEntry>();
- List<String> ids = new ArrayList<String>();
+ List<DBRefEntry> result = new ArrayList<>();
+ List<String> ids = new ArrayList<>();
ids.add(identifier);
BufferedReader br = null;
throws IOException
{
JSONParser jp = new JSONParser();
- List<DBRefEntry> result = new ArrayList<DBRefEntry>();
+ List<DBRefEntry> result = new ArrayList<>();
try
{
JSONArray responses = (JSONArray) jp.parse(br);
{
return false;
}
-
- @Override
- protected String getRequestMimeType(boolean b)
- {
- return null;
- }
-
- @Override
- protected String getResponseMimeType()
- {
- return null;
- }
-
};
}
import static org.testng.AssertJUnit.assertSame;
import static org.testng.AssertJUnit.assertTrue;
-import jalview.datamodel.Alignment;
+import jalview.datamodel.AlignmentI;
import jalview.datamodel.SequenceFeature;
import jalview.datamodel.SequenceI;
import jalview.datamodel.features.SequenceFeatures;
import jalview.gui.JvOptionPane;
import jalview.io.DataSourceType;
import jalview.io.FastaFile;
-import jalview.io.FileParse;
import jalview.io.gff.SequenceOntologyFactory;
import jalview.io.gff.SequenceOntologyLite;
+ "LKKALMMRGLIPECCAVYRIQDGEKKPIGWDTDISWLTGEELHVEVLENVPLTTHNFVRK\n"
+ "TFFTLAFCDFCRKLLFQGFRCQTCGYKFHQRCSTEVPLMCVNYDQLDLLFVSKFFEHHPI\n"
+ "PQEEASLAETALTSGSSPSAPASDSIGPQILTSPSPSKSIPIPQPFRPADEDHRNQFGQR\n"
- + "DRSSSAPNVHINTIEPVNIDDLIRDQGFRGDGGSTTGLSATPPASLPGSLTNVKALQKSP\n"
+ + "DRSSSAPNVHINTIEPVNIDDLIRDQGFRGDG\n"
+ // ? insertion added in ENSP00000288602.11, not in P15056
+ + "APLNQLMRCLRKYQSRTPSPLLHSVPSEIVFDFEPGPVFR\n"
+ // end insertion
+ + "GSTTGLSATPPASLPGSLTNVKALQKSP\n"
+ "GPQRERKSSSSSEDRNRMKTLGRRDSSDDWEIPDGQITVGQRIGSGSFGTVYKGKWHGDV\n"
+ "AVKMLNVTAPTPQQLQAFKNEVGVLRKTRHVNILLFMGYSTKPQLAIVTQWCEGSSLYHH\n"
+ "LHIIETKFEMIKLIDIARQTAQGMDYLHAKSIIHRDLKSNNIFLHEDLTVKIGDFGLATV\n"
}
@Test(dataProvider = "ens_seqs", suiteName = "live")
- public void testGetOneSeqs(EnsemblRestClient proxy, String sq,
+ public void testGetSequenceRecords(EnsemblSeqProxy proxy, String sq,
String fastasq) throws Exception
{
- FileParse fp = proxy.getSequenceReader(Arrays
- .asList(new String[] { sq }));
- SequenceI[] sqs = new FastaFile(fp).getSeqsAsArray();
FastaFile trueRes = new FastaFile(fastasq, DataSourceType.PASTE);
- SequenceI[] trueSqs = trueRes.getSeqsAsArray();
- Assert.assertEquals(sqs.length, trueSqs.length,
+ SequenceI[] expected = trueRes.getSeqsAsArray();
+ AlignmentI retrieved = proxy.getSequenceRecords(sq);
+
+ Assert.assertEquals(retrieved.getHeight(), expected.length,
"Different number of sequences retrieved for query " + sq);
- Alignment ral = new Alignment(sqs);
- for (SequenceI tr : trueSqs)
+
+ for (SequenceI tr : expected)
{
SequenceI[] rseq;
Assert.assertNotNull(
- rseq = ral.findSequenceMatch(tr.getName()),
+ rseq = retrieved.findSequenceMatch(tr.getName()),
"Couldn't find sequences matching expected sequence "
+ tr.getName());
Assert.assertEquals(rseq.length, 1,
"Sequences differ for " + tr.getName() + "\n" + "Exp:"
+ tr.getSequenceAsString() + "\n" + "Got:"
+ rseq[0].getSequenceAsString());
-
}
}