package jalview.ext.ensembl; import jalview.datamodel.AlignmentI; import jalview.datamodel.DBRefSource; import jalview.datamodel.GeneLociI; import jalview.datamodel.GeneLocus; import jalview.datamodel.Mapping; import jalview.util.MapList; import java.io.BufferedReader; import java.io.IOException; import java.net.MalformedURLException; import java.net.URL; import java.util.ArrayList; import java.util.Collections; import java.util.Iterator; import java.util.List; import org.json.simple.JSONArray; import org.json.simple.JSONObject; import org.json.simple.parser.JSONParser; import org.json.simple.parser.ParseException; /** * A client for the Ensembl REST service /map endpoint, to convert from * coordinates of one genome assembly to another. *
* Note that species and assembly identifiers passed to this class must be valid
* in Ensembl. They are not case sensitive.
*
* @author gmcarstairs
* @see https://rest.ensembl.org/documentation/info/assembly_map
* @see https://rest.ensembl.org/info/assembly/human?content-type=text/xml
* @see https://rest.ensembl.org/info/species?content-type=text/xml
*/
public class EnsemblMap extends EnsemblRestClient
{
private static final String MAPPED = "mapped";
private static final String MAPPINGS = "mappings";
private static final String CDS = "cds";
private static final String CDNA = "cdna";
/**
* Default constructor (to use rest.ensembl.org)
*/
public EnsemblMap()
{
super();
}
/**
* Constructor given the target domain to fetch data from
*
* @param
*/
public EnsemblMap(String domain)
{
super(domain);
}
@Override
public String getDbName()
{
return DBRefSource.ENSEMBL;
}
@Override
public AlignmentI getSequenceRecords(String queries) throws Exception
{
return null; // not used
}
/**
* Constructs a URL of the format
* http://rest.ensembl.org/map/human/GRCh38/17:45051610..45109016:1/GRCh37?content-type=application/json
*
*
* @param species
* @param chromosome
* @param fromRef
* @param toRef
* @param startPos
* @param endPos
* @return
* @throws MalformedURLException
*/
protected URL getAssemblyMapUrl(String species, String chromosome, String fromRef,
String toRef, int startPos, int endPos)
throws MalformedURLException
{
/*
* start-end might be reverse strand - present forwards to the service
*/
boolean forward = startPos <= endPos;
int start = forward ? startPos : endPos;
int end = forward ? endPos : startPos;
String strand = forward ? "1" : "-1";
String url = String.format(
"%s/map/%s/%s/%s:%d..%d:%s/%s?content-type=application/json",
getDomain(), species, fromRef, chromosome, start, end, strand,
toRef);
return new URL(url);
}
@Override
protected boolean useGetRequest()
{
return true;
}
@Override
protected URL getUrl(List
* {"mappings":
* [{
* "original": {"end":45109016,"start":45051610},
* "mapped" : {"end":43186384,"start":43128978}
* }] }
*
*
* @param br
* @return
*/
protected int[] parseAssemblyMappingResponse(BufferedReader br)
{
int[] result = null;
JSONParser jp = new JSONParser();
try
{
JSONObject parsed = (JSONObject) jp.parse(br);
JSONArray mappings = (JSONArray) parsed.get(MAPPINGS);
Iterator rvals = mappings.iterator();
while (rvals.hasNext())
{
// todo check for "mapped"
JSONObject val = (JSONObject) rvals.next();
JSONObject mapped = (JSONObject) val.get(MAPPED);
int start = Integer.parseInt(mapped.get("start").toString());
int end = Integer.parseInt(mapped.get("end").toString());
String strand = mapped.get("strand").toString();
if ("1".equals(strand))
{
result = new int[] { start, end };
}
else
{
result = new int[] { end, start };
}
}
} catch (IOException | ParseException | NumberFormatException e)
{
// ignore
}
return result;
}
/**
* Calls the REST /map/cds/id service, and returns a DBRefEntry holding the
* returned chromosomal coordinates, or returns null if the call fails
*
* @param division
* e.g. Ensembl, EnsemblMetazoa
* @param accession
* e.g. ENST00000592782, Y55B1AR.1.1
* @param start
* @param end
* @return
*/
public GeneLociI getCdsMapping(String division, String accession,
int start, int end)
{
return getIdMapping(division, accession, start, end, CDS);
}
/**
* Calls the REST /map/cdna/id service, and returns a DBRefEntry holding the
* returned chromosomal coordinates, or returns null if the call fails
*
* @param division
* e.g. Ensembl, EnsemblMetazoa
* @param accession
* e.g. ENST00000592782, Y55B1AR.1.1
* @param start
* @param end
* @return
*/
public GeneLociI getCdnaMapping(String division, String accession,
int start, int end)
{
return getIdMapping(division, accession, start, end, CDNA);
}
GeneLociI getIdMapping(String division, String accession, int start,
int end, String cdsOrCdna)
{
URL url = null;
BufferedReader br = null;
try
{
String domain = new EnsemblInfo().getDomain(division);
if (domain != null)
{
url = getIdMapUrl(domain, accession, start, end, cdsOrCdna);
br = getHttpResponse(url, null);
if (br != null)
{
return (parseIdMappingResponse(br, accession, domain));
}
}
return null;
} catch (Throwable t)
{
System.out.println("Error calling " + url + ": " + t.getMessage());
return null;
} finally
{
if (br != null)
{
try
{
br.close();
} catch (IOException e)
{
// ignore
}
}
}
}
/**
* Constructs a URL to the /map/cds/
* {"mappings":
* [
* {"assembly_name":"TAIR10","end":2501311,"seq_region_name":"1","gap":0,
* "strand":-1,"coord_system":"chromosome","rank":0,"start":2501114},
* {"assembly_name":"TAIR10","end":2500815,"seq_region_name":"1","gap":0,
* "strand":-1,"coord_system":"chromosome","rank":0,"start":2500714}
* ]
* }
*
*
* @param br
* @param accession
* @param domain
* @return
*/
GeneLociI parseIdMappingResponse(BufferedReader br, String accession,
String domain)
{
JSONParser jp = new JSONParser();
try
{
JSONObject parsed = (JSONObject) jp.parse(br);
JSONArray mappings = (JSONArray) parsed.get(MAPPINGS);
Iterator rvals = mappings.iterator();
String assembly = null;
String chromosome = null;
int fromEnd = 0;
List