*/
// featureStore = Collections
// .synchronizedSortedMap(new TreeMap<String, FeatureStore>());
- featureStore = new TreeMap<String, FeatureStore>();
+ featureStore = new TreeMap<>();
}
/**
}
/**
- * Answers true if the given type is one of the specified sequence ontology
- * terms (or a sub-type of one), or if no terms are supplied. Answers false if
- * filter terms are specified and the given term does not match any of them.
+ * Answers true if the given type matches one of the specified terms (or is a
+ * sub-type of one in the Sequence Ontology), or if no terms are supplied.
+ * Answers false if filter terms are specified and the given term does not
+ * match any of them.
*
* @param type
* @param soTerm
SequenceOntologyI so = SequenceOntologyFactory.getInstance();
for (String term : soTerm)
{
- if (so.isA(type, term))
+ if (type.equals(term) || so.isA(type, term))
{
return true;
}
String group, String... type);
/**
- * Answers a list of all features stored, whose type either matches one of the
- * given ontology terms, or is a specialisation of a term in the Sequence
- * Ontology. Results are returned in no particular guaranteed order.
+ * Answers a list of all features stored, whose type either matches, or is a
+ * specialisation (in the Sequence Ontology) of, one of the given terms.
+ * Results are returned in no particular order.
*
* @param ontologyTerm
* @return
package jalview.ext.ensembl;
import jalview.datamodel.SequenceFeature;
+import jalview.datamodel.SequenceI;
import jalview.io.gff.SequenceOntologyFactory;
import jalview.io.gff.SequenceOntologyI;
+import java.util.ArrayList;
+import java.util.List;
+
import com.stevesoft.pat.Regex;
/**
}
/**
- * Answers true if the sequence feature type is 'exon' (or a subtype of exon
- * in the Sequence Ontology), and the Parent of the feature is the transcript
- * we are retrieving
+ * Answers a list of sequence features (if any) whose type is 'exon' (or a
+ * subtype of exon in the Sequence Ontology), and whose Parent is the
+ * transcript we are retrieving
*/
@Override
- protected boolean identifiesSequence(SequenceFeature sf, String accId)
+ protected List<SequenceFeature> getIdentifyingFeatures(SequenceI seq,
+ String accId)
{
- if (SequenceOntologyFactory.getInstance().isA(sf.getType(),
- SequenceOntologyI.EXON))
+ List<SequenceFeature> result = new ArrayList<>();
+ List<SequenceFeature> sfs = seq.getFeatures()
+ .getFeaturesByOntology(SequenceOntologyI.EXON);
+ for (SequenceFeature sf : sfs)
{
String parentFeature = (String) sf.getValue(PARENT);
if (("transcript:" + accId).equals(parentFeature))
{
- return true;
+ result.add(sf);
}
}
- return false;
+
+ return result;
}
/**
}
/**
- * Answers true if the sequence feature type is 'CDS' (or a subtype of CDS in
- * the Sequence Ontology), and the Parent of the feature is the transcript we
- * are retrieving
+ * Answers a list of sequence features (if any) whose type is 'CDS' (or a
+ * subtype of CDS in the Sequence Ontology), and whose Parent is the
+ * transcript we are retrieving
*/
@Override
- protected boolean identifiesSequence(SequenceFeature sf, String accId)
+ protected List<SequenceFeature> getIdentifyingFeatures(SequenceI seq,
+ String accId)
{
- if (SequenceOntologyFactory.getInstance().isA(sf.getType(),
- SequenceOntologyI.CDS))
+ List<SequenceFeature> result = new ArrayList<>();
+ List<SequenceFeature> sfs = seq.getFeatures()
+ .getFeaturesByOntology(SequenceOntologyI.CDS);
+ for (SequenceFeature sf : sfs)
{
String parentFeature = (String) sf.getValue(PARENT);
if (("transcript:" + accId).equals(parentFeature))
{
- return true;
+ result.add(sf);
}
}
- return false;
+ return result;
}
/**
protected List<int[]> getCdsRanges(SequenceI dnaSeq)
{
int len = dnaSeq.getLength();
- List<int[]> ranges = new ArrayList<int[]>();
+ List<int[]> ranges = new ArrayList<>();
ranges.add(new int[] { 1, len });
return ranges;
}
import jalview.datamodel.Alignment;
import jalview.datamodel.AlignmentI;
+import jalview.io.DataSourceType;
import jalview.io.FeaturesFile;
import jalview.io.FileParse;
+import java.io.BufferedReader;
import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URL;
// TODO: use a vararg String... for getSequenceRecords instead?
List<String> queries = new ArrayList<>();
queries.add(query);
- FileParse fp = getSequenceReader(queries);
- if (fp == null || !fp.isValid())
+ BufferedReader fp = getSequenceReader(queries);
+ if (fp == null)
{
return null;
}
- FeaturesFile fr = new FeaturesFile(fp);
+ FeaturesFile fr = new FeaturesFile(
+ new FileParse(fp, null, DataSourceType.URL));
return new Alignment(fr.getSeqsAsArray());
}
* describes the required encoding of the response.
*/
@Override
- protected String getRequestMimeType(boolean multipleIds)
+ protected String getRequestMimeType()
{
return "text/x-gff3";
}
/**
- * Returns the MIME type for GFF3.
+ * Returns the MIME type for GFF3
*/
@Override
protected String getResponseMimeType()
}
/**
- * Answers true for a feature of type 'gene' (or a sub-type of gene in the
- * Sequence Ontology), whose ID is the accession we are retrieving
+ * Answers a list of sequence features (if any) whose type is 'gene' (or a
+ * subtype of gene in the Sequence Ontology), and whose ID is the accession we
+ * are retrieving
*/
@Override
- protected boolean identifiesSequence(SequenceFeature sf, String accId)
+ protected List<SequenceFeature> getIdentifyingFeatures(SequenceI seq,
+ String accId)
{
- if (SequenceOntologyFactory.getInstance().isA(sf.getType(),
- SequenceOntologyI.GENE))
+ List<SequenceFeature> result = new ArrayList<>();
+ List<SequenceFeature> sfs = seq.getFeatures()
+ .getFeaturesByOntology(SequenceOntologyI.GENE);
+ for (SequenceFeature sf : sfs)
{
// NB features as gff use 'ID'; rest services return as 'id'
String id = (String) sf.getValue("ID");
if ((GENE_PREFIX + accId).equalsIgnoreCase(id))
{
- return true;
+ result.add(sf);
}
}
- return false;
+ return result;
}
/**
}
/**
- * Answers false. This allows an optimisation - a single 'gene' feature is all
- * that is needed to identify the positions of the gene on the genomic
- * sequence.
- */
- @Override
- protected boolean isSpliceable()
- {
- return false;
- }
-
- /**
* Override to do nothing as Ensembl doesn't return a protein sequence for a
* gene identifier
*/
package jalview.ext.ensembl;
import jalview.datamodel.SequenceFeature;
+import jalview.datamodel.SequenceI;
+import jalview.io.gff.SequenceOntologyI;
+
+import java.util.ArrayList;
+import java.util.List;
/**
* A client to fetch genomic sequence from Ensembl
}
/**
- * Answers true if the sequence feature type is 'transcript' (or a subtype of
- * transcript in the Sequence Ontology), and the ID of the feature is the
- * transcript we are retrieving
+ * Answers a list of sequence features (if any) whose type is 'transcript' (or
+ * a subtype of transcript in the Sequence Ontology), and whose ID is the
+ * accession we are retrieving.
+ * <p>
+ * Note we also include features of type "NMD_transcript_variant", although
+ * not strictly 'transcript' in the SO, as they used in Ensembl as if they
+ * were.
*/
@Override
- protected boolean identifiesSequence(SequenceFeature sf, String accId)
+ protected List<SequenceFeature> getIdentifyingFeatures(SequenceI seq,
+ String accId)
{
- if (isTranscript(sf.getType()))
+ List<SequenceFeature> result = new ArrayList<>();
+ List<SequenceFeature> sfs = seq.getFeatures().getFeaturesByOntology(
+ SequenceOntologyI.TRANSCRIPT,
+ SequenceOntologyI.NMD_TRANSCRIPT_VARIANT);
+ for (SequenceFeature sf : sfs)
{
+ // NB features as gff use 'ID'; rest services return as 'id'
String id = (String) sf.getValue("ID");
if (("transcript:" + accId).equals(id))
{
- return true;
+ result.add(sf);
}
}
- return false;
+ return result;
}
}
return true;
}
- @Override
- protected String getRequestMimeType(boolean multipleIds)
- {
- return "application/json";
- }
-
- @Override
- protected String getResponseMimeType()
- {
- return "application/json";
- }
-
/**
* Answers the domain (http://rest.ensembl.org or
* http://rest.ensemblgenomes.org) for the given division, or null if not
return true;
}
- @Override
- protected String getRequestMimeType(boolean multipleIds)
- {
- return "application/json";
- }
-
- @Override
- protected String getResponseMimeType()
- {
- return "application/json";
- }
-
/**
* Returns the gene id related to the given identifier (which may be for a
* gene, transcript or protein), or null if none is found
}
@Override
- protected String getRequestMimeType(boolean multipleIds)
- {
- return "application/json";
- }
-
- @Override
- protected String getResponseMimeType()
- {
- return "application/json";
- }
-
- @Override
protected URL getUrl(List<String> ids) throws MalformedURLException
{
return null; // not used
import jalview.datamodel.AlignmentI;
import jalview.datamodel.SequenceFeature;
+import jalview.datamodel.SequenceI;
+
+import java.util.ArrayList;
+import java.util.List;
import com.stevesoft.pat.Regex;
}
@Override
- protected boolean identifiesSequence(SequenceFeature sf, String accId)
+ protected List<SequenceFeature> getIdentifyingFeatures(SequenceI seq,
+ String accId)
{
- // not applicable - protein sequence is not a 'subset' of genomic sequence
- return false;
+ return new ArrayList<>();
}
@Override
*/
package jalview.ext.ensembl;
-import jalview.io.DataSourceType;
-import jalview.io.FileParse;
import jalview.util.StringUtils;
import java.io.BufferedReader;
* @see https://github.com/Ensembl/ensembl-rest/wiki/Change-log
* @see http://rest.ensembl.org/info/rest?content-type=application/json
*/
- private static final String LATEST_ENSEMBLGENOMES_REST_VERSION = "6.3";
+ private static final String LATEST_ENSEMBLGENOMES_REST_VERSION = "7.0";
- private static final String LATEST_ENSEMBL_REST_VERSION = "6.3";
+ private static final String LATEST_ENSEMBL_REST_VERSION = "7.0";
private static final String REST_CHANGE_LOG = "https://github.com/Ensembl/ensembl-rest/wiki/Change-log";
protected abstract boolean useGetRequest();
/**
- * Return the desired value for the Content-Type request header
- *
- * @param multipleIds
+ * Returns the desired value for the Content-Type request header. Default is
+ * application/json, override if required to vary this.
*
* @return
* @see https://github.com/Ensembl/ensembl-rest/wiki/HTTP-Headers
*/
- protected abstract String getRequestMimeType(boolean multipleIds);
+ protected String getRequestMimeType()
+ {
+ return "application/json";
+ }
/**
- * Return the desired value for the Accept request header
+ * Return the desired value for the Accept request header. Default is
+ * application/json, override if required to vary this.
*
* @return
* @see https://github.com/Ensembl/ensembl-rest/wiki/HTTP-Headers
*/
- protected abstract String getResponseMimeType();
+ protected String getResponseMimeType()
+ {
+ return "application/json";
+ }
/**
* Checks Ensembl's REST 'ping' endpoint, and returns true if response
}
/**
- * returns a reader to a Fasta response from the Ensembl sequence endpoint
+ * Returns a reader to a (Json) response from the Ensembl sequence endpoint.
+ * If the request failed the return value may be null.
*
* @param ids
* @return
* @throws IOException
*/
- protected FileParse getSequenceReader(List<String> ids) throws IOException
+ protected BufferedReader getSequenceReader(List<String> ids)
+ throws IOException
{
URL url = getUrl(ids);
BufferedReader reader = getHttpResponse(url, ids);
- if (reader == null)
- {
- // request failed
- return null;
- }
- FileParse fp = new FileParse(reader, url.toString(),
- DataSourceType.URL);
- return fp;
+ return reader;
}
/**
boolean multipleIds = ids != null && ids.size() > 1;
connection.setRequestMethod(
multipleIds ? HttpMethod.POST : HttpMethod.GET);
- connection.setRequestProperty("Content-Type",
- getRequestMimeType(multipleIds));
+ connection.setRequestProperty("Content-Type", getRequestMimeType());
connection.setRequestProperty("Accept", getResponseMimeType());
connection.setUseCaches(false);
import jalview.datamodel.DBRefEntry;
import jalview.datamodel.DBRefSource;
import jalview.datamodel.Mapping;
+import jalview.datamodel.Sequence;
import jalview.datamodel.SequenceFeature;
import jalview.datamodel.SequenceI;
import jalview.datamodel.features.SequenceFeatures;
import jalview.exceptions.JalviewException;
-import jalview.io.FastaFile;
-import jalview.io.FileParse;
import jalview.io.gff.Gff3Helper;
import jalview.io.gff.SequenceOntologyFactory;
import jalview.io.gff.SequenceOntologyI;
import jalview.util.IntRangeComparator;
import jalview.util.MapList;
+import java.io.BufferedReader;
import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.Collections;
import java.util.List;
+import org.json.simple.JSONObject;
+import org.json.simple.parser.JSONParser;
+import org.json.simple.parser.ParseException;
+
/**
* Base class for Ensembl sequence fetchers
*
inProgress = false;
throw new JalviewException("ENSEMBL Rest API not available.");
}
- FileParse fp = getSequenceReader(ids);
- if (fp == null)
+ BufferedReader br = getSequenceReader(ids);
+ if (br == null)
{
return alignment;
}
- FastaFile fr = new FastaFile(fp);
- if (fr.hasWarningMessage())
+ List<SequenceI> seqs = parseSequenceJson(br);
+
+ if (seqs.isEmpty())
{
- System.out.println(
- String.format("Warning when retrieving %d ids %s\n%s",
- ids.size(), ids.toString(), fr.getWarningMessage()));
+ throw new IOException("No data returned for " + ids);
}
- else if (fr.getSeqs().size() != ids.size())
+
+ if (seqs.size() != ids.size())
{
System.out.println(String.format(
"Only retrieved %d sequences for %d query strings",
- fr.getSeqs().size(), ids.size()));
+ seqs.size(), ids.size()));
}
- if (fr.getSeqs().size() == 1 && fr.getSeqs().get(0).getLength() == 0)
+ if (!seqs.isEmpty())
{
- /*
- * POST request has returned an empty FASTA file e.g. for invalid id
- */
- throw new IOException("No data returned for " + ids);
- }
-
- if (fr.getSeqs().size() > 0)
- {
- AlignmentI seqal = new Alignment(fr.getSeqsAsArray());
- for (SequenceI sq : seqal.getSequences())
+ AlignmentI seqal = new Alignment(
+ seqs.toArray(new SequenceI[seqs.size()]));
+ for (SequenceI seq : seqs)
{
- if (sq.getDescription() == null)
+ if (seq.getDescription() == null)
{
- sq.setDescription(getDbName());
+ seq.setDescription(getDbName());
}
- String name = sq.getName();
+ String name = seq.getName();
if (ids.contains(name)
|| ids.contains(name.replace("ENSP", "ENST")))
{
- DBRefEntry dbref = DBRefUtils.parseToDbRef(sq, getDbSource(),
+ // TODO JAL-3077 use true accession version in dbref
+ DBRefEntry dbref = DBRefUtils.parseToDbRef(seq, getDbSource(),
getEnsemblDataVersion(), name);
- sq.addDBRef(dbref);
+ seq.addDBRef(dbref);
}
}
if (alignment == null)
}
/**
+ * Parses a JSON response for a single sequence ID query
+ *
+ * @param br
+ * @return a single jalview.datamodel.Sequence
+ * @see http://rest.ensembl.org/documentation/info/sequence_id
+ */
+ protected List<SequenceI> parseSequenceJson(BufferedReader br)
+ {
+ JSONParser jp = new JSONParser();
+ List<SequenceI> result = new ArrayList<>();
+ try
+ {
+ /*
+ * for now, assumes only one sequence returned; refactor if needed
+ * in future to handle a JSONArray with more than one
+ */
+ final JSONObject val = (JSONObject) jp.parse(br);
+ Object s = val.get("desc");
+ String desc = s == null ? null : s.toString();
+ s = val.get("id");
+ String id = s == null ? null : s.toString();
+ s = val.get("seq");
+ String seq = s == null ? null : s.toString();
+ Sequence sequence = new Sequence(id, seq);
+ if (desc != null)
+ {
+ sequence.setDescription(desc);
+ }
+ // todo JAL-3077 make a DBRefEntry with true accession version
+ // s = val.get("version");
+ // String version = s == null ? "0" : s.toString();
+ // DBRefEntry dbref = new DBRefEntry(getDbSource(), version, id);
+ // sequence.addDBRef(dbref);
+ result.add(sequence);
+ } catch (ParseException | IOException e)
+ {
+ System.err.println("Error processing JSON response: " + e.toString());
+ // ignore
+ }
+ return result;
+ }
+
+ /**
* Returns the URL for the REST call
*
* @return
}
// @see https://github.com/Ensembl/ensembl-rest/wiki/Output-formats
urlstring.append("?type=").append(getSourceEnsemblType().getType());
- urlstring.append(("&Accept=text/x-fasta"));
+ urlstring.append(("&Accept=application/json"));
+ urlstring.append(("&Content-Type=application/json"));
String objectType = getObjectType();
if (objectType != null)
return false;
}
- @Override
- protected String getRequestMimeType(boolean multipleIds)
- {
- return multipleIds ? "application/json" : "text/x-fasta";
- }
-
- @Override
- protected String getResponseMimeType()
- {
- return "text/x-fasta";
- }
-
/**
*
* @return the configured sequence return type for this source
protected MapList getGenomicRangesFromFeatures(SequenceI sourceSequence,
String accId, int start)
{
- List<SequenceFeature> sfs = sourceSequence.getFeatures()
- .getPositionalFeatures();
+ List<SequenceFeature> sfs = getIdentifyingFeatures(sourceSequence,
+ accId);
if (sfs.isEmpty())
{
return null;
for (SequenceFeature sf : sfs)
{
+ int strand = sf.getStrand();
+ strand = strand == 0 ? 1 : strand; // treat unknown as forward
+
+ if (directionSet && strand != direction)
+ {
+ // abort - mix of forward and backward
+ System.err
+ .println("Error: forward and backward strand for " + accId);
+ return null;
+ }
+ direction = strand;
+ directionSet = true;
+
/*
- * accept the target feature type or a specialisation of it
- * (e.g. coding_exon for exon)
+ * add to CDS ranges, semi-sorted forwards/backwards
*/
- if (identifiesSequence(sf, accId))
+ if (strand < 0)
{
- int strand = sf.getStrand();
- strand = strand == 0 ? 1 : strand; // treat unknown as forward
-
- if (directionSet && strand != direction)
- {
- // abort - mix of forward and backward
- System.err.println(
- "Error: forward and backward strand for " + accId);
- return null;
- }
- direction = strand;
- directionSet = true;
-
- /*
- * add to CDS ranges, semi-sorted forwards/backwards
- */
- if (strand < 0)
- {
- regions.add(0, new int[] { sf.getEnd(), sf.getBegin() });
- }
- else
- {
- regions.add(new int[] { sf.getBegin(), sf.getEnd() });
- }
- mappedLength += Math.abs(sf.getEnd() - sf.getBegin() + 1);
-
- if (!isSpliceable())
- {
- /*
- * 'gene' sequence is contiguous so we can stop as soon as its
- * identifying feature has been found
- */
- break;
- }
+ regions.add(0, new int[] { sf.getEnd(), sf.getBegin() });
+ }
+ else
+ {
+ regions.add(new int[] { sf.getBegin(), sf.getEnd() });
}
+ mappedLength += Math.abs(sf.getEnd() - sf.getBegin() + 1);
}
if (regions.isEmpty())
}
/**
- * Answers true if the sequence being retrieved may occupy discontiguous
- * regions on the genomic sequence.
- */
- protected boolean isSpliceable()
- {
- return true;
- }
-
- /**
- * Returns true if the sequence feature marks positions of the genomic
+ * Answers a list of sequence features that mark positions of the genomic
* sequence feature which are within the sequence being retrieved. For
* example, an 'exon' feature whose parent is the target transcript marks the
- * cdna positions of the transcript.
+ * cdna positions of the transcript. For a gene sequence, this is trivially
+ * just the 'gene' feature with matching gene id.
*
- * @param sf
+ * @param seq
* @param accId
* @return
*/
- protected abstract boolean identifiesSequence(SequenceFeature sf,
- String accId);
+ protected abstract List<SequenceFeature> getIdentifyingFeatures(
+ SequenceI seq, String accId);
/**
* Transfers the sequence feature to the target sequence, locating its start
return true;
}
- @Override
- protected String getRequestMimeType(boolean multipleIds)
- {
- return "application/json";
- }
-
- @Override
- protected String getResponseMimeType()
- {
- return "application/json";
- }
-
/**
* Calls the Ensembl xrefs REST endpoint and retrieves any cross-references
* ("primary_id") for the given identifier (Ensembl accession id) and database
*/
public List<DBRefEntry> getCrossReferences(String identifier)
{
- List<DBRefEntry> result = new ArrayList<DBRefEntry>();
- List<String> ids = new ArrayList<String>();
+ List<DBRefEntry> result = new ArrayList<>();
+ List<String> ids = new ArrayList<>();
ids.add(identifier);
BufferedReader br = null;
throws IOException
{
JSONParser jp = new JSONParser();
- List<DBRefEntry> result = new ArrayList<DBRefEntry>();
+ List<DBRefEntry> result = new ArrayList<>();
try
{
JSONArray responses = (JSONArray) jp.parse(br);
import java.util.Map;
import java.util.Set;
-import junit.extensions.PA;
-
import org.testng.annotations.Test;
+import junit.extensions.PA;
+
public class SequenceFeaturesTest
{
@Test(groups = "Functional")
assertTrue(store.getFeaturesByOntology(new String[] {}).isEmpty());
assertTrue(store.getFeaturesByOntology((String[]) null).isEmpty());
- SequenceFeature sf1 = new SequenceFeature("transcript", "desc", 10, 20,
+ SequenceFeature transcriptFeature = new SequenceFeature("transcript", "desc", 10, 20,
Float.NaN, null);
- store.add(sf1);
+ store.add(transcriptFeature);
- // mRNA isA transcript; added here 'as if' non-positional
- // just to show that non-positional features are included in results
- SequenceFeature sf2 = new SequenceFeature("mRNA", "desc", 0, 0,
+ /*
+ * mRNA is a sub-type of transcript; added here 'as if' non-positional
+ * just to show that non-positional features are included in results
+ */
+ SequenceFeature mrnaFeature = new SequenceFeature("mRNA", "desc", 0, 0,
Float.NaN, null);
- store.add(sf2);
+ store.add(mrnaFeature);
- SequenceFeature sf3 = new SequenceFeature("Pfam", "desc", 30, 40,
+ SequenceFeature pfamFeature = new SequenceFeature("Pfam", "desc", 30, 40,
Float.NaN, null);
- store.add(sf3);
+ store.add(pfamFeature);
+ /*
+ * "transcript" matches both itself and the sub-term "mRNA"
+ */
features = store.getFeaturesByOntology("transcript");
assertEquals(features.size(), 2);
- assertTrue(features.contains(sf1));
- assertTrue(features.contains(sf2));
+ assertTrue(features.contains(transcriptFeature));
+ assertTrue(features.contains(mrnaFeature));
+ /*
+ * "mRNA" matches itself but not parent term "transcript"
+ */
features = store.getFeaturesByOntology("mRNA");
assertEquals(features.size(), 1);
- assertTrue(features.contains(sf2));
+ assertTrue(features.contains(mrnaFeature));
+ /*
+ * "pfam" is not an SO term but is included as an exact match
+ */
features = store.getFeaturesByOntology("mRNA", "Pfam");
assertEquals(features.size(), 2);
- assertTrue(features.contains(sf2));
- assertTrue(features.contains(sf3));
+ assertTrue(features.contains(mrnaFeature));
+ assertTrue(features.contains(pfamFeature));
features = store.getFeaturesByOntology("sequence_variant");
assertTrue(features.isEmpty());
@Test(groups = "Functional")
public void testSortFeatures()
{
- List<SequenceFeature> sfs = new ArrayList<SequenceFeature>();
+ List<SequenceFeature> sfs = new ArrayList<>();
SequenceFeature sf1 = new SequenceFeature("Pfam", "desc", 30, 80,
Float.NaN, null);
sfs.add(sf1);
import static org.testng.AssertJUnit.assertNull;
import static org.testng.AssertJUnit.assertTrue;
+import jalview.datamodel.Sequence;
import jalview.datamodel.SequenceDummy;
import jalview.datamodel.SequenceFeature;
import jalview.datamodel.SequenceI;
* accession id as parent
*/
@Test(groups = "Functional")
- public void testIdentifiesSequence()
+ public void testGetIdentifyingFeatures()
{
String accId = "ABC123";
- EnsemblCdna testee = new EnsemblCdna();
+ SequenceI seq = new Sequence(accId, "MKLNFRQIE");
- // exon with no parent not valid
- SequenceFeature sf = new SequenceFeature("exon", "", 1, 2, 0f, null);
- assertFalse(testee.identifiesSequence(sf, accId));
+ // exon with no parent: not valid
+ SequenceFeature sf1 = new SequenceFeature("exon", "", 1, 2, 0f, null);
+ seq.addSequenceFeature(sf1);
- // exon with wrong parent not valid
- sf.setValue("Parent", "transcript:XYZ");
- assertFalse(testee.identifiesSequence(sf, accId));
+ // exon with wrong parent: not valid
+ SequenceFeature sf2 = new SequenceFeature("exon", "", 1, 2, 0f, null);
+ sf2.setValue("Parent", "transcript:XYZ");
+ seq.addSequenceFeature(sf2);
// exon with right parent is valid
- sf.setValue("Parent", "transcript:" + accId);
- assertTrue(testee.identifiesSequence(sf, accId));
+ SequenceFeature sf3 = new SequenceFeature("exon", "", 1, 2, 0f, null);
+ sf3.setValue("Parent", "transcript:" + accId);
+ seq.addSequenceFeature(sf3);
// exon sub-type with right parent is valid
- sf = new SequenceFeature("coding_exon", "", 1, 2, 0f, null);
- sf.setValue("Parent", "transcript:" + accId);
- assertTrue(testee.identifiesSequence(sf, accId));
+ SequenceFeature sf4 = new SequenceFeature("coding_exon", "", 1, 2, 0f,
+ null);
+ sf4.setValue("Parent", "transcript:" + accId);
+ seq.addSequenceFeature(sf4);
// transcript not valid:
- sf = new SequenceFeature("transcript", "", 1, 2, 0f, null);
- sf.setValue("Parent", "transcript:" + accId);
- assertFalse(testee.identifiesSequence(sf, accId));
+ SequenceFeature sf5 = new SequenceFeature("transcript", "", 1, 2, 0f,
+ null);
+ sf5.setValue("Parent", "transcript:" + accId);
+ seq.addSequenceFeature(sf5);
// CDS not valid:
- sf = new SequenceFeature("CDS", "", 1, 2, 0f, null);
- sf.setValue("Parent", "transcript:" + accId);
- assertFalse(testee.identifiesSequence(sf, accId));
+ SequenceFeature sf6 = new SequenceFeature("transcript", "", 1, 2, 0f,
+ null);
+ sf6.setValue("Parent", "transcript:" + accId);
+ seq.addSequenceFeature(sf6);
+
+ List<SequenceFeature> sfs = new EnsemblCdna()
+ .getIdentifyingFeatures(seq, accId);
+ assertFalse(sfs.contains(sf1));
+ assertFalse(sfs.contains(sf2));
+ assertTrue(sfs.contains(sf3));
+ assertTrue(sfs.contains(sf4));
+ assertFalse(sfs.contains(sf5));
+ assertFalse(sfs.contains(sf6));
}
@Test(groups = "Functional")
import static org.testng.AssertJUnit.assertFalse;
import static org.testng.AssertJUnit.assertTrue;
+import jalview.datamodel.Sequence;
import jalview.datamodel.SequenceDummy;
import jalview.datamodel.SequenceFeature;
import jalview.datamodel.SequenceI;
* accession id as parent
*/
@Test(groups = "Functional")
- public void testIdentifiesSequence()
+ public void testGetIdentifyingFeatures()
{
String accId = "ABC123";
- EnsemblCds testee = new EnsemblCds();
+ SequenceI seq = new Sequence(accId, "MKDONS");
// cds with no parent not valid
- SequenceFeature sf = new SequenceFeature("CDS", "", 1, 2, 0f, null);
- assertFalse(testee.identifiesSequence(sf, accId));
+ SequenceFeature sf1 = new SequenceFeature("CDS", "", 1, 2, 0f, null);
+ seq.addSequenceFeature(sf1);
// cds with wrong parent not valid
- sf.setValue("Parent", "transcript:XYZ");
- assertFalse(testee.identifiesSequence(sf, accId));
+ SequenceFeature sf2 = new SequenceFeature("CDS", "", 1, 2, 0f, null);
+ sf2.setValue("Parent", "transcript:XYZ");
+ seq.addSequenceFeature(sf2);
// cds with right parent is valid
- sf.setValue("Parent", "transcript:" + accId);
- assertTrue(testee.identifiesSequence(sf, accId));
+ SequenceFeature sf3 = new SequenceFeature("CDS", "", 1, 2, 0f, null);
+ sf3.setValue("Parent", "transcript:" + accId);
+ seq.addSequenceFeature(sf3);
// cds sub-type with right parent is valid
- sf = new SequenceFeature("CDS_predicted", "", 1, 2, 0f, null);
- sf.setValue("Parent", "transcript:" + accId);
- assertTrue(testee.identifiesSequence(sf, accId));
+ SequenceFeature sf4 = new SequenceFeature("CDS_predicted", "", 1, 2, 0f,
+ null);
+ sf4.setValue("Parent", "transcript:" + accId);
+ seq.addSequenceFeature(sf4);
// transcript not valid:
- sf = new SequenceFeature("transcript", "", 1, 2, 0f, null);
- sf.setValue("Parent", "transcript:" + accId);
- assertFalse(testee.identifiesSequence(sf, accId));
+ SequenceFeature sf5 = new SequenceFeature("transcript", "", 1, 2, 0f,
+ null);
+ sf5.setValue("Parent", "transcript:" + accId);
+ seq.addSequenceFeature(sf5);
// exon not valid:
- sf = new SequenceFeature("exon", "", 1, 2, 0f, null);
- sf.setValue("Parent", "transcript:" + accId);
- assertFalse(testee.identifiesSequence(sf, accId));
+ SequenceFeature sf6 = new SequenceFeature("exon", "", 1, 2, 0f, null);
+ sf6.setValue("Parent", "transcript:" + accId);
+ seq.addSequenceFeature(sf6);
+
+ List<SequenceFeature> sfs = new EnsemblCds().getIdentifyingFeatures(seq,
+ accId);
+ assertFalse(sfs.contains(sf1));
+ assertFalse(sfs.contains(sf2));
+ assertTrue(sfs.contains(sf3));
+ assertTrue(sfs.contains(sf4));
+ assertFalse(sfs.contains(sf5));
+ assertFalse(sfs.contains(sf6));
}
@Test(groups = "Functional")
import jalview.api.FeatureSettingsModelI;
import jalview.bin.Cache;
+import jalview.datamodel.Sequence;
import jalview.datamodel.SequenceDummy;
import jalview.datamodel.SequenceFeature;
import jalview.datamodel.SequenceI;
genomic.setEnd(50000);
String geneId = "ABC123";
- // gene at (start+20000) length 501
- // should be ignored - the first 'gene' found defines the whole range
- // (note features are found in position order, not addition order)
- SequenceFeature sf = new SequenceFeature("gene", "", 20000, 20500, 0f,
- null);
- sf.setValue("ID", "gene:" + geneId);
- sf.setStrand("+");
- genomic.addSequenceFeature(sf);
-
// gene at (start + 10500) length 101
- sf = new SequenceFeature("gene", "", 10500, 10600, 0f, null);
+ SequenceFeature sf = new SequenceFeature("gene", "", 10500, 10600, 0f,
+ null);
sf.setValue("ID", "gene:" + geneId);
sf.setStrand("+");
genomic.addSequenceFeature(sf);
genomic.setEnd(50000);
String geneId = "ABC123";
- // gene at (start+20000) length 501
- // should be ignored - the first 'gene' found defines the whole range
- // (real data would only have one such feature)
- SequenceFeature sf = new SequenceFeature("ncRNA_gene", "", 20000,
- 20500, 0f, null);
- sf.setValue("ID", "gene:" + geneId);
- sf.setStrand("-");
- genomic.addSequenceFeature(sf);
-
// gene at (start + 10500) length 101
- sf = new SequenceFeature("gene", "", 10500, 10600, 0f, null);
+ SequenceFeature sf = new SequenceFeature("gene", "", 10500, 10600, 0f,
+ null);
sf.setValue("ID", "gene:" + geneId);
sf.setStrand("+");
genomic.addSequenceFeature(sf);
* accession id as ID
*/
@Test(groups = "Functional")
- public void testIdentifiesSequence()
+ public void testGetIdentifyingFeatures()
{
String accId = "ABC123";
- EnsemblGene testee = new EnsemblGene();
+ SequenceI seq = new Sequence(accId, "HIBEES");
// gene with no ID not valid
- SequenceFeature sf = new SequenceFeature("gene", "", 1, 2, 0f, null);
- assertFalse(testee.identifiesSequence(sf, accId));
+ SequenceFeature sf1 = new SequenceFeature("gene", "", 1, 2, 0f, null);
+ seq.addSequenceFeature(sf1);
// gene with wrong ID not valid
- sf.setValue("ID", "gene:XYZ");
- assertFalse(testee.identifiesSequence(sf, accId));
+ SequenceFeature sf2 = new SequenceFeature("gene", "", 1, 2, 0f, null);
+ sf2.setValue("ID", "gene:XYZ");
+ seq.addSequenceFeature(sf2);
// gene with right ID is valid
- sf.setValue("ID", "gene:" + accId);
- assertTrue(testee.identifiesSequence(sf, accId));
+ SequenceFeature sf3 = new SequenceFeature("gene", "", 1, 2, 0f, null);
+ sf3.setValue("ID", "gene:" + accId);
+ seq.addSequenceFeature(sf3);
// gene sub-type with right ID is valid
- sf = new SequenceFeature("snRNA_gene", "", 1, 2, 0f, null);
- sf.setValue("ID", "gene:" + accId);
- assertTrue(testee.identifiesSequence(sf, accId));
-
- // test is not case-sensitive
- assertTrue(testee.identifiesSequence(sf, accId.toLowerCase()));
+ SequenceFeature sf4 = new SequenceFeature("snRNA_gene", "", 1, 2, 0f, null);
+ sf4.setValue("ID", "gene:" + accId);
+ seq.addSequenceFeature(sf4);
// transcript not valid:
- sf = new SequenceFeature("transcript", "", 1, 2, 0f, null);
- sf.setValue("ID", "gene:" + accId);
- assertFalse(testee.identifiesSequence(sf, accId));
+ SequenceFeature sf5 = new SequenceFeature("transcript", "", 1, 2, 0f, null);
+ sf5.setValue("ID", "gene:" + accId);
+ seq.addSequenceFeature(sf5);
// exon not valid:
- sf = new SequenceFeature("exon", "", 1, 2, 0f, null);
- sf.setValue("ID", "gene:" + accId);
- assertFalse(testee.identifiesSequence(sf, accId));
+ SequenceFeature sf6 = new SequenceFeature("exon", "", 1, 2, 0f, null);
+ sf6.setValue("ID", "gene:" + accId);
+ seq.addSequenceFeature(sf6);
+
+ List<SequenceFeature> sfs = new EnsemblGene()
+ .getIdentifyingFeatures(seq, accId);
+ assertFalse(sfs.contains(sf1));
+ assertFalse(sfs.contains(sf2));
+ assertTrue(sfs.contains(sf3));
+ assertTrue(sfs.contains(sf4));
+ assertFalse(sfs.contains(sf5));
+ assertFalse(sfs.contains(sf6));
}
/**
import static org.testng.AssertJUnit.assertFalse;
import static org.testng.AssertJUnit.assertTrue;
+import jalview.datamodel.Sequence;
import jalview.datamodel.SequenceDummy;
import jalview.datamodel.SequenceFeature;
import jalview.datamodel.SequenceI;
* accession id as ID
*/
@Test(groups = "Functional")
- public void testIdentifiesSequence()
+ public void testGetIdentifyingFeatures()
{
String accId = "ABC123";
- EnsemblGenome testee = new EnsemblGenome();
+ SequenceI seq = new Sequence(accId, "HEARTS");
// transcript with no ID not valid
- SequenceFeature sf = new SequenceFeature("transcript", "", 1, 2, 0f,
+ SequenceFeature sf1 = new SequenceFeature("transcript", "", 1, 2, 0f,
null);
- assertFalse(testee.identifiesSequence(sf, accId));
+ seq.addSequenceFeature(sf1);
// transcript with wrong ID not valid
- sf.setValue("ID", "transcript");
- assertFalse(testee.identifiesSequence(sf, accId));
+ SequenceFeature sf2 = new SequenceFeature("transcript", "", 1, 2, 0f,
+ null);
+ sf2.setValue("ID", "transcript");
+ seq.addSequenceFeature(sf2);
// transcript with right ID is valid
- sf.setValue("ID", "transcript:" + accId);
- assertTrue(testee.identifiesSequence(sf, accId));
+ SequenceFeature sf3 = new SequenceFeature("transcript", "", 1, 2, 0f,
+ null);
+ sf3.setValue("ID", "transcript:" + accId);
+ seq.addSequenceFeature(sf3);
// transcript sub-type with right ID is valid
- sf = new SequenceFeature("ncRNA", "", 1, 2, 0f, null);
- sf.setValue("ID", "transcript:" + accId);
- assertTrue(testee.identifiesSequence(sf, accId));
+ SequenceFeature sf4 = new SequenceFeature("ncRNA", "", 1, 2, 0f, null);
+ sf4.setValue("ID", "transcript:" + accId);
+ seq.addSequenceFeature(sf4);
// Ensembl treats NMD_transcript_variant as if a transcript
- sf = new SequenceFeature("NMD_transcript_variant", "", 1, 2, 0f, null);
- sf.setValue("ID", "transcript:" + accId);
- assertTrue(testee.identifiesSequence(sf, accId));
+ SequenceFeature sf5 = new SequenceFeature("NMD_transcript_variant", "",
+ 1, 2, 0f, null);
+ sf5.setValue("ID", "transcript:" + accId);
+ seq.addSequenceFeature(sf5);
// gene not valid:
- sf = new SequenceFeature("gene", "", 1, 2, 0f, null);
- sf.setValue("ID", "transcript:" + accId);
- assertFalse(testee.identifiesSequence(sf, accId));
+ SequenceFeature sf6 = new SequenceFeature("gene", "", 1, 2, 0f, null);
+ sf6.setValue("ID", "transcript:" + accId);
+ seq.addSequenceFeature(sf6);
// exon not valid:
- sf = new SequenceFeature("exon", "", 1, 2, 0f, null);
- sf.setValue("ID", "transcript:" + accId);
- assertFalse(testee.identifiesSequence(sf, accId));
+ SequenceFeature sf7 = new SequenceFeature("exon", "", 1, 2, 0f, null);
+ sf7.setValue("ID", "transcript:" + accId);
+ seq.addSequenceFeature(sf7);
+
+ List<SequenceFeature> sfs = new EnsemblGenome()
+ .getIdentifyingFeatures(seq, accId);
+ assertFalse(sfs.contains(sf1));
+ assertFalse(sfs.contains(sf2));
+ assertTrue(sfs.contains(sf3));
+ assertTrue(sfs.contains(sf4));
+ assertTrue(sfs.contains(sf5));
+ assertFalse(sfs.contains(sf6));
+ assertFalse(sfs.contains(sf7));
}
}
{
return false;
}
-
- @Override
- protected String getRequestMimeType(boolean b)
- {
- return null;
- }
-
- @Override
- protected String getResponseMimeType()
- {
- return null;
- }
-
};
}
package jalview.ext.ensembl;
import jalview.datamodel.SequenceFeature;
+import jalview.datamodel.SequenceI;
+
+import java.util.ArrayList;
+import java.util.List;
/**
* A convenience class to simplify writing unit tests (pending Mockito or
}
@Override
- protected boolean identifiesSequence(SequenceFeature sf, String accId)
+ protected List<SequenceFeature> getIdentifyingFeatures(SequenceI seq,
+ String accId)
{
- return false;
+ return new ArrayList<>();
}
}
import static org.testng.AssertJUnit.assertEquals;
import static org.testng.AssertJUnit.assertSame;
-import jalview.datamodel.Alignment;
+import jalview.datamodel.AlignmentI;
import jalview.datamodel.SequenceFeature;
import jalview.datamodel.SequenceI;
import jalview.datamodel.features.SequenceFeatures;
import jalview.gui.JvOptionPane;
import jalview.io.DataSourceType;
import jalview.io.FastaFile;
-import jalview.io.FileParse;
import jalview.io.gff.SequenceOntologyFactory;
import jalview.io.gff.SequenceOntologyLite;
+ "LKKALMMRGLIPECCAVYRIQDGEKKPIGWDTDISWLTGEELHVEVLENVPLTTHNFVRK\n"
+ "TFFTLAFCDFCRKLLFQGFRCQTCGYKFHQRCSTEVPLMCVNYDQLDLLFVSKFFEHHPI\n"
+ "PQEEASLAETALTSGSSPSAPASDSIGPQILTSPSPSKSIPIPQPFRPADEDHRNQFGQR\n"
- + "DRSSSAPNVHINTIEPVNIDDLIRDQGFRGDGGSTTGLSATPPASLPGSLTNVKALQKSP\n"
+ + "DRSSSAPNVHINTIEPVNIDDLIRDQGFRGDG\n"
+ // ? insertion added in ENSP00000288602.11, not in P15056
+ + "APLNQLMRCLRKYQSRTPSPLLHSVPSEIVFDFEPGPVFR\n"
+ // end insertion
+ + "GSTTGLSATPPASLPGSLTNVKALQKSP\n"
+ "GPQRERKSSSSSEDRNRMKTLGRRDSSDDWEIPDGQITVGQRIGSGSFGTVYKGKWHGDV\n"
+ "AVKMLNVTAPTPQQLQAFKNEVGVLRKTRHVNILLFMGYSTKPQLAIVTQWCEGSSLYHH\n"
+ "LHIIETKFEMIKLIDIARQTAQGMDYLHAKSIIHRDLKSNNIFLHEDLTVKIGDFGLATV\n"
}
@Test(dataProvider = "ens_seqs", suiteName = "live")
- public void testGetOneSeqs(EnsemblRestClient proxy, String sq,
+ public void testGetSequenceRecords(EnsemblSeqProxy proxy, String sq,
String fastasq) throws Exception
{
- FileParse fp = proxy.getSequenceReader(Arrays
- .asList(new String[] { sq }));
- SequenceI[] sqs = new FastaFile(fp).getSeqsAsArray();
FastaFile trueRes = new FastaFile(fastasq, DataSourceType.PASTE);
- SequenceI[] trueSqs = trueRes.getSeqsAsArray();
- Assert.assertEquals(sqs.length, trueSqs.length,
+ SequenceI[] expected = trueRes.getSeqsAsArray();
+ AlignmentI retrieved = proxy.getSequenceRecords(sq);
+
+ Assert.assertEquals(retrieved.getHeight(), expected.length,
"Different number of sequences retrieved for query " + sq);
- Alignment ral = new Alignment(sqs);
- for (SequenceI tr : trueSqs)
+
+ for (SequenceI tr : expected)
{
SequenceI[] rseq;
Assert.assertNotNull(
- rseq = ral.findSequenceMatch(tr.getName()),
+ rseq = retrieved.findSequenceMatch(tr.getName()),
"Couldn't find sequences matching expected sequence "
+ tr.getName());
Assert.assertEquals(rseq.length, 1,
"Sequences differ for " + tr.getName() + "\n" + "Exp:"
+ tr.getSequenceAsString() + "\n" + "Got:"
+ rseq[0].getSequenceAsString());
-
}
}