From: gmungoc Date: Thu, 3 Mar 2016 09:53:40 +0000 (+0000) Subject: JAL-1705 include stop codons in derived CDS; support ensemblgenomes X-Git-Tag: Release_2_10_0~296^2~15 X-Git-Url: http://source.jalview.org/gitweb/?a=commitdiff_plain;h=b8058f3f849f44740a695c83e96bdca3a197af5c;p=jalview.git JAL-1705 include stop codons in derived CDS; support ensemblgenomes --- diff --git a/src/jalview/analysis/AlignmentUtils.java b/src/jalview/analysis/AlignmentUtils.java index e624ce7..2f9fcb2 100644 --- a/src/jalview/analysis/AlignmentUtils.java +++ b/src/jalview/analysis/AlignmentUtils.java @@ -55,6 +55,7 @@ import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import java.util.Map.Entry; +import java.util.NoSuchElementException; import java.util.Set; import java.util.TreeMap; @@ -1159,6 +1160,9 @@ public class AlignmentUtils } catch (IncompleteCodonException e) { // possible incomplete trailing codon - ignore + } catch (NoSuchElementException e) + { + // possibly peptide lacking STOP } } } diff --git a/src/jalview/ext/ensembl/EnsemblCdna.java b/src/jalview/ext/ensembl/EnsemblCdna.java index 467fc6d..028492e 100644 --- a/src/jalview/ext/ensembl/EnsemblCdna.java +++ b/src/jalview/ext/ensembl/EnsemblCdna.java @@ -8,6 +8,14 @@ import java.util.List; import com.stevesoft.pat.Regex; +/** + * A client to fetch CDNA sequence from Ensembl (i.e. that part of the genomic + * sequence that is transcribed to RNA, but not necessarily translated to + * protein) + * + * @author gmcarstairs + * + */ public class EnsemblCdna extends EnsemblSeqProxy { /* @@ -26,11 +34,24 @@ public class EnsemblCdna extends EnsemblSeqProxy EnsemblFeatureType.exon, EnsemblFeatureType.cds, EnsemblFeatureType.variation }; + /** + * Default constructor (to use rest.ensembl.org) + */ public EnsemblCdna() { super(); } + /** + * Constructor given the target domain to fetch data from + * + * @param d + */ + public EnsemblCdna(String d) + { + super(d); + } + @Override public String getDbName() { diff --git a/src/jalview/ext/ensembl/EnsemblCds.java b/src/jalview/ext/ensembl/EnsemblCds.java index ec5780f..63df7a7 100644 --- a/src/jalview/ext/ensembl/EnsemblCds.java +++ b/src/jalview/ext/ensembl/EnsemblCds.java @@ -5,8 +5,19 @@ import jalview.datamodel.SequenceI; import jalview.io.gff.SequenceOntologyFactory; import jalview.io.gff.SequenceOntologyI; +import java.util.ArrayList; import java.util.List; +/** + * A client for direct fetching of CDS sequences from Ensembl (i.e. that part of + * the genomic sequence that is translated to protein) + * + * TODO: not currently used as CDS sequences are computed from CDS features on + * transcripts - delete this class? + * + * @author gmcarstairs + * + */ public class EnsemblCds extends EnsemblSeqProxy { /* @@ -18,13 +29,23 @@ public class EnsemblCds extends EnsemblSeqProxy EnsemblFeatureType.variation }; /** - * Constructor + * Default constructor (to use rest.ensembl.org) */ public EnsemblCds() { super(); } + /** + * Constructor given the target domain to fetch data from + * + * @param d + */ + public EnsemblCds(String d) + { + super(d); + } + @Override public String getDbName() { @@ -87,11 +108,12 @@ public class EnsemblCds extends EnsemblSeqProxy * they are redundant information). */ @Override - protected int getCdsRanges(SequenceI dnaSeq, List ranges) + protected List getCdsRanges(SequenceI dnaSeq) { int len = dnaSeq.getLength(); + List ranges = new ArrayList(); ranges.add(new int[] { 1, len }); - return len; + return ranges; } } diff --git a/src/jalview/ext/ensembl/EnsemblFeatures.java b/src/jalview/ext/ensembl/EnsemblFeatures.java index 22faba9..0547433 100644 --- a/src/jalview/ext/ensembl/EnsemblFeatures.java +++ b/src/jalview/ext/ensembl/EnsemblFeatures.java @@ -27,6 +27,24 @@ class EnsemblFeatures extends EnsemblRestClient private EnsemblFeatureType[] featuresWanted = { EnsemblFeatureType.cds, EnsemblFeatureType.exon, EnsemblFeatureType.variation }; + /** + * Default constructor (to use rest.ensembl.org) + */ + public EnsemblFeatures() + { + super(); + } + + /** + * Constructor given the target domain to fetch data from + * + * @param d + */ + public EnsemblFeatures(String d) + { + super(d); + } + @Override public String getDbName() { @@ -61,7 +79,7 @@ class EnsemblFeatures extends EnsemblRestClient protected URL getUrl(List ids) throws MalformedURLException { StringBuffer urlstring = new StringBuffer(128); - urlstring.append(ENSEMBL_REST).append("/overlap/id/") + urlstring.append(getDomain()).append("/overlap/id/") .append(ids.get(0)); // @see https://github.com/Ensembl/ensembl-rest/wiki/Output-formats diff --git a/src/jalview/ext/ensembl/EnsemblGene.java b/src/jalview/ext/ensembl/EnsemblGene.java index 6507ff5..cec7a8d 100644 --- a/src/jalview/ext/ensembl/EnsemblGene.java +++ b/src/jalview/ext/ensembl/EnsemblGene.java @@ -40,10 +40,28 @@ public class EnsemblGene extends EnsemblSeqProxy EnsemblFeatureType.exon, EnsemblFeatureType.cds, EnsemblFeatureType.variation }; + /** + * Default constructor (to use rest.ensembl.org) + */ + public EnsemblGene() + { + super(); + } + + /** + * Constructor given the target domain to fetch data from + * + * @param d + */ + public EnsemblGene(String d) + { + super(d); + } + @Override public String getDbName() { - return "ENSEMBL (GENE)"; + return "ENSEMBL"; } @Override @@ -97,7 +115,7 @@ public class EnsemblGene extends EnsemblSeqProxy if (isTranscriptIdentifier(query)) { // we are assuming all transcripts have the same gene parent here - query = new EnsemblLookup().getParent(queries[0]); + query = new EnsemblLookup(getDomain()).getParent(queries[0]); if (query == null) { return null; @@ -110,7 +128,7 @@ public class EnsemblGene extends EnsemblSeqProxy */ if (!isGeneIdentifier(query)) { - List geneIds = new EnsemblSymbol().getIds(query); + List geneIds = new EnsemblSymbol(getDomain()).getIds(query); if (geneIds.isEmpty()) { return null; @@ -145,7 +163,7 @@ public class EnsemblGene extends EnsemblSeqProxy */ protected String getGeneIdentifiersForName(String query) { - List ids = new EnsemblSymbol().getIds(query); + List ids = new EnsemblSymbol(getDomain()).getIds(query); if (ids != null) { for (String id : ids) @@ -290,8 +308,9 @@ public class EnsemblGene extends EnsemblSeqProxy List mapTo = new ArrayList(); mapTo.add(new int[] { 1, transcriptLength }); MapList mapping = new MapList(mappedFrom, mapTo, 1, 1); - new EnsemblCdna().transferFeatures(gene.getSequenceFeatures(), - transcript.getDatasetSequence(), mapping, parentId); + new EnsemblCdna(getDomain()).transferFeatures( + gene.getSequenceFeatures(), transcript.getDatasetSequence(), + mapping, parentId); /* * fetch and save cross-references @@ -301,7 +320,7 @@ public class EnsemblGene extends EnsemblSeqProxy /* * and finally fetch the protein product and save as a cross-reference */ - new EnsemblCdna().addProteinProduct(transcript); + new EnsemblCdna(getDomain()).addProteinProduct(transcript); return transcript; } diff --git a/src/jalview/ext/ensembl/EnsemblGenome.java b/src/jalview/ext/ensembl/EnsemblGenome.java index e977e62..20987e1 100644 --- a/src/jalview/ext/ensembl/EnsemblGenome.java +++ b/src/jalview/ext/ensembl/EnsemblGenome.java @@ -2,6 +2,14 @@ package jalview.ext.ensembl; import jalview.datamodel.SequenceFeature; +/** + * A client to fetch genomic sequence from Ensembl + * + * TODO: not currently used - delete? + * + * @author gmcarstairs + * + */ public class EnsemblGenome extends EnsemblSeqProxy { /* @@ -12,11 +20,24 @@ public class EnsemblGenome extends EnsemblSeqProxy EnsemblFeatureType.transcript, EnsemblFeatureType.exon, EnsemblFeatureType.cds, EnsemblFeatureType.variation }; + /** + * Default constructor (to use rest.ensembl.org) + */ public EnsemblGenome() { super(); } + /** + * Constructor given the target domain to fetch data from + * + * @param d + */ + public EnsemblGenome(String d) + { + super(d); + } + @Override public String getDbName() { diff --git a/src/jalview/ext/ensembl/EnsemblGenomes.java b/src/jalview/ext/ensembl/EnsemblGenomes.java new file mode 100644 index 0000000..95acf41 --- /dev/null +++ b/src/jalview/ext/ensembl/EnsemblGenomes.java @@ -0,0 +1,40 @@ +package jalview.ext.ensembl; + + +/** + * A class to behave much like EnsemblGene but referencing the ensemblgenomes + * domain and data + * + * @author gmcarstairs + * + */ +public class EnsemblGenomes extends EnsemblGene +{ + /** + * Constructor sets domain to rest.ensemblgenomes.org instead of the 'usual' + * rest.ensembl.org + */ + public EnsemblGenomes() + { + super(ENSEMBL_GENOMES_REST); + } + + @Override + public boolean isGeneIdentifier(String query) + { + return true; + } + + @Override + public String getDbName() + { + return "EnsemblGenomes"; + } + + @Override + public String getTestQuery() + { + return "DDB_G0283883"; + } + +} diff --git a/src/jalview/ext/ensembl/EnsemblLookup.java b/src/jalview/ext/ensembl/EnsemblLookup.java index cd792b5..c5945ae 100644 --- a/src/jalview/ext/ensembl/EnsemblLookup.java +++ b/src/jalview/ext/ensembl/EnsemblLookup.java @@ -13,9 +13,34 @@ import org.json.simple.JSONObject; import org.json.simple.parser.JSONParser; import org.json.simple.parser.ParseException; +/** + * A client for the Ensembl lookup REST endpoint; used to find the Parent gene + * identifier given a transcript identifier. + * + * @author gmcarstairs + * + */ public class EnsemblLookup extends EnsemblRestClient { + /** + * Default constructor (to use rest.ensembl.org) + */ + public EnsemblLookup() + { + super(); + } + + /** + * Constructor given the target domain to fetch data from + * + * @param + */ + public EnsemblLookup(String d) + { + super(d); + } + @Override public String getDbName() { @@ -41,7 +66,7 @@ public class EnsemblLookup extends EnsemblRestClient */ protected URL getUrl(String identifier) { - String url = ENSEMBL_REST + "/lookup/id/" + identifier + String url = getDomain() + "/lookup/id/" + identifier + "?content-type=application/json"; try { diff --git a/src/jalview/ext/ensembl/EnsemblProtein.java b/src/jalview/ext/ensembl/EnsemblProtein.java index fb79ccf..97796a5 100644 --- a/src/jalview/ext/ensembl/EnsemblProtein.java +++ b/src/jalview/ext/ensembl/EnsemblProtein.java @@ -8,6 +8,12 @@ import java.util.List; import com.stevesoft.pat.Regex; +/** + * A client to fetch protein translated sequence for an Ensembl identifier + * + * @author gmcarstairs + * + */ public class EnsemblProtein extends EnsemblSeqProxy { /* @@ -21,11 +27,24 @@ public class EnsemblProtein extends EnsemblSeqProxy private static final List CROSSREFS = Arrays.asList(new String[] { "PDB", "Uniprot/SPTREMBL", "Uniprot/SWISSPROT" }); + /** + * Default constructor (to use rest.ensembl.org) + */ public EnsemblProtein() { super(); } + /** + * Constructor given the target domain to fetch data from + * + * @param d + */ + public EnsemblProtein(String d) + { + super(d); + } + @Override public String getDbName() { diff --git a/src/jalview/ext/ensembl/EnsemblRestClient.java b/src/jalview/ext/ensembl/EnsemblRestClient.java index 297f71b..1c47373 100644 --- a/src/jalview/ext/ensembl/EnsemblRestClient.java +++ b/src/jalview/ext/ensembl/EnsemblRestClient.java @@ -23,10 +23,9 @@ import com.stevesoft.pat.Regex; */ abstract class EnsemblRestClient extends EnsemblSequenceFetcher { - protected final static String ENSEMBL_REST = "http://rest.ensembl.org"; + private final static String ENSEMBL_REST = "http://rest.ensembl.org"; - protected static final String SEQUENCE_ID_URL = ENSEMBL_REST - + "/sequence/id"; + protected final static String ENSEMBL_GENOMES_REST = "http://rest.ensemblgenomes.org"; // @see https://github.com/Ensembl/ensembl-rest/wiki/Output-formats private static final String PING_URL = "http://rest.ensembl.org/info/ping.json"; @@ -39,18 +38,48 @@ abstract class EnsemblRestClient extends EnsemblSequenceFetcher private static final Regex GENE_REGEX = new Regex( "(ENS)([A-Z]{3}|)G[0-9]{11}$"); + private String domain = ENSEMBL_REST; + private static boolean ensemblRestAvailable = false; private static long lastCheck = -1; protected volatile boolean inProgress = false; - public static boolean isTranscriptIdentifier(String query) + /** + * Default constructor to use rest.ensembl.org + */ + public EnsemblRestClient() + { + this(ENSEMBL_REST); + } + + /** + * Constructor given the target domain to fetch data from + * + * @param d + */ + public EnsemblRestClient(String d) + { + domain = d; + } + + String getDomain() + { + return domain; + } + + void setDomain(String d) + { + domain = d; + } + + public boolean isTranscriptIdentifier(String query) { return query == null ? false : TRANSCRIPT_REGEX.search(query); } - public static boolean isGeneIdentifier(String query) + public boolean isGeneIdentifier(String query) { return query == null ? false : GENE_REGEX.search(query); } @@ -112,7 +141,10 @@ abstract class EnsemblRestClient extends EnsemblSequenceFetcher { try { - URL ping = new URL(PING_URL); + // note this format works for both ensembl and ensemblgenomes + // info/ping.json works for ensembl only (March 2016) + URL ping = new URL(getDomain() + + "/info/ping?content-type=application/json"); HttpURLConnection conn = (HttpURLConnection) ping.openConnection(); int rc = conn.getResponseCode(); conn.disconnect(); diff --git a/src/jalview/ext/ensembl/EnsemblSeqProxy.java b/src/jalview/ext/ensembl/EnsemblSeqProxy.java index 869a702..a878784 100644 --- a/src/jalview/ext/ensembl/EnsemblSeqProxy.java +++ b/src/jalview/ext/ensembl/EnsemblSeqProxy.java @@ -33,6 +33,7 @@ import java.util.Map.Entry; /** * Base class for Ensembl sequence fetchers * + * @see http://rest.ensembl.org/documentation/info/sequence_id * @author gmcarstairs */ public abstract class EnsemblSeqProxy extends EnsemblRestClient @@ -112,10 +113,19 @@ public abstract class EnsemblSeqProxy extends EnsemblRestClient } /** - * Constructor + * Default constructor (to use rest.ensembl.org) */ public EnsemblSeqProxy() { + super(); + } + + /** + * Constructor given the target domain to fetch data from + */ + public EnsemblSeqProxy(String d) + { + super(d); } /** @@ -201,7 +211,7 @@ public abstract class EnsemblSeqProxy extends EnsemblRestClient * get 'dummy' genomic sequence with exon, cds and variation features */ SequenceI genomicSequence = null; - EnsemblFeatures gffFetcher = new EnsemblFeatures(); + EnsemblFeatures gffFetcher = new EnsemblFeatures(getDomain()); EnsemblFeatureType[] features = getFeaturesToFetch(); AlignmentI geneFeatures = gffFetcher.getSequenceRecords(accId, features); @@ -251,7 +261,8 @@ public abstract class EnsemblSeqProxy extends EnsemblRestClient String accId = querySeq.getName(); try { - AlignmentI protein = new EnsemblProtein().getSequenceRecords(accId); + AlignmentI protein = new EnsemblProtein(getDomain()) + .getSequenceRecords(accId); if (protein == null || protein.getHeight() == 0) { System.out.println("Failed to retrieve protein for " + accId); @@ -302,7 +313,7 @@ public abstract class EnsemblSeqProxy extends EnsemblRestClient seq = seq.getDatasetSequence(); } - EnsemblXref xrefFetcher = new EnsemblXref(); + EnsemblXref xrefFetcher = new EnsemblXref(getDomain()); List xrefs = xrefFetcher.getCrossReferences(seq.getName(), getCrossReferenceDatabases()); for (DBRefEntry xref : xrefs) @@ -338,9 +349,8 @@ public abstract class EnsemblSeqProxy extends EnsemblRestClient */ protected MapList mapCdsToProtein(SequenceI dnaSeq, SequenceI proteinSeq) { - List ranges = new ArrayList(50); - - int mappedDnaLength = getCdsRanges(dnaSeq, ranges); + List ranges = getCdsRanges(dnaSeq); + int mappedDnaLength = MappingUtils.getLength(ranges); int proteinLength = proteinSeq.getLength(); int proteinEnd = proteinLength; @@ -363,7 +373,7 @@ public abstract class EnsemblSeqProxy extends EnsemblRestClient int codesForResidues = mappedDnaLength / 3; if (codesForResidues == (proteinLength + 1)) { - MappingUtils.unmapStopCodon(ranges, mappedDnaLength); + // assuming extra codon is for STOP and not in peptide codesForResidues--; } if (codesForResidues == proteinLength) @@ -375,26 +385,24 @@ public abstract class EnsemblSeqProxy extends EnsemblRestClient } /** - * Adds CDS ranges to the ranges list, and returns the total length mapped - * from. + * Returns a list of CDS ranges found. * * No need to worry about reverse strand dna, here since the retrieved * sequence is as transcribed (reverse complement for reverse strand), i.e in * the same sense as the peptide. * * @param dnaSeq - * @param ranges * @return */ - protected int getCdsRanges(SequenceI dnaSeq, List ranges) + protected List getCdsRanges(SequenceI dnaSeq) { + List result = new ArrayList(); SequenceFeature[] sfs = dnaSeq.getSequenceFeatures(); if (sfs == null) { - return 0; + return result; } SequenceOntologyI so = SequenceOntologyFactory.getInstance(); - int mappedDnaLength = 0; for (SequenceFeature sf : sfs) { /* @@ -415,7 +423,7 @@ public abstract class EnsemblSeqProxy extends EnsemblRestClient */ int begin = sf.getBegin(); int end = sf.getEnd(); - if (ranges.isEmpty()) + if (result.isEmpty()) { begin += phase; if (begin > end) @@ -423,11 +431,10 @@ public abstract class EnsemblSeqProxy extends EnsemblRestClient continue; // shouldn't happen? } } - ranges.add(new int[] { begin, end }); - mappedDnaLength += Math.abs(end - begin) + 1; + result.add(new int[] { begin, end }); } } - return mappedDnaLength; + return result; } /** @@ -514,7 +521,7 @@ public abstract class EnsemblSeqProxy extends EnsemblRestClient * multiple ids go in the POST body instead */ StringBuffer urlstring = new StringBuffer(128); - urlstring.append(SEQUENCE_ID_URL); + urlstring.append(getDomain() + "/sequence/id"); if (ids.size() == 1) { urlstring.append("/").append(ids.get(0)); diff --git a/src/jalview/ext/ensembl/EnsemblSymbol.java b/src/jalview/ext/ensembl/EnsemblSymbol.java index 5b3baa1..08f26c7 100644 --- a/src/jalview/ext/ensembl/EnsemblSymbol.java +++ b/src/jalview/ext/ensembl/EnsemblSymbol.java @@ -13,9 +13,26 @@ import org.json.simple.JSONObject; import org.json.simple.parser.JSONParser; import org.json.simple.parser.ParseException; +/** + * A client for the Ensembl xrefs/symbol REST service; + * + * @see http://rest.ensembl.org/documentation/info/xref_external + * @author gmcarstairs + * + */ public class EnsemblSymbol extends EnsemblXref { /** + * Constructor given the target domain to fetch data from + * + * @param d + */ + public EnsemblSymbol(String d) + { + super(d); + } + + /** * Returns the first "id" value in gene identifier format from the JSON * response, or null if none found * @@ -51,7 +68,7 @@ public class EnsemblSymbol extends EnsemblXref protected URL getUrl(String id, Species species) { - String url = ENSEMBL_REST + "/xrefs/symbol/" + species.toString() + "/" + String url = getDomain() + "/xrefs/symbol/" + species.toString() + "/" + id + "?content-type=application/json"; try diff --git a/src/jalview/ext/ensembl/EnsemblXref.java b/src/jalview/ext/ensembl/EnsemblXref.java index 514e44a..f19b4d4 100644 --- a/src/jalview/ext/ensembl/EnsemblXref.java +++ b/src/jalview/ext/ensembl/EnsemblXref.java @@ -27,6 +27,16 @@ import org.json.simple.parser.ParseException; class EnsemblXref extends EnsemblRestClient { + /** + * Constructor given the target domain to fetch data from + * + * @param d + */ + public EnsemblXref(String d) + { + super(d); + } + @Override public String getDbName() { @@ -164,7 +174,7 @@ class EnsemblXref extends EnsemblRestClient */ protected URL getUrl(String identifier) { - String url = ENSEMBL_REST + "/xrefs/id/" + identifier + String url = getDomain() + "/xrefs/id/" + identifier + "?content-type=application/json&all_levels=1"; try { diff --git a/src/jalview/util/MappingUtils.java b/src/jalview/util/MappingUtils.java index 16db13a..0780b2a 100644 --- a/src/jalview/util/MappingUtils.java +++ b/src/jalview/util/MappingUtils.java @@ -770,57 +770,6 @@ public final class MappingUtils } /** - * Remove the last 3 mapped positions from the given ranges - * - * @param ranges - * @param mappedLength - */ - public static void unmapStopCodon(List ranges, - int mappedLength) - { - if (mappedLength < 3) - { - return; - } - boolean done = false; - int targetLength = mappedLength - 3; - int mapped = 0; - Iterator it = ranges.iterator(); - while (!done && it.hasNext()) - { - int[] range = it.next(); - int length = Math.abs(range[1] - range[0]) + 1; - if (mapped + length == targetLength) - { - done = true; - } - else if (mapped + length < targetLength) - { - mapped += length; - continue; - } - else - { - /* - * need just a bit of this range - */ - int needed = targetLength - mapped; - int sense = range[1] >= range[0] ? 1 : -1; - range[1] = range[0] + (sense * (needed - 1)); - done = true; - } - } - /* - * remove any trailing ranges - */ - while (it.hasNext()) - { - it.next(); - it.remove(); - } - } - - /** * Returns the total length of the supplied ranges * * @param ranges diff --git a/src/jalview/ws/SequenceFetcher.java b/src/jalview/ws/SequenceFetcher.java index 007c85c..902ce27 100644 --- a/src/jalview/ws/SequenceFetcher.java +++ b/src/jalview/ws/SequenceFetcher.java @@ -21,6 +21,7 @@ package jalview.ws; import jalview.ext.ensembl.EnsemblGene; +import jalview.ext.ensembl.EnsemblGenomes; import jalview.ws.dbsources.EmblCdsSource; import jalview.ws.dbsources.EmblSource; import jalview.ws.dbsources.Pdb; @@ -58,11 +59,8 @@ public class SequenceFetcher extends ASequenceFetcher public SequenceFetcher(boolean addDas) { - // addDBRefSourceImpl(EnsemblProtein.class); - // addDBRefSourceImpl(EnsemblCds.class); - // addDBRefSourceImpl(EnsemblGenome.class); addDBRefSourceImpl(EnsemblGene.class); - // addDBRefSourceImpl(EnsemblCdna.class); + addDBRefSourceImpl(EnsemblGenomes.class); addDBRefSourceImpl(EmblSource.class); addDBRefSourceImpl(EmblCdsSource.class); addDBRefSourceImpl(Uniprot.class); diff --git a/test/jalview/ext/ensembl/EnsemblSeqProxyAdapter.java b/test/jalview/ext/ensembl/EnsemblSeqProxyAdapter.java index 7077eac..510e072 100644 --- a/test/jalview/ext/ensembl/EnsemblSeqProxyAdapter.java +++ b/test/jalview/ext/ensembl/EnsemblSeqProxyAdapter.java @@ -8,6 +8,23 @@ import jalview.datamodel.SequenceFeature; */ public class EnsemblSeqProxyAdapter extends EnsemblSeqProxy { + /** + * Default constructor (to use rest.ensembl.org) + */ + public EnsemblSeqProxyAdapter() + { + super(); + } + + /** + * Constructor given the target domain to fetch data from + * + * @param d + */ + public EnsemblSeqProxyAdapter(String d) + { + super(d); + } @Override public String getDbName() diff --git a/test/jalview/ext/ensembl/EnsemblSeqProxyTest.java b/test/jalview/ext/ensembl/EnsemblSeqProxyTest.java index 7ef8dd7..5d95a3c 100644 --- a/test/jalview/ext/ensembl/EnsemblSeqProxyTest.java +++ b/test/jalview/ext/ensembl/EnsemblSeqProxyTest.java @@ -14,11 +14,11 @@ import jalview.io.FastaFile; import jalview.io.FileParse; import jalview.io.gff.SequenceOntologyFactory; import jalview.io.gff.SequenceOntologyLite; +import jalview.util.MappingUtils; import java.lang.reflect.Method; import java.net.MalformedURLException; import java.net.URL; -import java.util.ArrayList; import java.util.Arrays; import java.util.List; @@ -309,9 +309,8 @@ public class EnsemblSeqProxyTest sf = new SequenceFeature("CDS_predicted", "", 10, 12, 0f, null); ds.addSequenceFeature(sf); - List ranges = new ArrayList(); - int mappedLength = testee.getCdsRanges(dnaSeq, ranges); - assertEquals(6, mappedLength); + List ranges = testee.getCdsRanges(dnaSeq); + assertEquals(6, MappingUtils.getLength(ranges)); assertEquals(2, ranges.size()); assertEquals(4, ranges.get(0)[0]); assertEquals(6, ranges.get(0)[1]); @@ -347,13 +346,12 @@ public class EnsemblSeqProxyTest sf = new SequenceFeature("CDS_predicted", "", 13, 15, 0f, null); ds.addSequenceFeature(sf); - List ranges = new ArrayList(); - int mappedLength = testee.getCdsRanges(dnaSeq, ranges); + List ranges = testee.getCdsRanges(dnaSeq); /* * check the mapping starts with the first complete codon */ - assertEquals(6, mappedLength); + assertEquals(6, MappingUtils.getLength(ranges)); assertEquals(2, ranges.size()); assertEquals(7, ranges.get(0)[0]); assertEquals(9, ranges.get(0)[1]); @@ -364,26 +362,28 @@ public class EnsemblSeqProxyTest @Test(groups = "Functional") public void testIsTranscriptIdentifier() { - assertFalse(EnsemblSeqProxy.isTranscriptIdentifier(null)); - assertFalse(EnsemblSeqProxy.isTranscriptIdentifier("")); - assertFalse(EnsemblSeqProxy.isTranscriptIdentifier("ENSG00000012345")); - assertTrue(EnsemblSeqProxy.isTranscriptIdentifier("ENST00000012345")); - assertTrue(EnsemblSeqProxy.isTranscriptIdentifier("ENSMUST00000012345")); - assertFalse(EnsemblSeqProxy.isTranscriptIdentifier("enst00000012345")); - assertFalse(EnsemblSeqProxy.isTranscriptIdentifier("ENST000000123456")); - assertFalse(EnsemblSeqProxy.isTranscriptIdentifier("ENST0000001234")); + EnsemblSeqProxy testee = new EnsemblGene(); + assertFalse(testee.isTranscriptIdentifier(null)); + assertFalse(testee.isTranscriptIdentifier("")); + assertFalse(testee.isTranscriptIdentifier("ENSG00000012345")); + assertTrue(testee.isTranscriptIdentifier("ENST00000012345")); + assertTrue(testee.isTranscriptIdentifier("ENSMUST00000012345")); + assertFalse(testee.isTranscriptIdentifier("enst00000012345")); + assertFalse(testee.isTranscriptIdentifier("ENST000000123456")); + assertFalse(testee.isTranscriptIdentifier("ENST0000001234")); } @Test(groups = "Functional") public void testIsGeneIdentifier() { - assertFalse(EnsemblSeqProxy.isGeneIdentifier(null)); - assertFalse(EnsemblSeqProxy.isGeneIdentifier("")); - assertFalse(EnsemblSeqProxy.isGeneIdentifier("ENST00000012345")); - assertTrue(EnsemblSeqProxy.isGeneIdentifier("ENSG00000012345")); - assertTrue(EnsemblSeqProxy.isGeneIdentifier("ENSMUSG00000012345")); - assertFalse(EnsemblSeqProxy.isGeneIdentifier("ensg00000012345")); - assertFalse(EnsemblSeqProxy.isGeneIdentifier("ENSG000000123456")); - assertFalse(EnsemblSeqProxy.isGeneIdentifier("ENSG0000001234")); + EnsemblSeqProxy testee = new EnsemblGene(); + assertFalse(testee.isGeneIdentifier(null)); + assertFalse(testee.isGeneIdentifier("")); + assertFalse(testee.isGeneIdentifier("ENST00000012345")); + assertTrue(testee.isGeneIdentifier("ENSG00000012345")); + assertTrue(testee.isGeneIdentifier("ENSMUSG00000012345")); + assertFalse(testee.isGeneIdentifier("ensg00000012345")); + assertFalse(testee.isGeneIdentifier("ENSG000000123456")); + assertFalse(testee.isGeneIdentifier("ENSG0000001234")); } } \ No newline at end of file diff --git a/test/jalview/util/MappingUtilsTest.java b/test/jalview/util/MappingUtilsTest.java index 3c4d4f8..853ebd5 100644 --- a/test/jalview/util/MappingUtilsTest.java +++ b/test/jalview/util/MappingUtilsTest.java @@ -858,61 +858,6 @@ public class MappingUtilsTest assertEquals("[5, 10]", Arrays.toString(hidden.get(1))); } - /** - * Tests for the method that removes the trailing stop codon from a mapping - * range i.e. the last 3 positions (whether split or not) - */ - @Test(groups = { "Functional" }) - public void testUnmapStopCodon() - { - List ranges = new ArrayList(); - - // simple case, forward strand: - ranges.add(new int[] { 1, 3 }); - ranges.add(new int[] { 9, 14 }); - MappingUtils.unmapStopCodon(ranges, 9); - assertEquals(2, ranges.size()); - assertArrayEquals(new int[] { 1, 3 }, ranges.get(0)); - assertArrayEquals(new int[] { 9, 11 }, ranges.get(1)); - - // split stop codon, forward strand: - ranges.clear(); - ranges.add(new int[] { 1, 8 }); - ranges.add(new int[] { 10, 10 }); - MappingUtils.unmapStopCodon(ranges, 9); - assertEquals(1, ranges.size()); - assertArrayEquals(new int[] { 1, 6 }, ranges.get(0)); - - // very split stop codon, forward strand: - ranges.clear(); - ranges.add(new int[] { 1, 1 }); - ranges.add(new int[] { 3, 4 }); - ranges.add(new int[] { 6, 6 }); - ranges.add(new int[] { 8, 8 }); - ranges.add(new int[] { 10, 10 }); - MappingUtils.unmapStopCodon(ranges, 6); - assertEquals(2, ranges.size()); - assertArrayEquals(new int[] { 1, 1 }, ranges.get(0)); - assertArrayEquals(new int[] { 3, 4 }, ranges.get(1)); - - // simple case, reverse strand: - ranges.clear(); - ranges.add(new int[] { 12, 10 }); - ranges.add(new int[] { 6, 1 }); - MappingUtils.unmapStopCodon(ranges, 9); - assertEquals(2, ranges.size()); - assertArrayEquals(new int[] { 12, 10 }, ranges.get(0)); - assertArrayEquals(new int[] { 6, 4 }, ranges.get(1)); - - // split stop codon, reverse strand: - ranges.clear(); - ranges.add(new int[] { 12, 6 }); - ranges.add(new int[] { 4, 3 }); - MappingUtils.unmapStopCodon(ranges, 9); - assertEquals(1, ranges.size()); - assertArrayEquals(new int[] { 12, 7 }, ranges.get(0)); - } - @Test(groups = { "Functional" }) public void testGetLength() {