From ef9282b464dc189faf9ce40a4b7420a204266668 Mon Sep 17 00:00:00 2001 From: gmungoc Date: Fri, 12 Feb 2016 16:55:19 +0000 Subject: [PATCH] JAL-1705 various refactoring towards Uniprot-to-Ensembl fetching --- src/jalview/analysis/CrossRef.java | 58 +++++++++- src/jalview/ext/ensembl/EnsemblCdna.java | 3 +- src/jalview/ext/ensembl/EnsemblGene.java | 114 +++++++++++++++++--- src/jalview/ext/ensembl/EnsemblProtein.java | 26 ++++- src/jalview/ext/ensembl/EnsemblRestClient.java | 6 +- src/jalview/ext/ensembl/EnsemblSeqProxy.java | 55 ++++++---- .../ext/ensembl/EnsemblSequenceFetcher.java | 3 +- src/jalview/gui/SequenceFetcher.java | 1 - src/jalview/ws/SequenceFetcher.java | 9 +- src/jalview/ws/seqfetcher/ASequenceFetcher.java | 8 +- src/jalview/ws/seqfetcher/DbSourceProxyImpl.java | 21 ++-- test/jalview/ext/ensembl/EnsemblGeneTest.java | 21 +++- test/jalview/ext/ensembl/EnsemblProteinTest.java | 26 +++++ test/jalview/ext/ensembl/EnsemblSeqProxyTest.java | 15 ++- test/jalview/io/gff/GffTests.java | 2 - 15 files changed, 296 insertions(+), 72 deletions(-) diff --git a/src/jalview/analysis/CrossRef.java b/src/jalview/analysis/CrossRef.java index 21fd08d..d45750e 100644 --- a/src/jalview/analysis/CrossRef.java +++ b/src/jalview/analysis/CrossRef.java @@ -27,6 +27,7 @@ import jalview.datamodel.DBRefEntry; import jalview.datamodel.DBRefSource; import jalview.datamodel.Mapping; import jalview.datamodel.Sequence; +import jalview.datamodel.SequenceFeature; import jalview.datamodel.SequenceI; import jalview.util.DBRefUtils; import jalview.ws.SequenceFetcher; @@ -45,6 +46,27 @@ import java.util.Vector; */ public class CrossRef { + /* + * A sub-class that ignores Parent attribute when comparing sequence + * features. This avoids 'duplicate' CDS features that only + * differ in their parent Transcript ids. + */ + class MySequenceFeature extends SequenceFeature + { + private SequenceFeature feat; + + MySequenceFeature(SequenceFeature sf) + { + this.feat = sf; + } + + @Override + public boolean equals(Object o) + { + return feat.equals(o, true); + } + } + /** * Select just the DNA or protein references for a protein or dna sequence * @@ -337,8 +359,11 @@ public class CrossRef + seq.getName()); e.printStackTrace(); } + if (retrieved != null) { + List copiedFeatures = new ArrayList(); + CrossRef me = new CrossRef(); for (int rs = 0; rs < retrieved.length; rs++) { // TODO: examine each sequence for 'redundancy' @@ -369,12 +394,41 @@ public class CrossRef && mappedrg.getSequenceAsString().equals( loc.getSequenceAsString())) { - System.err - .println("Mapping updated for retrieved crossreference"); + String msg = "Mapping updated from " + + ms.getName() + + " to retrieved crossreference " + + dss.getName(); + System.out.println(msg); // method to update all refs of existing To on // retrieved sequence with dss and merge any props // on To onto dss. map.setTo(dss); + /* + * copy sequence features as well, avoiding + * duplication (e.g. from 2 transcripts) + */ + SequenceFeature[] sfs = ms + .getSequenceFeatures(); + if (sfs != null) + { + for (SequenceFeature feat : sfs) + { + /* + * we override the equality test here (but not + * elsewhere) to ignore Parent attribute + * TODO not quite working yet! + */ + if (!copiedFeatures + .contains(me.new MySequenceFeature( + feat))) + { + dss.addSequenceFeature(feat); + copiedFeatures.add(feat); + } + } + } + cf.addMap(retrieved[rs].getDatasetSequence(), + dss, map.getMap()); } } catch (Exception e) { diff --git a/src/jalview/ext/ensembl/EnsemblCdna.java b/src/jalview/ext/ensembl/EnsemblCdna.java index d4d1c08..f60125b 100644 --- a/src/jalview/ext/ensembl/EnsemblCdna.java +++ b/src/jalview/ext/ensembl/EnsemblCdna.java @@ -11,7 +11,8 @@ import com.stevesoft.pat.Regex; public class EnsemblCdna extends EnsemblSeqProxy { // TODO modify to accept other species e.g. ENSMUSPnnn - private static final Regex ACCESSION_REGEX = new Regex("((ENST|ENSG|CCDS)[0-9.]{3,})"); + private static final Regex ACCESSION_REGEX = new Regex( + "(ENST|ENSG|CCDS)[0-9.]{3,}$"); /* * fetch exon features on genomic sequence (to identify the cdna regions) diff --git a/src/jalview/ext/ensembl/EnsemblGene.java b/src/jalview/ext/ensembl/EnsemblGene.java index dc28796..73649b4 100644 --- a/src/jalview/ext/ensembl/EnsemblGene.java +++ b/src/jalview/ext/ensembl/EnsemblGene.java @@ -8,6 +8,7 @@ import jalview.io.gff.SequenceOntologyFactory; import jalview.io.gff.SequenceOntologyI; import jalview.util.MapList; +import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; import java.util.List; @@ -21,9 +22,11 @@ import com.stevesoft.pat.Regex; */ public class EnsemblGene extends EnsemblSeqProxy { + private static final String GENE_PREFIX = "gene:"; + // TODO modify to accept other species e.g. ENSMUSGnnn private static final Regex ACCESSION_REGEX = new Regex( - "((ENSG)[0-9]{11})"); + "(ENSG|ENST)[0-9]{11}$"); private static final EnsemblFeatureType[] FEATURES_TO_FETCH = { EnsemblFeatureType.gene, EnsemblFeatureType.transcript, @@ -66,31 +69,97 @@ public class EnsemblGene extends EnsemblSeqProxy @Override public AlignmentI getSequenceRecords(String query) throws Exception { - // TODO ? if an ENST identifier is supplied, convert to ENSG? + List transcriptsWanted = null; + + if (isTranscriptIdentifier(query)) + { + transcriptsWanted = Arrays.asList(query + .split(getAccessionSeparator())); + query = getGeneForTranscript(query); + if (query == null) + { + return null; + } + } + AlignmentI al = super.getSequenceRecords(query); if (al.getHeight() > 0) { - getTranscripts(al, query); + getTranscripts(al, query, transcriptsWanted); } return al; } /** + * Gets the parent gene identifier for a given transcript identifier, by + * retrieving 'transcript' features overlapping the transcript, and finding + * the Parent property of the feature whose id is the given identifier. + * + * @param query + * @return + */ + protected String getGeneForTranscript(String transcriptId) + { + String geneId = null; + + /* + * reduce multiple transcripts (e.g. from Uniprot x-ref) to the first + * one only as representative (they should all have the same gene) + */ + transcriptId = transcriptId.split(getAccessionSeparator())[0]; + + try + { + EnsemblFeatureType[] geneFeature = new EnsemblFeatureType[] { EnsemblFeatureType.transcript }; + AlignmentI al = new EnsemblFeatures().getSequenceRecords( + transcriptId, geneFeature); + if (al != null && al.getHeight() > 0) + { + SequenceFeature[] sfs = al.getSequenceAt(0).getSequenceFeatures(); + if (sfs != null) + { + for (SequenceFeature sf : sfs) + { + if (transcriptId.equals(getTranscriptId(sf))) + { + String parent = (String) sf.getValue(PARENT); + if (parent != null && parent.startsWith(GENE_PREFIX)) + { + geneId = parent.substring(5); + } + break; + } + } + } + } + return geneId; + } catch (IOException e) + { + System.err.println("Error retrieving gene id for " + transcriptId + + ": " + e.getMessage()); + return null; + } + } + + /** * Constructs all transcripts for the gene, as identified by "transcript" * features whose Parent is the requested gene. The coding transcript * sequences (i.e. with introns omitted) are added to the alignment. * * @param al * @param accId + * @param transcriptsWanted + * optional list of transcript ids to filter by * @throws Exception */ - protected void getTranscripts(AlignmentI al, String accId) + protected void getTranscripts(AlignmentI al, String accId, + List transcriptsWanted) throws Exception { SequenceI gene = al.getSequenceAt(0); List transcriptFeatures = getTranscriptFeatures(accId, - gene); + gene, transcriptsWanted); for (SequenceFeature transcriptFeature : transcriptFeatures) { @@ -114,7 +183,7 @@ public class EnsemblGene extends EnsemblSeqProxy SequenceI makeTranscript(SequenceFeature transcriptFeature, AlignmentI al, SequenceI gene) { - String accId = (String) transcriptFeature.getValue("transcript_id"); + String accId = getTranscriptId(transcriptFeature); if (accId == null) { return null; @@ -189,19 +258,32 @@ public class EnsemblGene extends EnsemblSeqProxy } /** + * Returns the 'transcript_id' property of the sequence feature (or null) + * + * @param feature + * @return + */ + protected String getTranscriptId(SequenceFeature feature) + { + return (String) feature.getValue("transcript_id"); + } + + /** * Returns a list of the transcript features on the sequence whose Parent is * the gene for the accession id. * * @param accId * @param geneSequence + * @param transcriptsWanted + * optional list of ids to filter on * @return */ protected List getTranscriptFeatures(String accId, - SequenceI geneSequence) + SequenceI geneSequence, List transcriptsWanted) { List transcriptFeatures = new ArrayList(); - String parentIdentifier = "gene:" + accId; + String parentIdentifier = GENE_PREFIX + accId; SequenceFeature[] sfs = geneSequence.getSequenceFeatures(); if (sfs != null) @@ -210,6 +292,14 @@ public class EnsemblGene extends EnsemblSeqProxy { if (isTranscript(sf.getType())) { + if (transcriptsWanted != null) + { + String transcriptId = (String) sf.getValue("transcript_id"); + if (!transcriptsWanted.contains(transcriptId)) + { + // continue; + } + } String parent = (String) sf.getValue(PARENT); if (parentIdentifier.equals(parent)) { @@ -225,11 +315,11 @@ public class EnsemblGene extends EnsemblSeqProxy @Override public String getDescription() { - return "Fetches all transcripts and variant features for a gene"; + return "Fetches all transcripts and variant features for a gene or transcript"; } /** - * Default test query is a transcript + * Default test query is a gene id (can also enter a transcript id) */ @Override public String getTestQuery() @@ -251,7 +341,7 @@ public class EnsemblGene extends EnsemblSeqProxy SequenceOntologyI.GENE)) { String id = (String) sf.getValue(ID); - if (("gene:" + accId).equals(id)) + if ((GENE_PREFIX + accId).equals(id)) { return true; } @@ -278,7 +368,7 @@ public class EnsemblGene extends EnsemblSeqProxy if (isTranscript(sf.getType())) { String parent = (String) sf.getValue(PARENT); - if (!("gene:" + accessionId).equals(parent)) + if (!(GENE_PREFIX + accessionId).equals(parent)) { return false; } diff --git a/src/jalview/ext/ensembl/EnsemblProtein.java b/src/jalview/ext/ensembl/EnsemblProtein.java index 8f23984..29c7eda 100644 --- a/src/jalview/ext/ensembl/EnsemblProtein.java +++ b/src/jalview/ext/ensembl/EnsemblProtein.java @@ -12,7 +12,7 @@ public class EnsemblProtein extends EnsemblSeqProxy { // TODO modify to accept other species e.g. ENSMUSPnnn private static final Regex ACCESSION_REGEX = new Regex( - "((ENSP|CCDS)[0-9.]{3,})"); + "(ENSP|CCDS)[0-9.]{3,}$"); private static final List CROSSREFS = Arrays.asList(new String[] { "PDB", "Uniprot/SPTREMBL", "Uniprot/SWISSPROT" }); @@ -87,4 +87,28 @@ public class EnsemblProtein extends EnsemblSeqProxy return ACCESSION_REGEX; } + /** + * Returns an accession id for a query, including conversion of ENST* to + * ENSP*. This supports querying for the protein sequence for a transcript + * (ENST identifier) and returning the ENSP identifier. + */ + @Override + public String getAccessionIdFromQuery(String query) + { + String accId = super.getAccessionIdFromQuery(query); + + /* + * ensure last character before (11) digits is P + * ENST00000288602 -> ENSP00000288602 + * ENSMUST00000288602 -> ENSMUSP00000288602 + */ + if (accId != null && accId.length() >= 12) + { + char[] chars = accId.toCharArray(); + chars[chars.length - 12] = 'P'; + accId = new String(chars); + } + return accId; + } + } diff --git a/src/jalview/ext/ensembl/EnsemblRestClient.java b/src/jalview/ext/ensembl/EnsemblRestClient.java index dc4cc88..215eb7a 100644 --- a/src/jalview/ext/ensembl/EnsemblRestClient.java +++ b/src/jalview/ext/ensembl/EnsemblRestClient.java @@ -140,7 +140,7 @@ abstract class EnsemblRestClient extends EnsemblSequenceFetcher protected BufferedReader getHttpResponse(URL url, List ids) throws IOException { - long now = System.currentTimeMillis(); + // long now = System.currentTimeMillis(); HttpURLConnection connection = (HttpURLConnection) url.openConnection(); /* @@ -176,8 +176,8 @@ abstract class EnsemblRestClient extends EnsemblSequenceFetcher "Response code was not 200. Detected response was " + responseCode); } - System.out.println(getClass().getName() + " took " - + (System.currentTimeMillis() - now) + "ms to fetch"); + // System.out.println(getClass().getName() + " took " + // + (System.currentTimeMillis() - now) + "ms to fetch"); BufferedReader reader = null; reader = new BufferedReader(new InputStreamReader(response, "UTF-8")); diff --git a/src/jalview/ext/ensembl/EnsemblSeqProxy.java b/src/jalview/ext/ensembl/EnsemblSeqProxy.java index e77051d..a2be17b 100644 --- a/src/jalview/ext/ensembl/EnsemblSeqProxy.java +++ b/src/jalview/ext/ensembl/EnsemblSeqProxy.java @@ -30,6 +30,8 @@ import java.util.LinkedHashMap; import java.util.List; import java.util.Map.Entry; +import com.stevesoft.pat.Regex; + /** * Base class for Ensembl sequence fetchers * @@ -37,6 +39,10 @@ import java.util.Map.Entry; */ public abstract class EnsemblSeqProxy extends EnsemblRestClient { + // TODO modify to accept other species e.g. ENSMUSTnnn + private static final Regex TRANSCRIPT_REGEX = new Regex( + "(ENST)[0-9]{11}$"); + private static final List CROSS_REFERENCES = Arrays .asList(new String[] { "CCDS" }); @@ -154,14 +160,6 @@ public abstract class EnsemblSeqProxy extends EnsemblRestClient + ")"; System.err.println(msg); break; - // if (alignment != null) - // { - // break; // return what we got - // } - // else - // { - // throw new JalviewException(msg, r); - // } } } @@ -294,8 +292,7 @@ public abstract class EnsemblSeqProxy extends EnsemblRestClient } /** - * Get Uniprot and PDB xrefs from Ensembl, and attach them to the protein - * sequence + * Get database xrefs from Ensembl, and attach them to the sequence * * @param seq */ @@ -719,18 +716,18 @@ public abstract class EnsemblSeqProxy extends EnsemblRestClient /* * for sequence_variant, make an additional feature with consequence */ - if (SequenceOntologyFactory.getInstance().isA(sf.getType(), - SequenceOntologyI.SEQUENCE_VARIANT)) - { - String consequence = (String) sf.getValue(CONSEQUENCE_TYPE); - if (consequence != null) - { - SequenceFeature sf2 = new SequenceFeature("consequence", - consequence, copy.getBegin(), copy.getEnd(), 0f, - null); - targetSequence.addSequenceFeature(sf2); - } - } + // if (SequenceOntologyFactory.getInstance().isA(sf.getType(), + // SequenceOntologyI.SEQUENCE_VARIANT)) + // { + // String consequence = (String) sf.getValue(CONSEQUENCE_TYPE); + // if (consequence != null) + // { + // SequenceFeature sf2 = new SequenceFeature("consequence", + // consequence, copy.getBegin(), copy.getEnd(), 0f, + // null); + // targetSequence.addSequenceFeature(sf2); + // } + // } } } @@ -750,6 +747,7 @@ public abstract class EnsemblSeqProxy extends EnsemblRestClient return false; } + // long start = System.currentTimeMillis(); SequenceFeature[] sfs = sourceSequence.getSequenceFeatures(); MapList mapping = getGenomicRangesFromFeatures(sourceSequence, accessionId, targetSequence.getStart()); @@ -758,7 +756,13 @@ public abstract class EnsemblSeqProxy extends EnsemblRestClient return false; } - return transferFeatures(sfs, targetSequence, mapping, accessionId); + boolean result = transferFeatures(sfs, targetSequence, mapping, + accessionId); + // System.out.println("transferFeatures (" + (sfs.length) + " --> " + // + targetSequence.getSequenceFeatures().length + ") to " + // + targetSequence.getName() + // + " took " + (System.currentTimeMillis() - start) + "ms"); + return result; } /** @@ -1125,4 +1129,9 @@ public abstract class EnsemblSeqProxy extends EnsemblRestClient || SequenceOntologyFactory.getInstance().isA(featureType, SequenceOntologyI.TRANSCRIPT); } + + public static boolean isTranscriptIdentifier(String query) + { + return query == null ? false : TRANSCRIPT_REGEX.search(query); + } } diff --git a/src/jalview/ext/ensembl/EnsemblSequenceFetcher.java b/src/jalview/ext/ensembl/EnsemblSequenceFetcher.java index 2e32bd2..67c5e63 100644 --- a/src/jalview/ext/ensembl/EnsemblSequenceFetcher.java +++ b/src/jalview/ext/ensembl/EnsemblSequenceFetcher.java @@ -13,7 +13,8 @@ import com.stevesoft.pat.Regex; abstract class EnsemblSequenceFetcher extends DbSourceProxyImpl { // TODO modify to accept other species e.g. ENSMUSTnnn - private static final Regex ACCESSION_REGEX = new Regex("((ENSP|ENST|ENSG|CCDS)[0-9.]{3,})"); + private static final Regex ACCESSION_REGEX = new Regex( + "(ENSP|ENST|ENSG|CCDS)[0-9.]{3,}$"); /* * possible values for the 'feature' parameter of the /overlap REST service diff --git a/src/jalview/gui/SequenceFetcher.java b/src/jalview/gui/SequenceFetcher.java index 2004761..742a109 100755 --- a/src/jalview/gui/SequenceFetcher.java +++ b/src/jalview/gui/SequenceFetcher.java @@ -649,7 +649,6 @@ public class SequenceFetcher extends JPanel implements Runnable { rfound = true; rs[r] = null; - continue; } } if (!rfound) diff --git a/src/jalview/ws/SequenceFetcher.java b/src/jalview/ws/SequenceFetcher.java index 909f515..16bbfe6 100644 --- a/src/jalview/ws/SequenceFetcher.java +++ b/src/jalview/ws/SequenceFetcher.java @@ -20,10 +20,7 @@ */ package jalview.ws; -import jalview.ext.ensembl.EnsemblCdna; -import jalview.ext.ensembl.EnsemblCds; import jalview.ext.ensembl.EnsemblGene; -import jalview.ext.ensembl.EnsemblGenome; import jalview.ext.ensembl.EnsemblProtein; import jalview.ws.dbsources.EmblCdsSource; import jalview.ws.dbsources.EmblSource; @@ -63,10 +60,10 @@ public class SequenceFetcher extends ASequenceFetcher public SequenceFetcher(boolean addDas) { addDBRefSourceImpl(EnsemblProtein.class); - addDBRefSourceImpl(EnsemblCds.class); - addDBRefSourceImpl(EnsemblGenome.class); + // addDBRefSourceImpl(EnsemblCds.class); + // addDBRefSourceImpl(EnsemblGenome.class); addDBRefSourceImpl(EnsemblGene.class); - addDBRefSourceImpl(EnsemblCdna.class); + // addDBRefSourceImpl(EnsemblCdna.class); addDBRefSourceImpl(EmblSource.class); addDBRefSourceImpl(EmblCdsSource.class); addDBRefSourceImpl(Uniprot.class); diff --git a/src/jalview/ws/seqfetcher/ASequenceFetcher.java b/src/jalview/ws/seqfetcher/ASequenceFetcher.java index f825608..2ed3263 100644 --- a/src/jalview/ws/seqfetcher/ASequenceFetcher.java +++ b/src/jalview/ws/seqfetcher/ASequenceFetcher.java @@ -168,8 +168,7 @@ public class ASequenceFetcher { continue; // wrong sort of data } - boolean doMultiple = fetcher.getAccessionSeparator() != null; - // No separator - no Multiple Queries + boolean doMultiple = fetcher.getMaximumQueryCount() > 1; while (!queriesLeft.isEmpty()) { StringBuffer qsb = new StringBuffer(); @@ -188,8 +187,7 @@ public class ASequenceFetcher try { // create a fetcher and go to it - seqset = fetcher.getSequenceRecords(qsb.toString()); // , - // queriesFailed); + seqset = fetcher.getSequenceRecords(qsb.toString()); } catch (Exception ex) { System.err.println("Failed to retrieve the following from " @@ -256,7 +254,7 @@ public class ASequenceFetcher { System.out.println("# Adding " + queriesMade.size() + " ids back to queries list for searching again (" + db - + "."); + + ")"); queriesLeft.addAll(queriesMade); } } diff --git a/src/jalview/ws/seqfetcher/DbSourceProxyImpl.java b/src/jalview/ws/seqfetcher/DbSourceProxyImpl.java index 0a4d9a8..84072c0 100644 --- a/src/jalview/ws/seqfetcher/DbSourceProxyImpl.java +++ b/src/jalview/ws/seqfetcher/DbSourceProxyImpl.java @@ -24,8 +24,6 @@ import jalview.datamodel.AlignmentI; import jalview.io.FormatAdapter; import jalview.io.IdentifyFile; -import com.stevesoft.pat.Regex; - /** * common methods for implementations of the DbSourceProxy interface. * @@ -102,23 +100,20 @@ public abstract class DbSourceProxyImpl implements DbSourceProxy return sequences; } + /** + * Returns the first accession id in the query (up to the first accession id + * separator), or the whole query if there is no separator or it is not found + */ @Override public String getAccessionIdFromQuery(String query) { - Regex vgr = getAccessionValidator(); - if (vgr == null) + String sep = getAccessionSeparator(); + if (sep == null) { return query; } - vgr.search(query); - if (vgr.numSubs() > 0) - { - return (vgr.stringMatched(1)); - } - else - { - return (vgr.stringMatched()); - } + int sepPos = query.indexOf(sep); + return sepPos == -1 ? query : query.substring(0, sepPos); } /** diff --git a/test/jalview/ext/ensembl/EnsemblGeneTest.java b/test/jalview/ext/ensembl/EnsemblGeneTest.java index 1f1a84e..a262c1e 100644 --- a/test/jalview/ext/ensembl/EnsemblGeneTest.java +++ b/test/jalview/ext/ensembl/EnsemblGeneTest.java @@ -12,6 +12,7 @@ import jalview.io.gff.SequenceOntologyFactory; import jalview.io.gff.SequenceOntologyLite; import jalview.util.MapList; +import java.util.Arrays; import java.util.List; import org.testng.annotations.AfterClass; @@ -129,33 +130,51 @@ public class EnsemblGeneTest SequenceFeature sf1 = new SequenceFeature("transcript", "", 20000, 20500, 0f, null); sf1.setValue("Parent", "gene:" + geneId); + sf1.setValue("transcript_id", "transcript1"); genomic.addSequenceFeature(sf1); // transcript sub-type feature SequenceFeature sf2 = new SequenceFeature("snRNA", "", 20000, 20500, 0f, null); sf2.setValue("Parent", "gene:" + geneId); + sf2.setValue("transcript_id", "transcript2"); genomic.addSequenceFeature(sf2); // NMD_transcript_variant treated like transcript in Ensembl SequenceFeature sf3 = new SequenceFeature("NMD_transcript_variant", "", 20000, 20500, 0f, null); sf3.setValue("Parent", "gene:" + geneId); + sf3.setValue("transcript_id", "transcript3"); genomic.addSequenceFeature(sf3); // transcript for a different gene - ignored SequenceFeature sf4 = new SequenceFeature("snRNA", "", 20000, 20500, 0f, null); sf4.setValue("Parent", "gene:XYZ"); + sf4.setValue("transcript_id", "transcript4"); genomic.addSequenceFeature(sf4); EnsemblGene testee = new EnsemblGene(); + + /* + * with no filter + */ List features = testee.getTranscriptFeatures(geneId, - genomic); + genomic, null); assertEquals(3, features.size()); assertSame(sf1, features.get(0)); assertSame(sf2, features.get(1)); assertSame(sf3, features.get(2)); + + /* + * with filter + */ + List ids = Arrays.asList(new String[] { "transcript2", + "transcript3" }); + features = testee.getTranscriptFeatures(geneId, genomic, ids); + assertEquals(2, features.size()); + assertSame(sf2, features.get(0)); + assertSame(sf3, features.get(1)); } /** diff --git a/test/jalview/ext/ensembl/EnsemblProteinTest.java b/test/jalview/ext/ensembl/EnsemblProteinTest.java index bd0e7b3..c5db0a8 100644 --- a/test/jalview/ext/ensembl/EnsemblProteinTest.java +++ b/test/jalview/ext/ensembl/EnsemblProteinTest.java @@ -1,5 +1,7 @@ package jalview.ext.ensembl; +import static org.testng.AssertJUnit.assertEquals; + import org.testng.Assert; import org.testng.annotations.Test; @@ -16,4 +18,28 @@ public class EnsemblProteinTest Assert.assertFalse(esq.isValidReference("ENSG00000288602")); } + @Test(groups = "Functional") + public void testGetAccesionIdFromQuery() throws Exception + { + EnsemblSequenceFetcher esq = new EnsemblProtein(); + assertEquals("ENSP00000288602", + esq.getAccessionIdFromQuery("ENSP00000288602")); + assertEquals("ENSMUSP00000288602", + esq.getAccessionIdFromQuery("ENSMUSP00000288602")); + + // ENST converted to ENSP + assertEquals("ENSP00000288602", + esq.getAccessionIdFromQuery("ENST00000288602")); + assertEquals("ENSMUSP00000288602", + esq.getAccessionIdFromQuery("ENSMUST00000288602")); + + // with valid separator: + assertEquals("ENSP00000288604", + esq.getAccessionIdFromQuery("ENSP00000288604 ENSP00000288602")); + + // with wrong separator: + assertEquals("ENSP00000288604,ENSP00000288602", + esq.getAccessionIdFromQuery("ENSP00000288604,ENSP00000288602")); + } + } diff --git a/test/jalview/ext/ensembl/EnsemblSeqProxyTest.java b/test/jalview/ext/ensembl/EnsemblSeqProxyTest.java index a6694eb..73d2858 100644 --- a/test/jalview/ext/ensembl/EnsemblSeqProxyTest.java +++ b/test/jalview/ext/ensembl/EnsemblSeqProxyTest.java @@ -1,6 +1,8 @@ package jalview.ext.ensembl; import static org.testng.AssertJUnit.assertEquals; +import static org.testng.AssertJUnit.assertFalse; +import static org.testng.AssertJUnit.assertTrue; import jalview.datamodel.Alignment; import jalview.datamodel.AlignmentI; @@ -341,7 +343,6 @@ public class EnsemblSeqProxyTest SequenceFeature sf = new SequenceFeature("CDS", "", 5, 9, 0f, null); sf.setPhase("2"); // skip 2 bases to start of next codon ds.addSequenceFeature(sf); - ds.addSequenceFeature(sf); // CDS for dna 13-15 sf = new SequenceFeature("CDS_predicted", "", 13, 15, 0f, null); ds.addSequenceFeature(sf); @@ -359,4 +360,16 @@ public class EnsemblSeqProxyTest assertEquals(13, ranges.get(1)[0]); assertEquals(15, ranges.get(1)[1]); } + + @Test(groups = "Functional") + public void testIsTranscriptIdentifier() + { + assertFalse(EnsemblSeqProxy.isTranscriptIdentifier(null)); + assertFalse(EnsemblSeqProxy.isTranscriptIdentifier("")); + assertFalse(EnsemblSeqProxy.isTranscriptIdentifier("ENSG00000012345")); + assertTrue(EnsemblSeqProxy.isTranscriptIdentifier("ENST00000012345")); + assertFalse(EnsemblSeqProxy.isTranscriptIdentifier("enst00000012345")); + assertFalse(EnsemblSeqProxy.isTranscriptIdentifier("ENST000000123456")); + assertFalse(EnsemblSeqProxy.isTranscriptIdentifier("ENST0000001234")); + } } \ No newline at end of file diff --git a/test/jalview/io/gff/GffTests.java b/test/jalview/io/gff/GffTests.java index 77da8fa..2ee4eac 100644 --- a/test/jalview/io/gff/GffTests.java +++ b/test/jalview/io/gff/GffTests.java @@ -69,8 +69,6 @@ public class GffTests mappedRegion = mapList[0].getMap().locateInFrom(15, 15); assertArrayEquals(new int[] { 12, 10 }, mappedRegion); - // so far so good; TODO: programmatically add mapped sequences - // and verify the mappings are 'realised' SequenceI dna1 = new Sequence("dna1", "AAACCCGGGTTTAAACCCGGGTTT"); AlignmentI al = new Alignment(new SequenceI[] { dna1 }); al.setDataset(null); -- 1.7.10.2