From 2779b461347e684414f9e98e607e138b1e43db84 Mon Sep 17 00:00:00 2001 From: gmungoc Date: Wed, 17 Jan 2018 11:37:09 +0000 Subject: [PATCH] JAL-2679 case-insensitive comparison of query and retrieved accession --- src/jalview/ext/ensembl/EnsemblFeatures.java | 12 ++++++++++-- src/jalview/ext/ensembl/EnsemblGene.java | 6 +++--- src/jalview/ext/ensembl/EnsemblSeqProxy.java | 10 ++++++---- test/jalview/ext/ensembl/EnsemblCdnaTest.java | 3 +++ test/jalview/ext/ensembl/EnsemblGeneTest.java | 6 +++++- 5 files changed, 27 insertions(+), 10 deletions(-) diff --git a/src/jalview/ext/ensembl/EnsemblFeatures.java b/src/jalview/ext/ensembl/EnsemblFeatures.java index 7570822..cb6f548 100644 --- a/src/jalview/ext/ensembl/EnsemblFeatures.java +++ b/src/jalview/ext/ensembl/EnsemblFeatures.java @@ -82,7 +82,7 @@ class EnsemblFeatures extends EnsemblRestClient public AlignmentI getSequenceRecords(String query) throws IOException { // TODO: use a vararg String... for getSequenceRecords instead? - List queries = new ArrayList(); + List queries = new ArrayList<>(); queries.add(query); FileParse fp = getSequenceReader(queries); if (fp == null || !fp.isValid()) @@ -109,9 +109,17 @@ class EnsemblFeatures extends EnsemblRestClient urlstring.append("?content-type=text/x-gff3"); /* + * specify object_type=gene in case is shared by transcript and/or protein; + * currently only fetching features for gene sequences; + * refactor in future if needed to fetch for transcripts + */ + urlstring.append("&").append(OBJECT_TYPE).append("=") + .append(OBJECT_TYPE_GENE); + + /* * specify features to retrieve * @see http://rest.ensembl.org/documentation/info/overlap_id - * could make the list a configurable entry in jalview.properties + * could make the list a configurable entry in .jalview_properties */ for (EnsemblFeatureType feature : featuresWanted) { diff --git a/src/jalview/ext/ensembl/EnsemblGene.java b/src/jalview/ext/ensembl/EnsemblGene.java index 919134c..c8b59da 100644 --- a/src/jalview/ext/ensembl/EnsemblGene.java +++ b/src/jalview/ext/ensembl/EnsemblGene.java @@ -413,7 +413,7 @@ public class EnsemblGene extends EnsemblSeqProxy for (SequenceFeature sf : sfs) { String parent = (String) sf.getValue(PARENT); - if (parentIdentifier.equals(parent)) + if (parentIdentifier.equalsIgnoreCase(parent)) { transcriptFeatures.add(sf); } @@ -452,7 +452,7 @@ public class EnsemblGene extends EnsemblSeqProxy { // NB features as gff use 'ID'; rest services return as 'id' String id = (String) sf.getValue("ID"); - if ((GENE_PREFIX + accId).equals(id)) + if ((GENE_PREFIX + accId).equalsIgnoreCase(id)) { return true; } @@ -479,7 +479,7 @@ public class EnsemblGene extends EnsemblSeqProxy if (isTranscript(type)) { String parent = (String) sf.getValue(PARENT); - if (!(GENE_PREFIX + accessionId).equals(parent)) + if (!(GENE_PREFIX + accessionId).equalsIgnoreCase(parent)) { return false; } diff --git a/src/jalview/ext/ensembl/EnsemblSeqProxy.java b/src/jalview/ext/ensembl/EnsemblSeqProxy.java index 24d1b95..b2ebb1a 100644 --- a/src/jalview/ext/ensembl/EnsemblSeqProxy.java +++ b/src/jalview/ext/ensembl/EnsemblSeqProxy.java @@ -203,7 +203,8 @@ public abstract class EnsemblSeqProxy extends EnsemblRestClient try { /* - * get 'dummy' genomic sequence with exon, cds and variation features + * get 'dummy' genomic sequence with gene, transcript, + * exon, cds and variation features */ SequenceI genomicSequence = null; EnsemblFeatures gffFetcher = new EnsemblFeatures(getDomain()); @@ -219,7 +220,7 @@ public abstract class EnsemblSeqProxy extends EnsemblRestClient /* * transfer features to the query sequence */ - SequenceI querySeq = alignment.findName(accId); + SequenceI querySeq = alignment.findName(accId, true); if (transferFeatures(accId, genomicSequence, querySeq)) { @@ -861,7 +862,8 @@ public abstract class EnsemblSeqProxy extends EnsemblRestClient { String parent = (String) sf.getValue(PARENT); // using contains to allow for prefix "gene:", "transcript:" etc - if (parent != null && !parent.contains(identifier)) + if (parent != null + && !parent.toUpperCase().contains(identifier.toUpperCase())) { // this genomic feature belongs to a different transcript return false; @@ -896,7 +898,7 @@ public abstract class EnsemblSeqProxy extends EnsemblRestClient for (SequenceFeature sf : sfs) { String parent = (String) sf.getValue(PARENT); - if (parent != null && parent.equals(parentId)) + if (parent != null && parent.equalsIgnoreCase(parentId)) { result.add(sf); } diff --git a/test/jalview/ext/ensembl/EnsemblCdnaTest.java b/test/jalview/ext/ensembl/EnsemblCdnaTest.java index 6611e05..779962c 100644 --- a/test/jalview/ext/ensembl/EnsemblCdnaTest.java +++ b/test/jalview/ext/ensembl/EnsemblCdnaTest.java @@ -228,6 +228,9 @@ public class EnsemblCdnaTest sf.setValue("Parent", "transcript:" + accId); assertTrue(testee.retainFeature(sf, accId)); + // test is not case-sensitive + assertTrue(testee.retainFeature(sf, accId.toLowerCase())); + // feature with wrong parent is not retained sf.setValue("Parent", "transcript:XYZ"); assertFalse(testee.retainFeature(sf, accId)); diff --git a/test/jalview/ext/ensembl/EnsemblGeneTest.java b/test/jalview/ext/ensembl/EnsemblGeneTest.java index 5920b89..1b1a2b4 100644 --- a/test/jalview/ext/ensembl/EnsemblGeneTest.java +++ b/test/jalview/ext/ensembl/EnsemblGeneTest.java @@ -173,7 +173,8 @@ public class EnsemblGeneTest // NMD_transcript_variant treated like transcript in Ensembl SequenceFeature sf3 = new SequenceFeature("NMD_transcript_variant", "", 22000, 22500, 0f, null); - sf3.setValue("Parent", "gene:" + geneId); + // id matching should not be case-sensitive + sf3.setValue("Parent", "gene:" + geneId.toLowerCase()); sf3.setValue("transcript_id", "transcript3"); genomic.addSequenceFeature(sf3); @@ -259,6 +260,9 @@ public class EnsemblGeneTest sf.setValue("ID", "gene:" + accId); assertTrue(testee.identifiesSequence(sf, accId)); + // test is not case-sensitive + assertTrue(testee.identifiesSequence(sf, accId.toLowerCase())); + // transcript not valid: sf = new SequenceFeature("transcript", "", 1, 2, 0f, null); sf.setValue("ID", "gene:" + accId); -- 1.7.10.2