From 4fa6faf64b0d16caffa71ac38506d473952478c5 Mon Sep 17 00:00:00 2001 From: gmungoc Date: Tue, 26 Sep 2017 10:17:30 +0100 Subject: [PATCH] JAL-2738 unit test for reverse strand gene --- src/jalview/ext/ensembl/EnsemblGene.java | 2 +- src/jalview/io/vcf/VCFLoader.java | 19 +++- test/jalview/io/vcf/VCFLoaderTest.java | 150 +++++++++++++++++------------- 3 files changed, 101 insertions(+), 70 deletions(-) diff --git a/src/jalview/ext/ensembl/EnsemblGene.java b/src/jalview/ext/ensembl/EnsemblGene.java index 45e8e34..5e970b1 100644 --- a/src/jalview/ext/ensembl/EnsemblGene.java +++ b/src/jalview/ext/ensembl/EnsemblGene.java @@ -426,7 +426,7 @@ public class EnsemblGene extends EnsemblSeqProxy * @param mapping * the mapping from gene to transcript positions */ - protected void mapTranscriptToChromosome(Sequence transcript, + protected void mapTranscriptToChromosome(SequenceI transcript, SequenceI gene, MapList mapping) { GeneLoci loci = gene.getGeneLoci(); diff --git a/src/jalview/io/vcf/VCFLoader.java b/src/jalview/io/vcf/VCFLoader.java index 44e8e16..6b1eb0d 100644 --- a/src/jalview/io/vcf/VCFLoader.java +++ b/src/jalview/io/vcf/VCFLoader.java @@ -337,8 +337,7 @@ public class VCFLoader } StringBuilder sb = new StringBuilder(); - sb.append(forwardStrand ? reference : Dna - .getComplement((char) reference[0])); + sb.append(forwardStrand ? (char) reference[0] : complement(reference)); /* * inspect alleles and record SNP variants (as the variant @@ -354,8 +353,8 @@ public class VCFLoader if (alleleBase.length == 1) { sb.append(",").append( - forwardStrand ? alleleBase : Dna - .getComplement((char) alleleBase[0])); + forwardStrand ? (char) alleleBase[0] + : complement(alleleBase)); } } } @@ -377,6 +376,18 @@ public class VCFLoader } /** + * A convenience method to complement a dna base and return the string value + * of its complement + * + * @param reference + * @return + */ + protected String complement(byte[] reference) + { + return String.valueOf(Dna.getComplement((char) reference[0])); + } + + /** * Determines the location of the query range (chromosome positions) in a * different reference assembly. *

diff --git a/test/jalview/io/vcf/VCFLoaderTest.java b/test/jalview/io/vcf/VCFLoaderTest.java index db72941..cdfe298 100644 --- a/test/jalview/io/vcf/VCFLoaderTest.java +++ b/test/jalview/io/vcf/VCFLoaderTest.java @@ -19,7 +19,6 @@ import jalview.util.MapList; import java.io.File; import java.io.IOException; import java.io.PrintWriter; -import java.util.Arrays; import java.util.List; import org.testng.annotations.Test; @@ -27,10 +26,19 @@ import org.testng.annotations.Test; public class VCFLoaderTest { // columns 9717- of gene P30419 from Ensembl (modified) - private static final String FASTA = ">ENSG00000136448/1-25 chromosome:GRCh38:17:45051610:45051634:1\n" + private static final String FASTA = + // forward strand 'gene' + ">gene1/1-25 chromosome:GRCh38:17:45051610:45051634:1\n" + "CAAGCTGGCGGACGAGAGTGTGACA\n" // and a 'made up' mini-transcript with two exons - + ">ENST00000592782/1-18\n--AGCTGGCG----AGAGTGTGAC-\n"; + + ">transcript1/1-18\n--AGCTGGCG----AGAGTGTGAC-\n" + + + // 'reverse strand' gene (reverse complement) + ">gene2/1-25 chromosome:GRCh38:17:45051610:45051634:-1\n" + + "TGTCACACTCTCGTCCGCCAGCTTG\n" + // and its 'transcript' + + ">transcript2/1-18\n" + + "-GTCACACTCT----CGCCAGCT--\n"; private static final String[] VCF = { "##fileformat=VCFv4.2", "##INFO=", @@ -64,7 +72,7 @@ public class VCFLoaderTest assertEquals(sf.getEnd(), 2); assertEquals(sf.getType(), SequenceOntologyI.SEQUENCE_VARIANT); assertEquals(sf.getScore(), 5.08130e-03, 0.000001f); - assertEquals("A,T", sf.getValue(Gff3Helper.ALLELES)); + assertEquals(sf.getValue(Gff3Helper.ALLELES), "A,T"); sf = geneFeatures.get(1); assertEquals(sf.getFeatureGroup(), "VCF"); @@ -72,7 +80,7 @@ public class VCFLoaderTest assertEquals(sf.getEnd(), 4); assertEquals(sf.getType(), SequenceOntologyI.SEQUENCE_VARIANT); assertEquals(sf.getScore(), 3.08130e-03, 0.000001f); - assertEquals("G,C", sf.getValue(Gff3Helper.ALLELES)); + assertEquals(sf.getValue(Gff3Helper.ALLELES), "G,C"); /* * verify variant feature(s) added to transcript @@ -86,7 +94,7 @@ public class VCFLoaderTest assertEquals(sf.getEnd(), 2); assertEquals(sf.getType(), SequenceOntologyI.SEQUENCE_VARIANT); assertEquals(sf.getScore(), 3.08130e-03, 0.000001f); - assertEquals("G,C", sf.getValue(Gff3Helper.ALLELES)); + assertEquals(sf.getValue(Gff3Helper.ALLELES), "G,C"); /* * verify variant feature(s) computed and added to protein @@ -128,41 +136,65 @@ public class VCFLoaderTest DataSourceType.PASTE); /* - * map gene sequence to chromosome (normally done when the sequence is fetched + * map gene1 sequence to chromosome (normally done when the sequence is fetched * from Ensembl and transcripts computed) */ AlignmentI alignment = af.getViewport().getAlignment(); - int[][] to = new int[][] { new int[] { 45051610, 45051634 } }; - List toRanges = Arrays.asList(to); - SequenceI gene = alignment.getSequenceAt(0); - List fromRanges = Arrays.asList(new int[][] { new int[] { - gene.getStart(), gene.getEnd() } }); - ((Sequence) gene).setGeneLoci(new GeneLoci("human", "GRCh38", "17", - new MapList(fromRanges, toRanges, 1, 1))); + SequenceI gene1 = alignment.getSequenceAt(0); + int[] to = new int[] { 45051610, 45051634 }; + int[] from = new int[] { gene1.getStart(), gene1.getEnd() }; + gene1.setGeneLoci(new GeneLoci("human", "GRCh38", "17", new MapList( + from, to, 1, 1))); /* - * map 'transcript' to chromosome via 'gene' - * transcript/1-18 is gene/3-10,15-24 + * map 'transcript1' to chromosome via 'gene1' + * transcript1/1-18 is gene1/3-10,15-24 * which is chromosome 45051612-45051619,45051624-45051633 */ - to = new int[][] { new int[] { 45051612, 45051619 }, - new int[] { 45051624, 45051633 } }; - toRanges = Arrays.asList(to); - SequenceI transcript = alignment.getSequenceAt(1); - fromRanges = Arrays.asList(new int[][] { new int[] { - transcript.getStart(), transcript.getEnd() } }); - ((Sequence) transcript).setGeneLoci(new GeneLoci("human", "GRCh38", - "17", new MapList(fromRanges, toRanges, 1, 1))); + to = new int[] { 45051612, 45051619, 45051624, 45051633 }; + SequenceI transcript1 = alignment.getSequenceAt(1); + from = new int[] { transcript1.getStart(), transcript1.getEnd() }; + transcript1.setGeneLoci(new GeneLoci("human", "GRCh38", "17", + new MapList(from, to, 1, 1))); /* - * add a protein product as a DBRef on the transcript + * map gene2 to chromosome reverse strand */ - SequenceI peptide = new Sequence("ENSP001", "SWRECD"); + SequenceI gene2 = alignment.getSequenceAt(2); + to = new int[] { 45051634, 45051610 }; + from = new int[] { gene2.getStart(), gene2.getEnd() }; + gene2.setGeneLoci(new GeneLoci("human", "GRCh38", "17", new MapList( + from, to, 1, 1))); + + /* + * map 'transcript2' to chromosome via 'gene2' + * transcript2/1-18 is gene2/2-11,16-23 + * which is chromosome 45051633-45051624,45051619-45051612 + */ + to = new int[] { 45051633, 45051624, 45051619, 45051612 }; + SequenceI transcript2 = alignment.getSequenceAt(3); + from = new int[] { transcript2.getStart(), transcript2.getEnd() }; + transcript2.setGeneLoci(new GeneLoci("human", "GRCh38", "17", + new MapList(from, to, 1, 1))); + + /* + * add a protein product as a DBRef on transcript1 + */ + SequenceI peptide1 = new Sequence("ENSP001", "SWRECD"); MapList mapList = new MapList(new int[] { 1, 18 }, new int[] { 1, 6 }, 3, 1); - Mapping map = new Mapping(peptide, mapList); + Mapping map = new Mapping(peptide1, mapList); DBRefEntry product = new DBRefEntry("", "", "ENSP001", map); - transcript.addDBRef(product); + transcript1.addDBRef(product); + + /* + * add a protein product as a DBRef on transcript2 + */ + SequenceI peptide2 = new Sequence("ENSP002", "VTLSPA"); + mapList = new MapList(new int[] { 1, 18 }, new int[] { 1, 6 }, 3, 1); + map = new Mapping(peptide2, mapList); + product = new DBRefEntry("", "", "ENSP002", map); + transcript2.addDBRef(product); return alignment; } @@ -179,23 +211,6 @@ public class VCFLoaderTest { AlignmentI al = buildAlignment(); - /* - * invert forward to reverse strand mappings - */ - List to = al.getSequenceAt(0).getGeneLoci().mapping - .getToRanges(); - int temp = to.get(0)[0]; - to.get(0)[0] = to.get(0)[1]; - to.get(0)[1] = temp; - to = al.getSequenceAt(1).getGeneLoci().mapping.getToRanges(); - to.get(0)[0] = to.get(0)[1]; - to.get(0)[1] = temp; - to.get(1)[0] = to.get(1)[1]; - to.get(1)[1] = temp; - int[] tmp2 = to.get(0); - to.set(0, to.get(1)); - to.set(1, tmp2); - VCFLoader loader = new VCFLoader(al); File f = makeVcf(); @@ -203,53 +218,58 @@ public class VCFLoaderTest loader.loadVCF(f.getPath(), null); /* - * verify variant feature(s) added to gene + * verify variant feature(s) added to gene2 + * gene/1-25 maps to chromosome 45051634- reverse strand + * variants A/T at 45051611 and G/C at 45051613 map to + * T/A and C/G at gene positions 24 and 22 respectively */ - List geneFeatures = al.getSequenceAt(0) + List geneFeatures = al.getSequenceAt(2) .getSequenceFeatures(); assertEquals(geneFeatures.size(), 2); SequenceFeature sf = geneFeatures.get(0); assertEquals(sf.getFeatureGroup(), "VCF"); - assertEquals(sf.getBegin(), 2); - assertEquals(sf.getEnd(), 2); + assertEquals(sf.getBegin(), 22); + assertEquals(sf.getEnd(), 22); assertEquals(sf.getType(), SequenceOntologyI.SEQUENCE_VARIANT); - assertEquals(sf.getScore(), 5.08130e-03, 0.000001f); - assertEquals("A,T", sf.getValue(Gff3Helper.ALLELES)); + assertEquals(sf.getScore(), 3.08130e-03, 0.000001f); + assertEquals("C,G", sf.getValue(Gff3Helper.ALLELES)); sf = geneFeatures.get(1); assertEquals(sf.getFeatureGroup(), "VCF"); - assertEquals(sf.getBegin(), 4); - assertEquals(sf.getEnd(), 4); + assertEquals(sf.getBegin(), 24); + assertEquals(sf.getEnd(), 24); assertEquals(sf.getType(), SequenceOntologyI.SEQUENCE_VARIANT); - assertEquals(sf.getScore(), 3.08130e-03, 0.000001f); - assertEquals("G,C", sf.getValue(Gff3Helper.ALLELES)); + assertEquals(sf.getScore(), 5.08130e-03, 0.000001f); + assertEquals("T,A", sf.getValue(Gff3Helper.ALLELES)); /* - * verify variant feature(s) added to transcript + * verify variant feature(s) added to transcript2 + * variant C/G at position 22 of gene overlaps and maps to + * position 17 of transcript */ - List transcriptFeatures = al.getSequenceAt(1) + List transcriptFeatures = al.getSequenceAt(3) .getSequenceFeatures(); assertEquals(transcriptFeatures.size(), 1); sf = transcriptFeatures.get(0); assertEquals(sf.getFeatureGroup(), "VCF"); - assertEquals(sf.getBegin(), 2); - assertEquals(sf.getEnd(), 2); + assertEquals(sf.getBegin(), 17); + assertEquals(sf.getEnd(), 17); assertEquals(sf.getType(), SequenceOntologyI.SEQUENCE_VARIANT); assertEquals(sf.getScore(), 3.08130e-03, 0.000001f); - assertEquals("G,C", sf.getValue(Gff3Helper.ALLELES)); + assertEquals("C,G", sf.getValue(Gff3Helper.ALLELES)); /* * verify variant feature(s) computed and added to protein - * first codon AGC varies to ACC giving S/T + * last codon GCT varies to GGT giving A/G in the last peptide position */ - SequenceI peptide = al.getSequenceAt(1).getDBRefs()[0].getMap().getTo(); + SequenceI peptide = al.getSequenceAt(3).getDBRefs()[0].getMap().getTo(); List proteinFeatures = peptide.getSequenceFeatures(); assertEquals(proteinFeatures.size(), 1); sf = proteinFeatures.get(0); assertEquals(sf.getFeatureGroup(), "VCF"); - assertEquals(sf.getBegin(), 1); - assertEquals(sf.getEnd(), 1); + assertEquals(sf.getBegin(), 6); + assertEquals(sf.getEnd(), 6); assertEquals(sf.getType(), SequenceOntologyI.SEQUENCE_VARIANT); - assertEquals(sf.getDescription(), "p.Ser1Thr"); + assertEquals(sf.getDescription(), "p.Ala6Gly"); } } -- 1.7.10.2