X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=test%2Fjalview%2Fio%2Fvcf%2FVCFLoaderTest.java;h=2b418de990b35bdd70eecca833f8823d879b95ef;hb=181fd0fa4064654d94f76c3f4ff9333f0ea1834b;hp=1e8866551a57c7830a3f7b51c9aef51631aeee3f;hpb=91d83d4836ba3f8c6c395d46d607faf693946e66;p=jalview.git diff --git a/test/jalview/io/vcf/VCFLoaderTest.java b/test/jalview/io/vcf/VCFLoaderTest.java index 1e88665..2b418de 100644 --- a/test/jalview/io/vcf/VCFLoaderTest.java +++ b/test/jalview/io/vcf/VCFLoaderTest.java @@ -1,11 +1,32 @@ +/* + * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$) + * Copyright (C) $$Year-Rel$$ The Jalview Authors + * + * This file is part of Jalview. + * + * Jalview is free software: you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, either version 3 + * of the License, or (at your option) any later version. + * + * Jalview is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR + * PURPOSE. See the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Jalview. If not, see . + * The Jalview Authors are detailed in the 'AUTHORS' file. + */ package jalview.io.vcf; import static jalview.io.gff.SequenceOntologyI.SEQUENCE_VARIANT; import static org.testng.Assert.assertEquals; import static org.testng.Assert.assertNull; -import static org.testng.Assert.assertSame; +import static org.testng.Assert.assertTrue; import jalview.bin.Cache; +import jalview.bin.Console; import jalview.datamodel.AlignmentI; import jalview.datamodel.DBRefEntry; import jalview.datamodel.Mapping; @@ -13,13 +34,11 @@ import jalview.datamodel.Sequence; import jalview.datamodel.SequenceFeature; import jalview.datamodel.SequenceI; import jalview.datamodel.features.FeatureAttributes; -import jalview.datamodel.features.FeatureAttributes.Datatype; import jalview.datamodel.features.SequenceFeatures; import jalview.gui.AlignFrame; import jalview.io.DataSourceType; import jalview.io.FileLoader; import jalview.io.gff.Gff3Helper; -import jalview.io.gff.SequenceOntologyI; import jalview.util.MapList; import java.io.File; @@ -37,11 +56,10 @@ public class VCFLoaderTest private static final float DELTA = 0.00001f; // columns 9717- of gene P30419 from Ensembl (much modified) - private static final String FASTA = "" - + - /* - * forward strand 'gene' and 'transcript' with two exons - */ + private static final String FASTA = "" + + /* + * forward strand 'gene' and 'transcript' with two exons + */ ">gene1/1-25 chromosome:GRCh38:17:45051610:45051634:1\n" + "CAAGCTGGCGGACGAGAGTGTGACA\n" + ">transcript1/1-18\n--AGCTGGCG----AGAGTGTGAC-\n" @@ -50,8 +68,8 @@ public class VCFLoaderTest * reverse strand gene and transcript (reverse complement alleles!) */ + ">gene2/1-25 chromosome:GRCh38:17:45051610:45051634:-1\n" - + "TGTCACACTCTCGTCCGCCAGCTTG\n" - + ">transcript2/1-18\n" + "-GTCACACTCT----CGCCAGCT--\n" + + "TGTCACACTCTCGTCCGCCAGCTTG\n" + ">transcript2/1-18\n" + + "-GTCACACTCT----CGCCAGCT--\n" /* * 'gene' on chromosome 5 with two transcripts @@ -62,7 +80,8 @@ public class VCFLoaderTest + ">transcript4/1-18\n-----TGG-GGACGAGAGTGTGA-A\n"; private static final String[] VCF = { "##fileformat=VCFv4.2", - // fields other than AF are ignored when parsing as they have no INFO definition + // fields other than AF are ignored when parsing as they have no INFO + // definition "##INFO=", "##INFO= dbRefs = al.getSequenceAt(1).getDBRefs(); SequenceI peptide = null; for (DBRefEntry dbref : dbRefs) { @@ -221,24 +240,11 @@ public class VCFLoaderTest } } List proteinFeatures = peptide.getSequenceFeatures(); - assertEquals(proteinFeatures.size(), 3); - sf = proteinFeatures.get(0); - assertEquals(sf.getFeatureGroup(), "VCF"); - assertEquals(sf.getBegin(), 1); - assertEquals(sf.getEnd(), 1); - assertEquals(sf.getType(), SequenceOntologyI.NONSYNONYMOUS_VARIANT); - assertEquals(sf.getDescription(), "p.Ser1Thr"); /* - * check that sequence_variant attribute AF has been clocked as - * numeric with correct min and max values - * (i.e. invalid values have been ignored - JAL-3375) + * JAL-3187 don't precompute protein features, do dynamically instead */ - FeatureAttributes fa = FeatureAttributes.getInstance(); - assertSame(fa.getDatatype(SEQUENCE_VARIANT, "AF"), Datatype.Number); - float[] minmax = fa.getMinMax(SEQUENCE_VARIANT, "AF"); - assertEquals(minmax[0], 0.002f); - assertEquals(minmax[1], 0.005f); + assertTrue(proteinFeatures.isEmpty()); } private File makeVcfFile() throws IOException @@ -272,8 +278,8 @@ public class VCFLoaderTest SequenceI gene1 = alignment.findName("gene1"); int[] to = new int[] { 45051610, 45051634 }; int[] from = new int[] { gene1.getStart(), gene1.getEnd() }; - gene1.setGeneLoci("homo_sapiens", "GRCh38", "17", new MapList(from, to, - 1, 1)); + gene1.setGeneLoci("homo_sapiens", "GRCh38", "17", + new MapList(from, to, 1, 1)); /* * map 'transcript1' to chromosome via 'gene1' @@ -283,9 +289,8 @@ public class VCFLoaderTest to = new int[] { 45051612, 45051619, 45051624, 45051633 }; SequenceI transcript1 = alignment.findName("transcript1"); from = new int[] { transcript1.getStart(), transcript1.getEnd() }; - transcript1.setGeneLoci("homo_sapiens", "GRCh38", "17", new MapList( - from, to, - 1, 1)); + transcript1.setGeneLoci("homo_sapiens", "GRCh38", "17", + new MapList(from, to, 1, 1)); /* * map gene2 to chromosome reverse strand @@ -293,8 +298,8 @@ public class VCFLoaderTest SequenceI gene2 = alignment.findName("gene2"); to = new int[] { 45051634, 45051610 }; from = new int[] { gene2.getStart(), gene2.getEnd() }; - gene2.setGeneLoci("homo_sapiens", "GRCh38", "17", new MapList(from, to, - 1, 1)); + gene2.setGeneLoci("homo_sapiens", "GRCh38", "17", + new MapList(from, to, 1, 1)); /* * map 'transcript2' to chromosome via 'gene2' @@ -304,9 +309,8 @@ public class VCFLoaderTest to = new int[] { 45051633, 45051624, 45051619, 45051612 }; SequenceI transcript2 = alignment.findName("transcript2"); from = new int[] { transcript2.getStart(), transcript2.getEnd() }; - transcript2.setGeneLoci("homo_sapiens", "GRCh38", "17", new MapList( - from, to, - 1, 1)); + transcript2.setGeneLoci("homo_sapiens", "GRCh38", "17", + new MapList(from, to, 1, 1)); /* * add a protein product as a DBRef on transcript1 @@ -333,8 +337,8 @@ public class VCFLoaderTest SequenceI gene3 = alignment.findName("gene3"); to = new int[] { 45051610, 45051634 }; from = new int[] { gene3.getStart(), gene3.getEnd() }; - gene3.setGeneLoci("homo_sapiens", "GRCh38", "5", new MapList(from, to, - 1, 1)); + gene3.setGeneLoci("homo_sapiens", "GRCh38", "5", + new MapList(from, to, 1, 1)); /* * map 'transcript3' to chromosome @@ -342,9 +346,8 @@ public class VCFLoaderTest SequenceI transcript3 = alignment.findName("transcript3"); to = new int[] { 45051612, 45051619, 45051624, 45051633 }; from = new int[] { transcript3.getStart(), transcript3.getEnd() }; - transcript3.setGeneLoci("homo_sapiens", "GRCh38", "5", new MapList( - from, to, - 1, 1)); + transcript3.setGeneLoci("homo_sapiens", "GRCh38", "5", + new MapList(from, to, 1, 1)); /* * map 'transcript4' to chromosome @@ -353,9 +356,8 @@ public class VCFLoaderTest to = new int[] { 45051615, 45051617, 45051619, 45051632, 45051634, 45051634 }; from = new int[] { transcript4.getStart(), transcript4.getEnd() }; - transcript4.setGeneLoci("homo_sapiens", "GRCh38", "5", new MapList( - from, to, - 1, 1)); + transcript4.setGeneLoci("homo_sapiens", "GRCh38", "5", + new MapList(from, to, 1, 1)); /* * add a protein product as a DBRef on transcript3 @@ -490,7 +492,7 @@ public class VCFLoaderTest * verify variant feature(s) computed and added to protein * last codon GCT varies to GGT giving A/G in the last peptide position */ - DBRefEntry[] dbRefs = al.getSequenceAt(3).getDBRefs(); + List dbRefs = al.getSequenceAt(3).getDBRefs(); SequenceI peptide = null; for (DBRefEntry dbref : dbRefs) { @@ -500,13 +502,11 @@ public class VCFLoaderTest } } List proteinFeatures = peptide.getSequenceFeatures(); - assertEquals(proteinFeatures.size(), 3); - sf = proteinFeatures.get(0); - assertEquals(sf.getFeatureGroup(), "VCF"); - assertEquals(sf.getBegin(), 6); - assertEquals(sf.getEnd(), 6); - assertEquals(sf.getType(), SequenceOntologyI.NONSYNONYMOUS_VARIANT); - assertEquals(sf.getDescription(), "p.Ala6Gly"); + + /* + * JAL-3187 don't precompute protein features, do dynamically instead + */ + assertTrue(proteinFeatures.isEmpty()); } /** @@ -549,6 +549,7 @@ public class VCFLoaderTest // gene features include Consequence for all transcripts Map map = (Map) sf.getValue("CSQ"); assertEquals(map.size(), 9); + assertEquals(map.get("PolyPhen"), "Bad"); sf = geneFeatures.get(1); assertEquals(sf.getBegin(), 5); @@ -558,6 +559,7 @@ public class VCFLoaderTest assertEquals(sf.getValue("alleles"), "C,T"); map = (Map) sf.getValue("CSQ"); assertEquals(map.size(), 9); + assertEquals(map.get("PolyPhen"), "Bad;;"); // %3B%3B decoded sf = geneFeatures.get(2); assertEquals(sf.getBegin(), 9); @@ -650,7 +652,7 @@ public class VCFLoaderTest * and GAG/GGG which is E/G in position 4 * the insertion variant is not transferred to the peptide */ - DBRefEntry[] dbRefs = al.findName("transcript3").getDBRefs(); + List dbRefs = al.findName("transcript3").getDBRefs(); SequenceI peptide = null; for (DBRefEntry dbref : dbRefs) { @@ -660,20 +662,24 @@ public class VCFLoaderTest } } List proteinFeatures = peptide.getSequenceFeatures(); - SequenceFeatures.sortFeatures(proteinFeatures, true); - assertEquals(proteinFeatures.size(), 2); - sf = proteinFeatures.get(0); - assertEquals(sf.getFeatureGroup(), "VCF"); - assertEquals(sf.getBegin(), 1); - assertEquals(sf.getEnd(), 1); - assertEquals(sf.getType(), SequenceOntologyI.SYNONYMOUS_VARIANT); - assertEquals(sf.getDescription(), "agC/agT"); - sf = proteinFeatures.get(1); - assertEquals(sf.getFeatureGroup(), "VCF"); - assertEquals(sf.getBegin(), 4); - assertEquals(sf.getEnd(), 4); - assertEquals(sf.getType(), SequenceOntologyI.NONSYNONYMOUS_VARIANT); - assertEquals(sf.getDescription(), "p.Glu4Gly"); + /* + * JAL-3187 don't precompute protein features, do dynamically instead + */ + assertTrue(proteinFeatures.isEmpty()); + // SequenceFeatures.sortFeatures(proteinFeatures, true); + // assertEquals(proteinFeatures.size(), 2); + // sf = proteinFeatures.get(0); + // assertEquals(sf.getFeatureGroup(), "VCF"); + // assertEquals(sf.getBegin(), 1); + // assertEquals(sf.getEnd(), 1); + // assertEquals(sf.getType(), SequenceOntologyI.SYNONYMOUS_VARIANT); + // assertEquals(sf.getDescription(), "agC/agT"); + // sf = proteinFeatures.get(1); + // assertEquals(sf.getFeatureGroup(), "VCF"); + // assertEquals(sf.getBegin(), 4); + // assertEquals(sf.getEnd(), 4); + // assertEquals(sf.getType(), SequenceOntologyI.NONSYNONYMOUS_VARIANT); + // assertEquals(sf.getDescription(), "p.Glu4Gly"); /* * verify variant feature(s) added to transcript4 @@ -728,8 +734,7 @@ public class VCFLoaderTest @Test(groups = "Functional") public void testLoadVCFContig() throws IOException { - VCFLoader loader = new VCFLoader( - "test/jalview/io/vcf/testVcf2.vcf"); + VCFLoader loader = new VCFLoader("test/jalview/io/vcf/testVcf2.vcf"); SequenceI seq = loader.loadVCFContig("contig123"); assertEquals(seq.getLength(), 15);