X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=test%2Fjalview%2Fanalysis%2FAlignmentUtilsTests.java;h=b4628b882c618e1611e4c39a1adf031270b56485;hb=73fd64da56448bcb2a7771cc174d9d9c62da87ad;hp=a0c9ace8e2fd1e882ae656fe32b5d2892cb3ba52;hpb=fbc1dd3e5d38efcee395a49e1774226bf45d6687;p=jalview.git diff --git a/test/jalview/analysis/AlignmentUtilsTests.java b/test/jalview/analysis/AlignmentUtilsTests.java index a0c9ace..b4628b8 100644 --- a/test/jalview/analysis/AlignmentUtilsTests.java +++ b/test/jalview/analysis/AlignmentUtilsTests.java @@ -977,7 +977,7 @@ public class AlignmentUtilsTests /* * scenario: * dna1 --> [4, 6] [10,12] --> pep1 - * dna2 --> [1, 3] [7, 9] [13,15] --> pep1 + * dna2 --> [1, 3] [7, 9] [13,15] --> pep2 */ SequenceI dna1 = new Sequence("dna1", "aaaGGGcccTTTaaa"); SequenceI dna2 = new Sequence("dna2", "GGGcccTTTaaaCCC"); @@ -993,30 +993,52 @@ public class AlignmentUtilsTests dna.setDataset(null); /* + * put a variant feature on dna2 base 8 + * - should transfer to cds2 base 5 + */ + dna2.addSequenceFeature(new SequenceFeature("variant", "hgmd", 8, 8, + 0f, null)); + + /* * need a sourceDbRef if we are to construct dbrefs to the CDS - * sequence + * sequence from the dna contig sequences */ DBRefEntry dbref = new DBRefEntry("ENSEMBL", "0", "dna1"); - dna1.getDatasetSequence().setSourceDBRef(dbref); + dna1.getDatasetSequence().addDBRef(dbref); + org.testng.Assert.assertEquals(dbref, dna1.getPrimaryDBRefs().get(0)); dbref = new DBRefEntry("ENSEMBL", "0", "dna2"); - dna2.getDatasetSequence().setSourceDBRef(dbref); + dna2.getDatasetSequence().addDBRef(dbref); + org.testng.Assert.assertEquals(dbref, dna2.getPrimaryDBRefs().get(0)); /* * CDS sequences are 'discovered' from dna-to-protein mappings on the alignment * dataset (e.g. added from dbrefs by CrossRef.findXrefSequences) */ - MapList map = new MapList(new int[] { 4, 6, 10, 12 }, + MapList mapfordna1 = new MapList(new int[] { 4, 6, 10, 12 }, new int[] { 1, 2 }, 3, 1); AlignedCodonFrame acf = new AlignedCodonFrame(); - acf.addMap(dna1.getDatasetSequence(), pep1.getDatasetSequence(), map); + acf.addMap(dna1.getDatasetSequence(), pep1.getDatasetSequence(), + mapfordna1); dna.addCodonFrame(acf); - map = new MapList(new int[] { 1, 3, 7, 9, 13, 15 }, new int[] { 1, 3 }, + MapList mapfordna2 = new MapList(new int[] { 1, 3, 7, 9, 13, 15 }, + new int[] { 1, 3 }, 3, 1); acf = new AlignedCodonFrame(); - acf.addMap(dna2.getDatasetSequence(), pep2.getDatasetSequence(), map); + acf.addMap(dna2.getDatasetSequence(), pep2.getDatasetSequence(), + mapfordna2); dna.addCodonFrame(acf); /* + * In this case, mappings originally came from matching Uniprot accessions - so need an xref on dna involving those regions. These are normally constructed from CDS annotation + */ + DBRefEntry dna1xref = new DBRefEntry("UNIPROT", "ENSEMBL", "pep1", + new Mapping(mapfordna1)); + dna1.getDatasetSequence().addDBRef(dna1xref); + DBRefEntry dna2xref = new DBRefEntry("UNIPROT", "ENSEMBL", "pep2", + new Mapping(mapfordna2)); + dna2.getDatasetSequence().addDBRef(dna2xref); + + /* * execute method under test: */ AlignmentI cds = AlignmentUtils.makeCdsAlignment(new SequenceI[] { @@ -1042,11 +1064,12 @@ public class AlignmentUtilsTests * verify CDS has a dbref with mapping to peptide */ assertNotNull(cds1Dss.getDBRefs()); - assertEquals(1, cds1Dss.getDBRefs().length); + assertEquals(2, cds1Dss.getDBRefs().length); dbref = cds1Dss.getDBRefs()[0]; - assertEquals("UNIPROT", dbref.getSource()); - assertEquals("0", dbref.getVersion()); - assertEquals("pep1", dbref.getAccessionId()); + assertEquals(dna1xref.getSource(), dbref.getSource()); + // version is via ensembl's primary ref + assertEquals(dna1xref.getVersion(), dbref.getVersion()); + assertEquals(dna1xref.getAccessionId(), dbref.getAccessionId()); assertNotNull(dbref.getMap()); assertSame(pep1.getDatasetSequence(), dbref.getMap().getTo()); MapList cdsMapping = new MapList(new int[] { 1, 6 }, @@ -1057,6 +1080,7 @@ public class AlignmentUtilsTests * verify peptide has added a dbref with reverse mapping to CDS */ assertNotNull(pep1.getDBRefs()); + // FIXME pep1.getDBRefs() is 1 - is that the correct behaviour ? assertEquals(2, pep1.getDBRefs().length); dbref = pep1.getDBRefs()[1]; assertEquals("ENSEMBL", dbref.getSource()); @@ -1137,6 +1161,16 @@ public class AlignmentUtilsTests assertSame(cds2Dss, m.getSequence()); assertEquals(7, m.getStart()); assertEquals(9, m.getEnd()); + + /* + * check cds2 acquired a variant feature in position 5 + */ + SequenceFeature[] sfs = cds2Dss.getSequenceFeatures(); + assertNotNull(sfs); + assertEquals(1, sfs.length); + assertEquals("variant", sfs[0].type); + assertEquals(5, sfs[0].begin); + assertEquals(5, sfs[0].end); } /** @@ -1937,13 +1971,15 @@ public class AlignmentUtilsTests public void testComputePeptideVariants() { /* - * scenario: AAATTTCCC codes for KFP, with variants - * GAA -> E - * CAA -> Q - * AAG synonymous - * AAT -> N - * TTC synonymous - * CAC,CGC -> H,R (as one variant) + * scenario: AAATTTCCC codes for KFP + * variants: + * GAA -> E source: Ensembl + * CAA -> Q source: dbSNP + * AAG synonymous source: COSMIC + * AAT -> N source: Ensembl + * ...TTC synonymous source: dbSNP + * ......CAC,CGC -> H,R source: COSMIC + * (one variant with two alleles) */ SequenceI peptide = new Sequence("pep/10-12", "KFP"); @@ -1951,32 +1987,35 @@ public class AlignmentUtilsTests * two distinct variants for codon 1 position 1 * second one has clinical significance */ + String ensembl = "Ensembl"; + String dbSnp = "dbSNP"; + String cosmic = "COSMIC"; SequenceFeature sf1 = new SequenceFeature("sequence_variant", "", 1, 1, - 0f, null); + 0f, ensembl); sf1.setValue("alleles", "A,G"); // GAA -> E sf1.setValue("ID", "var1.125A>G"); SequenceFeature sf2 = new SequenceFeature("sequence_variant", "", 1, 1, - 0f, null); + 0f, dbSnp); sf2.setValue("alleles", "A,C"); // CAA -> Q sf2.setValue("ID", "var2"); sf2.setValue("clinical_significance", "Dodgy"); SequenceFeature sf3 = new SequenceFeature("sequence_variant", "", 3, 3, - 0f, null); + 0f, cosmic); sf3.setValue("alleles", "A,G"); // synonymous sf3.setValue("ID", "var3"); sf3.setValue("clinical_significance", "None"); SequenceFeature sf4 = new SequenceFeature("sequence_variant", "", 3, 3, - 0f, null); + 0f, ensembl); sf4.setValue("alleles", "A,T"); // AAT -> N sf4.setValue("ID", "sequence_variant:var4"); // prefix gets stripped off sf4.setValue("clinical_significance", "Benign"); SequenceFeature sf5 = new SequenceFeature("sequence_variant", "", 6, 6, - 0f, null); + 0f, dbSnp); sf5.setValue("alleles", "T,C"); // synonymous sf5.setValue("ID", "var5"); sf5.setValue("clinical_significance", "Bad"); SequenceFeature sf6 = new SequenceFeature("sequence_variant", "", 8, 8, - 0f, null); + 0f, cosmic); sf6.setValue("alleles", "C,A,G"); // CAC,CGC -> H,R sf6.setValue("ID", "var6"); sf6.setValue("clinical_significance", "Good"); @@ -2024,14 +2063,15 @@ public class AlignmentUtilsTests /* * verify added sequence features for - * var1 K -> E - * var2 K -> Q - * var4 K -> N - * var6 P -> H - * var6 P -> R + * var1 K -> E Ensembl + * var2 K -> Q dbSNP + * var4 K -> N Ensembl + * var6 P -> H COSMIC + * var6 P -> R COSMIC */ SequenceFeature[] sfs = peptide.getSequenceFeatures(); assertEquals(5, sfs.length); + SequenceFeature sf = sfs[0]; assertEquals(1, sf.getBegin()); assertEquals(1, sf.getEnd()); @@ -2044,7 +2084,8 @@ public class AlignmentUtilsTests assertEquals( "p.Lys1Glu var1.125A>G|http://www.ensembl.org/Homo_sapiens/Variation/Summary?v=var1.125A%3EG", sf.links.get(0)); - assertEquals("Jalview", sf.getFeatureGroup()); + assertEquals(ensembl, sf.getFeatureGroup()); + sf = sfs[1]; assertEquals(1, sf.getBegin()); assertEquals(1, sf.getEnd()); @@ -2056,7 +2097,8 @@ public class AlignmentUtilsTests assertEquals( "p.Lys1Gln var2|http://www.ensembl.org/Homo_sapiens/Variation/Summary?v=var2", sf.links.get(0)); - assertEquals("Jalview", sf.getFeatureGroup()); + assertEquals(dbSnp, sf.getFeatureGroup()); + sf = sfs[2]; assertEquals(1, sf.getBegin()); assertEquals(1, sf.getEnd()); @@ -2068,7 +2110,9 @@ public class AlignmentUtilsTests assertEquals( "p.Lys1Asn var4|http://www.ensembl.org/Homo_sapiens/Variation/Summary?v=var4", sf.links.get(0)); - assertEquals("Jalview", sf.getFeatureGroup()); + assertEquals(ensembl, sf.getFeatureGroup()); + + // var5 generates two distinct protein variant features sf = sfs[3]; assertEquals(3, sf.getBegin()); assertEquals(3, sf.getEnd()); @@ -2080,8 +2124,8 @@ public class AlignmentUtilsTests assertEquals( "p.Pro3His var6|http://www.ensembl.org/Homo_sapiens/Variation/Summary?v=var6", sf.links.get(0)); - // var5 generates two distinct protein variant features - assertEquals("Jalview", sf.getFeatureGroup()); + assertEquals(cosmic, sf.getFeatureGroup()); + sf = sfs[4]; assertEquals(3, sf.getBegin()); assertEquals(3, sf.getEnd()); @@ -2093,7 +2137,7 @@ public class AlignmentUtilsTests assertEquals( "p.Pro3Arg var6|http://www.ensembl.org/Homo_sapiens/Variation/Summary?v=var6", sf.links.get(0)); - assertEquals("Jalview", sf.getFeatureGroup()); + assertEquals(cosmic, sf.getFeatureGroup()); } /** @@ -2440,6 +2484,7 @@ public class AlignmentUtilsTests SequenceI dna6 = new Sequence("dna6", "CCCgggtttAAA"); dna6.createDatasetSequence(); al1.addSequence(dna6); + // JAL-2110 JBP Comment: what's the use case for this behaviour ? assertFalse(AlignmentUtils.alignAsSameSequences(al1, al2)); } @@ -2448,8 +2493,9 @@ public class AlignmentUtilsTests { SequenceI dna1 = new Sequence("dna1", "cccGGGTTTaaa"); SequenceI dna2 = new Sequence("dna2", "CCCgggtttAAA"); - SequenceI as1 = dna1.deriveSequence(), as2 = dna1.deriveSequence() - .getSubSequence(3, 7), as3 = dna2.deriveSequence(); + SequenceI as1 = dna1.deriveSequence(); + SequenceI as2 = dna1.deriveSequence().getSubSequence(3, 7); + SequenceI as3 = dna2.deriveSequence(); as1.insertCharAt(6, 5, '-'); String s_as1 = as1.getSequenceAsString(); as2.insertCharAt(6, 5, '-'); @@ -2460,8 +2506,9 @@ public class AlignmentUtilsTests // why do we need to cast this still ? ((Alignment) aligned).createDatasetAlignment(); - SequenceI uas1 = dna1.deriveSequence(), uas2 = dna1.deriveSequence() - .getSubSequence(3, 7), uas3 = dna2.deriveSequence(); + SequenceI uas1 = dna1.deriveSequence(); + SequenceI uas2 = dna1.deriveSequence().getSubSequence(3, 7); + SequenceI uas3 = dna2.deriveSequence(); AlignmentI tobealigned = new Alignment(new SequenceI[] { uas1, uas2, uas3 }); ((Alignment) tobealigned).createDatasetAlignment();