X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=test%2Fjalview%2Fanalysis%2FAlignmentUtilsTests.java;fp=test%2Fjalview%2Fanalysis%2FAlignmentUtilsTests.java;h=c436818e80485526e409347d53e1534872df0472;hb=61f1a8b75ea5ce352d6214c34fbdcd58bafbbb73;hp=3ada6fab0e198577cb21eabae6e016e5ef4ce06c;hpb=e06580cb9fa7255844392883e96d5f9cbc680ec1;p=jalview.git diff --git a/test/jalview/analysis/AlignmentUtilsTests.java b/test/jalview/analysis/AlignmentUtilsTests.java index 3ada6fa..c436818 100644 --- a/test/jalview/analysis/AlignmentUtilsTests.java +++ b/test/jalview/analysis/AlignmentUtilsTests.java @@ -317,4 +317,162 @@ public class AlignmentUtilsTests assertTrue(Arrays.equals(new int[] { 1, 3 }, mapList.getToRanges())); } + + /** + * Test for the alignSequenceAs method that takes two sequences and a mapping. + */ + @Test + public void testAlignSequenceAs_withMapping_noIntrons() + { + /* + * Simple case: no gaps in dna + */ + SequenceI dna = new Sequence("Seq1", "GGGAAA"); + dna.createDatasetSequence(); + SequenceI protein = new Sequence("Seq1", "-A-L-"); + protein.createDatasetSequence(); + AlignedCodonFrame acf = new AlignedCodonFrame(); + MapList map = new MapList(new int[] + { 1, 6 }, new int[] + { 1, 2 }, 3, 1); + acf.addMap(dna.getDatasetSequence(), protein.getDatasetSequence(), map); + + /* + * No existing gaps in dna: + */ + AlignmentUtils.alignSequenceAs(dna, protein, acf, "---", '-', false, + false); + assertEquals("---GGG---AAA", dna.getSequenceAsString()); + + /* + * Now introduce gaps in dna but ignore them when realigning. + */ + dna.setSequence("-G-G-G-A-A-A-"); + AlignmentUtils.alignSequenceAs(dna, protein, acf, "---", '-', false, + false); + assertEquals("---GGG---AAA", dna.getSequenceAsString()); + + /* + * Now include gaps in dna when realigning. First retaining 'mapped' gaps + * only, i.e. those within the exon region. + */ + dna.setSequence("-G-G--G-A--A-A-"); + AlignmentUtils.alignSequenceAs(dna, protein, acf, "---", '-', true, + false); + assertEquals("---G-G--G---A--A-A", dna.getSequenceAsString()); + + /* + * Include all gaps in dna when realigning (within and without the exon + * region). The leading gap, and the gaps between codons, are subsumed by + * the protein alignment gap. + */ + dna.setSequence("-G-GG--AA-A-"); + AlignmentUtils.alignSequenceAs(dna, protein, acf, "---", '-', true, + true); + assertEquals("---G-GG---AA-A-", dna.getSequenceAsString()); + + /* + * Include only unmapped gaps in dna when realigning (outside the exon + * region). The leading gap, and the gaps between codons, are subsumed by + * the protein alignment gap. + */ + dna.setSequence("-G-GG--AA-A-"); + AlignmentUtils.alignSequenceAs(dna, protein, acf, "---", '-', false, + true); + assertEquals("---GGG---AAA-", dna.getSequenceAsString()); + } + + /** + * Test for the alignSequenceAs method that takes two sequences and a mapping. + */ + @Test + public void testAlignSequenceAs_withMapping_withIntrons() + { + /* + * Simple case: no gaps in dna + */ + SequenceI dna = new Sequence("Seq1", "GGGAAACCCTTTGGG"); + dna.createDatasetSequence(); + SequenceI protein = new Sequence("Seq1", "-A-L-"); + protein.createDatasetSequence(); + AlignedCodonFrame acf = new AlignedCodonFrame(); + + /* + * Exons at codon 2 (AAA) and 4 (TTT) + */ + MapList map = new MapList(new int[] + { 4, 6, 10, 12 }, new int[] + { 1, 2 }, 3, 1); + acf.addMap(dna.getDatasetSequence(), protein.getDatasetSequence(), map); + + /* + * Align dna as "-A-L-". The protein 'gaps' follow the introns, i.e are + * placed immediately before the mapped codons. + */ + AlignmentUtils.alignSequenceAs(dna, protein, acf, "---", '-', false, + false); + assertEquals("GGG---AAACCC---TTTGGG", dna.getSequenceAsString()); + + /* + * Add gaps to dna - but ignore when realigning. + */ + dna.setSequence("-G-G-G--A--A---AC-CC-T-TT-GG-G-"); + AlignmentUtils.alignSequenceAs(dna, protein, acf, "---", '-', false, + false); + assertEquals("GGG---AAACCC---TTTGGG", dna.getSequenceAsString()); + + /* + * Add gaps to dna - include within exons only when realigning. + */ + dna.setSequence("-G-G-G--A--A---A-C-CC-T-TT-GG-G-"); + AlignmentUtils.alignSequenceAs(dna, protein, acf, "---", '-', true, + false); + assertEquals("GGG---A--A---ACCC---T-TTGGG", dna.getSequenceAsString()); + + /* + * Include gaps outside exons only when realigning. + */ + dna.setSequence("-G-G-G--A--A---A-C-CC-T-TT-GG-G-"); + AlignmentUtils.alignSequenceAs(dna, protein, acf, "---", '-', false, + true); + assertEquals("-G-G-G---AAA-C-CC---TTT-GG-G-", dna.getSequenceAsString()); + + /* + * Include all gaps in dna when realigning. + */ + dna.setSequence("-G-G-G--A--A---A-C-CC-T-TT-GG-G-"); + AlignmentUtils.alignSequenceAs(dna, protein, acf, "---", '-', true, + true); + assertEquals("-G-G-G---A--A---A-C-CC---T-TT-GG-G-", + dna.getSequenceAsString()); + } + + /** + * Test for the case where not all of the protein sequence is mapped to cDNA. + */ + @Test + public void testAlignSequenceAs_withMapping_withUnmappedProtein() + { + SequenceI dna = new Sequence("Seq1", "GGGAAACCCTTTGGG"); + dna.createDatasetSequence(); + SequenceI protein = new Sequence("Seq1", "-A-L-P-"); + protein.createDatasetSequence(); + AlignedCodonFrame acf = new AlignedCodonFrame(); + + /* + * Exons at codon 2 (AAA) and 4 (TTT) mapped to A and P + */ + MapList map = new MapList(new int[] + { 4, 6, 10, 12 }, new int[] + { 1, 1, 3, 3 }, 3, 1); + acf.addMap(dna.getDatasetSequence(), protein.getDatasetSequence(), map); + + /* + * Align dna as "-A-L-P-". Currently, does nothing (aborts realignment). + * Change this test first if different behaviour wanted. + */ + AlignmentUtils.alignSequenceAs(dna, protein, acf, "---", '-', false, + false); + assertEquals("GGGAAACCCTTTGGG", dna.getSequenceAsString()); + } }