X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=test%2Fjalview%2Fanalysis%2FAlignmentUtilsTests.java;h=bc6a13777ef32a15a4ac07096d228b995dec13da;hb=be32c14cd8e48fe0a207cd7030cb9cd46f894678;hp=bba21c3e7487a9631f48ea517722fea4d3b85ba1;hpb=ad15cff29620f960119f80176f1fd443da9f6763;p=jalview.git diff --git a/test/jalview/analysis/AlignmentUtilsTests.java b/test/jalview/analysis/AlignmentUtilsTests.java index bba21c3..bc6a137 100644 --- a/test/jalview/analysis/AlignmentUtilsTests.java +++ b/test/jalview/analysis/AlignmentUtilsTests.java @@ -20,19 +20,70 @@ */ package jalview.analysis; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertSame; import static org.junit.Assert.assertTrue; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.List; +import java.util.Map; + import org.junit.Test; +import jalview.datamodel.AlignedCodonFrame; import jalview.datamodel.Alignment; +import jalview.datamodel.AlignmentAnnotation; import jalview.datamodel.AlignmentI; +import jalview.datamodel.Annotation; +import jalview.datamodel.DBRefEntry; +import jalview.datamodel.Mapping; import jalview.datamodel.Sequence; import jalview.datamodel.SequenceI; import jalview.io.AppletFormatAdapter; +import jalview.io.FormatAdapter; +import jalview.util.MapList; public class AlignmentUtilsTests { + // @formatter:off + private static final String TEST_DATA = + "# STOCKHOLM 1.0\n" + + "#=GS D.melanogaster.1 AC AY119185.1/838-902\n" + + "#=GS D.melanogaster.2 AC AC092237.1/57223-57161\n" + + "#=GS D.melanogaster.3 AC AY060611.1/560-627\n" + + "D.melanogaster.1 G.AGCC.CU...AUGAUCGA\n" + + "#=GR D.melanogaster.1 SS ................((((\n" + + "D.melanogaster.2 C.AUUCAACU.UAUGAGGAU\n" + + "#=GR D.melanogaster.2 SS ................((((\n" + + "D.melanogaster.3 G.UGGCGCU..UAUGACGCA\n" + + "#=GR D.melanogaster.3 SS (.(((...(....(((((((\n" + + "//"; + + private static final String AA_SEQS_1 = + ">Seq1Name\n" + + "K-QY--L\n" + + ">Seq2Name\n" + + "-R-FP-W-\n"; + + private static final String CDNA_SEQS_1 = + ">Seq1Name\n" + + "AC-GG--CUC-CAA-CT\n" + + ">Seq2Name\n" + + "-CG-TTA--ACG---AAGT\n"; + + private static final String CDNA_SEQS_2 = + ">Seq1Name\n" + + "GCTCGUCGTACT\n" + + ">Seq2Name\n" + + "GGGTCAGGCAGT\n"; + // @formatter:on + public static Sequence ts=new Sequence("short","ASDASDASDASDASDASDASDASDASDASDASDASDASD"); + @Test public void testExpandFlanks() { @@ -55,6 +106,708 @@ public class AlignmentUtilsTests assertTrue("Flanking sequence not the same as original dataset sequence.\n"+ung+"\n"+sq.getDatasetSequence().getSequenceAsString(),ung.equalsIgnoreCase(sq.getDatasetSequence().getSequenceAsString())); } } + } } + + /** + * Test method that returns a map of lists of sequences by sequence name. + * + * @throws IOException + */ + @Test + public void testGetSequencesByName() throws IOException + { + final String data = ">Seq1Name\nKQYL\n" + ">Seq2Name\nRFPW\n" + + ">Seq1Name\nABCD\n"; + AlignmentI al = loadAlignment(data, "FASTA"); + Map> map = AlignmentUtils + .getSequencesByName(al); + assertEquals(2, map.keySet().size()); + assertEquals(2, map.get("Seq1Name").size()); + assertEquals("KQYL", map.get("Seq1Name").get(0).getSequenceAsString()); + assertEquals("ABCD", map.get("Seq1Name").get(1).getSequenceAsString()); + assertEquals(1, map.get("Seq2Name").size()); + assertEquals("RFPW", map.get("Seq2Name").get(0).getSequenceAsString()); + } + /** + * Helper method to load an alignment and ensure dataset sequences are set up. + * + * @param data + * @param format TODO + * @return + * @throws IOException + */ + protected AlignmentI loadAlignment(final String data, String format) throws IOException + { + Alignment a = new FormatAdapter().readFile(data, + AppletFormatAdapter.PASTE, format); + a.setDataset(null); + return a; + } + + /** + * Test mapping of protein to cDNA, for the case where we have no sequence + * cross-references, so mappings are made first-served 1-1 where sequences + * translate. + * + * @throws IOException + */ + @Test + public void testMapProteinToCdna_noXrefs() throws IOException + { + List protseqs = new ArrayList(); + protseqs.add(new Sequence("UNIPROT|V12345", "EIQ")); + protseqs.add(new Sequence("UNIPROT|V12346", "EIQ")); + protseqs.add(new Sequence("UNIPROT|V12347", "SAR")); + AlignmentI protein = new Alignment(protseqs.toArray(new SequenceI[3])); + protein.setDataset(null); + + List dnaseqs = new ArrayList(); + dnaseqs.add(new Sequence("EMBL|A11111", "TCAGCACGC")); // = SAR + dnaseqs.add(new Sequence("EMBL|A22222", "GAGATACAA")); // = EIQ + dnaseqs.add(new Sequence("EMBL|A33333", "GAAATCCAG")); // = EIQ + dnaseqs.add(new Sequence("EMBL|A44444", "GAAATTCAG")); // = EIQ + AlignmentI cdna = new Alignment(dnaseqs.toArray(new SequenceI[4])); + cdna.setDataset(null); + + assertTrue(AlignmentUtils.mapProteinToCdna(protein, cdna)); + + // 3 mappings made, each from 1 to 1 sequence + assertEquals(3, protein.getCodonFrames().size()); + assertEquals(1, protein.getCodonFrame(protein.getSequenceAt(0)).size()); + assertEquals(1, protein.getCodonFrame(protein.getSequenceAt(1)).size()); + assertEquals(1, protein.getCodonFrame(protein.getSequenceAt(2)).size()); + + // V12345 mapped to A22222 + AlignedCodonFrame acf = protein.getCodonFrame( + protein.getSequenceAt(0)).get(0); + assertEquals(1, acf.getdnaSeqs().length); + assertEquals(cdna.getSequenceAt(1).getDatasetSequence(), + acf.getdnaSeqs()[0]); + Mapping[] protMappings = acf.getProtMappings(); + assertEquals(1, protMappings.length); + MapList mapList = protMappings[0].getMap(); + assertEquals(3, mapList.getFromRatio()); + assertEquals(1, mapList.getToRatio()); + assertTrue(Arrays.equals(new int[] + { 1, 9 }, mapList.getFromRanges().get(0))); + assertEquals(1, mapList.getFromRanges().size()); + assertTrue(Arrays.equals(new int[] + { 1, 3 }, mapList.getToRanges().get(0))); + assertEquals(1, mapList.getToRanges().size()); + + // V12346 mapped to A33333 + acf = protein.getCodonFrame(protein.getSequenceAt(1)).get(0); + assertEquals(1, acf.getdnaSeqs().length); + assertEquals(cdna.getSequenceAt(2).getDatasetSequence(), + acf.getdnaSeqs()[0]); + + // V12347 mapped to A11111 + acf = protein.getCodonFrame(protein.getSequenceAt(2)).get(0); + assertEquals(1, acf.getdnaSeqs().length); + assertEquals(cdna.getSequenceAt(0).getDatasetSequence(), + acf.getdnaSeqs()[0]); + + // no mapping involving the 'extra' A44444 + assertTrue(protein.getCodonFrame(cdna.getSequenceAt(3)).isEmpty()); + } + + /** + * Test for the alignSequenceAs method that takes two sequences and a mapping. + */ + @Test + public void testAlignSequenceAs_withMapping_noIntrons() + { + MapList map = new MapList(new int[] + { 1, 6 }, new int[] + { 1, 2 }, 3, 1); + + /* + * No existing gaps in dna: + */ + checkAlignSequenceAs("GGGAAA", "-A-L-", false, false, map, + "---GGG---AAA"); + + /* + * Now introduce gaps in dna but ignore them when realigning. + */ + checkAlignSequenceAs("-G-G-G-A-A-A-", "-A-L-", false, false, map, + "---GGG---AAA"); + + /* + * Now include gaps in dna when realigning. First retaining 'mapped' gaps + * only, i.e. those within the exon region. + */ + checkAlignSequenceAs("-G-G--G-A--A-A-", "-A-L-", true, false, map, + "---G-G--G---A--A-A"); + + /* + * Include all gaps in dna when realigning (within and without the exon + * region). The leading gap, and the gaps between codons, are subsumed by + * the protein alignment gap. + */ + checkAlignSequenceAs("-G-GG--AA-A-", "-A-L-", true, true, map, + "---G-GG---AA-A-"); + + /* + * Include only unmapped gaps in dna when realigning (outside the exon + * region). The leading gap, and the gaps between codons, are subsumed by + * the protein alignment gap. + */ + checkAlignSequenceAs("-G-GG--AA-A-", "-A-L-", false, true, map, + "---GGG---AAA-"); + } + + /** + * Test for the alignSequenceAs method that takes two sequences and a mapping. + */ + @Test + public void testAlignSequenceAs_withMapping_withIntrons() + { + /* + * Exons at codon 2 (AAA) and 4 (TTT) + */ + MapList map = new MapList(new int[] + { 4, 6, 10, 12 }, new int[] + { 1, 2 }, 3, 1); + + /* + * Simple case: no gaps in dna + */ + checkAlignSequenceAs("GGGAAACCCTTTGGG", "--A-L-", false, false, map, + "GGG---AAACCCTTTGGG"); + + /* + * Add gaps to dna - but ignore when realigning. + */ + checkAlignSequenceAs("-G-G-G--A--A---AC-CC-T-TT-GG-G-", "--A-L-", + false, false, map, "GGG---AAACCCTTTGGG"); + + /* + * Add gaps to dna - include within exons only when realigning. + */ + checkAlignSequenceAs("-G-G-G--A--A---A-C-CC-T-TT-GG-G-", "--A-L-", + true, false, map, "GGG---A--A---ACCCT-TTGGG"); + + /* + * Include gaps outside exons only when realigning. + */ + checkAlignSequenceAs("-G-G-G--A--A---A-C-CC-T-TT-GG-G-", "--A-L-", + false, true, map, "-G-G-GAAAC-CCTTT-GG-G-"); + + /* + * Include gaps following first intron if we are 'preserving mapped gaps' + */ + checkAlignSequenceAs("-G-G-G--A--A---A-C-CC-T-TT-GG-G-", "--A-L-", + true, true, map, "-G-G-G--A--A---A-C-CC-T-TT-GG-G-"); + + /* + * Include all gaps in dna when realigning. + */ + checkAlignSequenceAs("-G-G-G--A--A---A-C-CC-T-TT-GG-G-", "--A-L-", + true, true, map, "-G-G-G--A--A---A-C-CC-T-TT-GG-G-"); + } + + /** + * Test for the case where not all of the protein sequence is mapped to cDNA. + */ + @Test + public void testAlignSequenceAs_withMapping_withUnmappedProtein() + { + + /* + * Exons at codon 2 (AAA) and 4 (TTT) mapped to A and P + */ + final MapList map = new MapList(new int[] + { 4, 6, 10, 12 }, new int[] + { 1, 1, 3, 3 }, 3, 1); + + + /* + * Expect alignment does nothing (aborts realignment). Change this test + * first if different behaviour wanted. + */ + checkAlignSequenceAs("GGGAAACCCTTTGGG", "-A-L-P-", false, + false, map, "GGGAAACCCTTTGGG"); + } + + /** + * Helper method that performs and verifies the method under test. + * + * @param dnaSeq + * @param proteinSeq + * @param preserveMappedGaps + * @param preserveUnmappedGaps + * @param map + * @param expected + */ + protected void checkAlignSequenceAs(final String dnaSeq, + final String proteinSeq, final boolean preserveMappedGaps, + final boolean preserveUnmappedGaps, MapList map, + final String expected) + { + SequenceI dna = new Sequence("Seq1", dnaSeq); + dna.createDatasetSequence(); + SequenceI protein = new Sequence("Seq1", proteinSeq); + protein.createDatasetSequence(); + AlignedCodonFrame acf = new AlignedCodonFrame(); + acf.addMap(dna.getDatasetSequence(), protein.getDatasetSequence(), map); + + AlignmentUtils.alignSequenceAs(dna, protein, acf, "---", '-', + preserveMappedGaps, preserveUnmappedGaps); + assertEquals(expected, dna.getSequenceAsString()); + } + + /** + * Test for the alignSequenceAs method where we preserve gaps in introns only. + */ + @Test + public void testAlignSequenceAs_keepIntronGapsOnly() + { + + /* + * Intron GGGAAA followed by exon CCCTTT + */ + MapList map = new MapList(new int[] + { 7, 12 }, new int[] + { 1, 2 }, 3, 1); + + checkAlignSequenceAs("GG-G-AA-A-C-CC-T-TT", "AL", + false, true, map, "GG-G-AA-ACCCTTT"); + } + + /** + * Test for the method that generates an aligned translated sequence from one + * mapping. + */ + @Test + public void testGetAlignedTranslation_dnaLikeProtein() + { + // dna alignment will be replaced + SequenceI dna = new Sequence("Seq1", "T-G-CC-A--T-TAC-CAG-"); + dna.createDatasetSequence(); + // protein alignment will be 'applied' to dna + SequenceI protein = new Sequence("Seq1", "-CH-Y--Q-"); + protein.createDatasetSequence(); + MapList map = new MapList(new int[] + { 1, 12 }, new int[] + { 1, 4 }, 3, 1); + AlignedCodonFrame acf = new AlignedCodonFrame(); + acf.addMap(dna.getDatasetSequence(), protein.getDatasetSequence(), map); + + final SequenceI aligned = AlignmentUtils + .getAlignedTranslation(protein, '-', acf); + assertEquals("---TGCCAT---TAC------CAG---", aligned.getSequenceAsString()); + assertSame(aligned.getDatasetSequence(), dna.getDatasetSequence()); + } + + /** + * Test the method that realigns protein to match mapped codon alignment. + */ + @Test + public void testAlignProteinAsDna() + { + // seq1 codons are [1,2,3] [4,5,6] [7,8,9] [10,11,12] + SequenceI dna1 = new Sequence("Seq1", "TGCCATTACCAG-"); + // seq2 codons are [1,3,4] [5,6,7] [8,9,10] [11,12,13] + SequenceI dna2 = new Sequence("Seq2", "T-GCCATTACCAG"); + // seq3 codons are [1,2,3] [4,5,7] [8,9,10] [11,12,13] + SequenceI dna3 = new Sequence("Seq3", "TGCCA-TTACCAG"); + AlignmentI dna = new Alignment(new SequenceI[] + { dna1, dna2, dna3 }); + dna.setDataset(null); + + // protein alignment will be realigned like dna + SequenceI prot1 = new Sequence("Seq1", "CHYQ"); + SequenceI prot2 = new Sequence("Seq2", "CHYQ"); + SequenceI prot3 = new Sequence("Seq3", "CHYQ"); + AlignmentI protein = new Alignment(new SequenceI[] + { prot1, prot2, prot3 }); + protein.setDataset(null); + + MapList map = new MapList(new int[] + { 1, 12 }, new int[] + { 1, 4 }, 3, 1); + AlignedCodonFrame acf = new AlignedCodonFrame(); + acf.addMap(dna1.getDatasetSequence(), prot1.getDatasetSequence(), map); + acf.addMap(dna2.getDatasetSequence(), prot2.getDatasetSequence(), map); + acf.addMap(dna3.getDatasetSequence(), prot3.getDatasetSequence(), map); + protein.setCodonFrames(Collections.singleton(acf)); + + /* + * Translated codon order is [1,2,3] [1,3,4] [4,5,6] [4,5,7] [5,6,7] [7,8,9] + * [8,9,10] [10,11,12] [11,12,13] + */ + AlignmentUtils.alignProteinAsDna(protein, dna); + assertEquals("C-H--Y-Q-", prot1.getSequenceAsString()); + assertEquals("-C--H-Y-Q", prot2.getSequenceAsString()); + assertEquals("C--H--Y-Q", prot3.getSequenceAsString()); + } + + /** + * Test the method that tests whether a CDNA sequence translates to a protein + * sequence + */ + @Test + public void testTranslatesAs() + { + assertTrue(AlignmentUtils.translatesAs("tttcccaaaggg".toCharArray(), 0, + "FPKG".toCharArray())); + // with start codon + assertTrue(AlignmentUtils.translatesAs("atgtttcccaaaggg".toCharArray(), + 3, "FPKG".toCharArray())); + // with stop codon1 + assertTrue(AlignmentUtils.translatesAs("tttcccaaagggtaa".toCharArray(), + 0, "FPKG".toCharArray())); + // with stop codon2 + assertTrue(AlignmentUtils.translatesAs("tttcccaaagggtag".toCharArray(), + 0, "FPKG".toCharArray())); + // with stop codon3 + assertTrue(AlignmentUtils.translatesAs("tttcccaaagggtga".toCharArray(), + 0, "FPKG".toCharArray())); + // with start and stop codon1 + assertTrue(AlignmentUtils.translatesAs( + "atgtttcccaaaggtaa".toCharArray(), 3, "FPKG".toCharArray())); + // with start and stop codon2 + assertTrue(AlignmentUtils.translatesAs( + "atgtttcccaaaggtag".toCharArray(), 3, "FPKG".toCharArray())); + // with start and stop codon3 + assertTrue(AlignmentUtils.translatesAs( + "atgtttcccaaaggtga".toCharArray(), 3, "FPKG".toCharArray())); + + // wrong protein + assertFalse(AlignmentUtils.translatesAs("tttcccaaaggg".toCharArray(), + 0, + "FPMG".toCharArray())); + } + + /** + * Test mapping of protein to cDNA, for cases where the cDNA has start and/or + * stop codons in addition to the protein coding sequence. + * + * @throws IOException + */ + @Test + public void testMapProteinToCdna_withStartAndStopCodons() + throws IOException + { + List protseqs = new ArrayList(); + protseqs.add(new Sequence("UNIPROT|V12345", "EIQ")); + protseqs.add(new Sequence("UNIPROT|V12346", "EIQ")); + protseqs.add(new Sequence("UNIPROT|V12347", "SAR")); + AlignmentI protein = new Alignment(protseqs.toArray(new SequenceI[3])); + protein.setDataset(null); + + List dnaseqs = new ArrayList(); + // start + SAR: + dnaseqs.add(new Sequence("EMBL|A11111", "ATGTCAGCACGC")); + // = EIQ + stop + dnaseqs.add(new Sequence("EMBL|A22222", "GAGATACAATAA")); + // = start +EIQ + stop + dnaseqs.add(new Sequence("EMBL|A33333", "ATGGAAATCCAGTAG")); + dnaseqs.add(new Sequence("EMBL|A44444", "GAAATTCAG")); + AlignmentI cdna = new Alignment(dnaseqs.toArray(new SequenceI[4])); + cdna.setDataset(null); + + assertTrue(AlignmentUtils.mapProteinToCdna(protein, cdna)); + + // 3 mappings made, each from 1 to 1 sequence + assertEquals(3, protein.getCodonFrames().size()); + assertEquals(1, protein.getCodonFrame(protein.getSequenceAt(0)).size()); + assertEquals(1, protein.getCodonFrame(protein.getSequenceAt(1)).size()); + assertEquals(1, protein.getCodonFrame(protein.getSequenceAt(2)).size()); + + // V12345 mapped from A22222 + AlignedCodonFrame acf = protein.getCodonFrame( + protein.getSequenceAt(0)).get(0); + assertEquals(1, acf.getdnaSeqs().length); + assertEquals(cdna.getSequenceAt(1).getDatasetSequence(), + acf.getdnaSeqs()[0]); + Mapping[] protMappings = acf.getProtMappings(); + assertEquals(1, protMappings.length); + MapList mapList = protMappings[0].getMap(); + assertEquals(3, mapList.getFromRatio()); + assertEquals(1, mapList.getToRatio()); + assertTrue(Arrays.equals(new int[] + { 1, 9 }, mapList.getFromRanges().get(0))); + assertEquals(1, mapList.getFromRanges().size()); + assertTrue(Arrays.equals(new int[] + { 1, 3 }, mapList.getToRanges().get(0))); + assertEquals(1, mapList.getToRanges().size()); + + // V12346 mapped from A33333 starting position 4 + acf = protein.getCodonFrame(protein.getSequenceAt(1)).get(0); + assertEquals(1, acf.getdnaSeqs().length); + assertEquals(cdna.getSequenceAt(2).getDatasetSequence(), + acf.getdnaSeqs()[0]); + protMappings = acf.getProtMappings(); + assertEquals(1, protMappings.length); + mapList = protMappings[0].getMap(); + assertEquals(3, mapList.getFromRatio()); + assertEquals(1, mapList.getToRatio()); + assertTrue(Arrays.equals(new int[] + { 4, 12 }, mapList.getFromRanges().get(0))); + assertEquals(1, mapList.getFromRanges().size()); + assertTrue(Arrays.equals(new int[] + { 1, 3 }, mapList.getToRanges().get(0))); + assertEquals(1, mapList.getToRanges().size()); + + // V12347 mapped to A11111 starting position 4 + acf = protein.getCodonFrame(protein.getSequenceAt(2)).get(0); + assertEquals(1, acf.getdnaSeqs().length); + assertEquals(cdna.getSequenceAt(0).getDatasetSequence(), + acf.getdnaSeqs()[0]); + protMappings = acf.getProtMappings(); + assertEquals(1, protMappings.length); + mapList = protMappings[0].getMap(); + assertEquals(3, mapList.getFromRatio()); + assertEquals(1, mapList.getToRatio()); + assertTrue(Arrays.equals(new int[] + { 4, 12 }, mapList.getFromRanges().get(0))); + assertEquals(1, mapList.getFromRanges().size()); + assertTrue(Arrays.equals(new int[] + { 1, 3 }, mapList.getToRanges().get(0))); + assertEquals(1, mapList.getToRanges().size()); + + // no mapping involving the 'extra' A44444 + assertTrue(protein.getCodonFrame(cdna.getSequenceAt(3)).isEmpty()); + } + + /** + * Test mapping of protein to cDNA, for the case where we have some sequence + * cross-references. Verify that 1-to-many mappings are made where + * cross-references exist and sequences are mappable. + * + * @throws IOException + */ + @Test + public void testMapProteinToCdna_withXrefs() throws IOException + { + List protseqs = new ArrayList(); + protseqs.add(new Sequence("UNIPROT|V12345", "EIQ")); + protseqs.add(new Sequence("UNIPROT|V12346", "EIQ")); + protseqs.add(new Sequence("UNIPROT|V12347", "SAR")); + AlignmentI protein = new Alignment(protseqs.toArray(new SequenceI[3])); + protein.setDataset(null); + + List dnaseqs = new ArrayList(); + dnaseqs.add(new Sequence("EMBL|A11111", "TCAGCACGC")); // = SAR + dnaseqs.add(new Sequence("EMBL|A22222", "ATGGAGATACAA")); // = start + EIQ + dnaseqs.add(new Sequence("EMBL|A33333", "GAAATCCAG")); // = EIQ + dnaseqs.add(new Sequence("EMBL|A44444", "GAAATTCAG")); // = EIQ + dnaseqs.add(new Sequence("EMBL|A55555", "GAGATTCAG")); // = EIQ + AlignmentI cdna = new Alignment(dnaseqs.toArray(new SequenceI[5])); + cdna.setDataset(null); + + // Xref A22222 to V12345 (should get mapped) + dnaseqs.get(1).addDBRef(new DBRefEntry("UNIPROT", "1", "V12345")); + // Xref V12345 to A44444 (should get mapped) + protseqs.get(0).addDBRef(new DBRefEntry("EMBL", "1", "A44444")); + // Xref A33333 to V12347 (sequence mismatch - should not get mapped) + dnaseqs.get(2).addDBRef(new DBRefEntry("UNIPROT", "1", "V12347")); + // as V12345 is mapped to A22222 and A44444, this leaves V12346 unmapped. + // it should get paired up with the unmapped A33333 + // A11111 should be mapped to V12347 + // A55555 is spare and has no xref so is not mapped + + assertTrue(AlignmentUtils.mapProteinToCdna(protein, cdna)); + + // 4 protein mappings made for 3 proteins, 2 to V12345, 1 each to V12346/7 + assertEquals(3, protein.getCodonFrames().size()); + assertEquals(1, protein.getCodonFrame(protein.getSequenceAt(0)).size()); + assertEquals(1, protein.getCodonFrame(protein.getSequenceAt(1)).size()); + assertEquals(1, protein.getCodonFrame(protein.getSequenceAt(2)).size()); + + // one mapping for each of the first 4 cDNA sequences + assertEquals(1, protein.getCodonFrame(cdna.getSequenceAt(0)).size()); + assertEquals(1, protein.getCodonFrame(cdna.getSequenceAt(1)).size()); + assertEquals(1, protein.getCodonFrame(cdna.getSequenceAt(2)).size()); + assertEquals(1, protein.getCodonFrame(cdna.getSequenceAt(3)).size()); + + // V12345 mapped to A22222 and A44444 + AlignedCodonFrame acf = protein.getCodonFrame( + protein.getSequenceAt(0)).get(0); + assertEquals(2, acf.getdnaSeqs().length); + assertEquals(cdna.getSequenceAt(1).getDatasetSequence(), + acf.getdnaSeqs()[0]); + assertEquals(cdna.getSequenceAt(3).getDatasetSequence(), + acf.getdnaSeqs()[1]); + + // V12346 mapped to A33333 + acf = protein.getCodonFrame(protein.getSequenceAt(1)).get(0); + assertEquals(1, acf.getdnaSeqs().length); + assertEquals(cdna.getSequenceAt(2).getDatasetSequence(), + acf.getdnaSeqs()[0]); + + // V12347 mapped to A11111 + acf = protein.getCodonFrame(protein.getSequenceAt(2)).get(0); + assertEquals(1, acf.getdnaSeqs().length); + assertEquals(cdna.getSequenceAt(0).getDatasetSequence(), + acf.getdnaSeqs()[0]); + + // no mapping involving the 'extra' A55555 + assertTrue(protein.getCodonFrame(cdna.getSequenceAt(4)).isEmpty()); + } + + /** + * Test mapping of protein to cDNA, for the case where we have some sequence + * cross-references. Verify that once we have made an xref mapping we don't + * also map un-xrefd sequeces. + * + * @throws IOException + */ + @Test + public void testMapProteinToCdna_prioritiseXrefs() throws IOException + { + List protseqs = new ArrayList(); + protseqs.add(new Sequence("UNIPROT|V12345", "EIQ")); + protseqs.add(new Sequence("UNIPROT|V12346", "EIQ")); + AlignmentI protein = new Alignment( + protseqs.toArray(new SequenceI[protseqs.size()])); + protein.setDataset(null); + + List dnaseqs = new ArrayList(); + dnaseqs.add(new Sequence("EMBL|A11111", "GAAATCCAG")); // = EIQ + dnaseqs.add(new Sequence("EMBL|A22222", "GAAATTCAG")); // = EIQ + AlignmentI cdna = new Alignment(dnaseqs.toArray(new SequenceI[dnaseqs + .size()])); + cdna.setDataset(null); + + // Xref A22222 to V12345 (should get mapped) + // A11111 should then be mapped to the unmapped V12346 + dnaseqs.get(1).addDBRef(new DBRefEntry("UNIPROT", "1", "V12345")); + + assertTrue(AlignmentUtils.mapProteinToCdna(protein, cdna)); + + // 2 protein mappings made + assertEquals(2, protein.getCodonFrames().size()); + assertEquals(1, protein.getCodonFrame(protein.getSequenceAt(0)).size()); + assertEquals(1, protein.getCodonFrame(protein.getSequenceAt(1)).size()); + + // one mapping for each of the cDNA sequences + assertEquals(1, protein.getCodonFrame(cdna.getSequenceAt(0)).size()); + assertEquals(1, protein.getCodonFrame(cdna.getSequenceAt(1)).size()); + + // V12345 mapped to A22222 + AlignedCodonFrame acf = protein.getCodonFrame(protein.getSequenceAt(0)) + .get(0); + assertEquals(1, acf.getdnaSeqs().length); + assertEquals(cdna.getSequenceAt(1).getDatasetSequence(), + acf.getdnaSeqs()[0]); + + // V12346 mapped to A11111 + acf = protein.getCodonFrame(protein.getSequenceAt(1)).get(0); + assertEquals(1, acf.getdnaSeqs().length); + assertEquals(cdna.getSequenceAt(0).getDatasetSequence(), + acf.getdnaSeqs()[0]); + } + + /** + * Test the method that shows or hides sequence annotations by type(s) and + * selection group. + */ + @Test + public void testShowOrHideSequenceAnnotations() + { + SequenceI seq1 = new Sequence("Seq1", "AAA"); + SequenceI seq2 = new Sequence("Seq2", "BBB"); + SequenceI seq3 = new Sequence("Seq3", "CCC"); + Annotation[] anns = new Annotation[] + { new Annotation(2f) }; + AlignmentAnnotation ann1 = new AlignmentAnnotation("Structure", "ann1", + anns); + ann1.setSequenceRef(seq1); + AlignmentAnnotation ann2 = new AlignmentAnnotation("Structure", "ann2", + anns); + ann2.setSequenceRef(seq2); + AlignmentAnnotation ann3 = new AlignmentAnnotation("Structure", "ann3", + anns); + AlignmentAnnotation ann4 = new AlignmentAnnotation("Temp", "ann4", anns); + ann4.setSequenceRef(seq1); + AlignmentAnnotation ann5 = new AlignmentAnnotation("Temp", "ann5", anns); + ann5.setSequenceRef(seq2); + AlignmentAnnotation ann6 = new AlignmentAnnotation("Temp", "ann6", anns); + AlignmentI al = new Alignment(new SequenceI[] {seq1, seq2, seq3}); + al.addAnnotation(ann1); // Structure for Seq1 + al.addAnnotation(ann2); // Structure for Seq2 + al.addAnnotation(ann3); // Structure for no sequence + al.addAnnotation(ann4); // Temp for seq1 + al.addAnnotation(ann5); // Temp for seq2 + al.addAnnotation(ann6); // Temp for no sequence + List types = new ArrayList(); + List scope = new ArrayList(); + + /* + * Set all sequence related Structure to hidden (ann1, ann2) + */ + types.add("Structure"); + AlignmentUtils.showOrHideSequenceAnnotations(al, types, null, false, + false); + assertFalse(ann1.visible); + assertFalse(ann2.visible); + assertTrue(ann3.visible); // not sequence-related, not affected + assertTrue(ann4.visible); // not Structure, not affected + assertTrue(ann5.visible); // " + assertTrue(ann6.visible); // not sequence-related, not affected + + /* + * Set Temp in {seq1, seq3} to hidden + */ + types.clear(); + types.add("Temp"); + scope.add(seq1); + scope.add(seq3); + AlignmentUtils.showOrHideSequenceAnnotations(al, types, scope, false, + false); + assertFalse(ann1.visible); // unchanged + assertFalse(ann2.visible); // unchanged + assertTrue(ann3.visible); // not sequence-related, not affected + assertFalse(ann4.visible); // Temp for seq1 hidden + assertTrue(ann5.visible); // not in scope, not affected + assertTrue(ann6.visible); // not sequence-related, not affected + + /* + * Set Temp in all sequences to hidden + */ + types.clear(); + types.add("Temp"); + scope.add(seq1); + scope.add(seq3); + AlignmentUtils.showOrHideSequenceAnnotations(al, types, null, false, + false); + assertFalse(ann1.visible); // unchanged + assertFalse(ann2.visible); // unchanged + assertTrue(ann3.visible); // not sequence-related, not affected + assertFalse(ann4.visible); // Temp for seq1 hidden + assertFalse(ann5.visible); // Temp for seq2 hidden + assertTrue(ann6.visible); // not sequence-related, not affected + + /* + * Set all types in {seq1, seq3} to visible + */ + types.clear(); + scope.clear(); + scope.add(seq1); + scope.add(seq3); + AlignmentUtils.showOrHideSequenceAnnotations(al, types, scope, true, + true); + assertTrue(ann1.visible); // Structure for seq1 set visible + assertFalse(ann2.visible); // not in scope, unchanged + assertTrue(ann3.visible); // not sequence-related, not affected + assertTrue(ann4.visible); // Temp for seq1 set visible + assertFalse(ann5.visible); // not in scope, unchanged + assertTrue(ann6.visible); // not sequence-related, not affected + + /* + * Set all types in all scope to hidden + */ + AlignmentUtils.showOrHideSequenceAnnotations(al, types, null, true, + false); + assertFalse(ann1.visible); + assertFalse(ann2.visible); + assertTrue(ann3.visible); // not sequence-related, not affected + assertFalse(ann4.visible); + assertFalse(ann5.visible); + assertTrue(ann6.visible); // not sequence-related, not affected } }