/* * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$) * Copyright (C) $$Year-Rel$$ The Jalview Authors * * This file is part of Jalview. * * Jalview is free software: you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation, either version 3 * of the License, or (at your option) any later version. * * Jalview is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty * of MERCHANTABILITY or FITNESS FOR A PARTICULAR * PURPOSE. See the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with Jalview. If not, see . * The Jalview Authors are detailed in the 'AUTHORS' file. */ package jalview.analysis; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertSame; import static org.junit.Assert.assertTrue; import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; import java.util.List; import java.util.Map; import org.junit.Test; import jalview.datamodel.AlignedCodonFrame; import jalview.datamodel.Alignment; import jalview.datamodel.AlignmentAnnotation; import jalview.datamodel.AlignmentI; import jalview.datamodel.Annotation; import jalview.datamodel.DBRefEntry; import jalview.datamodel.Mapping; import jalview.datamodel.Sequence; import jalview.datamodel.SequenceI; import jalview.io.AppletFormatAdapter; import jalview.io.FormatAdapter; import jalview.util.MapList; public class AlignmentUtilsTests { // @formatter:off private static final String TEST_DATA = "# STOCKHOLM 1.0\n" + "#=GS D.melanogaster.1 AC AY119185.1/838-902\n" + "#=GS D.melanogaster.2 AC AC092237.1/57223-57161\n" + "#=GS D.melanogaster.3 AC AY060611.1/560-627\n" + "D.melanogaster.1 G.AGCC.CU...AUGAUCGA\n" + "#=GR D.melanogaster.1 SS ................((((\n" + "D.melanogaster.2 C.AUUCAACU.UAUGAGGAU\n" + "#=GR D.melanogaster.2 SS ................((((\n" + "D.melanogaster.3 G.UGGCGCU..UAUGACGCA\n" + "#=GR D.melanogaster.3 SS (.(((...(....(((((((\n" + "//"; private static final String AA_SEQS_1 = ">Seq1Name\n" + "K-QY--L\n" + ">Seq2Name\n" + "-R-FP-W-\n"; private static final String CDNA_SEQS_1 = ">Seq1Name\n" + "AC-GG--CUC-CAA-CT\n" + ">Seq2Name\n" + "-CG-TTA--ACG---AAGT\n"; private static final String CDNA_SEQS_2 = ">Seq1Name\n" + "GCTCGUCGTACT\n" + ">Seq2Name\n" + "GGGTCAGGCAGT\n"; // @formatter:on public static Sequence ts=new Sequence("short","ASDASDASDASDASDASDASDASDASDASDASDASDASD"); @Test public void testExpandFlanks() { AlignmentI al = new Alignment(new Sequence[] {}); for (int i=4;i<14;i+=3) { SequenceI s1=ts.deriveSequence().getSubSequence(i, i+7); al.addSequence(s1); } System.out.println(new AppletFormatAdapter().formatSequences("Clustal", al, true)); for (int flnk=-1;flnk<25; flnk++) { AlignmentI exp; System.out.println("\nFlank size: "+flnk); System.out.println(new AppletFormatAdapter().formatSequences("Clustal", exp=AlignmentUtils.expandContext(al, flnk), true)); if (flnk==-1) { for (SequenceI sq:exp.getSequences()) { String ung = sq.getSequenceAsString().replaceAll("-+", ""); assertTrue("Flanking sequence not the same as original dataset sequence.\n"+ung+"\n"+sq.getDatasetSequence().getSequenceAsString(),ung.equalsIgnoreCase(sq.getDatasetSequence().getSequenceAsString())); } } } } /** * Test method that returns a map of lists of sequences by sequence name. * * @throws IOException */ @Test public void testGetSequencesByName() throws IOException { final String data = ">Seq1Name\nKQYL\n" + ">Seq2Name\nRFPW\n" + ">Seq1Name\nABCD\n"; AlignmentI al = loadAlignment(data, "FASTA"); Map> map = AlignmentUtils .getSequencesByName(al); assertEquals(2, map.keySet().size()); assertEquals(2, map.get("Seq1Name").size()); assertEquals("KQYL", map.get("Seq1Name").get(0).getSequenceAsString()); assertEquals("ABCD", map.get("Seq1Name").get(1).getSequenceAsString()); assertEquals(1, map.get("Seq2Name").size()); assertEquals("RFPW", map.get("Seq2Name").get(0).getSequenceAsString()); } /** * Helper method to load an alignment and ensure dataset sequences are set up. * * @param data * @param format TODO * @return * @throws IOException */ protected AlignmentI loadAlignment(final String data, String format) throws IOException { Alignment a = new FormatAdapter().readFile(data, AppletFormatAdapter.PASTE, format); a.setDataset(null); return a; } /** * Test mapping of protein to cDNA, for the case where we have no sequence * cross-references, so mappings are made first-served 1-1 where sequences * translate. * * @throws IOException */ @Test public void testMapProteinToCdna_noXrefs() throws IOException { List protseqs = new ArrayList(); protseqs.add(new Sequence("UNIPROT|V12345", "EIQ")); protseqs.add(new Sequence("UNIPROT|V12346", "EIQ")); protseqs.add(new Sequence("UNIPROT|V12347", "SAR")); AlignmentI protein = new Alignment(protseqs.toArray(new SequenceI[3])); protein.setDataset(null); List dnaseqs = new ArrayList(); dnaseqs.add(new Sequence("EMBL|A11111", "TCAGCACGC")); // = SAR dnaseqs.add(new Sequence("EMBL|A22222", "GAGATACAA")); // = EIQ dnaseqs.add(new Sequence("EMBL|A33333", "GAAATCCAG")); // = EIQ dnaseqs.add(new Sequence("EMBL|A44444", "GAAATTCAG")); // = EIQ AlignmentI cdna = new Alignment(dnaseqs.toArray(new SequenceI[4])); cdna.setDataset(null); assertTrue(AlignmentUtils.mapProteinToCdna(protein, cdna)); // 3 mappings made, each from 1 to 1 sequence assertEquals(3, protein.getCodonFrames().size()); assertEquals(1, protein.getCodonFrame(protein.getSequenceAt(0)).size()); assertEquals(1, protein.getCodonFrame(protein.getSequenceAt(1)).size()); assertEquals(1, protein.getCodonFrame(protein.getSequenceAt(2)).size()); // V12345 mapped to A22222 AlignedCodonFrame acf = protein.getCodonFrame( protein.getSequenceAt(0)).get(0); assertEquals(1, acf.getdnaSeqs().length); assertEquals(cdna.getSequenceAt(1).getDatasetSequence(), acf.getdnaSeqs()[0]); Mapping[] protMappings = acf.getProtMappings(); assertEquals(1, protMappings.length); MapList mapList = protMappings[0].getMap(); assertEquals(3, mapList.getFromRatio()); assertEquals(1, mapList.getToRatio()); assertTrue(Arrays.equals(new int[] { 1, 9 }, mapList.getFromRanges().get(0))); assertEquals(1, mapList.getFromRanges().size()); assertTrue(Arrays.equals(new int[] { 1, 3 }, mapList.getToRanges().get(0))); assertEquals(1, mapList.getToRanges().size()); // V12346 mapped to A33333 acf = protein.getCodonFrame(protein.getSequenceAt(1)).get(0); assertEquals(1, acf.getdnaSeqs().length); assertEquals(cdna.getSequenceAt(2).getDatasetSequence(), acf.getdnaSeqs()[0]); // V12347 mapped to A11111 acf = protein.getCodonFrame(protein.getSequenceAt(2)).get(0); assertEquals(1, acf.getdnaSeqs().length); assertEquals(cdna.getSequenceAt(0).getDatasetSequence(), acf.getdnaSeqs()[0]); // no mapping involving the 'extra' A44444 assertTrue(protein.getCodonFrame(cdna.getSequenceAt(3)).isEmpty()); } /** * Test for the alignSequenceAs method that takes two sequences and a mapping. */ @Test public void testAlignSequenceAs_withMapping_noIntrons() { MapList map = new MapList(new int[] { 1, 6 }, new int[] { 1, 2 }, 3, 1); /* * No existing gaps in dna: */ checkAlignSequenceAs("GGGAAA", "-A-L-", false, false, map, "---GGG---AAA"); /* * Now introduce gaps in dna but ignore them when realigning. */ checkAlignSequenceAs("-G-G-G-A-A-A-", "-A-L-", false, false, map, "---GGG---AAA"); /* * Now include gaps in dna when realigning. First retaining 'mapped' gaps * only, i.e. those within the exon region. */ checkAlignSequenceAs("-G-G--G-A--A-A-", "-A-L-", true, false, map, "---G-G--G---A--A-A"); /* * Include all gaps in dna when realigning (within and without the exon * region). The leading gap, and the gaps between codons, are subsumed by * the protein alignment gap. */ checkAlignSequenceAs("-G-GG--AA-A-", "-A-L-", true, true, map, "---G-GG---AA-A-"); /* * Include only unmapped gaps in dna when realigning (outside the exon * region). The leading gap, and the gaps between codons, are subsumed by * the protein alignment gap. */ checkAlignSequenceAs("-G-GG--AA-A-", "-A-L-", false, true, map, "---GGG---AAA-"); } /** * Test for the alignSequenceAs method that takes two sequences and a mapping. */ @Test public void testAlignSequenceAs_withMapping_withIntrons() { /* * Exons at codon 2 (AAA) and 4 (TTT) */ MapList map = new MapList(new int[] { 4, 6, 10, 12 }, new int[] { 1, 2 }, 3, 1); /* * Simple case: no gaps in dna */ checkAlignSequenceAs("GGGAAACCCTTTGGG", "--A-L-", false, false, map, "GGG---AAACCCTTTGGG"); /* * Add gaps to dna - but ignore when realigning. */ checkAlignSequenceAs("-G-G-G--A--A---AC-CC-T-TT-GG-G-", "--A-L-", false, false, map, "GGG---AAACCCTTTGGG"); /* * Add gaps to dna - include within exons only when realigning. */ checkAlignSequenceAs("-G-G-G--A--A---A-C-CC-T-TT-GG-G-", "--A-L-", true, false, map, "GGG---A--A---ACCCT-TTGGG"); /* * Include gaps outside exons only when realigning. */ checkAlignSequenceAs("-G-G-G--A--A---A-C-CC-T-TT-GG-G-", "--A-L-", false, true, map, "-G-G-GAAAC-CCTTT-GG-G-"); /* * Include gaps following first intron if we are 'preserving mapped gaps' */ checkAlignSequenceAs("-G-G-G--A--A---A-C-CC-T-TT-GG-G-", "--A-L-", true, true, map, "-G-G-G--A--A---A-C-CC-T-TT-GG-G-"); /* * Include all gaps in dna when realigning. */ checkAlignSequenceAs("-G-G-G--A--A---A-C-CC-T-TT-GG-G-", "--A-L-", true, true, map, "-G-G-G--A--A---A-C-CC-T-TT-GG-G-"); } /** * Test for the case where not all of the protein sequence is mapped to cDNA. */ @Test public void testAlignSequenceAs_withMapping_withUnmappedProtein() { /* * Exons at codon 2 (AAA) and 4 (TTT) mapped to A and P */ final MapList map = new MapList(new int[] { 4, 6, 10, 12 }, new int[] { 1, 1, 3, 3 }, 3, 1); /* * Expect alignment does nothing (aborts realignment). Change this test * first if different behaviour wanted. */ checkAlignSequenceAs("GGGAAACCCTTTGGG", "-A-L-P-", false, false, map, "GGGAAACCCTTTGGG"); } /** * Helper method that performs and verifies the method under test. * * @param dnaSeq * @param proteinSeq * @param preserveMappedGaps * @param preserveUnmappedGaps * @param map * @param expected */ protected void checkAlignSequenceAs(final String dnaSeq, final String proteinSeq, final boolean preserveMappedGaps, final boolean preserveUnmappedGaps, MapList map, final String expected) { SequenceI dna = new Sequence("Seq1", dnaSeq); dna.createDatasetSequence(); SequenceI protein = new Sequence("Seq1", proteinSeq); protein.createDatasetSequence(); AlignedCodonFrame acf = new AlignedCodonFrame(); acf.addMap(dna.getDatasetSequence(), protein.getDatasetSequence(), map); AlignmentUtils.alignSequenceAs(dna, protein, acf, "---", '-', preserveMappedGaps, preserveUnmappedGaps); assertEquals(expected, dna.getSequenceAsString()); } /** * Test for the alignSequenceAs method where we preserve gaps in introns only. */ @Test public void testAlignSequenceAs_keepIntronGapsOnly() { /* * Intron GGGAAA followed by exon CCCTTT */ MapList map = new MapList(new int[] { 7, 12 }, new int[] { 1, 2 }, 3, 1); checkAlignSequenceAs("GG-G-AA-A-C-CC-T-TT", "AL", false, true, map, "GG-G-AA-ACCCTTT"); } /** * Test for the method that generates an aligned translated sequence from one * mapping. */ @Test public void testGetAlignedTranslation_dnaLikeProtein() { // dna alignment will be replaced SequenceI dna = new Sequence("Seq1", "T-G-CC-A--T-TAC-CAG-"); dna.createDatasetSequence(); // protein alignment will be 'applied' to dna SequenceI protein = new Sequence("Seq1", "-CH-Y--Q-"); protein.createDatasetSequence(); MapList map = new MapList(new int[] { 1, 12 }, new int[] { 1, 4 }, 3, 1); AlignedCodonFrame acf = new AlignedCodonFrame(); acf.addMap(dna.getDatasetSequence(), protein.getDatasetSequence(), map); final SequenceI aligned = AlignmentUtils .getAlignedTranslation(protein, '-', acf); assertEquals("---TGCCAT---TAC------CAG---", aligned.getSequenceAsString()); assertSame(aligned.getDatasetSequence(), dna.getDatasetSequence()); } /** * Test the method that realigns protein to match mapped codon alignment. */ @Test public void testAlignProteinAsDna() { // seq1 codons are [1,2,3] [4,5,6] [7,8,9] [10,11,12] SequenceI dna1 = new Sequence("Seq1", "TGCCATTACCAG-"); // seq2 codons are [1,3,4] [5,6,7] [8,9,10] [11,12,13] SequenceI dna2 = new Sequence("Seq2", "T-GCCATTACCAG"); // seq3 codons are [1,2,3] [4,5,7] [8,9,10] [11,12,13] SequenceI dna3 = new Sequence("Seq3", "TGCCA-TTACCAG"); AlignmentI dna = new Alignment(new SequenceI[] { dna1, dna2, dna3 }); dna.setDataset(null); // protein alignment will be realigned like dna SequenceI prot1 = new Sequence("Seq1", "CHYQ"); SequenceI prot2 = new Sequence("Seq2", "CHYQ"); SequenceI prot3 = new Sequence("Seq3", "CHYQ"); AlignmentI protein = new Alignment(new SequenceI[] { prot1, prot2, prot3 }); protein.setDataset(null); MapList map = new MapList(new int[] { 1, 12 }, new int[] { 1, 4 }, 3, 1); AlignedCodonFrame acf = new AlignedCodonFrame(); acf.addMap(dna1.getDatasetSequence(), prot1.getDatasetSequence(), map); acf.addMap(dna2.getDatasetSequence(), prot2.getDatasetSequence(), map); acf.addMap(dna3.getDatasetSequence(), prot3.getDatasetSequence(), map); protein.setCodonFrames(Collections.singleton(acf)); /* * Translated codon order is [1,2,3] [1,3,4] [4,5,6] [4,5,7] [5,6,7] [7,8,9] * [8,9,10] [10,11,12] [11,12,13] */ AlignmentUtils.alignProteinAsDna(protein, dna); assertEquals("C-H--Y-Q-", prot1.getSequenceAsString()); assertEquals("-C--H-Y-Q", prot2.getSequenceAsString()); assertEquals("C--H--Y-Q", prot3.getSequenceAsString()); } /** * Test the method that tests whether a CDNA sequence translates to a protein * sequence */ @Test public void testTranslatesAs() { assertTrue(AlignmentUtils.translatesAs("tttcccaaaggg".toCharArray(), 0, "FPKG".toCharArray())); // with start codon assertTrue(AlignmentUtils.translatesAs("atgtttcccaaaggg".toCharArray(), 3, "FPKG".toCharArray())); // with stop codon1 assertTrue(AlignmentUtils.translatesAs("tttcccaaagggtaa".toCharArray(), 0, "FPKG".toCharArray())); // with stop codon2 assertTrue(AlignmentUtils.translatesAs("tttcccaaagggtag".toCharArray(), 0, "FPKG".toCharArray())); // with stop codon3 assertTrue(AlignmentUtils.translatesAs("tttcccaaagggtga".toCharArray(), 0, "FPKG".toCharArray())); // with start and stop codon1 assertTrue(AlignmentUtils.translatesAs( "atgtttcccaaaggtaa".toCharArray(), 3, "FPKG".toCharArray())); // with start and stop codon2 assertTrue(AlignmentUtils.translatesAs( "atgtttcccaaaggtag".toCharArray(), 3, "FPKG".toCharArray())); // with start and stop codon3 assertTrue(AlignmentUtils.translatesAs( "atgtttcccaaaggtga".toCharArray(), 3, "FPKG".toCharArray())); // wrong protein assertFalse(AlignmentUtils.translatesAs("tttcccaaaggg".toCharArray(), 0, "FPMG".toCharArray())); } /** * Test mapping of protein to cDNA, for cases where the cDNA has start and/or * stop codons in addition to the protein coding sequence. * * @throws IOException */ @Test public void testMapProteinToCdna_withStartAndStopCodons() throws IOException { List protseqs = new ArrayList(); protseqs.add(new Sequence("UNIPROT|V12345", "EIQ")); protseqs.add(new Sequence("UNIPROT|V12346", "EIQ")); protseqs.add(new Sequence("UNIPROT|V12347", "SAR")); AlignmentI protein = new Alignment(protseqs.toArray(new SequenceI[3])); protein.setDataset(null); List dnaseqs = new ArrayList(); // start + SAR: dnaseqs.add(new Sequence("EMBL|A11111", "ATGTCAGCACGC")); // = EIQ + stop dnaseqs.add(new Sequence("EMBL|A22222", "GAGATACAATAA")); // = start +EIQ + stop dnaseqs.add(new Sequence("EMBL|A33333", "ATGGAAATCCAGTAG")); dnaseqs.add(new Sequence("EMBL|A44444", "GAAATTCAG")); AlignmentI cdna = new Alignment(dnaseqs.toArray(new SequenceI[4])); cdna.setDataset(null); assertTrue(AlignmentUtils.mapProteinToCdna(protein, cdna)); // 3 mappings made, each from 1 to 1 sequence assertEquals(3, protein.getCodonFrames().size()); assertEquals(1, protein.getCodonFrame(protein.getSequenceAt(0)).size()); assertEquals(1, protein.getCodonFrame(protein.getSequenceAt(1)).size()); assertEquals(1, protein.getCodonFrame(protein.getSequenceAt(2)).size()); // V12345 mapped from A22222 AlignedCodonFrame acf = protein.getCodonFrame( protein.getSequenceAt(0)).get(0); assertEquals(1, acf.getdnaSeqs().length); assertEquals(cdna.getSequenceAt(1).getDatasetSequence(), acf.getdnaSeqs()[0]); Mapping[] protMappings = acf.getProtMappings(); assertEquals(1, protMappings.length); MapList mapList = protMappings[0].getMap(); assertEquals(3, mapList.getFromRatio()); assertEquals(1, mapList.getToRatio()); assertTrue(Arrays.equals(new int[] { 1, 9 }, mapList.getFromRanges().get(0))); assertEquals(1, mapList.getFromRanges().size()); assertTrue(Arrays.equals(new int[] { 1, 3 }, mapList.getToRanges().get(0))); assertEquals(1, mapList.getToRanges().size()); // V12346 mapped from A33333 starting position 4 acf = protein.getCodonFrame(protein.getSequenceAt(1)).get(0); assertEquals(1, acf.getdnaSeqs().length); assertEquals(cdna.getSequenceAt(2).getDatasetSequence(), acf.getdnaSeqs()[0]); protMappings = acf.getProtMappings(); assertEquals(1, protMappings.length); mapList = protMappings[0].getMap(); assertEquals(3, mapList.getFromRatio()); assertEquals(1, mapList.getToRatio()); assertTrue(Arrays.equals(new int[] { 4, 12 }, mapList.getFromRanges().get(0))); assertEquals(1, mapList.getFromRanges().size()); assertTrue(Arrays.equals(new int[] { 1, 3 }, mapList.getToRanges().get(0))); assertEquals(1, mapList.getToRanges().size()); // V12347 mapped to A11111 starting position 4 acf = protein.getCodonFrame(protein.getSequenceAt(2)).get(0); assertEquals(1, acf.getdnaSeqs().length); assertEquals(cdna.getSequenceAt(0).getDatasetSequence(), acf.getdnaSeqs()[0]); protMappings = acf.getProtMappings(); assertEquals(1, protMappings.length); mapList = protMappings[0].getMap(); assertEquals(3, mapList.getFromRatio()); assertEquals(1, mapList.getToRatio()); assertTrue(Arrays.equals(new int[] { 4, 12 }, mapList.getFromRanges().get(0))); assertEquals(1, mapList.getFromRanges().size()); assertTrue(Arrays.equals(new int[] { 1, 3 }, mapList.getToRanges().get(0))); assertEquals(1, mapList.getToRanges().size()); // no mapping involving the 'extra' A44444 assertTrue(protein.getCodonFrame(cdna.getSequenceAt(3)).isEmpty()); } /** * Test mapping of protein to cDNA, for the case where we have some sequence * cross-references. Verify that 1-to-many mappings are made where * cross-references exist and sequences are mappable. * * @throws IOException */ @Test public void testMapProteinToCdna_withXrefs() throws IOException { List protseqs = new ArrayList(); protseqs.add(new Sequence("UNIPROT|V12345", "EIQ")); protseqs.add(new Sequence("UNIPROT|V12346", "EIQ")); protseqs.add(new Sequence("UNIPROT|V12347", "SAR")); AlignmentI protein = new Alignment(protseqs.toArray(new SequenceI[3])); protein.setDataset(null); List dnaseqs = new ArrayList(); dnaseqs.add(new Sequence("EMBL|A11111", "TCAGCACGC")); // = SAR dnaseqs.add(new Sequence("EMBL|A22222", "ATGGAGATACAA")); // = start + EIQ dnaseqs.add(new Sequence("EMBL|A33333", "GAAATCCAG")); // = EIQ dnaseqs.add(new Sequence("EMBL|A44444", "GAAATTCAG")); // = EIQ dnaseqs.add(new Sequence("EMBL|A55555", "GAGATTCAG")); // = EIQ AlignmentI cdna = new Alignment(dnaseqs.toArray(new SequenceI[5])); cdna.setDataset(null); // Xref A22222 to V12345 (should get mapped) dnaseqs.get(1).addDBRef(new DBRefEntry("UNIPROT", "1", "V12345")); // Xref V12345 to A44444 (should get mapped) protseqs.get(0).addDBRef(new DBRefEntry("EMBL", "1", "A44444")); // Xref A33333 to V12347 (sequence mismatch - should not get mapped) dnaseqs.get(2).addDBRef(new DBRefEntry("UNIPROT", "1", "V12347")); // as V12345 is mapped to A22222 and A44444, this leaves V12346 unmapped. // it should get paired up with the unmapped A33333 // A11111 should be mapped to V12347 // A55555 is spare and has no xref so is not mapped assertTrue(AlignmentUtils.mapProteinToCdna(protein, cdna)); // 4 protein mappings made for 3 proteins, 2 to V12345, 1 each to V12346/7 assertEquals(3, protein.getCodonFrames().size()); assertEquals(1, protein.getCodonFrame(protein.getSequenceAt(0)).size()); assertEquals(1, protein.getCodonFrame(protein.getSequenceAt(1)).size()); assertEquals(1, protein.getCodonFrame(protein.getSequenceAt(2)).size()); // one mapping for each of the first 4 cDNA sequences assertEquals(1, protein.getCodonFrame(cdna.getSequenceAt(0)).size()); assertEquals(1, protein.getCodonFrame(cdna.getSequenceAt(1)).size()); assertEquals(1, protein.getCodonFrame(cdna.getSequenceAt(2)).size()); assertEquals(1, protein.getCodonFrame(cdna.getSequenceAt(3)).size()); // V12345 mapped to A22222 and A44444 AlignedCodonFrame acf = protein.getCodonFrame( protein.getSequenceAt(0)).get(0); assertEquals(2, acf.getdnaSeqs().length); assertEquals(cdna.getSequenceAt(1).getDatasetSequence(), acf.getdnaSeqs()[0]); assertEquals(cdna.getSequenceAt(3).getDatasetSequence(), acf.getdnaSeqs()[1]); // V12346 mapped to A33333 acf = protein.getCodonFrame(protein.getSequenceAt(1)).get(0); assertEquals(1, acf.getdnaSeqs().length); assertEquals(cdna.getSequenceAt(2).getDatasetSequence(), acf.getdnaSeqs()[0]); // V12347 mapped to A11111 acf = protein.getCodonFrame(protein.getSequenceAt(2)).get(0); assertEquals(1, acf.getdnaSeqs().length); assertEquals(cdna.getSequenceAt(0).getDatasetSequence(), acf.getdnaSeqs()[0]); // no mapping involving the 'extra' A55555 assertTrue(protein.getCodonFrame(cdna.getSequenceAt(4)).isEmpty()); } /** * Test mapping of protein to cDNA, for the case where we have some sequence * cross-references. Verify that once we have made an xref mapping we don't * also map un-xrefd sequeces. * * @throws IOException */ @Test public void testMapProteinToCdna_prioritiseXrefs() throws IOException { List protseqs = new ArrayList(); protseqs.add(new Sequence("UNIPROT|V12345", "EIQ")); protseqs.add(new Sequence("UNIPROT|V12346", "EIQ")); AlignmentI protein = new Alignment( protseqs.toArray(new SequenceI[protseqs.size()])); protein.setDataset(null); List dnaseqs = new ArrayList(); dnaseqs.add(new Sequence("EMBL|A11111", "GAAATCCAG")); // = EIQ dnaseqs.add(new Sequence("EMBL|A22222", "GAAATTCAG")); // = EIQ AlignmentI cdna = new Alignment(dnaseqs.toArray(new SequenceI[dnaseqs .size()])); cdna.setDataset(null); // Xref A22222 to V12345 (should get mapped) // A11111 should then be mapped to the unmapped V12346 dnaseqs.get(1).addDBRef(new DBRefEntry("UNIPROT", "1", "V12345")); assertTrue(AlignmentUtils.mapProteinToCdna(protein, cdna)); // 2 protein mappings made assertEquals(2, protein.getCodonFrames().size()); assertEquals(1, protein.getCodonFrame(protein.getSequenceAt(0)).size()); assertEquals(1, protein.getCodonFrame(protein.getSequenceAt(1)).size()); // one mapping for each of the cDNA sequences assertEquals(1, protein.getCodonFrame(cdna.getSequenceAt(0)).size()); assertEquals(1, protein.getCodonFrame(cdna.getSequenceAt(1)).size()); // V12345 mapped to A22222 AlignedCodonFrame acf = protein.getCodonFrame(protein.getSequenceAt(0)) .get(0); assertEquals(1, acf.getdnaSeqs().length); assertEquals(cdna.getSequenceAt(1).getDatasetSequence(), acf.getdnaSeqs()[0]); // V12346 mapped to A11111 acf = protein.getCodonFrame(protein.getSequenceAt(1)).get(0); assertEquals(1, acf.getdnaSeqs().length); assertEquals(cdna.getSequenceAt(0).getDatasetSequence(), acf.getdnaSeqs()[0]); } /** * Test the method that shows or hides sequence annotations by type(s) and * selection group. */ @Test public void testShowOrHideSequenceAnnotations() { SequenceI seq1 = new Sequence("Seq1", "AAA"); SequenceI seq2 = new Sequence("Seq2", "BBB"); SequenceI seq3 = new Sequence("Seq3", "CCC"); Annotation[] anns = new Annotation[] { new Annotation(2f) }; AlignmentAnnotation ann1 = new AlignmentAnnotation("Structure", "ann1", anns); ann1.setSequenceRef(seq1); AlignmentAnnotation ann2 = new AlignmentAnnotation("Structure", "ann2", anns); ann2.setSequenceRef(seq2); AlignmentAnnotation ann3 = new AlignmentAnnotation("Structure", "ann3", anns); AlignmentAnnotation ann4 = new AlignmentAnnotation("Temp", "ann4", anns); ann4.setSequenceRef(seq1); AlignmentAnnotation ann5 = new AlignmentAnnotation("Temp", "ann5", anns); ann5.setSequenceRef(seq2); AlignmentAnnotation ann6 = new AlignmentAnnotation("Temp", "ann6", anns); AlignmentI al = new Alignment(new SequenceI[] {seq1, seq2, seq3}); al.addAnnotation(ann1); // Structure for Seq1 al.addAnnotation(ann2); // Structure for Seq2 al.addAnnotation(ann3); // Structure for no sequence al.addAnnotation(ann4); // Temp for seq1 al.addAnnotation(ann5); // Temp for seq2 al.addAnnotation(ann6); // Temp for no sequence List types = new ArrayList(); List scope = new ArrayList(); /* * Set all sequence related Structure to hidden (ann1, ann2) */ types.add("Structure"); AlignmentUtils.showOrHideSequenceAnnotations(al, types, null, false, false); assertFalse(ann1.visible); assertFalse(ann2.visible); assertTrue(ann3.visible); // not sequence-related, not affected assertTrue(ann4.visible); // not Structure, not affected assertTrue(ann5.visible); // " assertTrue(ann6.visible); // not sequence-related, not affected /* * Set Temp in {seq1, seq3} to hidden */ types.clear(); types.add("Temp"); scope.add(seq1); scope.add(seq3); AlignmentUtils.showOrHideSequenceAnnotations(al, types, scope, false, false); assertFalse(ann1.visible); // unchanged assertFalse(ann2.visible); // unchanged assertTrue(ann3.visible); // not sequence-related, not affected assertFalse(ann4.visible); // Temp for seq1 hidden assertTrue(ann5.visible); // not in scope, not affected assertTrue(ann6.visible); // not sequence-related, not affected /* * Set Temp in all sequences to hidden */ types.clear(); types.add("Temp"); scope.add(seq1); scope.add(seq3); AlignmentUtils.showOrHideSequenceAnnotations(al, types, null, false, false); assertFalse(ann1.visible); // unchanged assertFalse(ann2.visible); // unchanged assertTrue(ann3.visible); // not sequence-related, not affected assertFalse(ann4.visible); // Temp for seq1 hidden assertFalse(ann5.visible); // Temp for seq2 hidden assertTrue(ann6.visible); // not sequence-related, not affected /* * Set all types in {seq1, seq3} to visible */ types.clear(); scope.clear(); scope.add(seq1); scope.add(seq3); AlignmentUtils.showOrHideSequenceAnnotations(al, types, scope, true, true); assertTrue(ann1.visible); // Structure for seq1 set visible assertFalse(ann2.visible); // not in scope, unchanged assertTrue(ann3.visible); // not sequence-related, not affected assertTrue(ann4.visible); // Temp for seq1 set visible assertFalse(ann5.visible); // not in scope, unchanged assertTrue(ann6.visible); // not sequence-related, not affected /* * Set all types in all scope to hidden */ AlignmentUtils.showOrHideSequenceAnnotations(al, types, null, true, false); assertFalse(ann1.visible); assertFalse(ann2.visible); assertTrue(ann3.visible); // not sequence-related, not affected assertFalse(ann4.visible); assertFalse(ann5.visible); assertTrue(ann6.visible); // not sequence-related, not affected } }