dna2.createDatasetSequence();
pep1.createDatasetSequence();
pep2.createDatasetSequence();
+ dna1.addSequenceFeature(new SequenceFeature("CDS", "cds1", 4, 6, 0f,
+ null));
+ dna1.addSequenceFeature(new SequenceFeature("CDS", "cds2", 10, 12, 0f,
+ null));
+ dna2.addSequenceFeature(new SequenceFeature("CDS", "cds3", 1, 3, 0f,
+ null));
+ dna2.addSequenceFeature(new SequenceFeature("CDS", "cds4", 7, 9, 0f,
+ null));
+ dna2.addSequenceFeature(new SequenceFeature("CDS", "cds5", 13, 15, 0f,
+ null));
List<AlignedCodonFrame> mappings = new ArrayList<AlignedCodonFrame>();
MapList map = new MapList(new int[] { 4, 6, 10, 12 },
mappings.add(acf);
AlignmentI cds = AlignmentUtils.makeCdsAlignment(new SequenceI[] {
- dna1, dna2 }, mappings);
+ dna1, dna2 }, mappings, '-');
assertEquals(2, cds.getSequences().size());
- assertEquals("GGGTTT", cds.getSequenceAt(0).getSequenceAsString());
- assertEquals("GGGTTTCCC", cds.getSequenceAt(1).getSequenceAsString());
+ assertEquals("---GGG---TTT---", cds.getSequenceAt(0)
+ .getSequenceAsString());
+ assertEquals("GGG---TTT---CCC", cds.getSequenceAt(1)
+ .getSequenceAsString());
/*
* Verify updated mappings
SearchResults sr = MappingUtils.buildSearchResults(pep1, 1, mappings);
assertEquals(1, sr.getResults().size());
Match m = sr.getResults().get(0);
- assertEquals(cds.getSequenceAt(0).getDatasetSequence(),
+ assertSame(cds.getSequenceAt(0).getDatasetSequence(),
m.getSequence());
assertEquals(1, m.getStart());
assertEquals(3, m.getEnd());
// map F to TTT
sr = MappingUtils.buildSearchResults(pep1, 2, mappings);
m = sr.getResults().get(0);
- assertEquals(cds.getSequenceAt(0).getDatasetSequence(),
+ assertSame(cds.getSequenceAt(0).getDatasetSequence(),
m.getSequence());
assertEquals(4, m.getStart());
assertEquals(6, m.getEnd());
sr = MappingUtils.buildSearchResults(pep2, 1, mappings);
assertEquals(1, sr.getResults().size());
m = sr.getResults().get(0);
- assertEquals(cds.getSequenceAt(1).getDatasetSequence(),
+ assertSame(cds.getSequenceAt(1).getDatasetSequence(),
m.getSequence());
assertEquals(1, m.getStart());
assertEquals(3, m.getEnd());
// map F to TTT
sr = MappingUtils.buildSearchResults(pep2, 2, mappings);
m = sr.getResults().get(0);
- assertEquals(cds.getSequenceAt(1).getDatasetSequence(),
+ assertSame(cds.getSequenceAt(1).getDatasetSequence(),
m.getSequence());
assertEquals(4, m.getStart());
assertEquals(6, m.getEnd());
// map P to CCC
sr = MappingUtils.buildSearchResults(pep2, 3, mappings);
m = sr.getResults().get(0);
- assertEquals(cds.getSequenceAt(1).getDatasetSequence(),
+ assertSame(cds.getSequenceAt(1).getDatasetSequence(),
m.getSequence());
assertEquals(7, m.getStart());
assertEquals(9, m.getEnd());
mappings.add(acf);
AlignedCodonFrame newMapping = new AlignedCodonFrame();
+ List<int[]> ungappedColumns = new ArrayList<int[]>();
+ ungappedColumns.add(new int[] { 4, 6 });
+ ungappedColumns.add(new int[] { 10, 12 });
List<SequenceI> cdsSeqs = AlignmentUtils.makeCdsSequences(dna1, acf,
- newMapping);
+ ungappedColumns,
+ newMapping, '-');
assertEquals(1, cdsSeqs.size());
SequenceI cdsSeq = cdsSeqs.get(0);
pep1.createDatasetSequence();
pep2.createDatasetSequence();
pep3.createDatasetSequence();
+ dna1.addSequenceFeature(new SequenceFeature("CDS", "cds1", 4, 6, 0f,
+ null));
+ dna1.addSequenceFeature(new SequenceFeature("CDS", "cds2", 10, 12, 0f,
+ null));
+ dna1.addSequenceFeature(new SequenceFeature("CDS", "cds3", 1, 3, 0f,
+ null));
+ dna1.addSequenceFeature(new SequenceFeature("CDS", "cds4", 7, 9, 0f,
+ null));
+ dna1.addSequenceFeature(new SequenceFeature("CDS", "cds5", 1, 3, 0f,
+ null));
+ dna1.addSequenceFeature(new SequenceFeature("CDS", "cds6", 10, 12, 0f,
+ null));
pep1.getDatasetSequence().addDBRef(
new DBRefEntry("EMBLCDS", "2", "A12345"));
pep2.getDatasetSequence().addDBRef(
new DBRefEntry("EMBLCDS", "4", "A12347"));
/*
- * Make the mappings from dna to protein. Using LinkedHashset is a
- * convenience so results are in the input order. There is no assertion that
- * the generated exon sequences are in any particular order.
+ * Make the mappings from dna to protein
*/
List<AlignedCodonFrame> mappings = new ArrayList<AlignedCodonFrame>();
// map ...GGG...TTT to GF
* exon-to-protein and exon-to-dna mappings
*/
AlignmentI exal = AlignmentUtils.makeCdsAlignment(
- new SequenceI[] { dna1 }, mappings);
+ new SequenceI[] { dna1 }, mappings, '-');
/*
* Verify we have 3 cds sequences, mapped to pep1/2/3 respectively
assertEquals(3, cds.size());
SequenceI cdsSeq = cds.get(0);
- assertEquals("GGGTTT", cdsSeq.getSequenceAsString());
+ assertEquals("---GGG---TTT", cdsSeq.getSequenceAsString());
assertEquals("dna1|A12345", cdsSeq.getName());
assertEquals(1, cdsSeq.getDBRefs().length);
DBRefEntry cdsRef = cdsSeq.getDBRefs()[0];
assertEquals("A12345", cdsRef.getAccessionId());
cdsSeq = cds.get(1);
- assertEquals("aaaccc", cdsSeq.getSequenceAsString());
+ assertEquals("aaa---ccc---", cdsSeq.getSequenceAsString());
assertEquals("dna1|A12346", cdsSeq.getName());
assertEquals(1, cdsSeq.getDBRefs().length);
cdsRef = cdsSeq.getDBRefs()[0];
assertEquals("A12346", cdsRef.getAccessionId());
cdsSeq = cds.get(2);
- assertEquals("aaaTTT", cdsSeq.getSequenceAsString());
+ assertEquals("aaa------TTT", cdsSeq.getSequenceAsString());
assertEquals("dna1|A12347", cdsSeq.getName());
assertEquals(1, cdsSeq.getDBRefs().length);
cdsRef = cdsSeq.getDBRefs()[0];
assertEquals(1, sf.getBegin());
assertEquals(6, sf.getEnd());
}
+
+ /**
+ * Test the method that extracts the cds-only part of a dna alignment, for the
+ * case where the cds should be aligned to match its nucleotide sequence.
+ */
+ @Test(groups = { "Functional" })
+ public void testMakeCdsAlignment_alternativeTranscripts()
+ {
+ SequenceI dna1 = new Sequence("dna1", "aaaGGGCC-----CTTTaaaGGG");
+ // alternative transcript of same dna skips CCC codon
+ SequenceI dna2 = new Sequence("dna2", "aaaGGGCC-----cttTaaaGGG");
+ // dna3 has no mapping (protein product) so should be ignored here
+ SequenceI dna3 = new Sequence("dna3", "aaaGGGCCCCCGGGcttTaaaGGG");
+ SequenceI pep1 = new Sequence("pep1", "GPFG");
+ SequenceI pep2 = new Sequence("pep2", "GPG");
+ dna1.createDatasetSequence();
+ dna2.createDatasetSequence();
+ dna3.createDatasetSequence();
+ pep1.createDatasetSequence();
+ pep2.createDatasetSequence();
+ dna1.addSequenceFeature(new SequenceFeature("CDS", "cds1", 4, 8, 0f,
+ null));
+ dna1.addSequenceFeature(new SequenceFeature("CDS", "cds2", 9, 12, 0f,
+ null));
+ dna1.addSequenceFeature(new SequenceFeature("CDS", "cds3", 16, 18, 0f,
+ null));
+ dna2.addSequenceFeature(new SequenceFeature("CDS", "cds", 4, 8, 0f,
+ null));
+ dna2.addSequenceFeature(new SequenceFeature("CDS", "cds", 12, 12, 0f,
+ null));
+ dna2.addSequenceFeature(new SequenceFeature("CDS", "cds", 16, 18, 0f,
+ null));
+
+ List<AlignedCodonFrame> mappings = new ArrayList<AlignedCodonFrame>();
+ MapList map = new MapList(new int[] { 4, 12, 16, 18 },
+ new int[] { 1, 4 }, 3, 1);
+ AlignedCodonFrame acf = new AlignedCodonFrame();
+ acf.addMap(dna1.getDatasetSequence(), pep1.getDatasetSequence(), map);
+ mappings.add(acf);
+ map = new MapList(new int[] { 4, 8, 12, 12, 16, 18 },
+ new int[] { 1, 3 },
+ 3, 1);
+ acf = new AlignedCodonFrame();
+ acf.addMap(dna2.getDatasetSequence(), pep2.getDatasetSequence(), map);
+ mappings.add(acf);
+
+ AlignmentI cds = AlignmentUtils.makeCdsAlignment(new SequenceI[] {
+ dna1, dna2, dna3 }, mappings, '-');
+ assertEquals(2, cds.getSequences().size());
+ assertEquals("GGGCCCTTTGGG", cds.getSequenceAt(0).getSequenceAsString());
+ assertEquals("GGGCC---TGGG", cds.getSequenceAt(1).getSequenceAsString());
+
+ /*
+ * Verify updated mappings
+ */
+ assertEquals(2, mappings.size());
+
+ /*
+ * Mapping from pep1 to GGGTTT in first new CDS sequence
+ */
+ List<AlignedCodonFrame> pep1Mapping = MappingUtils
+ .findMappingsForSequence(pep1, mappings);
+ assertEquals(1, pep1Mapping.size());
+ /*
+ * maps GPFG to 1-3,4-6,7-9,10-12
+ */
+ SearchResults sr = MappingUtils.buildSearchResults(pep1, 1, mappings);
+ assertEquals(1, sr.getResults().size());
+ Match m = sr.getResults().get(0);
+ assertEquals(cds.getSequenceAt(0).getDatasetSequence(),
+ m.getSequence());
+ assertEquals(1, m.getStart());
+ assertEquals(3, m.getEnd());
+ sr = MappingUtils.buildSearchResults(pep1, 2, mappings);
+ m = sr.getResults().get(0);
+ assertEquals(4, m.getStart());
+ assertEquals(6, m.getEnd());
+ sr = MappingUtils.buildSearchResults(pep1, 3, mappings);
+ m = sr.getResults().get(0);
+ assertEquals(7, m.getStart());
+ assertEquals(9, m.getEnd());
+ sr = MappingUtils.buildSearchResults(pep1, 4, mappings);
+ m = sr.getResults().get(0);
+ assertEquals(10, m.getStart());
+ assertEquals(12, m.getEnd());
+
+ /*
+ * GPG in pep2 map to 1-3,4-6,7-9 in second CDS sequence
+ */
+ List<AlignedCodonFrame> pep2Mapping = MappingUtils
+ .findMappingsForSequence(pep2, mappings);
+ assertEquals(1, pep2Mapping.size());
+ sr = MappingUtils.buildSearchResults(pep2, 1, mappings);
+ assertEquals(1, sr.getResults().size());
+ m = sr.getResults().get(0);
+ assertEquals(cds.getSequenceAt(1).getDatasetSequence(),
+ m.getSequence());
+ assertEquals(1, m.getStart());
+ assertEquals(3, m.getEnd());
+ sr = MappingUtils.buildSearchResults(pep2, 2, mappings);
+ m = sr.getResults().get(0);
+ assertEquals(4, m.getStart());
+ assertEquals(6, m.getEnd());
+ sr = MappingUtils.buildSearchResults(pep2, 3, mappings);
+ m = sr.getResults().get(0);
+ assertEquals(7, m.getStart());
+ assertEquals(9, m.getEnd());
+ }
+
+ /**
+ * Tests for gapped column in sequences
+ */
+ @Test(groups = { "Functional" })
+ public void testIsGappedColumn()
+ {
+ SequenceI seq1 = new Sequence("Seq1", "a--c.tc-a-g");
+ SequenceI seq2 = new Sequence("Seq2", "aa---t--a-g");
+ SequenceI seq3 = new Sequence("Seq3", "ag-c t-g-");
+ List<SequenceI> seqs = Arrays
+ .asList(new SequenceI[] { seq1, seq2, seq3 });
+ // the column number is base 1
+ assertFalse(AlignmentUtils.isGappedColumn(seqs, 1));
+ assertFalse(AlignmentUtils.isGappedColumn(seqs, 2));
+ assertTrue(AlignmentUtils.isGappedColumn(seqs, 3));
+ assertFalse(AlignmentUtils.isGappedColumn(seqs, 4));
+ assertTrue(AlignmentUtils.isGappedColumn(seqs, 5));
+ assertFalse(AlignmentUtils.isGappedColumn(seqs, 6));
+ assertFalse(AlignmentUtils.isGappedColumn(seqs, 7));
+ assertFalse(AlignmentUtils.isGappedColumn(seqs, 8));
+ assertFalse(AlignmentUtils.isGappedColumn(seqs, 9));
+ assertTrue(AlignmentUtils.isGappedColumn(seqs, 10));
+ assertFalse(AlignmentUtils.isGappedColumn(seqs, 11));
+ // out of bounds:
+ assertTrue(AlignmentUtils.isGappedColumn(seqs, 0));
+ assertTrue(AlignmentUtils.isGappedColumn(seqs, 100));
+ assertTrue(AlignmentUtils.isGappedColumn(seqs, -100));
+ assertTrue(AlignmentUtils.isGappedColumn(null, 0));
+ }
+
+ @Test(groups = { "Functional" })
+ public void testFindCdsColumns()
+ {
+ // TODO target method belongs in a general-purpose alignment
+ // analysis method to find columns for feature
+
+ /*
+ * NB this method assumes CDS ranges are contiguous (no introns)
+ */
+ SequenceI gene = new Sequence("gene", "aaacccgggtttaaacccgggttt");
+ SequenceI seq1 = new Sequence("Seq1", "--ac-cgGG-GGaaACC--GGtt-");
+ SequenceI seq2 = new Sequence("Seq2", "AA--CCGG--g-AAA--cG-GTTt");
+ seq1.createDatasetSequence();
+ seq2.createDatasetSequence();
+ seq1.addSequenceFeature(new SequenceFeature("CDS", "cds", 5, 6, 0f,
+ null));
+ seq1.addSequenceFeature(new SequenceFeature("CDS", "cds", 7, 8, 0f,
+ null));
+ seq1.addSequenceFeature(new SequenceFeature("CDS", "cds", 11, 13, 0f,
+ null));
+ seq1.addSequenceFeature(new SequenceFeature("CDS", "cds", 14, 15, 0f,
+ null));
+ seq2.addSequenceFeature(new SequenceFeature("CDS", "cds", 1, 2, 0f,
+ null));
+ seq2.addSequenceFeature(new SequenceFeature("CDS", "cds", 3, 6, 0f,
+ null));
+ seq2.addSequenceFeature(new SequenceFeature("CDS", "cds", 8, 10, 0f,
+ null));
+ seq2.addSequenceFeature(new SequenceFeature("CDS", "cds", 12, 12, 0f,
+ null));
+ seq2.addSequenceFeature(new SequenceFeature("CDS", "cds", 13, 15, 0f,
+ null));
+
+ List<int[]> cdsColumns = AlignmentUtils.findCdsColumns(new SequenceI[] {
+ seq1, seq2 });
+ assertEquals(4, cdsColumns.size());
+ assertEquals("[1, 2]", Arrays.toString(cdsColumns.get(0)));
+ assertEquals("[5, 9]", Arrays.toString(cdsColumns.get(1)));
+ assertEquals("[11, 17]", Arrays.toString(cdsColumns.get(2)));
+ assertEquals("[19, 23]", Arrays.toString(cdsColumns.get(3)));
+ }
}