package jalview.io.gff; import static org.testng.AssertJUnit.assertEquals; import static org.testng.AssertJUnit.assertNull; import static org.testng.AssertJUnit.assertSame; import static org.testng.AssertJUnit.assertTrue; import static org.testng.internal.junit.ArrayAsserts.assertArrayEquals; import jalview.datamodel.AlignedCodonFrame; import jalview.datamodel.Alignment; import jalview.datamodel.AlignmentI; import jalview.datamodel.Sequence; import jalview.datamodel.SequenceDummy; import jalview.datamodel.SequenceFeature; import jalview.datamodel.SequenceI; import java.io.IOException; import java.util.ArrayList; import java.util.List; import org.testng.annotations.Test; public class Gff3HelperTest { /** * Test processing one PASA GFF line giving a match from forward strand to * forward strand * * @throws IOException */ @Test(groups = "Functional") public void testProcessCdnaMatch_forwardToForward() throws IOException { GffHelperBase testee = new Gff3Helper(); List newseqs = new ArrayList(); String[] gff = "gi|68711\tblat-pasa\tcDNA_match\t12923\t13060\t98.55\t+\t.\tID=align_68;Target=gi|N37351 1 138 +" .split("\\t"); SequenceI seq = new Sequence("gi|68711", "GAATTCGTTCATGTAGGTTGATTTTTATT"); seq.createDatasetSequence(); AlignmentI align = new Alignment(new SequenceI[] {}); /* * this should create a mapping from gi|68711/12923-13060 * to virtual sequence gi|N37351 (added to newseqs) positions 1-138 */ testee.processGff(seq, gff, align, newseqs, false); assertEquals(1, newseqs.size()); assertTrue(newseqs.get(0) instanceof SequenceDummy); assertEquals("gi|N37351", newseqs.get(0).getName()); assertEquals(1, align.getCodonFrames().size()); AlignedCodonFrame mapping = align.getCodonFrames().iterator().next(); /* * 'dnaseqs' (map from) is here [gi|68711] * 'aaseqs' (map to) is here [gi|N37351] */ // TODO use more suitable naming in AlignedCodonFrame assertEquals(1, mapping.getAaSeqs().length); assertSame(seq.getDatasetSequence(), mapping.getdnaSeqs()[0]); assertEquals(1, mapping.getdnaSeqs().length); assertSame(newseqs.get(0), mapping.getAaSeqs()[0]); assertEquals(1, mapping.getdnaToProt().length); assertEquals(1, mapping.getdnaToProt()[0].getFromRanges().size()); assertArrayEquals(new int[] { 12923, 13060 }, mapping.getdnaToProt()[0] .getFromRanges().get(0)); assertEquals(1, mapping.getdnaToProt()[0].getToRanges().size()); assertArrayEquals(new int[] { 1, 138 }, mapping.getdnaToProt()[0] .getToRanges().get(0)); } /** * Test processing one PASA GFF line giving a match from forward strand to * reverse strand * * @throws IOException */ @Test(groups = "Functional") public void testProcessCdnaMatch_forwardToReverse() throws IOException { GffHelperBase testee = new Gff3Helper(); List newseqs = new ArrayList(); String[] gff = "gi|68711\tblat-pasa\tcDNA_match\t12923\t13060\t98.55\t+\t.\tID=align_68;Target=gi|N37351 1 138 -" .split("\\t"); SequenceI seq = new Sequence("gi|68711", "GAATTCGTTCATGTAGGTTGATTTTTATT"); seq.createDatasetSequence(); AlignmentI align = new Alignment(new SequenceI[] {}); /* * this should create a mapping from gi|68711/12923-13060 * to virtual sequence gi|N37351 (added to newseqs) positions 138-1 */ testee.processGff(seq, gff, align, newseqs, false); assertEquals(1, newseqs.size()); assertTrue(newseqs.get(0) instanceof SequenceDummy); assertEquals("gi|N37351", newseqs.get(0).getName()); assertEquals(1, align.getCodonFrames().size()); AlignedCodonFrame mapping = align.getCodonFrames().iterator().next(); /* * 'dnaseqs' (map from) is here [gi|68711] * 'aaseqs' (map to) is here [gi|N37351] */ // TODO use more suitable naming in AlignedCodonFrame assertEquals(1, mapping.getAaSeqs().length); assertSame(seq.getDatasetSequence(), mapping.getdnaSeqs()[0]); assertEquals(1, mapping.getdnaSeqs().length); assertSame(newseqs.get(0), mapping.getAaSeqs()[0]); assertEquals(1, mapping.getdnaToProt().length); assertEquals(1, mapping.getdnaToProt()[0].getFromRanges().size()); assertArrayEquals(new int[] { 12923, 13060 }, mapping.getdnaToProt()[0] .getFromRanges().get(0)); assertEquals(1, mapping.getdnaToProt()[0].getToRanges().size()); assertArrayEquals(new int[] { 138, 1 }, mapping.getdnaToProt()[0] .getToRanges().get(0)); } /** * Test processing one PASA GFF line giving a match from reverse complement * strand to forward strand * * @throws IOException */ @Test(groups = "Functional") public void testProcessCdnaMatch_reverseToForward() throws IOException { GffHelperBase testee = new Gff3Helper(); List newseqs = new ArrayList(); String[] gff = "gi|68711\tblat-pasa\tcDNA_match\t12923\t13060\t98.55\t-\t.\tID=align_68;Target=gi|N37351 1 138 +" .split("\\t"); SequenceI seq = new Sequence("gi|68711", "GAATTCGTTCATGTAGGTTGATTTTTATT"); seq.createDatasetSequence(); AlignmentI align = new Alignment(new SequenceI[] {}); /* * (For now) we don't process reverse complement mappings; to do this * would require (a) creating a virtual sequence placeholder for the * reverse complement (b) resolving the sequence by its id from some * source (GFF ##FASTA or other) (c) creating the reverse complement * sequence (d) updating the mapping to be to the reverse complement */ SequenceFeature sf = testee.processGff(seq, gff, align, newseqs, false); assertNull(sf); assertTrue(newseqs.isEmpty()); } /** * Test processing two PASA GFF lines representing a spliced mapping * * @throws IOException */ @Test(groups = "Functional") public void testProcessCdnaMatch_spliced() throws IOException { GffHelperBase testee = new Gff3Helper(); List newseqs = new ArrayList(); SequenceI seq = new Sequence("gi|68711", "GAATTCGTTCATGTAGGTTGATTTTTATT"); seq.createDatasetSequence(); AlignmentI align = new Alignment(new SequenceI[] {}); // mapping from gi|68711 12923-13060 to gi|N37351 1-138 String[] gff = "gi|68711\tblat-pasa\tcDNA_match\t12923\t13060\t98.55\t+\t.\tID=align_68;Target=gi|N37351 1 138 +" .split("\\t"); testee.processGff(seq, gff, align, newseqs, false); // mapping from gi|68711 13411-13550 to gi|N37351 139-278 gff = "gi|68711\tblat-pasa\tcDNA_match\t13411\t13550\t98.55\t+\t.\tID=align_68;Target=gi|N37351 139 278 +" .split("\\t"); testee.processGff(seq, gff, align, newseqs, false); assertEquals(1, newseqs.size()); assertTrue(newseqs.get(0) instanceof SequenceDummy); assertEquals("gi|N37351", newseqs.get(0).getName()); // only 1 AlignedCodonFrame added to the alignment with both mappings! // (this is important for 'align cdna to genome' to work correctly) assertEquals(1, align.getCodonFrames().size()); AlignedCodonFrame mapping = align.getCodonFrames().get(0); /* * 'dnaseqs' (map from) is here [gi|68711] * 'aaseqs' (map to) is here [gi|N37351] */ // TODO use more suitable naming in AlignedCodonFrame assertEquals(1, mapping.getAaSeqs().length); assertSame(seq.getDatasetSequence(), mapping.getdnaSeqs()[0]); assertEquals(1, mapping.getdnaSeqs().length); assertSame(newseqs.get(0), mapping.getAaSeqs()[0]); assertEquals(1, mapping.getdnaToProt().length); assertEquals(2, mapping.getdnaToProt()[0].getFromRanges().size()); // the two spliced dna ranges are combined in one MapList assertArrayEquals(new int[] { 12923, 13060 }, mapping.getdnaToProt()[0] .getFromRanges().get(0)); assertArrayEquals(new int[] { 13411, 13550 }, mapping.getdnaToProt()[0] .getFromRanges().get(1)); assertEquals(1, mapping.getdnaToProt()[0].getToRanges().size()); // the two cdna ranges are merged into one contiguous region assertArrayEquals(new int[] { 1, 278 }, mapping.getdnaToProt()[0] .getToRanges().get(0)); } }