1 package jalview.io.gff;
3 import static org.testng.AssertJUnit.assertEquals;
4 import static org.testng.AssertJUnit.assertNull;
5 import static org.testng.AssertJUnit.assertSame;
6 import static org.testng.AssertJUnit.assertTrue;
7 import static org.testng.internal.junit.ArrayAsserts.assertArrayEquals;
9 import jalview.datamodel.AlignedCodonFrame;
10 import jalview.datamodel.Alignment;
11 import jalview.datamodel.AlignmentI;
12 import jalview.datamodel.Sequence;
13 import jalview.datamodel.SequenceDummy;
14 import jalview.datamodel.SequenceFeature;
15 import jalview.datamodel.SequenceI;
17 import java.io.IOException;
18 import java.util.ArrayList;
19 import java.util.List;
21 import org.testng.annotations.Test;
23 public class Gff3HelperTest
27 * Test processing one PASA GFF line giving a match from forward strand to
32 @Test(groups = "Functional")
33 public void testProcessCdnaMatch_forwardToForward() throws IOException
35 GffHelperBase testee = new Gff3Helper();
36 List<SequenceI> newseqs = new ArrayList<SequenceI>();
37 String[] gff = "gi|68711\tblat-pasa\tcDNA_match\t12923\t13060\t98.55\t+\t.\tID=align_68;Target=gi|N37351 1 138 +"
39 SequenceI seq = new Sequence("gi|68711",
40 "GAATTCGTTCATGTAGGTTGATTTTTATT");
41 seq.createDatasetSequence();
42 AlignmentI align = new Alignment(new SequenceI[] {});
45 * this should create a mapping from gi|68711/12923-13060
46 * to virtual sequence gi|N37351 (added to newseqs) positions 1-138
48 testee.processGff(seq, gff, align, newseqs, false);
49 assertEquals(1, newseqs.size());
50 assertTrue(newseqs.get(0) instanceof SequenceDummy);
51 assertEquals("gi|N37351", newseqs.get(0).getName());
52 assertEquals(1, align.getCodonFrames().size());
53 AlignedCodonFrame mapping = align.getCodonFrames().iterator().next();
56 * 'dnaseqs' (map from) is here [gi|68711]
57 * 'aaseqs' (map to) is here [gi|N37351]
59 // TODO use more suitable naming in AlignedCodonFrame
60 assertEquals(1, mapping.getAaSeqs().length);
61 assertSame(seq.getDatasetSequence(), mapping.getdnaSeqs()[0]);
62 assertEquals(1, mapping.getdnaSeqs().length);
63 assertSame(newseqs.get(0), mapping.getAaSeqs()[0]);
64 assertEquals(1, mapping.getdnaToProt().length);
65 assertEquals(1, mapping.getdnaToProt()[0].getFromRanges().size());
66 assertArrayEquals(new int[] { 12923, 13060 }, mapping.getdnaToProt()[0]
67 .getFromRanges().get(0));
68 assertEquals(1, mapping.getdnaToProt()[0].getToRanges().size());
69 assertArrayEquals(new int[] { 1, 138 }, mapping.getdnaToProt()[0]
70 .getToRanges().get(0));
74 * Test processing one PASA GFF line giving a match from forward strand to
79 @Test(groups = "Functional")
80 public void testProcessCdnaMatch_forwardToReverse() throws IOException
82 GffHelperBase testee = new Gff3Helper();
83 List<SequenceI> newseqs = new ArrayList<SequenceI>();
84 String[] gff = "gi|68711\tblat-pasa\tcDNA_match\t12923\t13060\t98.55\t+\t.\tID=align_68;Target=gi|N37351 1 138 -"
86 SequenceI seq = new Sequence("gi|68711",
87 "GAATTCGTTCATGTAGGTTGATTTTTATT");
88 seq.createDatasetSequence();
89 AlignmentI align = new Alignment(new SequenceI[] {});
92 * this should create a mapping from gi|68711/12923-13060
93 * to virtual sequence gi|N37351 (added to newseqs) positions 138-1
95 testee.processGff(seq, gff, align, newseqs, false);
96 assertEquals(1, newseqs.size());
97 assertTrue(newseqs.get(0) instanceof SequenceDummy);
98 assertEquals("gi|N37351", newseqs.get(0).getName());
99 assertEquals(1, align.getCodonFrames().size());
100 AlignedCodonFrame mapping = align.getCodonFrames().iterator().next();
103 * 'dnaseqs' (map from) is here [gi|68711]
104 * 'aaseqs' (map to) is here [gi|N37351]
106 // TODO use more suitable naming in AlignedCodonFrame
107 assertEquals(1, mapping.getAaSeqs().length);
108 assertSame(seq.getDatasetSequence(), mapping.getdnaSeqs()[0]);
109 assertEquals(1, mapping.getdnaSeqs().length);
110 assertSame(newseqs.get(0), mapping.getAaSeqs()[0]);
111 assertEquals(1, mapping.getdnaToProt().length);
112 assertEquals(1, mapping.getdnaToProt()[0].getFromRanges().size());
113 assertArrayEquals(new int[] { 12923, 13060 }, mapping.getdnaToProt()[0]
114 .getFromRanges().get(0));
115 assertEquals(1, mapping.getdnaToProt()[0].getToRanges().size());
116 assertArrayEquals(new int[] { 138, 1 }, mapping.getdnaToProt()[0]
117 .getToRanges().get(0));
121 * Test processing one PASA GFF line giving a match from reverse complement
122 * strand to forward strand
124 * @throws IOException
126 @Test(groups = "Functional")
127 public void testProcessCdnaMatch_reverseToForward() throws IOException
129 GffHelperBase testee = new Gff3Helper();
130 List<SequenceI> newseqs = new ArrayList<SequenceI>();
131 String[] gff = "gi|68711\tblat-pasa\tcDNA_match\t12923\t13060\t98.55\t-\t.\tID=align_68;Target=gi|N37351 1 138 +"
133 SequenceI seq = new Sequence("gi|68711",
134 "GAATTCGTTCATGTAGGTTGATTTTTATT");
135 seq.createDatasetSequence();
136 AlignmentI align = new Alignment(new SequenceI[] {});
139 * (For now) we don't process reverse complement mappings; to do this
140 * would require (a) creating a virtual sequence placeholder for the
141 * reverse complement (b) resolving the sequence by its id from some
142 * source (GFF ##FASTA or other) (c) creating the reverse complement
143 * sequence (d) updating the mapping to be to the reverse complement
145 SequenceFeature sf = testee.processGff(seq, gff, align, newseqs, false);
147 assertTrue(newseqs.isEmpty());
151 * Test processing two PASA GFF lines representing a spliced mapping
153 * @throws IOException
155 @Test(groups = "Functional")
156 public void testProcessCdnaMatch_spliced() throws IOException
158 GffHelperBase testee = new Gff3Helper();
159 List<SequenceI> newseqs = new ArrayList<SequenceI>();
160 SequenceI seq = new Sequence("gi|68711",
161 "GAATTCGTTCATGTAGGTTGATTTTTATT");
162 seq.createDatasetSequence();
163 AlignmentI align = new Alignment(new SequenceI[] {});
165 // mapping from gi|68711 12923-13060 to gi|N37351 1-138
166 String[] gff = "gi|68711\tblat-pasa\tcDNA_match\t12923\t13060\t98.55\t+\t.\tID=align_68;Target=gi|N37351 1 138 +"
168 testee.processGff(seq, gff, align, newseqs, false);
169 // mapping from gi|68711 13411-13550 to gi|N37351 139-278
170 gff = "gi|68711\tblat-pasa\tcDNA_match\t13411\t13550\t98.55\t+\t.\tID=align_68;Target=gi|N37351 139 278 +"
172 testee.processGff(seq, gff, align, newseqs, false);
174 assertEquals(1, newseqs.size());
175 assertTrue(newseqs.get(0) instanceof SequenceDummy);
176 assertEquals("gi|N37351", newseqs.get(0).getName());
178 // only 1 AlignedCodonFrame added to the alignment with both mappings!
179 // (this is important for 'align cdna to genome' to work correctly)
180 assertEquals(1, align.getCodonFrames().size());
181 AlignedCodonFrame mapping = align.getCodonFrames().get(0);
184 * 'dnaseqs' (map from) is here [gi|68711]
185 * 'aaseqs' (map to) is here [gi|N37351]
187 // TODO use more suitable naming in AlignedCodonFrame
188 assertEquals(1, mapping.getAaSeqs().length);
189 assertSame(seq.getDatasetSequence(), mapping.getdnaSeqs()[0]);
190 assertEquals(1, mapping.getdnaSeqs().length);
191 assertSame(newseqs.get(0), mapping.getAaSeqs()[0]);
192 assertEquals(1, mapping.getdnaToProt().length);
193 assertEquals(2, mapping.getdnaToProt()[0].getFromRanges().size());
194 // the two spliced dna ranges are combined in one MapList
195 assertArrayEquals(new int[] { 12923, 13060 },
196 mapping.getdnaToProt()[0]
197 .getFromRanges().get(0));
198 assertArrayEquals(new int[] { 13411, 13550 }, mapping.getdnaToProt()[0]
199 .getFromRanges().get(1));
200 assertEquals(1, mapping.getdnaToProt()[0].getToRanges().size());
201 // the two cdna ranges are merged into one contiguous region
202 assertArrayEquals(new int[] { 1, 278 }, mapping.getdnaToProt()[0]
203 .getToRanges().get(0));