2 * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3 * Copyright (C) $$Year-Rel$$ The Jalview Authors
5 * This file is part of Jalview.
7 * Jalview is free software: you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation, either version 3
10 * of the License, or (at your option) any later version.
12 * Jalview is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty
14 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15 * PURPOSE. See the GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
19 * The Jalview Authors are detailed in the 'AUTHORS' file.
21 package jalview.io.gff;
23 import static org.testng.AssertJUnit.assertEquals;
24 import static org.testng.AssertJUnit.assertNull;
25 import static org.testng.AssertJUnit.assertSame;
26 import static org.testng.AssertJUnit.assertTrue;
27 import static org.testng.internal.junit.ArrayAsserts.assertArrayEquals;
29 import jalview.datamodel.AlignedCodonFrame;
30 import jalview.datamodel.Alignment;
31 import jalview.datamodel.AlignmentI;
32 import jalview.datamodel.Sequence;
33 import jalview.datamodel.SequenceDummy;
34 import jalview.datamodel.SequenceFeature;
35 import jalview.datamodel.SequenceI;
37 import java.io.IOException;
38 import java.util.ArrayList;
39 import java.util.List;
41 import org.testng.annotations.Test;
43 public class Gff3HelperTest
47 * Test processing one PASA GFF line giving a match from forward strand to
52 @Test(groups = "Functional")
53 public void testProcessCdnaMatch_forwardToForward() throws IOException
55 GffHelperBase testee = new Gff3Helper();
56 List<SequenceI> newseqs = new ArrayList<SequenceI>();
57 String[] gff = "gi|68711\tblat-pasa\tcDNA_match\t12923\t13060\t98.55\t+\t.\tID=align_68;Target=gi|N37351 1 138 +"
59 SequenceI seq = new Sequence("gi|68711",
60 "GAATTCGTTCATGTAGGTTGATTTTTATT");
61 seq.createDatasetSequence();
62 AlignmentI align = new Alignment(new SequenceI[] {});
65 * this should create a mapping from gi|68711/12923-13060
66 * to virtual sequence gi|N37351 (added to newseqs) positions 1-138
68 testee.processGff(seq, gff, align, newseqs, false);
69 assertEquals(1, newseqs.size());
70 assertTrue(newseqs.get(0) instanceof SequenceDummy);
71 assertEquals("gi|N37351", newseqs.get(0).getName());
72 assertEquals(1, align.getCodonFrames().size());
73 AlignedCodonFrame mapping = align.getCodonFrames().iterator().next();
76 * 'dnaseqs' (map from) is here [gi|68711]
77 * 'aaseqs' (map to) is here [gi|N37351]
79 // TODO use more suitable naming in AlignedCodonFrame
80 assertEquals(1, mapping.getAaSeqs().length);
81 assertSame(seq.getDatasetSequence(), mapping.getdnaSeqs()[0]);
82 assertEquals(1, mapping.getdnaSeqs().length);
83 assertSame(newseqs.get(0), mapping.getAaSeqs()[0]);
84 assertEquals(1, mapping.getdnaToProt().length);
85 assertEquals(1, mapping.getdnaToProt()[0].getFromRanges().size());
86 assertArrayEquals(new int[] { 12923, 13060 }, mapping.getdnaToProt()[0]
87 .getFromRanges().get(0));
88 assertEquals(1, mapping.getdnaToProt()[0].getToRanges().size());
89 assertArrayEquals(new int[] { 1, 138 }, mapping.getdnaToProt()[0]
90 .getToRanges().get(0));
94 * Test processing one PASA GFF line giving a match from forward strand to
99 @Test(groups = "Functional")
100 public void testProcessCdnaMatch_forwardToReverse() throws IOException
102 GffHelperBase testee = new Gff3Helper();
103 List<SequenceI> newseqs = new ArrayList<SequenceI>();
104 String[] gff = "gi|68711\tblat-pasa\tcDNA_match\t12923\t13060\t98.55\t+\t.\tID=align_68;Target=gi|N37351 1 138 -"
106 SequenceI seq = new Sequence("gi|68711",
107 "GAATTCGTTCATGTAGGTTGATTTTTATT");
108 seq.createDatasetSequence();
109 AlignmentI align = new Alignment(new SequenceI[] {});
112 * this should create a mapping from gi|68711/12923-13060
113 * to virtual sequence gi|N37351 (added to newseqs) positions 138-1
115 testee.processGff(seq, gff, align, newseqs, false);
116 assertEquals(1, newseqs.size());
117 assertTrue(newseqs.get(0) instanceof SequenceDummy);
118 assertEquals("gi|N37351", newseqs.get(0).getName());
119 assertEquals(1, align.getCodonFrames().size());
120 AlignedCodonFrame mapping = align.getCodonFrames().iterator().next();
123 * 'dnaseqs' (map from) is here [gi|68711]
124 * 'aaseqs' (map to) is here [gi|N37351]
126 // TODO use more suitable naming in AlignedCodonFrame
127 assertEquals(1, mapping.getAaSeqs().length);
128 assertSame(seq.getDatasetSequence(), mapping.getdnaSeqs()[0]);
129 assertEquals(1, mapping.getdnaSeqs().length);
130 assertSame(newseqs.get(0), mapping.getAaSeqs()[0]);
131 assertEquals(1, mapping.getdnaToProt().length);
132 assertEquals(1, mapping.getdnaToProt()[0].getFromRanges().size());
133 assertArrayEquals(new int[] { 12923, 13060 }, mapping.getdnaToProt()[0]
134 .getFromRanges().get(0));
135 assertEquals(1, mapping.getdnaToProt()[0].getToRanges().size());
136 assertArrayEquals(new int[] { 138, 1 }, mapping.getdnaToProt()[0]
137 .getToRanges().get(0));
141 * Test processing one PASA GFF line giving a match from reverse complement
142 * strand to forward strand
144 * @throws IOException
146 @Test(groups = "Functional")
147 public void testProcessCdnaMatch_reverseToForward() throws IOException
149 GffHelperBase testee = new Gff3Helper();
150 List<SequenceI> newseqs = new ArrayList<SequenceI>();
151 String[] gff = "gi|68711\tblat-pasa\tcDNA_match\t12923\t13060\t98.55\t-\t.\tID=align_68;Target=gi|N37351 1 138 +"
153 SequenceI seq = new Sequence("gi|68711",
154 "GAATTCGTTCATGTAGGTTGATTTTTATT");
155 seq.createDatasetSequence();
156 AlignmentI align = new Alignment(new SequenceI[] {});
159 * (For now) we don't process reverse complement mappings; to do this
160 * would require (a) creating a virtual sequence placeholder for the
161 * reverse complement (b) resolving the sequence by its id from some
162 * source (GFF ##FASTA or other) (c) creating the reverse complement
163 * sequence (d) updating the mapping to be to the reverse complement
165 SequenceFeature sf = testee.processGff(seq, gff, align, newseqs, false);
167 assertTrue(newseqs.isEmpty());
171 * Test processing two PASA GFF lines representing a spliced mapping
173 * @throws IOException
175 @Test(groups = "Functional")
176 public void testProcessCdnaMatch_spliced() throws IOException
178 GffHelperBase testee = new Gff3Helper();
179 List<SequenceI> newseqs = new ArrayList<SequenceI>();
180 SequenceI seq = new Sequence("gi|68711",
181 "GAATTCGTTCATGTAGGTTGATTTTTATT");
182 seq.createDatasetSequence();
183 AlignmentI align = new Alignment(new SequenceI[] {});
185 // mapping from gi|68711 12923-13060 to gi|N37351 1-138
186 String[] gff = "gi|68711\tblat-pasa\tcDNA_match\t12923\t13060\t98.55\t+\t.\tID=align_68;Target=gi|N37351 1 138 +"
188 testee.processGff(seq, gff, align, newseqs, false);
189 // mapping from gi|68711 13411-13550 to gi|N37351 139-278
190 gff = "gi|68711\tblat-pasa\tcDNA_match\t13411\t13550\t98.55\t+\t.\tID=align_68;Target=gi|N37351 139 278 +"
192 testee.processGff(seq, gff, align, newseqs, false);
194 assertEquals(1, newseqs.size());
195 assertTrue(newseqs.get(0) instanceof SequenceDummy);
196 assertEquals("gi|N37351", newseqs.get(0).getName());
198 // only 1 AlignedCodonFrame added to the alignment with both mappings!
199 // (this is important for 'align cdna to genome' to work correctly)
200 assertEquals(1, align.getCodonFrames().size());
201 AlignedCodonFrame mapping = align.getCodonFrames().get(0);
204 * 'dnaseqs' (map from) is here [gi|68711]
205 * 'aaseqs' (map to) is here [gi|N37351]
207 // TODO use more suitable naming in AlignedCodonFrame
208 assertEquals(1, mapping.getAaSeqs().length);
209 assertSame(seq.getDatasetSequence(), mapping.getdnaSeqs()[0]);
210 assertEquals(1, mapping.getdnaSeqs().length);
211 assertSame(newseqs.get(0), mapping.getAaSeqs()[0]);
212 assertEquals(1, mapping.getdnaToProt().length);
213 assertEquals(2, mapping.getdnaToProt()[0].getFromRanges().size());
214 // the two spliced dna ranges are combined in one MapList
215 assertArrayEquals(new int[] { 12923, 13060 }, mapping.getdnaToProt()[0]
216 .getFromRanges().get(0));
217 assertArrayEquals(new int[] { 13411, 13550 }, mapping.getdnaToProt()[0]
218 .getFromRanges().get(1));
219 assertEquals(1, mapping.getdnaToProt()[0].getToRanges().size());
220 // the two cdna ranges are merged into one contiguous region
221 assertArrayEquals(new int[] { 1, 278 }, mapping.getdnaToProt()[0]
222 .getToRanges().get(0));