import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
-import java.util.Collections;
import java.util.HashSet;
import java.util.Iterator;
-import java.util.LinkedHashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
@Test(groups = { "Functional" })
public void testAlignSequenceAs_withMapping_withUnmappedProtein()
{
-
/*
* Exons at codon 2 (AAA) and 4 (TTT) mapped to A and P
*/
* Expect alignment does nothing (aborts realignment). Change this test
* first if different behaviour wanted.
*/
- checkAlignSequenceAs("GGGAAACCCTTTGGG", "-A-L-P-", false, false, map,
- "GGGAAACCCTTTGGG");
+ checkAlignSequenceAs("gggAAAcccTTTggg", "-A-L-P-", false, false, map,
+ "gggAAAccc---TTTggg");
}
/**
* Helper method that performs and verifies the method under test.
*
- * @param dnaSeq
- * @param proteinSeq
+ * @param alignee
+ * the sequence to be realigned
+ * @param alignModel
+ * the sequence whose alignment is to be copied
* @param preserveMappedGaps
* @param preserveUnmappedGaps
* @param map
* @param expected
*/
- protected void checkAlignSequenceAs(final String dnaSeq,
- final String proteinSeq, final boolean preserveMappedGaps,
+ protected void checkAlignSequenceAs(final String alignee,
+ final String alignModel, final boolean preserveMappedGaps,
final boolean preserveUnmappedGaps, MapList map,
final String expected)
{
- SequenceI dna = new Sequence("Seq1", dnaSeq);
- dna.createDatasetSequence();
- SequenceI protein = new Sequence("Seq1", proteinSeq);
- protein.createDatasetSequence();
+ SequenceI alignMe = new Sequence("Seq1", alignee);
+ alignMe.createDatasetSequence();
+ SequenceI alignFrom = new Sequence("Seq2", alignModel);
+ alignFrom.createDatasetSequence();
AlignedCodonFrame acf = new AlignedCodonFrame();
- acf.addMap(dna.getDatasetSequence(), protein.getDatasetSequence(), map);
+ acf.addMap(alignMe.getDatasetSequence(), alignFrom.getDatasetSequence(), map);
- AlignmentUtils.alignSequenceAs(dna, protein, acf, "---", '-',
+ AlignmentUtils.alignSequenceAs(alignMe, alignFrom, acf, "---", '-',
preserveMappedGaps, preserveUnmappedGaps);
- assertEquals(expected, dna.getSequenceAsString());
+ assertEquals(expected, alignMe.getSequenceAsString());
}
/**
acf.addMap(dna1.getDatasetSequence(), prot1.getDatasetSequence(), map);
acf.addMap(dna2.getDatasetSequence(), prot2.getDatasetSequence(), map);
acf.addMap(dna3.getDatasetSequence(), prot3.getDatasetSequence(), map);
- protein.setCodonFrames(Collections.singleton(acf));
+ protein.setCodonFrames(new ArrayList<AlignedCodonFrame>());
/*
* Translated codon order is [1,2,3] [1,3,4] [4,5,6] [4,5,7] [5,6,7] [7,8,9]
pep1.createDatasetSequence();
pep2.createDatasetSequence();
- Set<AlignedCodonFrame> mappings = new HashSet<AlignedCodonFrame>();
+ List<AlignedCodonFrame> mappings = new ArrayList<AlignedCodonFrame>();
MapList map = new MapList(new int[] { 4, 6, 10, 12 },
new int[] { 1, 2 }, 3, 1);
AlignedCodonFrame acf = new AlignedCodonFrame();
* convenience so results are in the input order. There is no assertion that
* the generated exon sequences are in any particular order.
*/
- Set<AlignedCodonFrame> mappings = new LinkedHashSet<AlignedCodonFrame>();
+ List<AlignedCodonFrame> mappings = new ArrayList<AlignedCodonFrame>();
// map ...GGG...TTT to GF
MapList map = new MapList(new int[] { 4, 6, 10, 12 },
new int[] { 1, 2 }, 3, 1);
assertEquals(40, map.getFromLowest());
assertEquals(48, map.getFromHighest());
}
+
+ /**
+ * Test for the alignSequenceAs method where we have protein mapped to protein
+ */
+ @Test(groups = { "Functional" })
+ public void testAlignSequenceAs_mappedProteinProtein()
+ {
+
+ SequenceI alignMe = new Sequence("Match", "MGAASEV");
+ alignMe.createDatasetSequence();
+ SequenceI alignFrom = new Sequence("Query", "LQTGYMGAASEVMFSPTRR");
+ alignFrom.createDatasetSequence();
+
+ AlignedCodonFrame acf = new AlignedCodonFrame();
+ // this is like a domain or motif match of part of a peptide sequence
+ MapList map = new MapList(new int[] { 6, 12 }, new int[] { 1, 7 }, 1, 1);
+ acf.addMap(alignFrom.getDatasetSequence(),
+ alignMe.getDatasetSequence(), map);
+
+ AlignmentUtils.alignSequenceAs(alignMe, alignFrom, acf, "-", '-', true,
+ true);
+ assertEquals("-----MGAASEV-------", alignMe.getSequenceAsString());
+ }
+
+ /**
+ * Test for the alignSequenceAs method where there are trailing unmapped
+ * residues in the model sequence
+ */
+ @Test(groups = { "Functional" })
+ public void testAlignSequenceAs_withTrailingPeptide()
+ {
+ // map first 3 codons to KPF; G is a trailing unmapped residue
+ MapList map = new MapList(new int[] { 1, 9 }, new int[] { 1, 3 }, 3, 1);
+
+ checkAlignSequenceAs("AAACCCTTT", "K-PFG", true, true, map,
+ "AAA---CCCTTT---");
+ }
}