* @param cdnaAlignment
* @return
*/
- public static boolean mapProteinToCdna(final AlignmentI proteinAlignment,
+ public static boolean mapProteinAlignmentToCdna(final AlignmentI proteinAlignment,
final AlignmentI cdnaAlignment)
{
if (proteinAlignment == null || cdnaAlignment == null)
final AlignmentI cdnaAlignment, Set<SequenceI> mappedDna,
Set<SequenceI> mappedProtein, boolean xrefsOnly)
{
- boolean mappingPerformed = false;
+ boolean mappingExistsOrAdded = false;
List<SequenceI> thisSeqs = proteinAlignment.getSequences();
for (SequenceI aaSeq : thisSeqs)
{
{
continue;
}
- if (!mappingExists(proteinAlignment.getCodonFrames(),
+ if (mappingExists(proteinAlignment.getCodonFrames(),
aaSeq.getDatasetSequence(), cdnaSeq.getDatasetSequence()))
{
- MapList map = mapProteinToCdna(aaSeq, cdnaSeq);
+ mappingExistsOrAdded = true;
+ }
+ else
+ {
+ MapList map = mapProteinSequenceToCdna(aaSeq, cdnaSeq);
if (map != null)
{
acf.addMap(cdnaSeq, aaSeq, map);
- mappingPerformed = true;
+ mappingExistsOrAdded = true;
proteinMapped = true;
mappedDna.add(cdnaSeq);
mappedProtein.add(aaSeq);
proteinAlignment.addCodonFrame(acf);
}
}
- return mappingPerformed;
+ return mappingExistsOrAdded;
}
/**
* @param cdnaSeq
* @return
*/
- public static MapList mapProteinToCdna(SequenceI proteinSeq,
+ public static MapList mapProteinSequenceToCdna(SequenceI proteinSeq,
SequenceI cdnaSeq)
{
/*
*/
final int mappedLength = 3 * aaSeqChars.length;
int cdnaLength = cdnaSeqChars.length;
- int cdnaStart = 1;
- int cdnaEnd = cdnaLength;
- final int proteinStart = 1;
- final int proteinEnd = aaSeqChars.length;
+ int cdnaStart = cdnaSeq.getStart();
+ int cdnaEnd = cdnaSeq.getEnd();
+ final int proteinStart = proteinSeq.getStart();
+ final int proteinEnd = proteinSeq.getEnd();
/*
* If lengths don't match, try ignoring stop codon.
/*
* If lengths still don't match, try ignoring start codon.
*/
+ int startOffset = 0;
if (cdnaLength != mappedLength
&& cdnaLength > 2
&& String.valueOf(cdnaSeqChars, 0, 3).toUpperCase()
.equals(ResidueProperties.START))
{
+ startOffset += 3;
cdnaStart += 3;
cdnaLength -= 3;
}
{
return null;
}
- if (!translatesAs(cdnaSeqChars, cdnaStart - 1, aaSeqChars))
+ if (!translatesAs(cdnaSeqChars, startOffset,
+ aaSeqChars))
{
return null;
}
/*
* Traverse the aligned protein sequence.
*/
+ int fromOffset = alignFrom.getStart() - 1;
+ int toOffset = alignTo.getStart() - 1;
int sourceGapMappedLength = 0;
boolean inExon = false;
for (char sourceChar : thatAligned)
sourceDsPos++;
// Note mapping positions are base 1, our sequence positions base 0
int[] mappedPos = mapping.getMappedRegion(alignTo, alignFrom,
- sourceDsPos);
+ sourceDsPos + fromOffset);
if (mappedPos == null)
{
/*
* But then 'align dna as protein' doesn't make much sense otherwise.
*/
int intronLength = 0;
- while (basesWritten < mappedCodonEnd && thisSeqPos < thisSeq.length)
+ while (basesWritten + toOffset < mappedCodonEnd
+ && thisSeqPos < thisSeq.length)
{
final char c = thisSeq[thisSeqPos++];
if (c != myGapChar)
{
basesWritten++;
-
- if (basesWritten < mappedCodonStart)
+ int sourcePosition = basesWritten + toOffset;
+ if (sourcePosition < mappedCodonStart)
{
/*
* Found an unmapped (intron) base. First add in any preceding gaps
}
else
{
- final boolean startOfCodon = basesWritten == mappedCodonStart;
+ final boolean startOfCodon = sourcePosition == mappedCodonStart;
int gapsToAdd = calculateGapsToInsert(preserveMappedGaps,
preserveUnmappedGaps, sourceGapMappedLength, inExon,
trailingCopiedGap.length(), intronLength, startOfCodon);
* Just try to make a mapping (it is not yet stored), test whether
* successful.
*/
- return mapProteinToCdna(proteinDs, dnaDs) != null;
+ return mapProteinSequenceToCdna(proteinDs, dnaDs) != null;
}
/**
* Convert position in sequence (base 1) to sequence character array
* index (base 0)
*/
- int start = m.getStart() - 1;
+ int start = m.getStart() - m.getSequence().getStart();
setStatusMessage(seq, start, sequenceIndex);
return true;
}
for (SequenceI seq : e.getSequences())
{
SequenceI ds = seq.getDatasetSequence();
- SequenceI preEdit = result.get(ds);
- if (preEdit == null)
+ // SequenceI preEdit = result.get(ds);
+ if (!result.containsKey(ds))
{
- preEdit = new Sequence("", seq.getSequenceAsString());
+ /*
+ * copy sequence including start/end (but don't use copy constructor
+ * as we don't need annotations)
+ */
+ SequenceI preEdit = new Sequence("", seq.getSequenceAsString(),
+ seq.getStart(), seq.getEnd());
preEdit.setDatasetSequence(ds);
result.put(ds, preEdit);
}
* Work backwards through the edit list, deriving the sequences before each
* was applied. The final result is the sequence set before any edits.
*/
- Iterator<Edit> edits = new ReverseListIterator<Edit>(getEdits());
- while (edits.hasNext())
+ Iterator<Edit> editList = new ReverseListIterator<Edit>(getEdits());
+ while (editList.hasNext())
{
- Edit oldEdit = edits.next();
+ Edit oldEdit = editList.next();
Action action = oldEdit.getAction();
int position = oldEdit.getPosition();
int number = oldEdit.getNumber();
SequenceI preEdit = result.get(ds);
if (preEdit == null)
{
- preEdit = new Sequence("", seq.getSequenceAsString());
+ preEdit = new Sequence("", seq.getSequenceAsString(),
+ seq.getStart(), seq.getEnd());
preEdit.setDatasetSequence(ds);
result.put(ds, preEdit);
}
return null;
}
MapList ml = null;
- char[] dnaSeq = null;
+ SequenceI dnaSeq = null;
for (int i = 0; i < dnaToProt.length; i++)
{
if (dnaToProt[i].to == protein)
{
ml = getdnaToProt()[i];
- dnaSeq = dnaSeqs[i].getSequence();
+ dnaSeq = dnaSeqs[i];
break;
}
}
* Read off the mapped nucleotides (converting to position base 0)
*/
codonPos = MappingUtils.flattenRanges(codonPos);
- return new char[] { dnaSeq[codonPos[0] - 1], dnaSeq[codonPos[1] - 1],
- dnaSeq[codonPos[2] - 1] };
+ char[] dna = dnaSeq.getSequence();
+ int start = dnaSeq.getStart();
+ return new char[] { dna[codonPos[0] - start], dna[codonPos[1] - start],
+ dna[codonPos[2] - start] };
}
/**
private final char[] alignedSeq;
/*
+ * the sequence start residue
+ */
+ private int start;
+
+ /*
* Next position (base 0) in the aligned sequence
*/
private int alignedColumn = 0;
/**
* Constructor
*
- * @param cs
- * the aligned sequence characters
+ * @param seq
+ * the aligned sequence
* @param gapChar
*/
- public AlignedCodonIterator(char[] cs, char gapChar)
+ public AlignedCodonIterator(SequenceI seq, char gapChar)
{
- this.alignedSeq = cs;
+ this.alignedSeq = seq.getSequence();
+ this.start = seq.getStart();
this.gap = gapChar;
fromRanges = map.getFromRanges().iterator();
toRanges = map.getToRanges().iterator();
// i.e. code like getNextCodon()
if (toPosition <= currentToRange[1])
{
- char pep = Mapping.this.to.getSequence()[toPosition - 1];
+ SequenceI seq = Mapping.this.to;
+ char pep = seq.getSequence()[toPosition - seq.getStart()];
toPosition++;
return String.valueOf(pep);
}
*/
private int getAlignedColumn(int sequencePos)
{
- while (alignedBases < sequencePos
+ /*
+ * allow for offset e.g. treat pos 8 as 2 if sequence starts at 7
+ */
+ int truePos = sequencePos - (start - 1);
+ while (alignedBases < truePos
&& alignedColumn < alignedSeq.length)
{
if (alignedSeq[alignedColumn++] != gap)
public Iterator<AlignedCodon> getCodonIterator(SequenceI seq, char gapChar)
{
- return new AlignedCodonIterator(seq.getSequence(), gapChar);
+ return new AlignedCodonIterator(seq, gapChar);
}
}
* is a pre-requisite for building mappings.
*/
al.setDataset(null);
- AlignmentUtils.mapProteinToCdna(protein, cdna);
+ AlignmentUtils.mapProteinAlignmentToCdna(protein, cdna);
/*
* Create the AlignFrame for the added alignment. If it is protein, mappings
* Convert position in sequence (base 1) to sequence character array
* index (base 0)
*/
- int start = m.getStart() - 1;
+ int start = m.getStart() - m.getSequence().getStart();
setStatusMessage(seq, start, sequenceIndex);
return;
}
* @throws IOException
*/
@Test(groups = { "Functional" })
- public void testMapProteinToCdna_noXrefs() throws IOException
+ public void testMapProteinAlignmentToCdna_noXrefs() throws IOException
{
List<SequenceI> protseqs = new ArrayList<SequenceI>();
protseqs.add(new Sequence("UNIPROT|V12345", "EIQ"));
AlignmentI cdna = new Alignment(dnaseqs.toArray(new SequenceI[4]));
cdna.setDataset(null);
- assertTrue(AlignmentUtils.mapProteinToCdna(protein, cdna));
+ assertTrue(AlignmentUtils.mapProteinAlignmentToCdna(protein, cdna));
// 3 mappings made, each from 1 to 1 sequence
assertEquals(3, protein.getCodonFrames().size());
* @throws IOException
*/
@Test(groups = { "Functional" })
- public void testMapProteinToCdna_withStartAndStopCodons()
+ public void testMapProteinAlignmentToCdna_withStartAndStopCodons()
throws IOException
{
List<SequenceI> protseqs = new ArrayList<SequenceI>();
AlignmentI cdna = new Alignment(dnaseqs.toArray(new SequenceI[4]));
cdna.setDataset(null);
- assertTrue(AlignmentUtils.mapProteinToCdna(protein, cdna));
+ assertTrue(AlignmentUtils.mapProteinAlignmentToCdna(protein, cdna));
// 3 mappings made, each from 1 to 1 sequence
assertEquals(3, protein.getCodonFrames().size());
* @throws IOException
*/
@Test(groups = { "Functional" })
- public void testMapProteinToCdna_withXrefs() throws IOException
+ public void testMapProteinAlignmentToCdna_withXrefs() throws IOException
{
List<SequenceI> protseqs = new ArrayList<SequenceI>();
protseqs.add(new Sequence("UNIPROT|V12345", "EIQ"));
// A11111 should be mapped to V12347
// A55555 is spare and has no xref so is not mapped
- assertTrue(AlignmentUtils.mapProteinToCdna(protein, cdna));
+ assertTrue(AlignmentUtils.mapProteinAlignmentToCdna(protein, cdna));
// 4 protein mappings made for 3 proteins, 2 to V12345, 1 each to V12346/7
assertEquals(3, protein.getCodonFrames().size());
* @throws IOException
*/
@Test(groups = { "Functional" })
- public void testMapProteinToCdna_prioritiseXrefs() throws IOException
+ public void testMapProteinAlignmentToCdna_prioritiseXrefs()
+ throws IOException
{
List<SequenceI> protseqs = new ArrayList<SequenceI>();
protseqs.add(new Sequence("UNIPROT|V12345", "EIQ"));
// A11111 should then be mapped to the unmapped V12346
dnaseqs.get(1).addDBRef(new DBRefEntry("UNIPROT", "1", "V12345"));
- assertTrue(AlignmentUtils.mapProteinToCdna(protein, cdna));
+ assertTrue(AlignmentUtils.mapProteinAlignmentToCdna(protein, cdna));
// 2 protein mappings made
assertEquals(2, protein.getCodonFrames().size());
assertTrue(AlignmentUtils.isMappable(al1, al2));
assertTrue(AlignmentUtils.isMappable(al2, al1));
}
+
+ /**
+ * Test creating a mapping when the sequences involved do not start at residue
+ * 1
+ *
+ * @throws IOException
+ */
+ @Test(groups = { "Functional" })
+ public void testMapProteinSequenceToCdna_forSubsequence()
+ throws IOException
+ {
+ SequenceI prot = new Sequence("UNIPROT|V12345", "E-I--Q", 10, 12);
+ prot.createDatasetSequence();
+
+ SequenceI dna = new Sequence("EMBL|A33333", "GAA--AT-C-CAG", 40, 48);
+ dna.createDatasetSequence();
+
+ MapList map = AlignmentUtils.mapProteinSequenceToCdna(prot, dna);
+ assertEquals(10, map.getToLowest());
+ assertEquals(12, map.getToHighest());
+ assertEquals(40, map.getFromLowest());
+ assertEquals(48, map.getFromHighest());
+ }
}
public void testCut()
{
Edit ec = testee.new Edit(Action.CUT, seqs, 4, 3, al);
- testee.cut(ec, new AlignmentI[] { al });
+ EditCommand.cut(ec, new AlignmentI[] { al });
assertEquals("abcdhjk", seqs[0].getSequenceAsString());
assertEquals("fghjnopq", seqs[1].getSequenceAsString());
assertEquals("qrstxyz", seqs[2].getSequenceAsString());
newSeqs[1] = new Sequence("newseq1", "JWMPDH");
Edit ec = testee.new Edit(Action.PASTE, newSeqs, 0, al.getWidth(), al);
- testee.paste(ec, new AlignmentI[] { al });
+ EditCommand.paste(ec, new AlignmentI[] { al });
assertEquals(6, al.getSequences().size());
assertEquals("1234567890", seqs[3].getSequenceAsString());
assertEquals("ACEFKL", seqs[4].getSequenceAsString());
SequenceI seq = new Sequence("", "--A--B-CDEF");
SequenceI ds = new Sequence("", "ABCDEF");
seq.setDatasetSequence(ds);
- SequenceI[] seqs = new SequenceI[] { seq };
- Edit e = command.new Edit(Action.INSERT_GAP, seqs, 1, 2, '-');
+ SequenceI[] sqs = new SequenceI[] { seq };
+ Edit e = command.new Edit(Action.INSERT_GAP, sqs, 1, 2, '-');
command.addEdit(e);
- e = command.new Edit(Action.INSERT_GAP, seqs, 4, 1, '-');
+ e = command.new Edit(Action.INSERT_GAP, sqs, 4, 1, '-');
command.addEdit(e);
- e = command.new Edit(Action.INSERT_GAP, seqs, 0, 2, '-');
+ e = command.new Edit(Action.INSERT_GAP, sqs, 0, 2, '-');
command.addEdit(e);
Map<SequenceI, SequenceI> unwound = command.priorState(false);
SequenceI seq = new Sequence("", "ABC");
SequenceI ds = new Sequence("", "ABC");
seq.setDatasetSequence(ds);
- SequenceI[] seqs = new SequenceI[] { seq };
- Edit e = command.new Edit(Action.DELETE_GAP, seqs, 1, 1, '-');
+ SequenceI[] sqs = new SequenceI[] { seq };
+ Edit e = command.new Edit(Action.DELETE_GAP, sqs, 1, 1, '-');
command.addEdit(e);
- e = command.new Edit(Action.DELETE_GAP, seqs, 2, 1, '-');
+ e = command.new Edit(Action.DELETE_GAP, sqs, 2, 1, '-');
command.addEdit(e);
Map<SequenceI, SequenceI> unwound = command.priorState(false);
SequenceI seq = new Sequence("", "ABCDEF");
SequenceI ds = new Sequence("", "ABCDEF");
seq.setDatasetSequence(ds);
- SequenceI[] seqs = new SequenceI[] { seq };
- Edit e = command.new Edit(Action.DELETE_GAP, seqs, 2, 2, '-');
+ SequenceI[] sqs = new SequenceI[] { seq };
+ Edit e = command.new Edit(Action.DELETE_GAP, sqs, 2, 2, '-');
command.addEdit(e);
Map<SequenceI, SequenceI> unwound = command.priorState(false);
SequenceI seq = new Sequence("", "AB---CDEF");
SequenceI ds = new Sequence("", "ABCDEF");
seq.setDatasetSequence(ds);
- SequenceI[] seqs = new SequenceI[] { seq };
- Edit e = command.new Edit(Action.INSERT_GAP, seqs, 2, 3, '-');
+ SequenceI[] sqs = new SequenceI[] { seq };
+ Edit e = command.new Edit(Action.INSERT_GAP, sqs, 2, 3, '-');
command.addEdit(e);
Map<SequenceI, SequenceI> unwound = command.priorState(false);
- assertEquals("ABCDEF", unwound.get(ds).getSequenceAsString());
+ SequenceI prior = unwound.get(ds);
+ assertEquals("ABCDEF", prior.getSequenceAsString());
+ assertEquals(1, prior.getStart());
+ assertEquals(6, prior.getEnd());
+ }
+
+ /**
+ * Test 'undoing' a single gap insertion edit command, on a sequence whose
+ * start residue is other than 1
+ */
+ @Test(groups = { "Functional" })
+ public void testPriorState_singleInsertWithOffset()
+ {
+ EditCommand command = new EditCommand();
+ SequenceI seq = new Sequence("", "AB---CDEF", 8, 13);
+ // SequenceI ds = new Sequence("", "ABCDEF", 8, 13);
+ // seq.setDatasetSequence(ds);
+ seq.createDatasetSequence();
+ SequenceI[] sqs = new SequenceI[] { seq };
+ Edit e = command.new Edit(Action.INSERT_GAP, sqs, 2, 3, '-');
+ command.addEdit(e);
+
+ Map<SequenceI, SequenceI> unwound = command.priorState(false);
+ SequenceI prior = unwound.get(seq.getDatasetSequence());
+ assertEquals("ABCDEF", prior.getSequenceAsString());
+ assertEquals(8, prior.getStart());
+ assertEquals(13, prior.getEnd());
}
/**
SequenceI seq = new Sequence("", "ABC-DEF");
SequenceI ds1 = new Sequence("", "ABCDEF");
seq.setDatasetSequence(ds1);
- SequenceI[] seqs = new SequenceI[] { seq };
- Edit e = command.new Edit(Action.DELETE_GAP, seqs, 0, 2, '-');
+ SequenceI[] sqs = new SequenceI[] { seq };
+ Edit e = command.new Edit(Action.DELETE_GAP, sqs, 0, 2, '-');
command.addEdit(e);
seq = new Sequence("", "ABCDEF");
seq.setDatasetSequence(ds1);
- seqs = new SequenceI[] { seq };
- e = command.new Edit(Action.DELETE_GAP, seqs, 3, 1, '-');
+ sqs = new SequenceI[] { seq };
+ e = command.new Edit(Action.DELETE_GAP, sqs, 3, 1, '-');
command.addEdit(e);
/*
seq = new Sequence("", "FGHI--J");
SequenceI ds2 = new Sequence("", "FGHIJ");
seq.setDatasetSequence(ds2);
- seqs = new SequenceI[] { seq };
- e = command.new Edit(Action.DELETE_GAP, seqs, 2, 1, '-');
+ sqs = new SequenceI[] { seq };
+ e = command.new Edit(Action.DELETE_GAP, sqs, 2, 1, '-');
command.addEdit(e);
seq = new Sequence("", "FGHIJ");
seq.setDatasetSequence(ds2);
- seqs = new SequenceI[] { seq };
- e = command.new Edit(Action.DELETE_GAP, seqs, 4, 2, '-');
+ sqs = new SequenceI[] { seq };
+ e = command.new Edit(Action.DELETE_GAP, sqs, 4, 2, '-');
command.addEdit(e);
/*
seq = new Sequence("", "MNOPQ");
SequenceI ds3 = new Sequence("", "MNOPQ");
seq.setDatasetSequence(ds3);
- seqs = new SequenceI[] { seq };
- e = command.new Edit(Action.DELETE_GAP, seqs, 1, 1, '-');
+ sqs = new SequenceI[] { seq };
+ e = command.new Edit(Action.DELETE_GAP, sqs, 1, 1, '-');
command.addEdit(e);
Map<SequenceI, SequenceI> unwound = command.priorState(false);
SequenceI seq3 = new Sequence("", "M-NO--PQ");
SequenceI ds3 = new Sequence("", "MNOPQ");
seq3.setDatasetSequence(ds3);
- SequenceI[] seqs = new SequenceI[] { seq1, seq2, seq3 };
- Edit e = command.new Edit(Action.DELETE_GAP, seqs, 0, 1, '-');
+ SequenceI[] sqs = new SequenceI[] { seq1, seq2, seq3 };
+ Edit e = command.new Edit(Action.DELETE_GAP, sqs, 0, 1, '-');
command.addEdit(e);
/*
seq2.setDatasetSequence(ds2);
seq3 = new Sequence("", "M-NOPQ");
seq3.setDatasetSequence(ds3);
- seqs = new SequenceI[] { seq1, seq2, seq3 };
- e = command.new Edit(Action.DELETE_GAP, seqs, 4, 2, '-');
+ sqs = new SequenceI[] { seq1, seq2, seq3 };
+ e = command.new Edit(Action.DELETE_GAP, sqs, 4, 2, '-');
command.addEdit(e);
Map<SequenceI, SequenceI> unwound = command.priorState(false);
assertEquals("[C, T, T]", Arrays.toString(acf.getMappedCodon(
aseq1.getDatasetSequence(), 2)));
}
+
+ /**
+ * Test for the case where sequences have start > 1
+ */
+ @Test(groups = { "Functional" })
+ public void testGetMappedCodon_forSubSequences()
+ {
+ final Sequence seq1 = new Sequence("Seq1", "c-G-TA-gC-gT-T", 27, 35);
+ seq1.createDatasetSequence();
+
+ final Sequence aseq1 = new Sequence("Seq1", "-P-R", 12, 13);
+ aseq1.createDatasetSequence();
+
+ /*
+ * Set up the mappings for the exons (upper-case bases)
+ */
+ AlignedCodonFrame acf = new AlignedCodonFrame();
+ MapList map = new MapList(new int[] { 28, 30, 32, 32, 34, 35 },
+ new int[] { 12, 13 }, 3, 1);
+ acf.addMap(seq1.getDatasetSequence(), aseq1.getDatasetSequence(), map);
+
+ assertEquals("[G, T, A]", Arrays.toString(acf.getMappedCodon(
+ aseq1.getDatasetSequence(), 12)));
+ assertEquals("[C, T, T]", Arrays.toString(acf.getMappedCodon(
+ aseq1.getDatasetSequence(), 13)));
+ }
}
assertEquals("Q", codon.product);
assertFalse(codons.hasNext());
}
+
+ /**
+ * Test for a case with sequence (and mappings) not starting at 1
+ */
+ @Test(groups = { "Functional" })
+ public void testNext_withOffset()
+ {
+ SequenceI from = new Sequence("Seq1", "-CgC-C-cCtAG-AtG-Gc", 7, 20);
+ from.createDatasetSequence();
+ SequenceI to = new Sequence("Seq1/10-12", "-PQ-R-");
+ to.createDatasetSequence();
+ MapList map = new MapList(new int[] { 7, 7, 9, 10, 12, 12, 14, 16, 18,
+ 19 }, new int[] { 10, 12 }, 3, 1);
+ Mapping m = new Mapping(to.getDatasetSequence(), map);
+
+ Iterator<AlignedCodon> codons = m.getCodonIterator(from, '-');
+ AlignedCodon codon = codons.next();
+ assertEquals("[1, 3, 5]", codon.toString());
+ assertEquals("P", codon.product);
+ codon = codons.next();
+ assertEquals("[8, 10, 11]", codon.toString());
+ assertEquals("Q", codon.product);
+ codon = codons.next();
+ assertEquals("[13, 15, 17]", codon.toString());
+ assertEquals("R", codon.product);
+ assertFalse(codons.hasNext());
+ }
}
"//";
private static final String AA_SEQS_1 =
- ">Seq1Name\n" +
+ ">Seq1Name/5-8\n" +
"K-QY--L\n" +
- ">Seq2Name\n" +
+ ">Seq2Name/12-15\n" +
"-R-FP-W-\n";
private static final String CDNA_SEQS_1 =
- ">Seq1Name\n" +
+ ">Seq1Name/100-111\n" +
"AC-GG--CUC-CAA-CT\n" +
- ">Seq2Name\n" +
+ ">Seq2Name/200-211\n" +
"-CG-TTA--ACG---AAGT\n";
private static final String CDNA_SEQS_2 =
- ">Seq1Name\n" +
+ ">Seq1Name/50-61\n" +
"GCTCGUCGTACT\n" +
- ">Seq2Name\n" +
+ ">Seq2Name/60-71\n" +
"GGGTCAGGCAGT\n";
// @formatter:on
* Make mappings between sequences. The 'aligned cDNA' is playing the role
* of what would normally be protein here.
*/
- AlignedCodonFrame acf = new AlignedCodonFrame();
- MapList ml = new MapList(new int[] { 1, 12 }, new int[] { 1, 12 }, 1, 1);
- acf.addMap(al2.getSequenceAt(0), al1.getSequenceAt(0), ml);
- acf.addMap(al2.getSequenceAt(1), al1.getSequenceAt(1), ml);
- al1.addCodonFrame(acf);
+ makeMappings(al2, al1);
((Alignment) al2).alignAs(al1, false, true);
assertEquals("GC-TC--GUC-GTA-CT", al2.getSequenceAt(0)
// see also AlignmentUtilsTests
AlignmentI al1 = loadAlignment(CDNA_SEQS_1, "FASTA");
AlignmentI al2 = loadAlignment(AA_SEQS_1, "FASTA");
- AlignedCodonFrame acf = new AlignedCodonFrame();
- MapList ml = new MapList(new int[] { 1, 12 }, new int[] { 1, 4 }, 3, 1);
- acf.addMap(al1.getSequenceAt(0), al2.getSequenceAt(0), ml);
- acf.addMap(al1.getSequenceAt(1), al2.getSequenceAt(1), ml);
- al2.addCodonFrame(acf);
+ makeMappings(al1, al2);
((Alignment) al2).alignAs(al1, false, true);
assertEquals("K-Q-Y-L-", al2.getSequenceAt(0).getSequenceAsString());
}
/**
+ * Aligning protein from cDNA for a single sequence. This is the 'simple' case
+ * (as there is no need to compute codon 'alignments') but worth testing
+ * before tackling the multiple sequence case.
+ *
+ * @throws IOException
+ */
+ @Test(groups = { "Functional" })
+ public void testAlignAs_proteinAsCdna_singleSequence() throws IOException
+ {
+ /*
+ * simplest case remove all gaps
+ */
+ verifyAlignAs(">protein\n-Q-K-\n", ">dna\nCAAaaa\n", "QK");
+
+ /*
+ * with sequence offsets
+ */
+ verifyAlignAs(">protein/12-13\n-Q-K-\n", ">dna/20-25\nCAAaaa\n", "QK");
+ }
+
+ /**
* Test aligning cdna as per protein alignment.
*
* @throws IOException
*/
AlignmentI al1 = loadAlignment(CDNA_SEQS_1, "FASTA");
AlignmentI al2 = loadAlignment(AA_SEQS_1, "FASTA");
- AlignedCodonFrame acf = new AlignedCodonFrame();
- MapList ml = new MapList(new int[] { 1, 12 }, new int[] { 1, 4 }, 3, 1);
- acf.addMap(al1.getSequenceAt(0), al2.getSequenceAt(0), ml);
- acf.addMap(al1.getSequenceAt(1), al2.getSequenceAt(1), ml);
- al2.addCodonFrame(acf);
+ makeMappings(al1, al2);
/*
* Realign DNA; currently keeping existing gaps in introns only
}
/**
+ * Test aligning cdna as per protein - single sequences
+ *
+ * @throws IOException
+ */
+ @Test(groups = { "Functional" })
+ public void testAlignAs_cdnaAsProtein_singleSequence() throws IOException
+ {
+ /*
+ * simple case insert one gap
+ */
+ verifyAlignAs(">dna\nCAAaaa\n", ">protein\nQ-K\n", "CAA---aaa");
+
+ /*
+ * simple case but with sequence offsets
+ */
+ verifyAlignAs(">dna/5-10\nCAAaaa\n", ">protein/20-21\nQ-K\n",
+ "CAA---aaa");
+
+ /*
+ * insert gaps as per protein, drop gaps within codons
+ */
+ verifyAlignAs(">dna/10-18\nCA-Aa-aa--AGA\n", ">aa/6-8\n-Q-K--R\n",
+ "---CAA---aaa------AGA");
+ }
+
+ /**
+ * Helper method that makes mappings and then aligns the first alignment as
+ * the second
+ *
+ * @param fromSeqs
+ * @param toSeqs
+ * @param expected
+ * @throws IOException
+ */
+ public void verifyAlignAs(String fromSeqs, String toSeqs, String expected)
+ throws IOException
+ {
+ /*
+ * Load alignments and add mappings from nucleotide to protein (or from
+ * first to second if both the same type)
+ */
+ AlignmentI al1 = loadAlignment(fromSeqs, "FASTA");
+ AlignmentI al2 = loadAlignment(toSeqs, "FASTA");
+ makeMappings(al1, al2);
+
+ /*
+ * Realign DNA; currently keeping existing gaps in introns only
+ */
+ ((Alignment) al1).alignAs(al2, false, true);
+ assertEquals(expected, al1.getSequenceAt(0).getSequenceAsString());
+ }
+
+ /**
+ * Helper method to make mappings from protein to dna sequences, and add the
+ * mappings to the protein alignment
+ *
+ * @param alFrom
+ * @param alTo
+ */
+ public void makeMappings(AlignmentI alFrom, AlignmentI alTo)
+ {
+ AlignmentI prot = !alFrom.isNucleotide() ? alFrom : alTo;
+ AlignmentI nuc = alFrom == prot ? alTo : alFrom;
+
+ int ratio = (alFrom.isNucleotide() == alTo.isNucleotide() ? 1 : 3);
+
+ AlignedCodonFrame acf = new AlignedCodonFrame();
+
+ for (int i = 0; i < nuc.getHeight(); i++)
+ {
+ SequenceI seqFrom = nuc.getSequenceAt(i);
+ SequenceI seqTo = prot.getSequenceAt(i);
+ MapList ml = new MapList(new int[] { seqFrom.getStart(),
+ seqFrom.getEnd() },
+ new int[] { seqTo.getStart(), seqTo.getEnd() }, ratio, 1);
+ acf.addMap(seqFrom, seqTo, ml);
+ }
+
+ prot.addCodonFrame(acf);
+ }
+
+ /**
* Test aligning dna as per protein alignment, for the case where there are
* introns (i.e. some dna sites have no mapping from a peptide).
*
*/
String dna1 = "A-Aa-gG-GCC-cT-TT";
String dna2 = "c--CCGgg-TT--T-AA-A";
- AlignmentI al1 = loadAlignment(">Seq1\n" + dna1 + "\n>Seq2\n" + dna2
- + "\n", "FASTA");
- AlignmentI al2 = loadAlignment(">Seq1\n-P--YK\n>Seq2\nG-T--F\n",
- "FASTA");
+ AlignmentI al1 = loadAlignment(">Seq1/6-17\n" + dna1
+ + "\n>Seq2/20-31\n" + dna2 + "\n", "FASTA");
+ AlignmentI al2 = loadAlignment(
+ ">Seq1/7-9\n-P--YK\n>Seq2/11-13\nG-T--F\n", "FASTA");
AlignedCodonFrame acf = new AlignedCodonFrame();
// Seq1 has intron at dna positions 3,4,9 so splice is AAG GCC TTT
// Seq2 has intron at dna positions 1,5,6 so splice is CCG TTT AAA
- MapList ml1 = new MapList(new int[] { 1, 2, 5, 8, 10, 12 }, new int[] {
- 1, 3 }, 3, 1);
+ // TODO sequence offsets
+ MapList ml1 = new MapList(new int[] { 6, 7, 10, 13, 15, 17 }, new int[]
+ { 7, 9 }, 3, 1);
acf.addMap(al1.getSequenceAt(0), al2.getSequenceAt(0), ml1);
- MapList ml2 = new MapList(new int[] { 2, 4, 7, 12 },
- new int[] { 1, 3 }, 3, 1);
+ MapList ml2 = new MapList(new int[] { 21, 23, 26, 31 }, new int[] { 11,
+ 13 }, 3, 1);
acf.addMap(al1.getSequenceAt(1), al2.getSequenceAt(1), ml2);
al2.addCodonFrame(acf);