/**
* Align sequence 'alignTo' the same way as 'alignFrom', using the mapping to
- * match residues and codons.
+ * match residues and codons. Flags control whether existing gaps in unmapped
+ * (intron) and mapped (exon) regions are preserved or not. Gaps linking intro
+ * and exon are only retained if both flags are set.
*
* @param alignTo
* @param alignFrom
/*
* Traverse the aligned protein sequence.
*/
- int sourceGapLength = 0;
+ int sourceGapMappedLength = 0;
+ boolean inExon = false;
for (char sourceChar : thatAligned)
{
if (sourceChar == sourceGap)
{
- sourceGapLength++;
+ sourceGapMappedLength += ratio;
continue;
}
int mappedCodonStart = mappedPos[0]; // position (1...) of codon start
int mappedCodonEnd = mappedPos[mappedPos.length - 1]; // codon end pos
- int trailingCopiedGapLength = 0;
+ StringBuilder trailingCopiedGap = new StringBuilder();
/*
* Copy dna sequence up to and including this codon. Optionally, include
* Note this only works for 'linear' splicing, not reverse or interleaved.
* But then 'align dna as protein' doesn't make much sense otherwise.
*/
- boolean inCodon = false;
+ int intronLength = 0;
while (basesWritten < mappedCodonEnd && thisSeqPos < thisSeq.length)
{
final char c = thisSeq[thisSeqPos++];
{
basesWritten++;
- /*
- * Is this the start of the mapped codon? If so, add in any extra gap
- * due to the protein alignment.
- */
- if (basesWritten == mappedCodonStart)
+ if (basesWritten < mappedCodonStart)
{
- inCodon = true;
- int gapsToAdd = Math.max(0, ratio * sourceGapLength
- - trailingCopiedGapLength);
+ /*
+ * Found an unmapped (intron) base. First add in any preceding gaps
+ * (if wanted).
+ */
+ if (preserveUnmappedGaps && trailingCopiedGap.length() > 0)
+ {
+ thisAligned.append(trailingCopiedGap.toString());
+ intronLength += trailingCopiedGap.length();
+ trailingCopiedGap = new StringBuilder();
+ }
+ intronLength++;
+ inExon = false;
+ }
+ else
+ {
+ final boolean startOfCodon = basesWritten == mappedCodonStart;
+ int gapsToAdd = calculateGapsToInsert(preserveMappedGaps,
+ preserveUnmappedGaps, sourceGapMappedLength, inExon,
+ trailingCopiedGap.length(), intronLength, startOfCodon);
for (int i = 0; i < gapsToAdd; i++)
{
thisAligned.append(myGapChar);
}
- sourceGapLength = 0;
+ sourceGapMappedLength = 0;
+ inExon = true;
}
thisAligned.append(c);
- trailingCopiedGapLength = 0;
+ trailingCopiedGap = new StringBuilder();
}
- else if ((!inCodon && preserveUnmappedGaps)
- || (inCodon && preserveMappedGaps))
+ else
{
- thisAligned.append(c);
- trailingCopiedGapLength++;
+ if (inExon && preserveMappedGaps)
+ {
+ trailingCopiedGap.append(myGapChar);
+ }
+ else if (!inExon && preserveUnmappedGaps)
+ {
+ trailingCopiedGap.append(myGapChar);
+ }
}
}
-
- /*
- * Expand (if necessary) the trailing gap to the size of the aligned gap.
- */
- int gapsToAdd = (ratio * sourceGapLength - trailingCopiedGapLength);
- for (int i = 0; i < gapsToAdd; i++)
- {
- thisAligned.append(myGapChar);
- }
}
/*
*/
alignTo.setSequence(new String(thisAligned));
}
+
+ /**
+ * Helper method to work out how many gaps to insert when realigning.
+ *
+ * @param preserveMappedGaps
+ * @param preserveUnmappedGaps
+ * @param sourceGapMappedLength
+ * @param inExon
+ * @param trailingCopiedGap
+ * @param intronLength
+ * @param startOfCodon
+ * @return
+ */
+ protected static int calculateGapsToInsert(boolean preserveMappedGaps,
+ boolean preserveUnmappedGaps, int sourceGapMappedLength,
+ boolean inExon, int trailingGapLength,
+ int intronLength, final boolean startOfCodon)
+ {
+ int gapsToAdd = 0;
+ if (startOfCodon)
+ {
+ /*
+ * Reached start of codon. Ignore trailing gaps in intron unless we are
+ * preserving gaps in both exon and intron. Ignore them anyway if the
+ * protein alignment introduces a gap at least as large as the intronic
+ * region.
+ */
+ if (inExon && !preserveMappedGaps)
+ {
+ trailingGapLength = 0;
+ }
+ if (!inExon && !(preserveMappedGaps && preserveUnmappedGaps))
+ {
+ trailingGapLength = 0;
+ }
+ if (inExon)
+ {
+ gapsToAdd = Math.max(sourceGapMappedLength, trailingGapLength);
+ }
+ else
+ {
+ if (intronLength + trailingGapLength <= sourceGapMappedLength)
+ {
+ gapsToAdd = sourceGapMappedLength - intronLength;
+ }
+ else
+ {
+ gapsToAdd = Math.min(intronLength + trailingGapLength
+ - sourceGapMappedLength, trailingGapLength);
+ }
+ }
+ }
+ else
+ {
+ /*
+ * second or third base of codon; check for any gaps in dna
+ */
+ if (!preserveMappedGaps)
+ {
+ trailingGapLength = 0;
+ }
+ gapsToAdd = Math.max(sourceGapMappedLength, trailingGapLength);
+ }
+ return gapsToAdd;
+ }
}