int alignedCount = 0;
for (SequenceI dnaSeq : dna.getSequences())
{
- if (alignCdsSequenceAsProtein(dnaSeq, protein, mappings))
+ if (alignCdsSequenceAsProtein(dnaSeq, protein, mappings,
+ dna.getGapCharacter()))
{
alignedCount++;
}
}
/**
- * Helper method to align (if possible) the dna sequence (cds only) to match
- * the alignment of a mapped protein sequence
+ * Helper method to align (if possible) the dna sequence to match the
+ * alignment of a mapped protein sequence. This is currently limited to
+ * handling coding sequence only.
*
- * @param dnaSeq
+ * @param cdsSeq
* @param protein
* @param mappings
+ * @param gapChar
* @return
*/
- static boolean alignCdsSequenceAsProtein(SequenceI dnaSeq,
- AlignmentI protein, List<AlignedCodonFrame> mappings)
+ static boolean alignCdsSequenceAsProtein(SequenceI cdsSeq,
+ AlignmentI protein, List<AlignedCodonFrame> mappings, char gapChar)
{
+ SequenceI cdsDss = cdsSeq.getDatasetSequence();
+ if (cdsDss == null)
+ {
+ System.err
+ .println("alignCdsSequenceAsProtein needs aligned sequence!");
+ return false;
+ }
+
List<AlignedCodonFrame> dnaMappings = MappingUtils
- .findMappingsForSequence(dnaSeq, mappings);
+ .findMappingsForSequence(cdsSeq, mappings);
for (AlignedCodonFrame mapping : dnaMappings)
{
- SequenceI peptide = mapping.findAlignedSequence(dnaSeq, protein);
+ SequenceI peptide = mapping.findAlignedSequence(cdsSeq, protein);
+ int peptideLength = peptide.getLength();
if (peptide != null)
{
- Mapping map = mapping.getMappingBetween(dnaSeq, peptide);
+ Mapping map = mapping.getMappingBetween(cdsSeq, peptide);
if (map != null)
{
+ MapList mapList = map.getMap();
+ if (map.getTo() == peptide.getDatasetSequence())
+ {
+ mapList = mapList.getInverse();
+ }
+ int cdsLength = cdsDss.getLength();
+ int mappedFromLength = MappingUtils.getLength(mapList
+ .getFromRanges());
+ int mappedToLength = MappingUtils
+ .getLength(mapList.getToRanges());
+ boolean addStopCodon = (cdsLength == mappedFromLength * 3 + 3)
+ || (peptide.getDatasetSequence().getLength() == mappedFromLength - 1);
+ if (cdsLength != mappedToLength && !addStopCodon)
+ {
+ System.err
+ .println(String
+ .format("Can't align cds as protein (length mismatch %d/%d): %s",
+ cdsLength, mappedToLength,
+ cdsSeq.getName()));
+ }
+
/*
- * traverse peptide adding gaps or codons to new cds sequence
+ * pre-fill the aligned cds sequence with gaps
*/
- MapList mapList = map.getMap();
+ char[] alignedCds = new char[peptideLength * 3
+ + (addStopCodon ? 3 : 0)];
+ Arrays.fill(alignedCds, gapChar);
+
+ /*
+ * walk over the aligned peptide sequence and insert mapped
+ * codons for residues in the aligned cds sequence
+ */
+ char[] alignedPeptide = peptide.getSequence();
+ char[] nucleotides = cdsDss.getSequence();
+ int copiedBases = 0;
+ int cdsStart = cdsDss.getStart();
+ int proteinPos = peptide.getStart() - 1;
+ int cdsCol = 0;
+ for (char residue : alignedPeptide)
+ {
+ if (Comparison.isGap(residue))
+ {
+ cdsCol += 3;
+ }
+ else
+ {
+ proteinPos++;
+ int[] codon = mapList.locateInTo(proteinPos, proteinPos);
+ if (codon == null)
+ {
+ // e.g. incomplete start codon, X in peptide
+ cdsCol += 3;
+ }
+ else
+ {
+ for (int j = codon[0]; j <= codon[1]; j++)
+ {
+ char mappedBase = nucleotides[j - cdsStart];
+ alignedCds[cdsCol++] = mappedBase;
+ copiedBases++;
+ }
+ }
+ }
+ }
+
+ /*
+ * append stop codon if not mapped from protein,
+ * closing it up to the end of the mapped sequence
+ */
+ if (copiedBases == nucleotides.length - 3)
+ {
+ for (int i = alignedCds.length - 1; i >= 0; i--)
+ {
+ if (!Comparison.isGap(alignedCds[i]))
+ {
+ cdsCol = i + 1; // gap just after end of sequence
+ break;
+ }
+ }
+ for (int i = nucleotides.length - 3; i < nucleotides.length; i++)
+ {
+ alignedCds[cdsCol++] = nucleotides[i];
+ }
+ }
+ cdsSeq.setSequence(new String(alignedCds));
+ return true;
}
}
}
*
* @throws IOException
*/
- @Test(groups = { "Functional" }, enabled = false)
+ @Test(groups = { "Functional" }, enabled = true)
// TODO review / update this test after redesign of alignAs method
public void testAlignAs_cdnaAsProtein() throws IOException
{
*
* @throws IOException
*/
- @Test(groups = { "Functional" }, enabled = false)
+ @Test(groups = { "Functional" }, enabled = true)
// TODO review / update this test after redesign of alignAs method
public void testAlignAs_cdnaAsProtein_singleSequence() throws IOException
{
acf.addMap(seqFrom, seqTo, ml);
}
+ /*
+ * not sure whether mappings 'belong' or protein or nucleotide
+ * alignment, so adding to both ;~)
+ */
alFrom.addCodonFrame(acf);
+ alTo.addCodonFrame(acf);
}
/**