+ * Realigns the given dna to match the alignment of the protein, using codon
+ * mappings to translate aligned peptide positions to codons.
+ *
+ * Always produces a padded CDS alignment.
+ *
+ * @param dna
+ * the alignment whose sequences are realigned by this method
+ * @param protein
+ * the protein alignment whose alignment we are 'copying'
+ * @return the number of sequences that were realigned
+ */
+ public static int alignCdsAsProtein(AlignmentI dna, AlignmentI protein)
+ {
+ if (protein.isNucleotide() || !dna.isNucleotide())
+ {
+ System.err.println("Wrong alignment type in alignProteinAsDna");
+ return 0;
+ }
+ // todo: implement this
+ List<AlignedCodonFrame> mappings = protein.getCodonFrames();
+ int alignedCount = 0;
+ int width = 0; // alignment width for padding CDS
+ for (SequenceI dnaSeq : dna.getSequences())
+ {
+ if (alignCdsSequenceAsProtein(dnaSeq, protein, mappings,
+ dna.getGapCharacter()))
+ {
+ alignedCount++;
+ }
+ width = Math.max(dnaSeq.getLength(), width);
+ }
+ int oldwidth;
+ int diff;
+ for (SequenceI dnaSeq : dna.getSequences())
+ {
+ oldwidth = dnaSeq.getLength();
+ diff = width - oldwidth;
+ if (diff > 0)
+ {
+ dnaSeq.insertCharAt(oldwidth, diff, dna.getGapCharacter());
+ }
+ }
+ return alignedCount;
+ }
+
+ /**
+ * Helper method to align (if possible) the dna sequence to match the
+ * alignment of a mapped protein sequence. This is currently limited to
+ * handling coding sequence only.
+ *
+ * @param cdsSeq
+ * @param protein
+ * @param mappings
+ * @param gapChar
+ * @return
+ */
+ static boolean alignCdsSequenceAsProtein(SequenceI cdsSeq,
+ AlignmentI protein, List<AlignedCodonFrame> mappings, char gapChar)
+ {
+ SequenceI cdsDss = cdsSeq.getDatasetSequence();
+ if (cdsDss == null)
+ {
+ System.err
+ .println("alignCdsSequenceAsProtein needs aligned sequence!");
+ return false;
+ }
+
+ List<AlignedCodonFrame> dnaMappings = MappingUtils
+ .findMappingsForSequence(cdsSeq, mappings);
+ for (AlignedCodonFrame mapping : dnaMappings)
+ {
+ SequenceI peptide = mapping.findAlignedSequence(cdsSeq, protein);
+ if (peptide != null)
+ {
+ int peptideLength = peptide.getLength();
+ Mapping map = mapping.getMappingBetween(cdsSeq, peptide);
+ if (map != null)
+ {
+ MapList mapList = map.getMap();
+ if (map.getTo() == peptide.getDatasetSequence())
+ {
+ mapList = mapList.getInverse();
+ }
+ int cdsLength = cdsDss.getLength();
+ int mappedFromLength = MappingUtils.getLength(mapList
+ .getFromRanges());
+ int mappedToLength = MappingUtils
+ .getLength(mapList.getToRanges());
+ boolean addStopCodon = (cdsLength == mappedFromLength
+ * CODON_LENGTH + CODON_LENGTH)
+ || (peptide.getDatasetSequence().getLength() == mappedFromLength - 1);
+ if (cdsLength != mappedToLength && !addStopCodon)
+ {
+ System.err
+ .println(String
+ .format("Can't align cds as protein (length mismatch %d/%d): %s",
+ cdsLength, mappedToLength,
+ cdsSeq.getName()));
+ }
+
+ /*
+ * pre-fill the aligned cds sequence with gaps
+ */
+ char[] alignedCds = new char[peptideLength * CODON_LENGTH
+ + (addStopCodon ? CODON_LENGTH : 0)];
+ Arrays.fill(alignedCds, gapChar);
+
+ /*
+ * walk over the aligned peptide sequence and insert mapped
+ * codons for residues in the aligned cds sequence
+ */
+ char[] alignedPeptide = peptide.getSequence();
+ char[] nucleotides = cdsDss.getSequence();
+ int copiedBases = 0;
+ int cdsStart = cdsDss.getStart();
+ int proteinPos = peptide.getStart() - 1;
+ int cdsCol = 0;
+ for (char residue : alignedPeptide)
+ {
+ if (Comparison.isGap(residue))
+ {
+ cdsCol += CODON_LENGTH;
+ }
+ else
+ {
+ proteinPos++;
+ int[] codon = mapList.locateInTo(proteinPos, proteinPos);
+ if (codon == null)
+ {
+ // e.g. incomplete start codon, X in peptide
+ cdsCol += CODON_LENGTH;
+ }
+ else
+ {
+ for (int j = codon[0]; j <= codon[1]; j++)
+ {
+ char mappedBase = nucleotides[j - cdsStart];
+ alignedCds[cdsCol++] = mappedBase;
+ copiedBases++;
+ }
+ }
+ }
+ }
+
+ /*
+ * append stop codon if not mapped from protein,
+ * closing it up to the end of the mapped sequence
+ */
+ if (copiedBases == nucleotides.length - CODON_LENGTH)
+ {
+ for (int i = alignedCds.length - 1; i >= 0; i--)
+ {
+ if (!Comparison.isGap(alignedCds[i]))
+ {
+ cdsCol = i + 1; // gap just after end of sequence
+ break;
+ }
+ }
+ for (int i = nucleotides.length - CODON_LENGTH; i < nucleotides.length; i++)
+ {
+ alignedCds[cdsCol++] = nucleotides[i];
+ }
+ }
+ cdsSeq.setSequence(new String(alignedCds));
+ return true;
+ }
+ }
+ }
+ return false;
+ }
+
+ /**