*/
package jalview.datamodel;
+import jalview.analysis.AlignmentUtils;
import jalview.util.MessageManager;
import java.util.ArrayList;
}
/**
+ * Returns a map of lists of sequences keyed by sequence name.
+ *
+ * @return
+ */
+ @Override
+ public Map<String, List<SequenceI>> getSequencesByName()
+ {
+ return AlignmentUtils.getSequencesByName(this);
+ }
+
+ /**
* DOCUMENT ME!
*
* @param i
}
/**
- * Answers true if the supplied alignment has the same number of sequences,
- * and they are of equivalent length, ignoring gaps. Alignments should be of
- * the same type (protein/nucleotide) or different types with 3:1 length
- * scaling.
+ * Align this alignment 'the same as' the given one. Mapped sequences only are
+ * realigned. If both of the same type (nucleotide/protein) then align both
+ * identically. If this is nucleotide and the other is protein, make 3 gaps
+ * for each gap in the protein sequences. If this is protein and the other is
+ * nucleotide, insert a gap for each 3 gaps (or part thereof) between
+ * nucleotide bases. Does nothing if alignment of protein from cDNA is
+ * requested (not yet implemented).
*
* @param al
*/
@Override
- public boolean isMappableTo(AlignmentI al)
+ public int alignAs(AlignmentI al)
{
- int thisCodonScale = this.isNucleotide() ? 1 : 3;
- int thatCodonScale = al.isNucleotide() ? 1 : 3;
- if (this == al || this.getHeight() != al.getHeight())
+ int count = 0;
+ boolean thisIsNucleotide = this.isNucleotide();
+ boolean thatIsProtein = !al.isNucleotide();
+ if (!thatIsProtein && !thisIsNucleotide)
{
- return false;
+ System.err
+ .println("Alignment of protein from cDNA not yet implemented");
+ return 0;
+ // todo: build it - a variant of Dna.CdnaTranslate()
}
+ char thisGapChar = this.getGapCharacter();
+ char thatGapChar = al.getGapCharacter();
+ String gap = thisIsNucleotide && thatIsProtein ? String
+ .valueOf(new char[]
+ { thisGapChar, thisGapChar, thisGapChar }) : String
+ .valueOf(thisGapChar);
+ int ratio = thisIsNucleotide && thatIsProtein ? 3 : 1;
- // TODO: match sequence ids, allow different sequence ordering?
- // TODO: allow for stop/start codons?
- // TODO: exclude introns
- int i = 0;
- for (SequenceI seq : this.getSequences())
- {
- final int thisSequenceDnaLength = seq.getDatasetSequence()
- .getLength() * thisCodonScale;
- final int thatSequenceDnaLength = al.getSequenceAt(i)
- .getDatasetSequence().getLength()
- * thatCodonScale;
- if (thisSequenceDnaLength != thatSequenceDnaLength)
+ /*
+ * Get mappings from 'that' alignment's sequences to this.
+ */
+ for (SequenceI alignTo : getSequences())
+ {
+ AlignedCodonFrame[] mappings = al.getCodonFrame(alignTo);
+ if (mappings != null)
{
- return false;
+ for (AlignedCodonFrame mapping : mappings)
+ {
+ count += alignSequenceAs(alignTo, mapping, thatGapChar, gap,
+ ratio) ? 1 : 0;
+ }
}
- i++;
}
- return true;
+ return count;
}
/**
- * Align this alignment the same as the given one. If both of the same type
- * (nucleotide/protein) then align both identically. If this is nucleotide and
- * the other is protein, make 3 gaps for each gap in the protein sequences. If
- * this is protein and the other is nucleotide, insert a gap for each 3 gaps
- * (or part thereof) between nucleotide bases. The two alignments should be
- * compatible in height and lengths, but if not, then discrepancies will be
- * ignored with unpredictable results.
+ * Align sequence 'seq' the same way as 'other'. Note this currently assumes
+ * that we are aligned cDNA to match protein.
*
- * @param al
- * @throws UnsupportedOperation
- * if alignment of protein from cDNA is requested (not yet
- * implemented)
+ * @param seq
+ * the sequence to be realigned
+ * @param mapping
+ * holds mapping from the sequence whose alignment is to be 'copied'
+ * @param thatGapChar
+ * gap character used in the 'other' sequence
+ * @param gap
+ * character string represent a gap in the realigned sequence
+ * @param ratio
+ * the number of positions in the realigned sequence corresponding to
+ * one in the 'other'
+ * @return true if the sequence was realigned, false if it could not be
*/
- @Override
- public void alignAs(AlignmentI al)
- {
- boolean thisIsNucleotide = this.isNucleotide();
- boolean thatIsProtein = !al.isNucleotide();
- if (!thatIsProtein && !thisIsNucleotide)
+ protected boolean alignSequenceAs(SequenceI seq,
+ AlignedCodonFrame mapping,
+ char thatGapChar,
+ String gap, int ratio)
+ {
+ char myGapChar = gap.charAt(0);
+ // TODO rework this to use the mapping to match 'this' to 'that' residue
+ // position, to handle introns and exons correctly.
+ // TODO generalise to work for Protein-Protein, dna-dna, dna-protein
+ SequenceI alignFrom = mapping.getAaForDnaSeq(seq, false);
+ if (alignFrom == null)
{
- throw new UnsupportedOperationException(
- "Alignment of protein from cDNA not implemented");
+ return false;
}
- char thisGapChar = this.getGapCharacter();
- char thatGapChar = al.getGapCharacter();
- String gap = thisIsNucleotide && thatIsProtein ? String
- .valueOf(new char[]
- { thisGapChar, thisGapChar, thisGapChar }) : String
- .valueOf(thisGapChar);
- int ratio = thisIsNucleotide && thatIsProtein ? 3 : 1;
- int i = 0;
- for (SequenceI seq : this.getSequences())
+ final char[] thisSeq = seq.getSequence();
+ final char[] thisDs = seq.getDatasetSequence().getSequence();
+ final char[] thatAligned = alignFrom.getSequence();
+ StringBuilder thisAligned = new StringBuilder(2 * thisDs.length);
+
+ /*
+ * Find the DNA dataset position that corresponds to the first protein
+ * residue (e.g. ignoring start codon in cDNA).
+ */
+ int[] dnaStart = mapping.getDnaPosition(seq.getDatasetSequence(), 1);
+ int thisDsPosition = dnaStart == null ? 0 : dnaStart[0] - 1;
+ int thisSeqPos = 0;
+
+ /*
+ * Copy aligned cDNA up to (excluding) the first mapped base.
+ */
+ int basesWritten = 0;
+ while (basesWritten < thisDsPosition && thisSeqPos < thisSeq.length)
+ {
+ char c = thisSeq[thisSeqPos++];
+ thisAligned.append(c);
+ if (c != myGapChar)
+ {
+ basesWritten++;
+ }
+ }
+
+ /*
+ * Now traverse the aligned protein mirroring its gaps in cDNA.
+ */
+ for (char thatChar : thatAligned)
{
- SequenceI other = al.getSequenceAt(i++);
- if (other == null)
+ if (thatChar == thatGapChar)
{
- continue;
+ /*
+ * Add (equivalent of) a gap
+ */
+ thisAligned.append(gap);
}
- char[] thisDs = seq.getDatasetSequence().getSequence();
- char[] thatDs = other.getSequence();
- StringBuilder thisAligned = new StringBuilder(2 * thisDs.length);
- int thisDsPosition = 0;
- for (char thatChar : thatDs)
+ else
{
- if (thatChar == thatGapChar)
- {
- /*
- * Add (equivalent of) a gap
- */
- thisAligned.append(gap);
- }
- else
+ /*
+ * Add (equivalent of) a residue
+ */
+ for (int j = 0; j < ratio && thisDsPosition < thisDs.length; j++)
{
+ thisAligned.append(thisDs[thisDsPosition++]);
+
/*
- * Add (equivalent of) a residue
+ * Also advance over any gaps and the next residue in the old aligned
+ * sequence
*/
- for (int j = 0; j < ratio && thisDsPosition < thisDs.length; j++)
+ while (thisSeq[thisSeqPos] == myGapChar
+ && thisSeqPos < thisSeq.length)
{
- thisAligned.append(thisDs[thisDsPosition++]);
+ thisSeqPos++;
}
+ thisSeqPos++;
}
}
- /*
- * Include any 'extra' residues (there shouldn't be).
- */
- while (thisDsPosition < thisDs.length)
- {
- thisAligned.append(thisDs[thisDsPosition++]);
- }
- seq.setSequence(new String(thisAligned));
}
+
+ /*
+ * Finally copy any 'extra' aligned cDNA (e.g. stop codon, introns).
+ */
+ while (thisSeqPos < thisSeq.length)
+ {
+ thisAligned.append(thisSeq[thisSeqPos++]);
+ }
+ seq.setSequence(new String(thisAligned));
+ return true;
}
}