X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Fdatamodel%2FAlignment.java;h=5d11a2090983e5e10e23f5e7387e4eb67ce2208d;hb=cf82c19425ee457a01c3f1a04aabfb0d9817af6f;hp=91108bc4b9d64d6ab7fedc08accd5bab98330fb0;hpb=243951bc32e308e5fbd57860336fdb3e39ebd822;p=jalview.git diff --git a/src/jalview/datamodel/Alignment.java b/src/jalview/datamodel/Alignment.java index 91108bc..5d11a20 100755 --- a/src/jalview/datamodel/Alignment.java +++ b/src/jalview/datamodel/Alignment.java @@ -20,6 +20,7 @@ */ package jalview.datamodel; +import jalview.analysis.AlignmentUtils; import jalview.util.MessageManager; import java.util.ArrayList; @@ -157,6 +158,17 @@ public class Alignment implements AlignmentI } /** + * Returns a map of lists of sequences keyed by sequence name. + * + * @return + */ + @Override + public Map> getSequencesByName() + { + return AlignmentUtils.getSequencesByName(this); + } + + /** * DOCUMENT ME! * * @param i @@ -1596,114 +1608,156 @@ public class Alignment implements AlignmentI } /** - * Answers true if the supplied alignment has the same number of sequences, - * and they are of equivalent length, ignoring gaps. Alignments should be of - * the same type (protein/nucleotide) or different types with 3:1 length - * scaling. + * Align this alignment 'the same as' the given one. Mapped sequences only are + * realigned. If both of the same type (nucleotide/protein) then align both + * identically. If this is nucleotide and the other is protein, make 3 gaps + * for each gap in the protein sequences. If this is protein and the other is + * nucleotide, insert a gap for each 3 gaps (or part thereof) between + * nucleotide bases. Does nothing if alignment of protein from cDNA is + * requested (not yet implemented). * * @param al */ @Override - public boolean isMappableTo(AlignmentI al) + public int alignAs(AlignmentI al) { - int thisCodonScale = this.isNucleotide() ? 1 : 3; - int thatCodonScale = al.isNucleotide() ? 1 : 3; - if (this == al || this.getHeight() != al.getHeight()) + int count = 0; + boolean thisIsNucleotide = this.isNucleotide(); + boolean thatIsProtein = !al.isNucleotide(); + if (!thatIsProtein && !thisIsNucleotide) { - return false; + System.err + .println("Alignment of protein from cDNA not yet implemented"); + return 0; + // todo: build it - a variant of Dna.CdnaTranslate() } + char thisGapChar = this.getGapCharacter(); + char thatGapChar = al.getGapCharacter(); + String gap = thisIsNucleotide && thatIsProtein ? String + .valueOf(new char[] + { thisGapChar, thisGapChar, thisGapChar }) : String + .valueOf(thisGapChar); + int ratio = thisIsNucleotide && thatIsProtein ? 3 : 1; - // TODO: match sequence ids, allow different sequence ordering? - // TODO: allow for stop/start codons? - // TODO: exclude introns - int i = 0; - for (SequenceI seq : this.getSequences()) - { - final int thisSequenceDnaLength = seq.getDatasetSequence() - .getLength() * thisCodonScale; - final int thatSequenceDnaLength = al.getSequenceAt(i) - .getDatasetSequence().getLength() - * thatCodonScale; - if (thisSequenceDnaLength != thatSequenceDnaLength) + /* + * Get mappings from 'that' alignment's sequences to this. + */ + for (SequenceI alignTo : getSequences()) + { + AlignedCodonFrame[] mappings = al.getCodonFrame(alignTo); + if (mappings != null) { - return false; + for (AlignedCodonFrame mapping : mappings) + { + count += alignSequenceAs(alignTo, mapping, thatGapChar, gap, + ratio) ? 1 : 0; + } } - i++; } - return true; + return count; } /** - * Align this alignment the same as the given one. If both of the same type - * (nucleotide/protein) then align both identically. If this is nucleotide and - * the other is protein, make 3 gaps for each gap in the protein sequences. If - * this is protein and the other is nucleotide, insert a gap for each 3 gaps - * (or part thereof) between nucleotide bases. The two alignments should be - * compatible in height and lengths, but if not, then discrepancies will be - * ignored with unpredictable results. + * Align sequence 'seq' the same way as 'other'. Note this currently assumes + * that we are aligned cDNA to match protein. * - * @param al - * @throws UnsupportedOperation - * if alignment of protein from cDNA is requested (not yet - * implemented) + * @param seq + * the sequence to be realigned + * @param mapping + * holds mapping from the sequence whose alignment is to be 'copied' + * @param thatGapChar + * gap character used in the 'other' sequence + * @param gap + * character string represent a gap in the realigned sequence + * @param ratio + * the number of positions in the realigned sequence corresponding to + * one in the 'other' + * @return true if the sequence was realigned, false if it could not be */ - @Override - public void alignAs(AlignmentI al) - { - boolean thisIsNucleotide = this.isNucleotide(); - boolean thatIsProtein = !al.isNucleotide(); - if (!thatIsProtein && !thisIsNucleotide) + protected boolean alignSequenceAs(SequenceI seq, + AlignedCodonFrame mapping, + char thatGapChar, + String gap, int ratio) + { + char myGapChar = gap.charAt(0); + // TODO rework this to use the mapping to match 'this' to 'that' residue + // position, to handle introns and exons correctly. + // TODO generalise to work for Protein-Protein, dna-dna, dna-protein + SequenceI alignFrom = mapping.getAaForDnaSeq(seq, false); + if (alignFrom == null) { - throw new UnsupportedOperationException( - "Alignment of protein from cDNA not implemented"); + return false; } - char thisGapChar = this.getGapCharacter(); - char thatGapChar = al.getGapCharacter(); - String gap = thisIsNucleotide && thatIsProtein ? String - .valueOf(new char[] - { thisGapChar, thisGapChar, thisGapChar }) : String - .valueOf(thisGapChar); - int ratio = thisIsNucleotide && thatIsProtein ? 3 : 1; - int i = 0; - for (SequenceI seq : this.getSequences()) + final char[] thisSeq = seq.getSequence(); + final char[] thisDs = seq.getDatasetSequence().getSequence(); + final char[] thatAligned = alignFrom.getSequence(); + StringBuilder thisAligned = new StringBuilder(2 * thisDs.length); + + /* + * Find the DNA dataset position that corresponds to the first protein + * residue (e.g. ignoring start codon in cDNA). + */ + int[] dnaStart = mapping.getDnaPosition(seq.getDatasetSequence(), 1); + int thisDsPosition = dnaStart == null ? 0 : dnaStart[0] - 1; + int thisSeqPos = 0; + + /* + * Copy aligned cDNA up to (excluding) the first mapped base. + */ + int basesWritten = 0; + while (basesWritten < thisDsPosition && thisSeqPos < thisSeq.length) + { + char c = thisSeq[thisSeqPos++]; + thisAligned.append(c); + if (c != myGapChar) + { + basesWritten++; + } + } + + /* + * Now traverse the aligned protein mirroring its gaps in cDNA. + */ + for (char thatChar : thatAligned) { - SequenceI other = al.getSequenceAt(i++); - if (other == null) + if (thatChar == thatGapChar) { - continue; + /* + * Add (equivalent of) a gap + */ + thisAligned.append(gap); } - char[] thisDs = seq.getDatasetSequence().getSequence(); - char[] thatDs = other.getSequence(); - StringBuilder thisAligned = new StringBuilder(2 * thisDs.length); - int thisDsPosition = 0; - for (char thatChar : thatDs) + else { - if (thatChar == thatGapChar) - { - /* - * Add (equivalent of) a gap - */ - thisAligned.append(gap); - } - else + /* + * Add (equivalent of) a residue + */ + for (int j = 0; j < ratio && thisDsPosition < thisDs.length; j++) { + thisAligned.append(thisDs[thisDsPosition++]); + /* - * Add (equivalent of) a residue + * Also advance over any gaps and the next residue in the old aligned + * sequence */ - for (int j = 0; j < ratio && thisDsPosition < thisDs.length; j++) + while (thisSeq[thisSeqPos] == myGapChar + && thisSeqPos < thisSeq.length) { - thisAligned.append(thisDs[thisDsPosition++]); + thisSeqPos++; } + thisSeqPos++; } } - /* - * Include any 'extra' residues (there shouldn't be). - */ - while (thisDsPosition < thisDs.length) - { - thisAligned.append(thisDs[thisDsPosition++]); - } - seq.setSequence(new String(thisAligned)); } + + /* + * Finally copy any 'extra' aligned cDNA (e.g. stop codon, introns). + */ + while (thisSeqPos < thisSeq.length) + { + thisAligned.append(thisSeq[thisSeqPos++]); + } + seq.setSequence(new String(thisAligned)); + return true; } }