X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Fanalysis%2FAlignmentUtils.java;fp=src%2Fjalview%2Fanalysis%2FAlignmentUtils.java;h=74406dc2d340bfa46a4c4c98597ebd784058afbf;hb=4924c107e70d763821067bcb3e1586bc14589918;hp=0ae782ebaa0c217617e7dff4d0e8d8df2294d73b;hpb=97a31ae1fe3095b9d332f655103e334342ee9a19;p=jalview.git diff --git a/src/jalview/analysis/AlignmentUtils.java b/src/jalview/analysis/AlignmentUtils.java index 0ae782e..74406dc 100644 --- a/src/jalview/analysis/AlignmentUtils.java +++ b/src/jalview/analysis/AlignmentUtils.java @@ -20,9 +20,11 @@ */ package jalview.analysis; +import jalview.datamodel.AlignedCodon; import jalview.datamodel.AlignedCodonFrame; import jalview.datamodel.AlignmentAnnotation; import jalview.datamodel.AlignmentI; +import jalview.datamodel.Mapping; import jalview.datamodel.SearchResults; import jalview.datamodel.Sequence; import jalview.datamodel.SequenceI; @@ -30,10 +32,15 @@ import jalview.schemes.ResidueProperties; import jalview.util.MapList; import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashMap; +import java.util.Iterator; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; +import java.util.Map.Entry; import java.util.Set; +import java.util.TreeMap; /** * grab bag of useful alignment manipulation operations Expect these to be @@ -207,10 +214,11 @@ public class AlignmentUtils /** * Build mapping of protein to cDNA alignment. Mappings are made between - * sequences which have the same name and compatible lengths. Has a 3-valued - * result: either Mapped (at least one sequence mapping was created), - * AlreadyMapped (all possible sequence mappings already exist), or NotMapped - * (no possible sequence mappings exist). + * sequences which have the same name and compatible lengths. Any new mappings + * are added to the protein alignment. Has a 3-valued result: either Mapped + * (at least one sequence mapping was created), AlreadyMapped (all possible + * sequence mappings already exist), or NotMapped (no possible sequence + * mappings exist). * * @param proteinAlignment * @param cdnaAlignment @@ -780,4 +788,118 @@ public class AlignmentUtils alignedSeq.setSequence(newseq.toString()); return alignedSeq; } + + /** + * Realigns the given protein to match the alignment of the dna, using codon + * mappings to translate aligned codon positions to protein residues. + * + * @param protein + * the alignment whose sequences are realigned by this method + * @param dna + * the dna alignment whose alignment we are 'copying' + * @return the number of sequences that were realigned + */ + public static int alignProteinAsDna(AlignmentI protein, AlignmentI dna) + { + Set mappings = protein.getCodonFrames(); + + /* + * Map will hold, for each aligned codon position e.g. [3, 5, 6], a map of + * {dnaSequence, {proteinSequence, codonProduct}} at that position. The + * comparator keeps the codon positions ordered. + */ + Map> alignedCodons = new TreeMap>( + new CodonComparator()); + for (SequenceI dnaSeq : dna.getSequences()) + { + for (AlignedCodonFrame mapping : mappings) + { + Mapping seqMap = mapping.getMappingForSequence(dnaSeq); + SequenceI prot = mapping.findAlignedSequence( + dnaSeq.getDatasetSequence(), protein); + if (prot != null) + { + addCodonPositions(dnaSeq, prot, protein.getGapCharacter(), + seqMap, alignedCodons); + } + } + } + return alignProteinAs(protein, alignedCodons); + } + + /** + * Update the aligned protein sequences to match the codon alignments given in + * the map. + * + * @param protein + * @param alignedCodons + * an ordered map of codon positions (columns), with sequence/peptide + * values present in each column + * @return + */ + protected static int alignProteinAs(AlignmentI protein, + Map> alignedCodons) + { + /* + * Prefill aligned sequences with gaps before inserting aligned protein + * residues. + */ + int alignedWidth = alignedCodons.size(); + char[] gaps = new char[alignedWidth]; + Arrays.fill(gaps, protein.getGapCharacter()); + String allGaps = String.valueOf(gaps); + for (SequenceI seq : protein.getSequences()) + { + seq.setSequence(allGaps); + } + + int column = 0; + for (AlignedCodon codon : alignedCodons.keySet()) + { + final Map columnResidues = alignedCodons.get(codon); + for (Entry entry : columnResidues + .entrySet()) + { + // place translated codon at its column position in sequence + entry.getKey().getSequence()[column] = entry.getValue().charAt(0); + } + column++; + } + return 0; + } + + /** + * Populate the map of aligned codons by traversing the given sequence + * mapping, locating the aligned positions of mapped codons, and adding those + * positions and their translation products to the map. + * + * @param dna + * the aligned sequence we are mapping from + * @param protein + * the sequence to be aligned to the codons + * @param gapChar + * the gap character in the dna sequence + * @param seqMap + * a mapping to a sequence translation + * @param alignedCodons + * the map we are building up + */ + static void addCodonPositions(SequenceI dna, SequenceI protein, + char gapChar, + Mapping seqMap, + Map> alignedCodons) + { + Iterator codons = seqMap.getCodonIterator(dna, gapChar); + while (codons.hasNext()) + { + AlignedCodon codon = codons.next(); + Map seqProduct = alignedCodons.get(codon); + if (seqProduct == null) + { + seqProduct = new HashMap(); + alignedCodons.put(codon, seqProduct); + } + seqProduct.put(protein, codon.product); + } + } }