*/
package jalview.analysis;
+import jalview.datamodel.AlignedCodon;
import jalview.datamodel.AlignedCodonFrame;
import jalview.datamodel.AlignmentAnnotation;
import jalview.datamodel.AlignmentI;
+import jalview.datamodel.Mapping;
import jalview.datamodel.SearchResults;
import jalview.datamodel.Sequence;
import jalview.datamodel.SequenceI;
import jalview.util.MapList;
import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
+import java.util.Map.Entry;
import java.util.Set;
+import java.util.TreeMap;
/**
* grab bag of useful alignment manipulation operations Expect these to be
/**
* Build mapping of protein to cDNA alignment. Mappings are made between
- * sequences which have the same name and compatible lengths. Has a 3-valued
- * result: either Mapped (at least one sequence mapping was created),
- * AlreadyMapped (all possible sequence mappings already exist), or NotMapped
- * (no possible sequence mappings exist).
+ * sequences which have the same name and compatible lengths. Any new mappings
+ * are added to the protein alignment. Has a 3-valued result: either Mapped
+ * (at least one sequence mapping was created), AlreadyMapped (all possible
+ * sequence mappings already exist), or NotMapped (no possible sequence
+ * mappings exist).
*
* @param proteinAlignment
* @param cdnaAlignment
alignedSeq.setSequence(newseq.toString());
return alignedSeq;
}
+
+ /**
+ * Realigns the given protein to match the alignment of the dna, using codon
+ * mappings to translate aligned codon positions to protein residues.
+ *
+ * @param protein
+ * the alignment whose sequences are realigned by this method
+ * @param dna
+ * the dna alignment whose alignment we are 'copying'
+ * @return the number of sequences that were realigned
+ */
+ public static int alignProteinAsDna(AlignmentI protein, AlignmentI dna)
+ {
+ Set<AlignedCodonFrame> mappings = protein.getCodonFrames();
+
+ /*
+ * Map will hold, for each aligned codon position e.g. [3, 5, 6], a map of
+ * {dnaSequence, {proteinSequence, codonProduct}} at that position. The
+ * comparator keeps the codon positions ordered.
+ */
+ Map<AlignedCodon, Map<SequenceI, String>> alignedCodons = new TreeMap<AlignedCodon, Map<SequenceI, String>>(
+ new CodonComparator());
+ for (SequenceI dnaSeq : dna.getSequences())
+ {
+ for (AlignedCodonFrame mapping : mappings)
+ {
+ Mapping seqMap = mapping.getMappingForSequence(dnaSeq);
+ SequenceI prot = mapping.findAlignedSequence(
+ dnaSeq.getDatasetSequence(), protein);
+ if (prot != null)
+ {
+ addCodonPositions(dnaSeq, prot, protein.getGapCharacter(),
+ seqMap, alignedCodons);
+ }
+ }
+ }
+ return alignProteinAs(protein, alignedCodons);
+ }
+
+ /**
+ * Update the aligned protein sequences to match the codon alignments given in
+ * the map.
+ *
+ * @param protein
+ * @param alignedCodons
+ * an ordered map of codon positions (columns), with sequence/peptide
+ * values present in each column
+ * @return
+ */
+ protected static int alignProteinAs(AlignmentI protein,
+ Map<AlignedCodon, Map<SequenceI, String>> alignedCodons)
+ {
+ /*
+ * Prefill aligned sequences with gaps before inserting aligned protein
+ * residues.
+ */
+ int alignedWidth = alignedCodons.size();
+ char[] gaps = new char[alignedWidth];
+ Arrays.fill(gaps, protein.getGapCharacter());
+ String allGaps = String.valueOf(gaps);
+ for (SequenceI seq : protein.getSequences())
+ {
+ seq.setSequence(allGaps);
+ }
+
+ int column = 0;
+ for (AlignedCodon codon : alignedCodons.keySet())
+ {
+ final Map<SequenceI, String> columnResidues = alignedCodons.get(codon);
+ for (Entry<SequenceI, String> entry : columnResidues
+ .entrySet())
+ {
+ // place translated codon at its column position in sequence
+ entry.getKey().getSequence()[column] = entry.getValue().charAt(0);
+ }
+ column++;
+ }
+ return 0;
+ }
+
+ /**
+ * Populate the map of aligned codons by traversing the given sequence
+ * mapping, locating the aligned positions of mapped codons, and adding those
+ * positions and their translation products to the map.
+ *
+ * @param dna
+ * the aligned sequence we are mapping from
+ * @param protein
+ * the sequence to be aligned to the codons
+ * @param gapChar
+ * the gap character in the dna sequence
+ * @param seqMap
+ * a mapping to a sequence translation
+ * @param alignedCodons
+ * the map we are building up
+ */
+ static void addCodonPositions(SequenceI dna, SequenceI protein,
+ char gapChar,
+ Mapping seqMap,
+ Map<AlignedCodon, Map<SequenceI, String>> alignedCodons)
+ {
+ Iterator<AlignedCodon> codons = seqMap.getCodonIterator(dna, gapChar);
+ while (codons.hasNext())
+ {
+ AlignedCodon codon = codons.next();
+ Map<SequenceI, String> seqProduct = alignedCodons.get(codon);
+ if (seqProduct == null)
+ {
+ seqProduct = new HashMap<SequenceI, String>();
+ alignedCodons.put(codon, seqProduct);
+ }
+ seqProduct.put(protein, codon.product);
+ }
+ }
}