2 * Jalview - A Sequence Alignment Editor and Viewer (Version 2.8.2)
3 * Copyright (C) 2014 The Jalview Authors
5 * This file is part of Jalview.
7 * Jalview is free software: you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation, either version 3
10 * of the License, or (at your option) any later version.
12 * Jalview is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty
14 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15 * PURPOSE. See the GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
19 * The Jalview Authors are detailed in the 'AUTHORS' file.
21 package jalview.datamodel;
23 import jalview.util.MapList;
25 import java.util.ArrayList;
26 import java.util.List;
29 * Stores mapping between the columns of a protein alignment and a DNA alignment
30 * and a list of individual codon to amino acid mappings between sequences.
32 public class AlignedCodonFrame
37 * Aligned nucleotide positions for codons mapped to column positions of of aligned
39 * codons[3] = [12, 14, 15] means:
40 * column 4 in the protein alignment translates cols 13, 15, 16 in cDNA
41 * codons[5] = null means column 6 in the protein alignment is a gap
44 public int[][] codons = null;
47 * Width of protein sequence alignment (implicit assertion that codons.length
50 public int aaWidth = 0;
53 * TODO: not an ideal solution - we reference the aligned amino acid sequences
54 * in order to make insertions on them Better would be dnaAlignment and
55 * aaAlignment reference....
57 private List<SequenceI> a_aaSeqs = new ArrayList<SequenceI>();
60 * tied array of na Sequence objects.
62 private SequenceI[] dnaSeqs = null;
65 * tied array of Mappings to protein sequence Objects and SequenceI[]
66 * aaSeqs=null; MapLists where eac maps from the corresponding dnaSeqs element
67 * to corresponding aaSeqs element
69 private Mapping[] dnaToProt = null;
72 * initialise codon frame with a nominal alignment width
76 public AlignedCodonFrame(int aWidth)
83 codons = new int[aWidth][];
84 for (int res = 0; res < aWidth; res++)
91 * Construct a 'near copy' of the given AlignedCodonFrame, that references the
92 * same dataset sequences, but the given protein aligned sequences.
96 * @throws IllegalStateException
97 * if the copied mapping references any dataset not in the alignment
99 public AlignedCodonFrame(AlignedCodonFrame acf, SequenceI[] alignment)
101 this.codons = acf.codons;
102 this.dnaSeqs = acf.dnaSeqs;
103 this.dnaToProt = acf.dnaToProt;
105 for (SequenceI seq : acf.a_aaSeqs)
107 boolean found = false;
108 // TODO may not correctly handle the case where the same sequence appears
109 // twice in the source alignment i.e. same dataset sequence
110 // the copy will reference the first aligned sequence for both
111 // ?not solvable if realignment may reorder the sequences
112 // or check on sequence name as well????
113 for (SequenceI newseq : alignment)
115 if (seq.getDatasetSequence() == newseq.getDatasetSequence())
117 this.a_aaSeqs.add(newseq);
124 throw new IllegalStateException("Copying codon mapping for"
125 + seq.getSequenceAsString());
131 * ensure that codons array is at least as wide as aslen residues
134 * @return (possibly newly expanded) codon array
136 public int[][] checkCodonFrameWidth(int aslen)
138 // TODO why not codons.length < aslen ?
139 // should codons expand if length is 2 or 3 and aslen==2 ?
140 if (codons.length <= aslen + 1)
142 // probably never have to do this ?
143 int[][] c = new int[codons.length + 10][];
144 for (int i = 0; i < codons.length; i++)
155 * @return width of aligned translated amino acid residues
157 public int getaaWidth()
163 * increase aaWidth by one and insert a new aligned codon position space at
168 public void insertAAGap(int aspos, char gapCharacter)
170 // this aa appears before the aligned codons at aspos - so shift them in
171 // each pair of mapped sequences
173 // we actually have to modify the aligned sequences here, so use the
175 for (SequenceI seq : a_aaSeqs)
177 seq.insertCharAt(aspos, gapCharacter);
183 System.arraycopy(codons, aspos, codons, aspos + 1, codons.length
185 codons[aspos] = null; // clear so new codon position can be marked.
189 public void setAaWidth(int aapos)
195 * add a mapping between the dataset sequences for the associated dna and
196 * protein sequence objects
202 public void addMap(SequenceI dnaseq, SequenceI aaseq, MapList map)
207 nlen = dnaSeqs.length + 1;
209 SequenceI[] ndna = new SequenceI[nlen];
210 Mapping[] ndtp = new Mapping[nlen];
213 System.arraycopy(dnaSeqs, 0, ndna, 0, dnaSeqs.length);
214 System.arraycopy(dnaToProt, 0, ndtp, 0, dnaSeqs.length);
219 dnaSeqs[nlen] = (dnaseq.getDatasetSequence() == null) ? dnaseq : dnaseq
220 .getDatasetSequence();
221 Mapping mp = new Mapping(map);
222 // JBPNote DEBUG! THIS !
223 // dnaseq.transferAnnotation(aaseq, mp);
224 // aaseq.transferAnnotation(dnaseq, new Mapping(map.getInverse()));
225 mp.to = (aaseq.getDatasetSequence() == null) ? aaseq : aaseq
226 .getDatasetSequence();
228 dnaToProt[nlen] = mp;
231 public SequenceI[] getdnaSeqs()
236 public SequenceI[] getAaSeqs()
238 if (dnaToProt == null)
242 SequenceI[] sqs = new SequenceI[dnaToProt.length];
243 for (int sz = 0; sz < dnaToProt.length; sz++)
245 sqs[sz] = dnaToProt[sz].to;
250 public MapList[] getdnaToProt()
252 if (dnaToProt == null)
256 MapList[] sqs = new MapList[dnaToProt.length];
257 for (int sz = 0; sz < dnaToProt.length; sz++)
259 sqs[sz] = dnaToProt[sz].map;
264 public Mapping[] getProtMappings()
272 * @return null or corresponding aaSeq dataset sequence for dnaSeq entry
274 public SequenceI getAaForDnaSeq(SequenceI dnaSeqRef)
276 return getAaForDnaSeq(dnaSeqRef, true);
280 * Return the corresponding aligned or dataset aa sequence for given dna
281 * sequence, null if not found.
284 * @param returnDataset
285 * if true, return the aa dataset, else the aligned sequence
288 public SequenceI getAaForDnaSeq(SequenceI dnaSeqRef, boolean returnDataset)
294 SequenceI dnads = dnaSeqRef.getDatasetSequence();
295 for (int ds = 0; ds < dnaSeqs.length; ds++)
297 if (dnaSeqs[ds] == dnaSeqRef || dnaSeqs[ds] == dnads)
301 return dnaToProt[ds].to;
305 // TODO very fragile - depends on dnaSeqs, dnaToProt, a_aaSeqs moving
306 // in parallel; revise data model to guarantee this
307 return a_aaSeqs.get(ds);
317 * @return null or corresponding aaSeq entry for dnaSeq entry
319 public SequenceI getDnaForAaSeq(SequenceI aaSeqRef)
321 if (dnaToProt == null)
325 SequenceI aads = aaSeqRef.getDatasetSequence();
326 for (int as = 0; as < dnaToProt.length; as++)
328 if (dnaToProt[as].to == aaSeqRef || dnaToProt[as].to == aads)
337 * test to see if codon frame involves seq in any way
340 * a nucleotide or protein sequence
341 * @return true if a mapping exists to or from this sequence to any translated
344 public boolean involvesSequence(SequenceI seq)
346 return getAaForDnaSeq(seq) != null || getDnaForAaSeq(seq) != null;
350 * Add search results for regions in other sequences that translate or are
351 * translated from a particular position in seq
357 * where highlighted regions go
359 public void markMappedRegion(SequenceI seq, int index,
360 SearchResults results)
362 if (dnaToProt == null)
367 SequenceI ds = seq.getDatasetSequence();
368 for (int mi = 0; mi < dnaToProt.length; mi++)
370 if (dnaSeqs[mi] == seq || dnaSeqs[mi] == ds)
372 // DEBUG System.err.println("dna pos "+index);
373 codon = dnaToProt[mi].map.locateInTo(index, index);
376 for (int i = 0; i < codon.length; i += 2)
378 results.addResult(dnaToProt[mi].to, codon[i], codon[i + 1]);
382 else if (dnaToProt[mi].to == seq || dnaToProt[mi].to == ds)
384 // DEBUG System.err.println("aa pos "+index);
386 codon = dnaToProt[mi].map.locateInFrom(index, index);
389 for (int i = 0; i < codon.length; i += 2)
391 results.addResult(dnaSeqs[mi], codon[i], codon[i + 1]);
400 * Returns the DNA codon positions (base 1) for the given position (base 1) in
401 * a mapped protein sequence, or null if no mapping is found.
403 * Intended for use in aligning cDNA to match aligned protein. Only the first
404 * mapping found is returned, so not suitable for use if multiple protein
405 * sequences are mapped to the same cDNA (but aligning cDNA as protein is
406 * ill-defined for this case anyway).
409 * the DNA dataset sequence
411 * residue position (base 1) in a protein sequence
414 public int[] getDnaPosition(SequenceI seq, int aaPos)
417 * Adapted from markMappedRegion().
420 for (int i = 0; i < dnaToProt.length; i++)
422 if (dnaSeqs[i] == seq)
424 ml = getdnaToProt()[i];
428 return ml == null ? null : ml.locateInFrom(aaPos, aaPos);