2 * Jalview - A Sequence Alignment Editor and Viewer (Version 2.8.2)
3 * Copyright (C) 2014 The Jalview Authors
5 * This file is part of Jalview.
7 * Jalview is free software: you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation, either version 3
10 * of the License, or (at your option) any later version.
12 * Jalview is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty
14 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15 * PURPOSE. See the GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
19 * The Jalview Authors are detailed in the 'AUTHORS' file.
21 package jalview.datamodel;
23 import jalview.util.MapList;
25 import java.util.ArrayList;
26 import java.util.List;
29 * Stores mapping between the columns of a protein alignment and a DNA alignment
30 * and a list of individual codon to amino acid mappings between sequences.
32 public class AlignedCodonFrame
37 * Aligned nucleotide positions for codons mapped to column positions of of aligned
39 * codons[3] = [12, 14, 15] means:
40 * column 4 in the protein alignment translates cols 13, 15, 16 in cDNA
41 * codons[5] = null means column 6 in the protein alignment is a gap
44 public int[][] codons = null;
47 * Width of protein sequence alignment (implicit assertion that codons.length
50 public int aaWidth = 0;
53 * TODO: not an ideal solution - we reference the aligned amino acid sequences
54 * in order to make insertions on them Better would be dnaAlignment and
55 * aaAlignment reference....
57 private List<SequenceI> a_aaSeqs = new ArrayList<SequenceI>();
60 * tied array of na Sequence objects.
62 private SequenceI[] dnaSeqs = null;
65 * tied array of Mappings to protein sequence Objects and SequenceI[]
66 * aaSeqs=null; MapLists where eac maps from the corresponding dnaSeqs element
67 * to corresponding aaSeqs element
69 private Mapping[] dnaToProt = null;
72 * initialise codon frame with a nominal alignment width
76 public AlignedCodonFrame()
81 * Construct a 'near copy' of the given AlignedCodonFrame, that references the
82 * same dataset sequences, but the given protein aligned sequences.
86 * @throws IllegalStateException
87 * if the copied mapping references any dataset not in the alignment
89 public AlignedCodonFrame(AlignedCodonFrame acf, SequenceI[] alignment)
91 this.codons = acf.codons;
92 this.dnaSeqs = acf.dnaSeqs;
93 this.dnaToProt = acf.dnaToProt;
95 for (SequenceI seq : acf.a_aaSeqs)
97 boolean found = false;
98 // TODO may not correctly handle the case where the same sequence appears
99 // twice in the source alignment i.e. same dataset sequence
100 // the copy will reference the first aligned sequence for both
101 // ?not solvable if realignment may reorder the sequences
102 // or check on sequence name as well????
103 for (SequenceI newseq : alignment)
105 if (seq.getDatasetSequence() == newseq.getDatasetSequence())
107 this.a_aaSeqs.add(newseq);
114 throw new IllegalStateException("Copying codon mapping for"
115 + seq.getSequenceAsString());
121 * ensure that codons array is at least as wide as aslen residues
124 * @return (possibly newly expanded) codon array
126 public int[][] checkCodonFrameWidth(int aslen)
128 // TODO why not codons.length < aslen ?
129 // should codons expand if length is 2 or 3 and aslen==2 ?
130 if (codons.length <= aslen + 1)
132 // probably never have to do this ?
133 int[][] c = new int[codons.length + 10][];
134 for (int i = 0; i < codons.length; i++)
145 * @return width of aligned translated amino acid residues
147 public int getaaWidth()
153 * increase aaWidth by one and insert a new aligned codon position space at
158 public void insertAAGap(int aspos, char gapCharacter)
160 // this aa appears before the aligned codons at aspos - so shift them in
161 // each pair of mapped sequences
163 // we actually have to modify the aligned sequences here, so use the
165 for (SequenceI seq : a_aaSeqs)
167 seq.insertCharAt(aspos, gapCharacter);
173 System.arraycopy(codons, aspos, codons, aspos + 1, codons.length
175 codons[aspos] = null; // clear so new codon position can be marked.
179 public void setAaWidth(int aapos)
185 * add a mapping between the dataset sequences for the associated dna and
186 * protein sequence objects
192 public void addMap(SequenceI dnaseq, SequenceI aaseq, MapList map)
197 nlen = dnaSeqs.length + 1;
199 SequenceI[] ndna = new SequenceI[nlen];
200 Mapping[] ndtp = new Mapping[nlen];
203 System.arraycopy(dnaSeqs, 0, ndna, 0, dnaSeqs.length);
204 System.arraycopy(dnaToProt, 0, ndtp, 0, dnaSeqs.length);
209 dnaSeqs[nlen] = (dnaseq.getDatasetSequence() == null) ? dnaseq : dnaseq
210 .getDatasetSequence();
211 Mapping mp = new Mapping(map);
212 // JBPNote DEBUG! THIS !
213 // dnaseq.transferAnnotation(aaseq, mp);
214 // aaseq.transferAnnotation(dnaseq, new Mapping(map.getInverse()));
215 mp.to = (aaseq.getDatasetSequence() == null) ? aaseq : aaseq
216 .getDatasetSequence();
218 dnaToProt[nlen] = mp;
221 public SequenceI[] getdnaSeqs()
226 public SequenceI[] getAaSeqs()
228 if (dnaToProt == null)
232 SequenceI[] sqs = new SequenceI[dnaToProt.length];
233 for (int sz = 0; sz < dnaToProt.length; sz++)
235 sqs[sz] = dnaToProt[sz].to;
240 public MapList[] getdnaToProt()
242 if (dnaToProt == null)
246 MapList[] sqs = new MapList[dnaToProt.length];
247 for (int sz = 0; sz < dnaToProt.length; sz++)
249 sqs[sz] = dnaToProt[sz].map;
254 public Mapping[] getProtMappings()
262 * @return null or corresponding aaSeq dataset sequence for dnaSeq entry
264 public SequenceI getAaForDnaSeq(SequenceI dnaSeqRef)
266 return getAaForDnaSeq(dnaSeqRef, true);
270 * Return the corresponding aligned or dataset aa sequence for given dna
271 * sequence, null if not found.
274 * @param returnDataset
275 * if true, return the aa dataset, else the aligned sequence
278 public SequenceI getAaForDnaSeq(SequenceI dnaSeqRef, boolean returnDataset)
284 SequenceI dnads = dnaSeqRef.getDatasetSequence();
285 for (int ds = 0; ds < dnaSeqs.length; ds++)
287 if (dnaSeqs[ds] == dnaSeqRef || dnaSeqs[ds] == dnads)
291 return dnaToProt[ds].to;
295 // TODO very fragile - depends on dnaSeqs, dnaToProt, a_aaSeqs moving
296 // in parallel; revise data model to guarantee this
297 return a_aaSeqs.get(ds);
307 * @return null or corresponding aaSeq entry for dnaSeq entry
309 public SequenceI getDnaForAaSeq(SequenceI aaSeqRef)
311 if (dnaToProt == null)
315 SequenceI aads = aaSeqRef.getDatasetSequence();
316 for (int as = 0; as < dnaToProt.length; as++)
318 if (dnaToProt[as].to == aaSeqRef || dnaToProt[as].to == aads)
327 * test to see if codon frame involves seq in any way
330 * a nucleotide or protein sequence
331 * @return true if a mapping exists to or from this sequence to any translated
334 public boolean involvesSequence(SequenceI seq)
336 return getAaForDnaSeq(seq) != null || getDnaForAaSeq(seq) != null;
340 * Add search results for regions in other sequences that translate or are
341 * translated from a particular position in seq
347 * where highlighted regions go
349 public void markMappedRegion(SequenceI seq, int index,
350 SearchResults results)
352 if (dnaToProt == null)
357 SequenceI ds = seq.getDatasetSequence();
358 for (int mi = 0; mi < dnaToProt.length; mi++)
360 if (dnaSeqs[mi] == seq || dnaSeqs[mi] == ds)
362 // DEBUG System.err.println("dna pos "+index);
363 codon = dnaToProt[mi].map.locateInTo(index, index);
366 for (int i = 0; i < codon.length; i += 2)
368 results.addResult(dnaToProt[mi].to, codon[i], codon[i + 1]);
372 else if (dnaToProt[mi].to == seq || dnaToProt[mi].to == ds)
374 // DEBUG System.err.println("aa pos "+index);
376 codon = dnaToProt[mi].map.locateInFrom(index, index);
379 for (int i = 0; i < codon.length; i += 2)
381 results.addResult(dnaSeqs[mi], codon[i], codon[i + 1]);
390 * Returns the DNA codon positions (base 1) for the given position (base 1) in
391 * a mapped protein sequence, or null if no mapping is found.
393 * Intended for use in aligning cDNA to match aligned protein. Only the first
394 * mapping found is returned, so not suitable for use if multiple protein
395 * sequences are mapped to the same cDNA (but aligning cDNA as protein is
396 * ill-defined for this case anyway).
399 * the DNA dataset sequence
401 * residue position (base 1) in a protein sequence
404 public int[] getDnaPosition(SequenceI seq, int aaPos)
407 * Adapted from markMappedRegion().
410 for (int i = 0; i < dnaToProt.length; i++)
412 if (dnaSeqs[i] == seq)
414 ml = getdnaToProt()[i];
418 return ml == null ? null : ml.locateInFrom(aaPos, aaPos);