2 * Jalview - A Sequence Alignment Editor and Viewer (Version 2.8.2)
3 * Copyright (C) 2014 The Jalview Authors
5 * This file is part of Jalview.
7 * Jalview is free software: you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation, either version 3
10 * of the License, or (at your option) any later version.
12 * Jalview is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty
14 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15 * PURPOSE. See the GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
19 * The Jalview Authors are detailed in the 'AUTHORS' file.
21 package jalview.datamodel;
23 import jalview.util.MapList;
25 import java.util.ArrayList;
26 import java.util.List;
29 * Stores mapping between the columns of a protein alignment and a DNA alignment
30 * and a list of individual codon to amino acid mappings between sequences.
32 public class AlignedCodonFrame
36 * array of nucleotide positions for aligned codons at column of aligned
39 public int[][] codons = null;
42 * width of protein sequence alignement implicit assertion that codons.length
45 public int aaWidth = 0;
48 * TODO: not an ideal solution - we reference the aligned amino acid sequences
49 * in order to make insertions on them Better would be dnaAlignment and
50 * aaAlignment reference....
52 private List<SequenceI> a_aaSeqs = new ArrayList<SequenceI>();
55 * tied array of na Sequence objects.
57 private SequenceI[] dnaSeqs = null;
60 * tied array of Mappings to protein sequence Objects and SequenceI[]
61 * aaSeqs=null; MapLists where eac maps from the corresponding dnaSeqs element
62 * to corresponding aaSeqs element
64 private Mapping[] dnaToProt = null;
67 * initialise codon frame with a nominal alignment width
71 public AlignedCodonFrame(int aWidth)
78 codons = new int[aWidth][];
79 for (int res = 0; res < aWidth; res++)
86 * Construct a 'near copy' of the given AlignedCodonFrame, that references the
87 * same dataset sequences, but the given protein aligned sequences.
91 * @throws IllegalStateException
92 * if the copied mapping references any dataset not in the alignment
94 public AlignedCodonFrame(AlignedCodonFrame acf, SequenceI[] alignment)
96 this.codons = acf.codons;
97 this.dnaSeqs = acf.dnaSeqs;
98 this.dnaToProt = acf.dnaToProt;
100 for (SequenceI seq : acf.a_aaSeqs)
102 boolean found = false;
103 for (SequenceI newseq : alignment)
105 if (seq.getDatasetSequence() == newseq.getDatasetSequence())
107 this.a_aaSeqs.add(newseq);
114 throw new IllegalStateException("Copying codon mapping for"
115 + seq.getSequenceAsString());
121 * ensure that codons array is at least as wide as aslen residues
124 * @return (possibly newly expanded) codon array
126 public int[][] checkCodonFrameWidth(int aslen)
128 if (codons.length <= aslen + 1)
130 // probably never have to do this ?
131 int[][] c = new int[codons.length + 10][];
132 for (int i = 0; i < codons.length; i++)
143 * @return width of aligned translated amino acid residues
145 public int getaaWidth()
151 * increase aaWidth by one and insert a new aligned codon position space at
156 public void insertAAGap(int aspos, char gapCharacter)
158 // this aa appears before the aligned codons at aspos - so shift them in
159 // each pair of mapped sequences
161 // we actually have to modify the aligned sequences here, so use the
163 for (SequenceI seq : a_aaSeqs)
165 seq.insertCharAt(aspos, gapCharacter);
168 checkCodonFrameWidth(aspos);
172 System.arraycopy(codons, aspos, codons, aspos + 1, codons.length
174 codons[aspos] = null; // clear so new codon position can be marked.
178 public void setAaWidth(int aapos)
184 * add a mapping between the dataset sequences for the associated dna and
185 * protein sequence objects
191 public void addMap(SequenceI dnaseq, SequenceI aaseq, MapList map)
196 nlen = dnaSeqs.length + 1;
198 SequenceI[] ndna = new SequenceI[nlen];
199 Mapping[] ndtp = new Mapping[nlen];
202 System.arraycopy(dnaSeqs, 0, ndna, 0, dnaSeqs.length);
203 System.arraycopy(dnaToProt, 0, ndtp, 0, dnaSeqs.length);
208 dnaSeqs[nlen] = (dnaseq.getDatasetSequence() == null) ? dnaseq : dnaseq
209 .getDatasetSequence();
210 Mapping mp = new Mapping(map);
211 // JBPNote DEBUG! THIS !
212 // dnaseq.transferAnnotation(aaseq, mp);
213 // aaseq.transferAnnotation(dnaseq, new Mapping(map.getInverse()));
214 mp.to = (aaseq.getDatasetSequence() == null) ? aaseq : aaseq
215 .getDatasetSequence();
217 dnaToProt[nlen] = mp;
220 public SequenceI[] getdnaSeqs()
225 public SequenceI[] getAaSeqs()
227 if (dnaToProt == null)
231 SequenceI[] sqs = new SequenceI[dnaToProt.length];
232 for (int sz = 0; sz < dnaToProt.length; sz++)
234 sqs[sz] = dnaToProt[sz].to;
239 public MapList[] getdnaToProt()
241 if (dnaToProt == null)
245 MapList[] sqs = new MapList[dnaToProt.length];
246 for (int sz = 0; sz < dnaToProt.length; sz++)
248 sqs[sz] = dnaToProt[sz].map;
253 public Mapping[] getProtMappings()
261 * @return null or corresponding aaSeq dataset sequence for dnaSeq entry
263 public SequenceI getAaForDnaSeq(SequenceI dnaSeqRef)
265 return getAaForDnaSeq(dnaSeqRef, true);
269 * Return the corresponding aligned or dataset aa sequence for given dna
270 * sequence, null if not found.
273 * @param returnDataset
274 * if true, return the aa dataset, else the aligned sequence
277 public SequenceI getAaForDnaSeq(SequenceI dnaSeqRef, boolean returnDataset)
283 SequenceI dnads = dnaSeqRef.getDatasetSequence();
284 for (int ds = 0; ds < dnaSeqs.length; ds++)
286 if (dnaSeqs[ds] == dnaSeqRef || dnaSeqs[ds] == dnads)
290 return dnaToProt[ds].to;
294 // TODO very fragile - depends on dnaSeqs, dnaToProt, a_aaSeqs moving
295 // in parallel; revise data model to guarantee this
296 return a_aaSeqs.get(ds);
306 * @return null or corresponding aaSeq entry for dnaSeq entry
308 public SequenceI getDnaForAaSeq(SequenceI aaSeqRef)
310 if (dnaToProt == null)
314 SequenceI aads = aaSeqRef.getDatasetSequence();
315 for (int as = 0; as < dnaToProt.length; as++)
317 if (dnaToProt[as].to == aaSeqRef || dnaToProt[as].to == aads)
326 * test to see if codon frame involves seq in any way
329 * a nucleotide or protein sequence
330 * @return true if a mapping exists to or from this sequence to any translated
333 public boolean involvesSequence(SequenceI seq)
335 return getAaForDnaSeq(seq) != null || getDnaForAaSeq(seq) != null;
339 * Add search results for regions in other sequences that translate or are
340 * translated from a particular position in seq
346 * where highlighted regions go
348 public void markMappedRegion(SequenceI seq, int index,
349 SearchResults results)
351 if (dnaToProt == null)
356 SequenceI ds = seq.getDatasetSequence();
357 for (int mi = 0; mi < dnaToProt.length; mi++)
359 if (dnaSeqs[mi] == seq || dnaSeqs[mi] == ds)
361 // DEBUG System.err.println("dna pos "+index);
362 codon = dnaToProt[mi].map.locateInTo(index, index);
365 for (int i = 0; i < codon.length; i += 2)
367 results.addResult(dnaToProt[mi].to, codon[i], codon[i + 1]);
371 else if (dnaToProt[mi].to == seq || dnaToProt[mi].to == ds)
373 // DEBUG System.err.println("aa pos "+index);
375 codon = dnaToProt[mi].map.locateInFrom(index, index);
378 for (int i = 0; i < codon.length; i += 2)
380 results.addResult(dnaSeqs[mi], codon[i], codon[i + 1]);
389 * Returns the DNA codon positions (base 1) for the given position (base 1) in
390 * a mapped protein sequence, or null if no mapping is found.
392 * Intended for use in aligning cDNA to match aligned protein. Only the first
393 * mapping found is returned, so not suitable for use if multiple protein
394 * sequences are mapped to the same cDNA (but aligning cDNA as protein is
395 * ill-defined for this case anyway).
398 * the DNA dataset sequence
400 * residue position (base 1) in a protein sequence
403 public int[] getDnaPosition(SequenceI seq, int aaPos)
406 * Adapted from markMappedRegion().
409 for (int i = 0; i < dnaToProt.length; i++)
411 if (dnaSeqs[i] == seq)
413 ml = getdnaToProt()[i];
417 return ml == null ? null : ml.locateInFrom(aaPos, aaPos);