/* * Jalview - A Sequence Alignment Editor and Viewer (Version 2.8.2) * Copyright (C) 2014 The Jalview Authors * * This file is part of Jalview. * * Jalview is free software: you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation, either version 3 * of the License, or (at your option) any later version. * * Jalview is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty * of MERCHANTABILITY or FITNESS FOR A PARTICULAR * PURPOSE. See the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with Jalview. If not, see . * The Jalview Authors are detailed in the 'AUTHORS' file. */ package jalview.datamodel; import jalview.util.MapList; import java.util.ArrayList; import java.util.List; /** * Stores mapping between the columns of a protein alignment and a DNA alignment * and a list of individual codon to amino acid mappings between sequences. */ public class AlignedCodonFrame { /** *

   * Aligned nucleotide positions for codons mapped to column positions of of aligned
   * proteins. e.g.
   * codons[3] = [12, 14, 15] means:
   *     column 4 in the protein alignment translates cols 13, 15, 16 in cDNA
   * codons[5] = null means column 6 in the protein alignment is a gap
   *

*/ public int[][] codons = null; /** * Width of protein sequence alignment (implicit assertion that codons.length * >= aaWidth) */ public int aaWidth = 0; /* * TODO: not an ideal solution - we reference the aligned amino acid sequences * in order to make insertions on them Better would be dnaAlignment and * aaAlignment reference.... */ private List a_aaSeqs = new ArrayList(); /* * tied array of na Sequence objects. */ private SequenceI[] dnaSeqs = null; /* * tied array of Mappings to protein sequence Objects and SequenceI[] * aaSeqs=null; MapLists where eac maps from the corresponding dnaSeqs element * to corresponding aaSeqs element */ private Mapping[] dnaToProt = null; /** * initialise codon frame with a nominal alignment width * * @param aWidth */ public AlignedCodonFrame(int aWidth) { if (aWidth <= 0) { codons = null; return; } codons = new int[aWidth][]; for (int res = 0; res < aWidth; res++) { codons[res] = null; } } /** * Construct a 'near copy' of the given AlignedCodonFrame, that references the * same dataset sequences, but the given protein aligned sequences. * * @param acf * @param alignment * @throws IllegalStateException * if the copied mapping references any dataset not in the alignment */ public AlignedCodonFrame(AlignedCodonFrame acf, SequenceI[] alignment) { this.codons = acf.codons; this.dnaSeqs = acf.dnaSeqs; this.dnaToProt = acf.dnaToProt; for (SequenceI seq : acf.a_aaSeqs) { boolean found = false; // TODO may not correctly handle the case where the same sequence appears // twice in the source alignment i.e. same dataset sequence // the copy will reference the first aligned sequence for both // ?not solvable if realignment may reorder the sequences // or check on sequence name as well???? for (SequenceI newseq : alignment) { if (seq.getDatasetSequence() == newseq.getDatasetSequence()) { this.a_aaSeqs.add(newseq); found = true; break; } } if (!found) { throw new IllegalStateException("Copying codon mapping for" + seq.getSequenceAsString()); } } } /** * ensure that codons array is at least as wide as aslen residues * * @param aslen * @return (possibly newly expanded) codon array */ public int[][] checkCodonFrameWidth(int aslen) { // TODO why not codons.length < aslen ? // should codons expand if length is 2 or 3 and aslen==2 ? if (codons.length <= aslen + 1) { // probably never have to do this ? int[][] c = new int[codons.length + 10][]; for (int i = 0; i < codons.length; i++) { c[i] = codons[i]; codons[i] = null; } codons = c; } return codons; } /** * @return width of aligned translated amino acid residues */ public int getaaWidth() { return aaWidth; } /** * increase aaWidth by one and insert a new aligned codon position space at * aspos. * * @param aspos */ public void insertAAGap(int aspos, char gapCharacter) { // this aa appears before the aligned codons at aspos - so shift them in // each pair of mapped sequences aaWidth++; // we actually have to modify the aligned sequences here, so use the // a_aaSeqs vector for (SequenceI seq : a_aaSeqs) { seq.insertCharAt(aspos, gapCharacter); } if (aspos < aaWidth) { aaWidth++; System.arraycopy(codons, aspos, codons, aspos + 1, codons.length - aspos - 1); codons[aspos] = null; // clear so new codon position can be marked. } } public void setAaWidth(int aapos) { aaWidth = aapos; } /** * add a mapping between the dataset sequences for the associated dna and * protein sequence objects * * @param dnaseq * @param aaseq * @param map */ public void addMap(SequenceI dnaseq, SequenceI aaseq, MapList map) { int nlen = 1; if (dnaSeqs != null) { nlen = dnaSeqs.length + 1; } SequenceI[] ndna = new SequenceI[nlen]; Mapping[] ndtp = new Mapping[nlen]; if (dnaSeqs != null) { System.arraycopy(dnaSeqs, 0, ndna, 0, dnaSeqs.length); System.arraycopy(dnaToProt, 0, ndtp, 0, dnaSeqs.length); } dnaSeqs = ndna; dnaToProt = ndtp; nlen--; dnaSeqs[nlen] = (dnaseq.getDatasetSequence() == null) ? dnaseq : dnaseq .getDatasetSequence(); Mapping mp = new Mapping(map); // JBPNote DEBUG! THIS ! // dnaseq.transferAnnotation(aaseq, mp); // aaseq.transferAnnotation(dnaseq, new Mapping(map.getInverse())); mp.to = (aaseq.getDatasetSequence() == null) ? aaseq : aaseq .getDatasetSequence(); a_aaSeqs.add(aaseq); dnaToProt[nlen] = mp; } public SequenceI[] getdnaSeqs() { return dnaSeqs; } public SequenceI[] getAaSeqs() { if (dnaToProt == null) { return null; } SequenceI[] sqs = new SequenceI[dnaToProt.length]; for (int sz = 0; sz < dnaToProt.length; sz++) { sqs[sz] = dnaToProt[sz].to; } return sqs; } public MapList[] getdnaToProt() { if (dnaToProt == null) { return null; } MapList[] sqs = new MapList[dnaToProt.length]; for (int sz = 0; sz < dnaToProt.length; sz++) { sqs[sz] = dnaToProt[sz].map; } return sqs; } public Mapping[] getProtMappings() { return dnaToProt; } /** * * @param sequenceRef * @return null or corresponding aaSeq dataset sequence for dnaSeq entry */ public SequenceI getAaForDnaSeq(SequenceI dnaSeqRef) { return getAaForDnaSeq(dnaSeqRef, true); } /** * Return the corresponding aligned or dataset aa sequence for given dna * sequence, null if not found. * * @param sequenceRef * @param returnDataset * if true, return the aa dataset, else the aligned sequence * @return */ public SequenceI getAaForDnaSeq(SequenceI dnaSeqRef, boolean returnDataset) { if (dnaSeqs == null) { return null; } SequenceI dnads = dnaSeqRef.getDatasetSequence(); for (int ds = 0; ds < dnaSeqs.length; ds++) { if (dnaSeqs[ds] == dnaSeqRef || dnaSeqs[ds] == dnads) { if (returnDataset) { return dnaToProt[ds].to; } else { // TODO very fragile - depends on dnaSeqs, dnaToProt, a_aaSeqs moving // in parallel; revise data model to guarantee this return a_aaSeqs.get(ds); } } } return null; } /** * * @param sequenceRef * @return null or corresponding aaSeq entry for dnaSeq entry */ public SequenceI getDnaForAaSeq(SequenceI aaSeqRef) { if (dnaToProt == null) { return null; } SequenceI aads = aaSeqRef.getDatasetSequence(); for (int as = 0; as < dnaToProt.length; as++) { if (dnaToProt[as].to == aaSeqRef || dnaToProt[as].to == aads) { return dnaSeqs[as]; } } return null; } /** * test to see if codon frame involves seq in any way * * @param seq * a nucleotide or protein sequence * @return true if a mapping exists to or from this sequence to any translated * sequence */ public boolean involvesSequence(SequenceI seq) { return getAaForDnaSeq(seq) != null || getDnaForAaSeq(seq) != null; } /** * Add search results for regions in other sequences that translate or are * translated from a particular position in seq * * @param seq * @param index * position in seq * @param results * where highlighted regions go */ public void markMappedRegion(SequenceI seq, int index, SearchResults results) { if (dnaToProt == null) { return; } int[] codon; SequenceI ds = seq.getDatasetSequence(); for (int mi = 0; mi < dnaToProt.length; mi++) { if (dnaSeqs[mi] == seq || dnaSeqs[mi] == ds) { // DEBUG System.err.println("dna pos "+index); codon = dnaToProt[mi].map.locateInTo(index, index); if (codon != null) { for (int i = 0; i < codon.length; i += 2) { results.addResult(dnaToProt[mi].to, codon[i], codon[i + 1]); } } } else if (dnaToProt[mi].to == seq || dnaToProt[mi].to == ds) { // DEBUG System.err.println("aa pos "+index); { codon = dnaToProt[mi].map.locateInFrom(index, index); if (codon != null) { for (int i = 0; i < codon.length; i += 2) { results.addResult(dnaSeqs[mi], codon[i], codon[i + 1]); } } } } } } /** * Returns the DNA codon positions (base 1) for the given position (base 1) in * a mapped protein sequence, or null if no mapping is found. * * Intended for use in aligning cDNA to match aligned protein. Only the first * mapping found is returned, so not suitable for use if multiple protein * sequences are mapped to the same cDNA (but aligning cDNA as protein is * ill-defined for this case anyway). * * @param seq * the DNA dataset sequence * @param aaPos * residue position (base 1) in a protein sequence * @return */ public int[] getDnaPosition(SequenceI seq, int aaPos) { /* * Adapted from markMappedRegion(). */ MapList ml = null; for (int i = 0; i < dnaToProt.length; i++) { if (dnaSeqs[i] == seq) { ml = getdnaToProt()[i]; break; } } return ml == null ? null : ml.locateInFrom(aaPos, aaPos); } }