X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Fdatamodel%2FAlignedCodonFrame.java;h=1d34d41592f0aff9f3ba5f5ce22760f571b41224;hb=c19d2a91ca05e052e3408bf5852d88eb5d0608f1;hp=f4e2d97ee4826e6135b06ea3e275d056c45c97aa;hpb=d7563d2df09ba5146008951448b941a69bf74744;p=jalview.git diff --git a/src/jalview/datamodel/AlignedCodonFrame.java b/src/jalview/datamodel/AlignedCodonFrame.java index f4e2d97..1d34d41 100644 --- a/src/jalview/datamodel/AlignedCodonFrame.java +++ b/src/jalview/datamodel/AlignedCodonFrame.java @@ -1,260 +1,464 @@ -package jalview.datamodel; - -import java.util.Enumeration; -import java.util.Vector; - -import jalview.util.MapList; - -/** - * Stores mapping between the columns of a protein alignment and a DNA alignment - * and a list of individual codon to amino acid mappings between sequences. - */ - -public class AlignedCodonFrame -{ - /** - * array of nucleotide positions for aligned codons at column of aligned proteins. - */ - public int[][] codons = null; - /** - * width of protein sequence alignement - * implicit assertion that codons.length >= aaWidth - */ - public int aaWidth=0; - /** - * initialise codon frame with a nominal alignment width - * @param aWidth - */ - public AlignedCodonFrame(int aWidth) - { - if (aWidth<=0) - { - codons=null; - return; - } - codons = new int[aWidth][]; - for (int res = 0; res < aWidth; res++) - codons[res] = null; - } - - /** - * ensure that codons array is at least as wide as aslen residues - * @param aslen - * @return (possibly newly expanded) codon array - */ - public int[][] checkCodonFrameWidth(int aslen) - { - if (codons.length <= aslen + 1) - { - // probably never have to do this ? - int[][] c = new int[codons.length + 10][]; - for (int i = 0; i < codons.length; i++) - { - c[i] = codons[i]; - codons[i] = null; - } - codons = c; - } - return codons; - } - /** - * @return width of aligned translated amino acid residues - */ - public int getaaWidth() - { - return aaWidth; - } - /** - * TODO: not an ideal solution - we reference the aligned amino acid sequences in order to make insertions on them - * Better would be dnaAlignment and aaAlignment reference.... - */ - Vector a_aaSeqs=new Vector(); - /** - * increase aaWidth by one and insert a new aligned codon position space at aspos. - * @param aspos - */ - public void insertAAGap(int aspos, char gapCharacter) - { - // this aa appears before the aligned codons at aspos - so shift them in each pair of mapped sequences - aaWidth++; - if (a_aaSeqs!=null) - { - // we actually have to modify the aligned sequences here, so use the a_aaSeqs vector - Enumeration sq = a_aaSeqs.elements(); - while (sq.hasMoreElements()) - { - ((SequenceI) sq.nextElement()).insertCharAt(aspos, gapCharacter); - } - } - checkCodonFrameWidth(aspos); - if (aspos. + * The Jalview Authors are detailed in the 'AUTHORS' file. + */ +package jalview.datamodel; + +import jalview.util.MapList; +import jalview.util.MappingUtils; + +import java.util.ArrayList; +import java.util.List; + +/** + * Stores mapping between the columns of a protein alignment and a DNA alignment + * and a list of individual codon to amino acid mappings between sequences. + */ +public class AlignedCodonFrame +{ + + /** + * tied array of na Sequence objects. + */ + private SequenceI[] dnaSeqs = null; + + /** + * tied array of Mappings to protein sequence Objects and SequenceI[] + * aaSeqs=null; MapLists where each maps from the corresponding dnaSeqs + * element to corresponding aaSeqs element + */ + private Mapping[] dnaToProt = null; + + /** + * Constructor + */ + public AlignedCodonFrame() + { + } + + /** + * Adds a mapping between the dataset sequences for the associated dna and + * protein sequence objects + * + * @param dnaseq + * @param aaseq + * @param map + */ + public void addMap(SequenceI dnaseq, SequenceI aaseq, MapList map) + { + int nlen = 1; + if (dnaSeqs != null) + { + nlen = dnaSeqs.length + 1; + } + SequenceI[] ndna = new SequenceI[nlen]; + Mapping[] ndtp = new Mapping[nlen]; + if (dnaSeqs != null) + { + System.arraycopy(dnaSeqs, 0, ndna, 0, dnaSeqs.length); + System.arraycopy(dnaToProt, 0, ndtp, 0, dnaSeqs.length); + } + dnaSeqs = ndna; + dnaToProt = ndtp; + nlen--; + dnaSeqs[nlen] = (dnaseq.getDatasetSequence() == null) ? dnaseq : dnaseq + .getDatasetSequence(); + Mapping mp = new Mapping(map); + // JBPNote DEBUG! THIS ! + // dnaseq.transferAnnotation(aaseq, mp); + // aaseq.transferAnnotation(dnaseq, new Mapping(map.getInverse())); + mp.to = (aaseq.getDatasetSequence() == null) ? aaseq : aaseq + .getDatasetSequence(); + dnaToProt[nlen] = mp; + } + + public SequenceI[] getdnaSeqs() + { + return dnaSeqs; + } + + public SequenceI[] getAaSeqs() + { + if (dnaToProt == null) + { + return null; + } + SequenceI[] sqs = new SequenceI[dnaToProt.length]; + for (int sz = 0; sz < dnaToProt.length; sz++) + { + sqs[sz] = dnaToProt[sz].to; + } + return sqs; + } + + public MapList[] getdnaToProt() + { + if (dnaToProt == null) + { + return null; + } + MapList[] sqs = new MapList[dnaToProt.length]; + for (int sz = 0; sz < dnaToProt.length; sz++) + { + sqs[sz] = dnaToProt[sz].map; + } + return sqs; + } + + public Mapping[] getProtMappings() + { + return dnaToProt; + } + + /** + * Returns the first mapping found which is to or from the given sequence, or + * null. + * + * @param seq + * @return + */ + public Mapping getMappingForSequence(SequenceI seq) + { + if (dnaSeqs == null) + { + return null; + } + SequenceI seqDs = seq.getDatasetSequence(); + seqDs = seqDs != null ? seqDs : seq; + + for (int ds = 0; ds < dnaSeqs.length; ds++) + { + if (dnaSeqs[ds] == seqDs || dnaToProt[ds].to == seqDs) + { + return dnaToProt[ds]; + } + } + return null; + } + + /** + * Return the corresponding aligned or dataset aa sequence for given dna + * sequence, null if not found. + * + * @param sequenceRef + * @return + */ + public SequenceI getAaForDnaSeq(SequenceI dnaSeqRef) + { + if (dnaSeqs == null) + { + return null; + } + SequenceI dnads = dnaSeqRef.getDatasetSequence(); + for (int ds = 0; ds < dnaSeqs.length; ds++) + { + if (dnaSeqs[ds] == dnaSeqRef || dnaSeqs[ds] == dnads) + { + return dnaToProt[ds].to; + } + } + return null; + } + + /** + * + * @param sequenceRef + * @return null or corresponding aaSeq entry for dnaSeq entry + */ + public SequenceI getDnaForAaSeq(SequenceI aaSeqRef) + { + if (dnaToProt == null) + { + return null; + } + SequenceI aads = aaSeqRef.getDatasetSequence(); + for (int as = 0; as < dnaToProt.length; as++) + { + if (dnaToProt[as].to == aaSeqRef || dnaToProt[as].to == aads) + { + return dnaSeqs[as]; + } + } + return null; + } + + /** + * test to see if codon frame involves seq in any way + * + * @param seq + * a nucleotide or protein sequence + * @return true if a mapping exists to or from this sequence to any translated + * sequence + */ + public boolean involvesSequence(SequenceI seq) + { + return getAaForDnaSeq(seq) != null || getDnaForAaSeq(seq) != null; + } + + /** + * Add search results for regions in other sequences that translate or are + * translated from a particular position in seq + * + * @param seq + * @param index + * position in seq + * @param results + * where highlighted regions go + */ + public void markMappedRegion(SequenceI seq, int index, + SearchResults results) + { + if (dnaToProt == null) + { + return; + } + int[] codon; + SequenceI ds = seq.getDatasetSequence(); + for (int mi = 0; mi < dnaToProt.length; mi++) + { + if (dnaSeqs[mi] == seq || dnaSeqs[mi] == ds) + { + // DEBUG System.err.println("dna pos "+index); + codon = dnaToProt[mi].map.locateInTo(index, index); + if (codon != null) + { + for (int i = 0; i < codon.length; i += 2) + { + results.addResult(dnaToProt[mi].to, codon[i], codon[i + 1]); + } + } + } + else if (dnaToProt[mi].to == seq || dnaToProt[mi].to == ds) + { + // DEBUG System.err.println("aa pos "+index); + { + codon = dnaToProt[mi].map.locateInFrom(index, index); + if (codon != null) + { + for (int i = 0; i < codon.length; i += 2) + { + results.addResult(dnaSeqs[mi], codon[i], codon[i + 1]); + } + } + } + } + } + } + + /** + * Returns the DNA codon positions (base 1) for the given position (base 1) in + * a mapped protein sequence, or null if no mapping is found. + * + * Intended for use in aligning cDNA to match aligned protein. Only the first + * mapping found is returned, so not suitable for use if multiple protein + * sequences are mapped to the same cDNA (but aligning cDNA as protein is + * ill-defined for this case anyway). + * + * @param seq + * the DNA dataset sequence + * @param aaPos + * residue position (base 1) in a protein sequence + * @return + */ + public int[] getDnaPosition(SequenceI seq, int aaPos) + { + /* + * Adapted from markMappedRegion(). + */ + MapList ml = null; + for (int i = 0; i < dnaToProt.length; i++) + { + if (dnaSeqs[i] == seq) + { + ml = getdnaToProt()[i]; + break; + } + } + return ml == null ? null : ml.locateInFrom(aaPos, aaPos); + } + + /** + * Convenience method to return the first aligned sequence in the given + * alignment whose dataset has a mapping with the given dataset sequence. + * + * @param seq + * + * @param al + * @return + */ + public SequenceI findAlignedSequence(SequenceI seq, AlignmentI al) + { + /* + * Search mapped protein ('to') sequences first. + */ + if (this.dnaToProt != null) + { + for (int i = 0; i < dnaToProt.length; i++) + { + if (this.dnaSeqs[i] == seq) + { + for (SequenceI sourceAligned : al.getSequences()) + { + if (this.dnaToProt[i].to == sourceAligned.getDatasetSequence()) + { + return sourceAligned; + } + } + } + } + } + + /* + * Then try mapped dna sequences. + */ + if (this.dnaToProt != null) + { + for (int i = 0; i < dnaToProt.length; i++) + { + if (this.dnaToProt[i].to == seq) + { + for (SequenceI sourceAligned : al.getSequences()) + { + if (this.dnaSeqs[i] == sourceAligned.getDatasetSequence()) + { + return sourceAligned; + } + } + } + } + } + + return null; + } + + /** + * Returns the region in the 'mappedFrom' sequence's dataset that is mapped to + * position 'pos' (base 1) in the 'mappedTo' sequence's dataset. The region is + * a set of start/end position pairs. + * + * @param mappedFrom + * @param mappedTo + * @param pos + * @return + */ + public int[] getMappedRegion(SequenceI mappedFrom, SequenceI mappedTo, + int pos) + { + SequenceI targetDs = mappedFrom.getDatasetSequence() == null ? mappedFrom + : mappedFrom.getDatasetSequence(); + SequenceI sourceDs = mappedTo.getDatasetSequence() == null ? mappedTo + : mappedTo.getDatasetSequence(); + if (targetDs == null || sourceDs == null || dnaToProt == null) + { + return null; + } + for (int mi = 0; mi < dnaToProt.length; mi++) + { + if (dnaSeqs[mi] == targetDs && dnaToProt[mi].to == sourceDs) + { + int[] codon = dnaToProt[mi].map.locateInFrom(pos, pos); + if (codon != null) + { + return codon; + } + } + } + return null; + } + + /** + * Returns the DNA codon for the given position (base 1) in a mapped protein + * sequence, or null if no mapping is found. + * + * @param protein + * the peptide dataset sequence + * @param aaPos + * residue position (base 1) in the peptide sequence + * @return + */ + public char[] getMappedCodon(SequenceI protein, int aaPos) + { + if (dnaToProt == null) + { + return null; + } + MapList ml = null; + SequenceI dnaSeq = null; + for (int i = 0; i < dnaToProt.length; i++) + { + if (dnaToProt[i].to == protein) + { + ml = getdnaToProt()[i]; + dnaSeq = dnaSeqs[i]; + break; + } + } + if (ml == null) + { + return null; + } + int[] codonPos = ml.locateInFrom(aaPos, aaPos); + if (codonPos == null) + { + return null; + } + + /* + * Read off the mapped nucleotides (converting to position base 0) + */ + codonPos = MappingUtils.flattenRanges(codonPos); + char[] dna = dnaSeq.getSequence(); + int start = dnaSeq.getStart(); + return new char[] { dna[codonPos[0] - start], dna[codonPos[1] - start], + dna[codonPos[2] - start] }; + } + + /** + * Returns any mappings found which are to (or from) the given sequence, and + * to distinct sequences. + * + * @param seq + * @return + */ + public List getMappingsForSequence(SequenceI seq) + { + List result = new ArrayList(); + if (dnaSeqs == null) + { + return result; + } + List related = new ArrayList(); + SequenceI seqDs = seq.getDatasetSequence(); + seqDs = seqDs != null ? seqDs : seq; + + for (int ds = 0; ds < dnaSeqs.length; ds++) + { + final Mapping mapping = dnaToProt[ds]; + if (dnaSeqs[ds] == seqDs || mapping.to == seqDs) + { + if (!related.contains(mapping.to)) + { + result.add(mapping); + related.add(mapping.to); + } + } + } + return result; + } +}