X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;ds=sidebyside;f=src%2Fjalview%2Fdatamodel%2FAlignedCodonFrame.java;h=b5d6cefb627ca681820916a82a3cc89cf4400d55;hb=6e514532a128c47d099985375d6126111ef548cb;hp=6048808c02d2ed6bf08c571b7aa086c594afa92e;hpb=93de2a3b535decd5f5d7b24e5618806f4e99c46e;p=jalview.git diff --git a/src/jalview/datamodel/AlignedCodonFrame.java b/src/jalview/datamodel/AlignedCodonFrame.java index 6048808..b5d6cef 100644 --- a/src/jalview/datamodel/AlignedCodonFrame.java +++ b/src/jalview/datamodel/AlignedCodonFrame.java @@ -1,6 +1,6 @@ /* - * Jalview - A Sequence Alignment Editor and Viewer (Version 2.8.2) - * Copyright (C) 2014 The Jalview Authors + * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$) + * Copyright (C) $$Year-Rel$$ The Jalview Authors * * This file is part of Jalview. * @@ -22,9 +22,6 @@ package jalview.datamodel; import jalview.util.MapList; -import java.util.ArrayList; -import java.util.List; - /** * Stores mapping between the columns of a protein alignment and a DNA alignment * and a list of individual codon to amino acid mappings between sequences. @@ -32,30 +29,6 @@ import java.util.List; public class AlignedCodonFrame { - /** - *
-   * Aligned nucleotide positions for codons mapped to column positions of of aligned
-   * proteins. e.g.
-   * codons[3] = [12, 14, 15] means:
-   *     column 4 in the protein alignment translates cols 13, 15, 16 in cDNA
-   * codons[5] = null means column 6 in the protein alignment is a gap
-   * 
- */ - public int[][] codons = null; - - /** - * Width of protein sequence alignment (implicit assertion that codons.length - * >= aaWidth) - */ - public int aaWidth = 0; - - /* - * TODO: not an ideal solution - we reference the aligned amino acid sequences - * in order to make insertions on them Better would be dnaAlignment and - * aaAlignment reference.... - */ - private List a_aaSeqs = new ArrayList(); - /* * tied array of na Sequence objects. */ @@ -63,136 +36,20 @@ public class AlignedCodonFrame /* * tied array of Mappings to protein sequence Objects and SequenceI[] - * aaSeqs=null; MapLists where eac maps from the corresponding dnaSeqs element - * to corresponding aaSeqs element + * aaSeqs=null; MapLists where each maps from the corresponding dnaSeqs + * element to corresponding aaSeqs element */ private Mapping[] dnaToProt = null; /** - * initialise codon frame with a nominal alignment width - * - * @param aWidth - */ - public AlignedCodonFrame(int aWidth) - { - if (aWidth <= 0) - { - codons = null; - return; - } - codons = new int[aWidth][]; - for (int res = 0; res < aWidth; res++) - { - codons[res] = null; - } - } - - /** - * Construct a 'near copy' of the given AlignedCodonFrame, that references the - * same dataset sequences, but the given protein aligned sequences. - * - * @param acf - * @param alignment - * @throws IllegalStateException - * if the copied mapping references any dataset not in the alignment + * Constructor */ - public AlignedCodonFrame(AlignedCodonFrame acf, SequenceI[] alignment) + public AlignedCodonFrame() { - this.codons = acf.codons; - this.dnaSeqs = acf.dnaSeqs; - this.dnaToProt = acf.dnaToProt; - - for (SequenceI seq : acf.a_aaSeqs) - { - boolean found = false; - // TODO may not correctly handle the case where the same sequence appears - // twice in the source alignment i.e. same dataset sequence - // the copy will reference the first aligned sequence for both - // ?not solvable if realignment may reorder the sequences - // or check on sequence name as well???? - for (SequenceI newseq : alignment) - { - if (seq.getDatasetSequence() == newseq.getDatasetSequence()) - { - this.a_aaSeqs.add(newseq); - found = true; - break; - } - } - if (!found) - { - throw new IllegalStateException("Copying codon mapping for" - + seq.getSequenceAsString()); - } - } } /** - * ensure that codons array is at least as wide as aslen residues - * - * @param aslen - * @return (possibly newly expanded) codon array - */ - public int[][] checkCodonFrameWidth(int aslen) - { - // TODO why not codons.length < aslen ? - // should codons expand if length is 2 or 3 and aslen==2 ? - if (codons.length <= aslen + 1) - { - // probably never have to do this ? - int[][] c = new int[codons.length + 10][]; - for (int i = 0; i < codons.length; i++) - { - c[i] = codons[i]; - codons[i] = null; - } - codons = c; - } - return codons; - } - - /** - * @return width of aligned translated amino acid residues - */ - public int getaaWidth() - { - return aaWidth; - } - - /** - * increase aaWidth by one and insert a new aligned codon position space at - * aspos. - * - * @param aspos - */ - public void insertAAGap(int aspos, char gapCharacter) - { - // this aa appears before the aligned codons at aspos - so shift them in - // each pair of mapped sequences - aaWidth++; - // we actually have to modify the aligned sequences here, so use the - // a_aaSeqs vector - for (SequenceI seq : a_aaSeqs) - { - seq.insertCharAt(aspos, gapCharacter); - } - - if (aspos < aaWidth) - { - aaWidth++; - System.arraycopy(codons, aspos, codons, aspos + 1, codons.length - - aspos - 1); - codons[aspos] = null; // clear so new codon position can be marked. - } - } - - public void setAaWidth(int aapos) - { - aaWidth = aapos; - } - - /** - * add a mapping between the dataset sequences for the associated dna and + * Adds a mapping between the dataset sequences for the associated dna and * protein sequence objects * * @param dnaseq @@ -224,7 +81,6 @@ public class AlignedCodonFrame // aaseq.transferAnnotation(dnaseq, new Mapping(map.getInverse())); mp.to = (aaseq.getDatasetSequence() == null) ? aaseq : aaseq .getDatasetSequence(); - a_aaSeqs.add(aaseq); dnaToProt[nlen] = mp; } @@ -267,13 +123,29 @@ public class AlignedCodonFrame } /** + * Returns the first mapping found which is to or from the given sequence, or + * null. * - * @param sequenceRef - * @return null or corresponding aaSeq dataset sequence for dnaSeq entry + * @param seq + * @return */ - public SequenceI getAaForDnaSeq(SequenceI dnaSeqRef) + public Mapping getMappingForSequence(SequenceI seq) { - return getAaForDnaSeq(dnaSeqRef, true); + if (dnaSeqs == null) + { + return null; + } + SequenceI seqDs = seq.getDatasetSequence(); + seqDs = seqDs != null ? seqDs : seq; + + for (int ds = 0; ds < dnaSeqs.length; ds++) + { + if (dnaSeqs[ds] == seqDs || dnaToProt[ds].to == seqDs) + { + return dnaToProt[ds]; + } + } + return null; } /** @@ -281,11 +153,9 @@ public class AlignedCodonFrame * sequence, null if not found. * * @param sequenceRef - * @param returnDataset - * if true, return the aa dataset, else the aligned sequence * @return */ - public SequenceI getAaForDnaSeq(SequenceI dnaSeqRef, boolean returnDataset) + public SequenceI getAaForDnaSeq(SequenceI dnaSeqRef) { if (dnaSeqs == null) { @@ -296,16 +166,7 @@ public class AlignedCodonFrame { if (dnaSeqs[ds] == dnaSeqRef || dnaSeqs[ds] == dnads) { - if (returnDataset) - { - return dnaToProt[ds].to; - } - else - { - // TODO very fragile - depends on dnaSeqs, dnaToProt, a_aaSeqs moving - // in parallel; revise data model to guarantee this - return a_aaSeqs.get(ds); - } + return dnaToProt[ds].to; } } return null; @@ -427,4 +288,92 @@ public class AlignedCodonFrame } return ml == null ? null : ml.locateInFrom(aaPos, aaPos); } + + /** + * Convenience method to return the first aligned sequence in the given + * alignment whose dataset has a mapping with the given dataset sequence. + * + * @param seq + * + * @param al + * @return + */ + public SequenceI findAlignedSequence(SequenceI seq, AlignmentI al) + { + /* + * Search mapped protein ('to') sequences first. + */ + if (this.dnaToProt != null) + { + for (int i = 0; i < dnaToProt.length; i++) + { + if (this.dnaSeqs[i] == seq) + { + for (SequenceI sourceAligned : al.getSequences()) + { + if (this.dnaToProt[i].to == sourceAligned.getDatasetSequence()) + { + return sourceAligned; + } + } + } + } + } + + /* + * Then try mapped dna sequences. + */ + if (this.dnaToProt != null) + { + for (int i = 0; i < dnaToProt.length; i++) + { + if (this.dnaToProt[i].to == seq) + { + for (SequenceI sourceAligned : al.getSequences()) + { + if (this.dnaSeqs[i] == sourceAligned.getDatasetSequence()) + { + return sourceAligned; + } + } + } + } + } + + return null; + } + + /** + * Returns the region in the 'mappedFrom' sequence's dataset that is mapped to + * position 'pos' (base 1) in the 'mappedTo' sequence's dataset. The region is + * a set of start/end position pairs. + * + * @param mappedFrom + * @param mappedTo + * @param pos + * @return + */ + public int[] getMappedRegion(SequenceI mappedFrom, SequenceI mappedTo, + int pos) + { + SequenceI targetDs = mappedFrom.getDatasetSequence() == null ? mappedFrom + : mappedFrom.getDatasetSequence(); + SequenceI sourceDs = mappedTo.getDatasetSequence() == null ? mappedTo + : mappedTo.getDatasetSequence(); + if (targetDs == null || sourceDs == null || dnaToProt == null) + { + return null; + } + for (int mi = 0; mi < dnaToProt.length; mi++) + { + if (dnaSeqs[mi] == targetDs && dnaToProt[mi].to == sourceDs) + { + int[] codon = dnaToProt[mi].map.locateInFrom(pos, pos); + if (codon != null) { + return codon; + } + } + } + return null; + } }