X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;ds=sidebyside;f=src%2Fjalview%2Fdatamodel%2FAlignedCodonFrame.java;h=b5d6cefb627ca681820916a82a3cc89cf4400d55;hb=6e514532a128c47d099985375d6126111ef548cb;hp=6048808c02d2ed6bf08c571b7aa086c594afa92e;hpb=93de2a3b535decd5f5d7b24e5618806f4e99c46e;p=jalview.git
diff --git a/src/jalview/datamodel/AlignedCodonFrame.java b/src/jalview/datamodel/AlignedCodonFrame.java
index 6048808..b5d6cef 100644
--- a/src/jalview/datamodel/AlignedCodonFrame.java
+++ b/src/jalview/datamodel/AlignedCodonFrame.java
@@ -1,6 +1,6 @@
/*
- * Jalview - A Sequence Alignment Editor and Viewer (Version 2.8.2)
- * Copyright (C) 2014 The Jalview Authors
+ * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
+ * Copyright (C) $$Year-Rel$$ The Jalview Authors
*
* This file is part of Jalview.
*
@@ -22,9 +22,6 @@ package jalview.datamodel;
import jalview.util.MapList;
-import java.util.ArrayList;
-import java.util.List;
-
/**
* Stores mapping between the columns of a protein alignment and a DNA alignment
* and a list of individual codon to amino acid mappings between sequences.
@@ -32,30 +29,6 @@ import java.util.List;
public class AlignedCodonFrame
{
- /**
- *
- * Aligned nucleotide positions for codons mapped to column positions of of aligned
- * proteins. e.g.
- * codons[3] = [12, 14, 15] means:
- * column 4 in the protein alignment translates cols 13, 15, 16 in cDNA
- * codons[5] = null means column 6 in the protein alignment is a gap
- *
- */
- public int[][] codons = null;
-
- /**
- * Width of protein sequence alignment (implicit assertion that codons.length
- * >= aaWidth)
- */
- public int aaWidth = 0;
-
- /*
- * TODO: not an ideal solution - we reference the aligned amino acid sequences
- * in order to make insertions on them Better would be dnaAlignment and
- * aaAlignment reference....
- */
- private List a_aaSeqs = new ArrayList();
-
/*
* tied array of na Sequence objects.
*/
@@ -63,136 +36,20 @@ public class AlignedCodonFrame
/*
* tied array of Mappings to protein sequence Objects and SequenceI[]
- * aaSeqs=null; MapLists where eac maps from the corresponding dnaSeqs element
- * to corresponding aaSeqs element
+ * aaSeqs=null; MapLists where each maps from the corresponding dnaSeqs
+ * element to corresponding aaSeqs element
*/
private Mapping[] dnaToProt = null;
/**
- * initialise codon frame with a nominal alignment width
- *
- * @param aWidth
- */
- public AlignedCodonFrame(int aWidth)
- {
- if (aWidth <= 0)
- {
- codons = null;
- return;
- }
- codons = new int[aWidth][];
- for (int res = 0; res < aWidth; res++)
- {
- codons[res] = null;
- }
- }
-
- /**
- * Construct a 'near copy' of the given AlignedCodonFrame, that references the
- * same dataset sequences, but the given protein aligned sequences.
- *
- * @param acf
- * @param alignment
- * @throws IllegalStateException
- * if the copied mapping references any dataset not in the alignment
+ * Constructor
*/
- public AlignedCodonFrame(AlignedCodonFrame acf, SequenceI[] alignment)
+ public AlignedCodonFrame()
{
- this.codons = acf.codons;
- this.dnaSeqs = acf.dnaSeqs;
- this.dnaToProt = acf.dnaToProt;
-
- for (SequenceI seq : acf.a_aaSeqs)
- {
- boolean found = false;
- // TODO may not correctly handle the case where the same sequence appears
- // twice in the source alignment i.e. same dataset sequence
- // the copy will reference the first aligned sequence for both
- // ?not solvable if realignment may reorder the sequences
- // or check on sequence name as well????
- for (SequenceI newseq : alignment)
- {
- if (seq.getDatasetSequence() == newseq.getDatasetSequence())
- {
- this.a_aaSeqs.add(newseq);
- found = true;
- break;
- }
- }
- if (!found)
- {
- throw new IllegalStateException("Copying codon mapping for"
- + seq.getSequenceAsString());
- }
- }
}
/**
- * ensure that codons array is at least as wide as aslen residues
- *
- * @param aslen
- * @return (possibly newly expanded) codon array
- */
- public int[][] checkCodonFrameWidth(int aslen)
- {
- // TODO why not codons.length < aslen ?
- // should codons expand if length is 2 or 3 and aslen==2 ?
- if (codons.length <= aslen + 1)
- {
- // probably never have to do this ?
- int[][] c = new int[codons.length + 10][];
- for (int i = 0; i < codons.length; i++)
- {
- c[i] = codons[i];
- codons[i] = null;
- }
- codons = c;
- }
- return codons;
- }
-
- /**
- * @return width of aligned translated amino acid residues
- */
- public int getaaWidth()
- {
- return aaWidth;
- }
-
- /**
- * increase aaWidth by one and insert a new aligned codon position space at
- * aspos.
- *
- * @param aspos
- */
- public void insertAAGap(int aspos, char gapCharacter)
- {
- // this aa appears before the aligned codons at aspos - so shift them in
- // each pair of mapped sequences
- aaWidth++;
- // we actually have to modify the aligned sequences here, so use the
- // a_aaSeqs vector
- for (SequenceI seq : a_aaSeqs)
- {
- seq.insertCharAt(aspos, gapCharacter);
- }
-
- if (aspos < aaWidth)
- {
- aaWidth++;
- System.arraycopy(codons, aspos, codons, aspos + 1, codons.length
- - aspos - 1);
- codons[aspos] = null; // clear so new codon position can be marked.
- }
- }
-
- public void setAaWidth(int aapos)
- {
- aaWidth = aapos;
- }
-
- /**
- * add a mapping between the dataset sequences for the associated dna and
+ * Adds a mapping between the dataset sequences for the associated dna and
* protein sequence objects
*
* @param dnaseq
@@ -224,7 +81,6 @@ public class AlignedCodonFrame
// aaseq.transferAnnotation(dnaseq, new Mapping(map.getInverse()));
mp.to = (aaseq.getDatasetSequence() == null) ? aaseq : aaseq
.getDatasetSequence();
- a_aaSeqs.add(aaseq);
dnaToProt[nlen] = mp;
}
@@ -267,13 +123,29 @@ public class AlignedCodonFrame
}
/**
+ * Returns the first mapping found which is to or from the given sequence, or
+ * null.
*
- * @param sequenceRef
- * @return null or corresponding aaSeq dataset sequence for dnaSeq entry
+ * @param seq
+ * @return
*/
- public SequenceI getAaForDnaSeq(SequenceI dnaSeqRef)
+ public Mapping getMappingForSequence(SequenceI seq)
{
- return getAaForDnaSeq(dnaSeqRef, true);
+ if (dnaSeqs == null)
+ {
+ return null;
+ }
+ SequenceI seqDs = seq.getDatasetSequence();
+ seqDs = seqDs != null ? seqDs : seq;
+
+ for (int ds = 0; ds < dnaSeqs.length; ds++)
+ {
+ if (dnaSeqs[ds] == seqDs || dnaToProt[ds].to == seqDs)
+ {
+ return dnaToProt[ds];
+ }
+ }
+ return null;
}
/**
@@ -281,11 +153,9 @@ public class AlignedCodonFrame
* sequence, null if not found.
*
* @param sequenceRef
- * @param returnDataset
- * if true, return the aa dataset, else the aligned sequence
* @return
*/
- public SequenceI getAaForDnaSeq(SequenceI dnaSeqRef, boolean returnDataset)
+ public SequenceI getAaForDnaSeq(SequenceI dnaSeqRef)
{
if (dnaSeqs == null)
{
@@ -296,16 +166,7 @@ public class AlignedCodonFrame
{
if (dnaSeqs[ds] == dnaSeqRef || dnaSeqs[ds] == dnads)
{
- if (returnDataset)
- {
- return dnaToProt[ds].to;
- }
- else
- {
- // TODO very fragile - depends on dnaSeqs, dnaToProt, a_aaSeqs moving
- // in parallel; revise data model to guarantee this
- return a_aaSeqs.get(ds);
- }
+ return dnaToProt[ds].to;
}
}
return null;
@@ -427,4 +288,92 @@ public class AlignedCodonFrame
}
return ml == null ? null : ml.locateInFrom(aaPos, aaPos);
}
+
+ /**
+ * Convenience method to return the first aligned sequence in the given
+ * alignment whose dataset has a mapping with the given dataset sequence.
+ *
+ * @param seq
+ *
+ * @param al
+ * @return
+ */
+ public SequenceI findAlignedSequence(SequenceI seq, AlignmentI al)
+ {
+ /*
+ * Search mapped protein ('to') sequences first.
+ */
+ if (this.dnaToProt != null)
+ {
+ for (int i = 0; i < dnaToProt.length; i++)
+ {
+ if (this.dnaSeqs[i] == seq)
+ {
+ for (SequenceI sourceAligned : al.getSequences())
+ {
+ if (this.dnaToProt[i].to == sourceAligned.getDatasetSequence())
+ {
+ return sourceAligned;
+ }
+ }
+ }
+ }
+ }
+
+ /*
+ * Then try mapped dna sequences.
+ */
+ if (this.dnaToProt != null)
+ {
+ for (int i = 0; i < dnaToProt.length; i++)
+ {
+ if (this.dnaToProt[i].to == seq)
+ {
+ for (SequenceI sourceAligned : al.getSequences())
+ {
+ if (this.dnaSeqs[i] == sourceAligned.getDatasetSequence())
+ {
+ return sourceAligned;
+ }
+ }
+ }
+ }
+ }
+
+ return null;
+ }
+
+ /**
+ * Returns the region in the 'mappedFrom' sequence's dataset that is mapped to
+ * position 'pos' (base 1) in the 'mappedTo' sequence's dataset. The region is
+ * a set of start/end position pairs.
+ *
+ * @param mappedFrom
+ * @param mappedTo
+ * @param pos
+ * @return
+ */
+ public int[] getMappedRegion(SequenceI mappedFrom, SequenceI mappedTo,
+ int pos)
+ {
+ SequenceI targetDs = mappedFrom.getDatasetSequence() == null ? mappedFrom
+ : mappedFrom.getDatasetSequence();
+ SequenceI sourceDs = mappedTo.getDatasetSequence() == null ? mappedTo
+ : mappedTo.getDatasetSequence();
+ if (targetDs == null || sourceDs == null || dnaToProt == null)
+ {
+ return null;
+ }
+ for (int mi = 0; mi < dnaToProt.length; mi++)
+ {
+ if (dnaSeqs[mi] == targetDs && dnaToProt[mi].to == sourceDs)
+ {
+ int[] codon = dnaToProt[mi].map.locateInFrom(pos, pos);
+ if (codon != null) {
+ return codon;
+ }
+ }
+ }
+ return null;
+ }
}