package jalview.datamodel;
import jalview.analysis.AlignmentUtils;
+import jalview.io.FastaFile;
import jalview.util.MessageManager;
import java.util.ArrayList;
import java.util.Enumeration;
+import java.util.HashSet;
import java.util.Hashtable;
+import java.util.LinkedHashSet;
import java.util.List;
import java.util.Map;
+import java.util.Set;
import java.util.Vector;
/**
public Hashtable alignmentProperties;
+ private Set<AlignedCodonFrame> codonFrameList = new LinkedHashSet<AlignedCodonFrame>();
+
private void initAlignment(SequenceI[] seqs)
{
int i = 0;
}
/**
+ * Make a 'copy' alignment - sequences have new copies of features and
+ * annotations, but share the original dataset sequences.
+ */
+ public Alignment(AlignmentI al)
+ {
+ SequenceI[] seqs = al.getSequencesArray();
+ for (int i = 0; i < seqs.length; i++)
+ {
+ seqs[i] = new Sequence(seqs[i]);
+ }
+
+ /*
+ * Share the same dataset sequence mappings (if any). TODO: find a better
+ * place for these to live (alignment dataset?).
+ */
+ this.codonFrameList = ((Alignment) al).codonFrameList;
+
+ initAlignment(seqs);
+ }
+
+ /**
* Make an alignment from an array of Sequences.
*
* @param sequences
// this(compactAlignment.refCigars);
}
- /**
- * DOCUMENT ME!
- *
- * @return DOCUMENT ME!
- */
@Override
public List<SequenceI> getSequences()
{
@Override
public void setSequenceAt(int i, SequenceI snew)
{
- SequenceI oldseq = getSequenceAt(i);
- deleteSequence(i);
synchronized (sequences)
{
+ deleteSequence(i);
sequences.set(i, snew);
}
}
synchronized (sequences)
{
sequences.remove(i);
+ hiddenSequences.adjustHeightSequenceDeleted(i);
}
- hiddenSequences.adjustHeightSequenceDeleted(i);
}
}
return true;
}
+ /**
+ * Delete all annotations, including auto-calculated if the flag is set true.
+ * Returns true if at least one annotation was deleted, else false.
+ *
+ * @param includingAutoCalculated
+ * @return
+ */
+ @Override
+ public boolean deleteAllAnnotations(boolean includingAutoCalculated)
+ {
+ boolean result = false;
+ for (AlignmentAnnotation alan : getAlignmentAnnotation())
+ {
+ if (!alan.autoCalculated || includingAutoCalculated)
+ {
+ deleteAnnotation(alan);
+ result = true;
+ }
+ }
+ return result;
+ }
+
/*
* (non-Javadoc)
*
return alignmentProperties;
}
- AlignedCodonFrame[] codonFrameList = null;
-
/*
* (non-Javadoc)
*
@Override
public void addCodonFrame(AlignedCodonFrame codons)
{
- if (codons == null)
- {
- return;
- }
- if (codonFrameList == null)
+ if (codons != null)
{
- codonFrameList = new AlignedCodonFrame[]
- { codons };
- return;
+ codonFrameList.add(codons);
}
- AlignedCodonFrame[] t = new AlignedCodonFrame[codonFrameList.length + 1];
- System.arraycopy(codonFrameList, 0, t, 0, codonFrameList.length);
- t[codonFrameList.length] = codons;
- codonFrameList = t;
- }
-
- /*
- * (non-Javadoc)
- *
- * @see jalview.datamodel.AlignmentI#getCodonFrame(int)
- */
- @Override
- public AlignedCodonFrame getCodonFrame(int index)
- {
- return codonFrameList[index];
}
/*
* jalview.datamodel.AlignmentI#getCodonFrame(jalview.datamodel.SequenceI)
*/
@Override
- public AlignedCodonFrame[] getCodonFrame(SequenceI seq)
+ public List<AlignedCodonFrame> getCodonFrame(SequenceI seq)
{
- if (seq == null || codonFrameList == null)
+ if (seq == null)
{
return null;
}
- Vector cframes = new Vector();
- for (int f = 0; f < codonFrameList.length; f++)
+ List<AlignedCodonFrame> cframes = new ArrayList<AlignedCodonFrame>();
+ for (AlignedCodonFrame acf : codonFrameList)
{
- if (codonFrameList[f].involvesSequence(seq))
+ if (acf.involvesSequence(seq))
{
- cframes.addElement(codonFrameList[f]);
+ cframes.add(acf);
}
}
- if (cframes.size() == 0)
- {
- return null;
- }
- AlignedCodonFrame[] cfr = new AlignedCodonFrame[cframes.size()];
- cframes.copyInto(cfr);
- return cfr;
+ return cframes;
}
- /*
- * (non-Javadoc)
+ /**
+ * Sets the codon frame mappings (replacing any existing mappings).
+ *
+ * @see jalview.datamodel.AlignmentI#setCodonFrames()
+ */
+ @Override
+ public void setCodonFrames(Set<AlignedCodonFrame> acfs)
+ {
+ this.codonFrameList = acfs;
+ }
+
+ /**
+ * Returns the set of codon frame mappings. Any changes to the returned set
+ * will affect the alignment.
*
* @see jalview.datamodel.AlignmentI#getCodonFrames()
*/
@Override
- public AlignedCodonFrame[] getCodonFrames()
+ public Set<AlignedCodonFrame> getCodonFrames()
{
return codonFrameList;
}
{
return false;
}
- boolean removed = false;
- int i = 0, iSize = codonFrameList.length;
- while (i < iSize)
- {
- if (codonFrameList[i] == codons)
- {
- removed = true;
- if (i + 1 < iSize)
- {
- System.arraycopy(codonFrameList, i + 1, codonFrameList, i, iSize
- - i - 1);
- }
- iSize--;
- }
- else
- {
- i++;
- }
- }
- return removed;
+ return codonFrameList.remove(codons);
}
@Override
{
addAnnotation(alan[a]);
}
- AlignedCodonFrame[] acod = toappend.getCodonFrames();
- for (int a = 0; acod != null && a < acod.length; a++)
- {
- this.addCodonFrame(acod[a]);
- }
+
+ this.codonFrameList.addAll(toappend.getCodonFrames());
+
List<SequenceGroup> sg = toappend.getGroups();
if (sg != null)
{
}
}
+
+ private SequenceI seqrep=null;
+
+ /**
+ *
+ * @return the representative sequence for this group
+ */
+ public SequenceI getSeqrep()
+ {
+ return seqrep;
+ }
+
+ /**
+ * set the representative sequence for this group. Note - this affects the
+ * interpretation of the Hidereps attribute.
+ *
+ * @param seqrep
+ * the seqrep to set (null means no sequence representative)
+ */
+ public void setSeqrep(SequenceI seqrep)
+ {
+ this.seqrep = seqrep;
+ }
+
+ /**
+ *
+ * @return true if group has a sequence representative
+ */
+ public boolean hasSeqrep()
+ {
+ return seqrep != null;
+ }
+
@Override
public int getEndRes()
{
}
/**
+ * Align this alignment like the given (mapped) one.
+ */
+ @Override
+ public int alignAs(AlignmentI al)
+ {
+ /*
+ * Currently retains unmapped gaps (in introns), regaps mapped regions
+ * (exons)
+ */
+ return alignAs(al, false, true);
+ }
+
+ /**
* Align this alignment 'the same as' the given one. Mapped sequences only are
* realigned. If both of the same type (nucleotide/protein) then align both
* identically. If this is nucleotide and the other is protein, make 3 gaps
* nucleotide bases. Does nothing if alignment of protein from cDNA is
* requested (not yet implemented).
*
+ * Parameters control whether gaps in exon (mapped) and intron (unmapped)
+ * regions are preserved. Gaps that connect introns to exons are treated
+ * conservatively, i.e. only preserved if both intron and exon gaps are
+ * preserved.
+ *
* @param al
+ * @param preserveMappedGaps
+ * if true, gaps within and between mapped codons are preserved
+ * @param preserveUnmappedGaps
+ * if true, gaps within and between unmapped codons are preserved
*/
- @Override
- public int alignAs(AlignmentI al)
+// @Override
+ public int alignAs(AlignmentI al, boolean preserveMappedGaps,
+ boolean preserveUnmappedGaps)
{
+ // TODO should this method signature be the one in the interface?
int count = 0;
boolean thisIsNucleotide = this.isNucleotide();
boolean thatIsProtein = !al.isNucleotide();
if (!thatIsProtein && !thisIsNucleotide)
{
- System.err
- .println("Alignment of protein from cDNA not yet implemented");
- return 0;
- // todo: build it - a variant of Dna.CdnaTranslate()
+ return AlignmentUtils.alignProteinAsDna(this, al);
}
+
char thisGapChar = this.getGapCharacter();
- char thatGapChar = al.getGapCharacter();
String gap = thisIsNucleotide && thatIsProtein ? String
.valueOf(new char[]
{ thisGapChar, thisGapChar, thisGapChar }) : String
.valueOf(thisGapChar);
- int ratio = thisIsNucleotide && thatIsProtein ? 3 : 1;
/*
* Get mappings from 'that' alignment's sequences to this.
*/
for (SequenceI alignTo : getSequences())
{
- AlignedCodonFrame[] mappings = al.getCodonFrame(alignTo);
- if (mappings != null)
- {
- for (AlignedCodonFrame mapping : mappings)
- {
- count += alignSequenceAs(alignTo, mapping, thatGapChar, gap,
- ratio) ? 1 : 0;
- }
- }
+ count += AlignmentUtils.alignSequenceAs(alignTo, al, gap, preserveMappedGaps,
+ preserveUnmappedGaps) ? 1 : 0;
}
return count;
}
/**
- * Align sequence 'seq' the same way as 'other'. Note this currently assumes
- * that we are aligned cDNA to match protein.
- *
- * @param seq
- * the sequence to be realigned
- * @param mapping
- * holds mapping from the sequence whose alignment is to be 'copied'
- * @param thatGapChar
- * gap character used in the 'other' sequence
- * @param gap
- * character string represent a gap in the realigned sequence
- * @param ratio
- * the number of positions in the realigned sequence corresponding to
- * one in the 'other'
- * @return true if the sequence was realigned, false if it could not be
+ * Returns the alignment in Fasta format. Behaviour of this method is not
+ * guaranteed between versions.
*/
- protected boolean alignSequenceAs(SequenceI seq,
- AlignedCodonFrame mapping,
- char thatGapChar,
- String gap, int ratio)
- {
- char myGapChar = gap.charAt(0);
- // TODO rework this to use the mapping to match 'this' to 'that' residue
- // position, to handle introns and exons correctly.
- // TODO generalise to work for Protein-Protein, dna-dna, dna-protein
- SequenceI alignFrom = mapping.getAaForDnaSeq(seq, false);
- if (alignFrom == null)
- {
- return false;
- }
- final char[] thisSeq = seq.getSequence();
- final char[] thisDs = seq.getDatasetSequence().getSequence();
- final char[] thatAligned = alignFrom.getSequence();
- StringBuilder thisAligned = new StringBuilder(2 * thisDs.length);
-
- /*
- * Find the DNA dataset position that corresponds to the first protein
- * residue (e.g. ignoring start codon in cDNA).
- */
- int[] dnaStart = mapping.getDnaPosition(seq.getDatasetSequence(), 1);
- int thisDsPosition = dnaStart == null ? 0 : dnaStart[0] - 1;
- int thisSeqPos = 0;
-
- /*
- * Copy aligned cDNA up to (excluding) the first mapped base.
- */
- int basesWritten = 0;
- while (basesWritten < thisDsPosition && thisSeqPos < thisSeq.length)
- {
- char c = thisSeq[thisSeqPos++];
- thisAligned.append(c);
- if (c != myGapChar)
- {
- basesWritten++;
- }
- }
+ @Override
+ public String toString()
+ {
+ return new FastaFile().print(getSequencesArray());
+ }
- /*
- * Now traverse the aligned protein mirroring its gaps in cDNA.
- */
- for (char thatChar : thatAligned)
+ /**
+ * Returns the set of distinct sequence names. No ordering is guaranteed.
+ */
+ @Override
+ public Set<String> getSequenceNames()
+ {
+ Set<String> names = new HashSet<String>();
+ for (SequenceI seq : getSequences())
{
- if (thatChar == thatGapChar)
- {
- /*
- * Add (equivalent of) a gap
- */
- thisAligned.append(gap);
- }
- else
- {
- /*
- * Add (equivalent of) a residue
- */
- for (int j = 0; j < ratio && thisDsPosition < thisDs.length; j++)
- {
- thisAligned.append(thisDs[thisDsPosition++]);
-
- /*
- * Also advance over any gaps and the next residue in the old aligned
- * sequence
- */
- while (thisSeq[thisSeqPos] == myGapChar
- && thisSeqPos < thisSeq.length)
- {
- thisSeqPos++;
- }
- thisSeqPos++;
- }
- }
+ names.add(seq.getName());
}
+ return names;
+ }
- /*
- * Finally copy any 'extra' aligned cDNA (e.g. stop codon, introns).
- */
- while (thisSeqPos < thisSeq.length)
- {
- thisAligned.append(thisSeq[thisSeqPos++]);
- }
- seq.setSequence(new String(thisAligned));
- return true;
+ /**
+ * Returns a (possibly empty) alignment whose sequences are aligned to match
+ * the current alignment, as mapped by the given codon mappings.
+ *
+ * @param codonFrames
+ * @return
+ */
+ @Override
+ public AlignmentI getAlignedComplement(Set<AlignedCodonFrame> codonFrames)
+ {
+ // Note: passing codonFrames as a parameter rather than using
+ // this.codonFrameList as more flexible. Specifically, mappings are held
+ // on the protein alignment but we might want to act on dna.
+
+ // TODO we want the gap character of the mapped alignment, not this one!
+ List<SequenceI> alignedSeqs = AlignmentUtils.getAlignedTranslation(
+ getSequences(), getGapCharacter(), codonFrames);
+ final SequenceI[] seqsAsArray = alignedSeqs
+ .toArray(new SequenceI[alignedSeqs.size()]);
+ AlignmentI al = new Alignment(seqsAsArray);
+ al.padGaps();
+ al.setDataset(null);
+ return al;
}
}