*/
package jalview.datamodel;
+import jalview.analysis.AlignmentUtils;
+import jalview.io.FastaFile;
import jalview.util.MessageManager;
import java.util.ArrayList;
import java.util.Enumeration;
+import java.util.HashSet;
import java.util.Hashtable;
+import java.util.LinkedHashSet;
import java.util.List;
import java.util.Map;
+import java.util.Set;
import java.util.Vector;
/**
public Hashtable alignmentProperties;
+ private Set<AlignedCodonFrame> codonFrameList = new LinkedHashSet<AlignedCodonFrame>();
+
private void initAlignment(SequenceI[] seqs)
{
int i = 0;
}
/**
+ * Make a 'copy' alignment - sequences have new copies of features and
+ * annotations, but share the original dataset sequences.
+ */
+ public Alignment(AlignmentI al)
+ {
+ SequenceI[] seqs = al.getSequencesArray();
+ for (int i = 0; i < seqs.length; i++)
+ {
+ seqs[i] = new Sequence(seqs[i]);
+ }
+
+ /*
+ * Share the same dataset sequence mappings (if any). TODO: find a better
+ * place for these to live (alignment dataset?).
+ */
+ this.codonFrameList = ((Alignment) al).codonFrameList;
+
+ initAlignment(seqs);
+ }
+
+ /**
* Make an alignment from an array of Sequences.
*
* @param sequences
// this(compactAlignment.refCigars);
}
- /**
- * DOCUMENT ME!
- *
- * @return DOCUMENT ME!
- */
@Override
public List<SequenceI> getSequences()
{
}
/**
+ * Returns a map of lists of sequences keyed by sequence name.
+ *
+ * @return
+ */
+ @Override
+ public Map<String, List<SequenceI>> getSequencesByName()
+ {
+ return AlignmentUtils.getSequencesByName(this);
+ }
+
+ /**
* DOCUMENT ME!
*
* @param i
@Override
public void setSequenceAt(int i, SequenceI snew)
{
- SequenceI oldseq = getSequenceAt(i);
- deleteSequence(i);
synchronized (sequences)
{
+ deleteSequence(i);
sequences.set(i, snew);
}
}
synchronized (sequences)
{
sequences.remove(i);
+ hiddenSequences.adjustHeightSequenceDeleted(i);
}
- hiddenSequences.adjustHeightSequenceDeleted(i);
}
}
return true;
}
+ /**
+ * Delete all annotations, including auto-calculated if the flag is set true.
+ * Returns true if at least one annotation was deleted, else false.
+ *
+ * @param includingAutoCalculated
+ * @return
+ */
+ @Override
+ public boolean deleteAllAnnotations(boolean includingAutoCalculated)
+ {
+ boolean result = false;
+ for (AlignmentAnnotation alan : getAlignmentAnnotation())
+ {
+ if (!alan.autoCalculated || includingAutoCalculated)
+ {
+ deleteAnnotation(alan);
+ result = true;
+ }
+ }
+ return result;
+ }
+
/*
* (non-Javadoc)
*
return alignmentProperties;
}
- AlignedCodonFrame[] codonFrameList = null;
-
/*
* (non-Javadoc)
*
@Override
public void addCodonFrame(AlignedCodonFrame codons)
{
- if (codons == null)
+ if (codons != null)
{
- return;
- }
- if (codonFrameList == null)
- {
- codonFrameList = new AlignedCodonFrame[]
- { codons };
- return;
+ codonFrameList.add(codons);
}
- AlignedCodonFrame[] t = new AlignedCodonFrame[codonFrameList.length + 1];
- System.arraycopy(codonFrameList, 0, t, 0, codonFrameList.length);
- t[codonFrameList.length] = codons;
- codonFrameList = t;
- }
-
- /*
- * (non-Javadoc)
- *
- * @see jalview.datamodel.AlignmentI#getCodonFrame(int)
- */
- @Override
- public AlignedCodonFrame getCodonFrame(int index)
- {
- return codonFrameList[index];
}
/*
* jalview.datamodel.AlignmentI#getCodonFrame(jalview.datamodel.SequenceI)
*/
@Override
- public AlignedCodonFrame[] getCodonFrame(SequenceI seq)
+ public List<AlignedCodonFrame> getCodonFrame(SequenceI seq)
{
- if (seq == null || codonFrameList == null)
+ if (seq == null)
{
return null;
}
- Vector cframes = new Vector();
- for (int f = 0; f < codonFrameList.length; f++)
+ List<AlignedCodonFrame> cframes = new ArrayList<AlignedCodonFrame>();
+ for (AlignedCodonFrame acf : codonFrameList)
{
- if (codonFrameList[f].involvesSequence(seq))
+ if (acf.involvesSequence(seq))
{
- cframes.addElement(codonFrameList[f]);
+ cframes.add(acf);
}
}
- if (cframes.size() == 0)
- {
- return null;
- }
- AlignedCodonFrame[] cfr = new AlignedCodonFrame[cframes.size()];
- cframes.copyInto(cfr);
- return cfr;
+ return cframes;
}
- /*
- * (non-Javadoc)
+ /**
+ * Sets the codon frame mappings (replacing any existing mappings).
+ *
+ * @see jalview.datamodel.AlignmentI#setCodonFrames()
+ */
+ @Override
+ public void setCodonFrames(Set<AlignedCodonFrame> acfs)
+ {
+ this.codonFrameList = acfs;
+ }
+
+ /**
+ * Returns the set of codon frame mappings. Any changes to the returned set
+ * will affect the alignment.
*
* @see jalview.datamodel.AlignmentI#getCodonFrames()
*/
@Override
- public AlignedCodonFrame[] getCodonFrames()
+ public Set<AlignedCodonFrame> getCodonFrames()
{
return codonFrameList;
}
{
return false;
}
- boolean removed = false;
- int i = 0, iSize = codonFrameList.length;
- while (i < iSize)
- {
- if (codonFrameList[i] == codons)
- {
- removed = true;
- if (i + 1 < iSize)
- {
- System.arraycopy(codonFrameList, i + 1, codonFrameList, i, iSize
- - i - 1);
- }
- iSize--;
- }
- else
- {
- i++;
- }
- }
- return removed;
+ return codonFrameList.remove(codons);
}
@Override
{
addAnnotation(alan[a]);
}
- AlignedCodonFrame[] acod = toappend.getCodonFrames();
- for (int a = 0; acod != null && a < acod.length; a++)
- {
- this.addCodonFrame(acod[a]);
- }
+
+ this.codonFrameList.addAll(toappend.getCodonFrames());
+
List<SequenceGroup> sg = toappend.getGroups();
if (sg != null)
{
}
}
+
+ private SequenceI seqrep=null;
+
+ /**
+ *
+ * @return the representative sequence for this group
+ */
+ public SequenceI getSeqrep()
+ {
+ return seqrep;
+ }
+
+ /**
+ * set the representative sequence for this group. Note - this affects the
+ * interpretation of the Hidereps attribute.
+ *
+ * @param seqrep
+ * the seqrep to set (null means no sequence representative)
+ */
+ public void setSeqrep(SequenceI seqrep)
+ {
+ this.seqrep = seqrep;
+ }
+
+ /**
+ *
+ * @return true if group has a sequence representative
+ */
+ public boolean hasSeqrep()
+ {
+ return seqrep != null;
+ }
+
@Override
public int getEndRes()
{
}
/**
- * Answers true if the supplied alignment has the same number of sequences,
- * and they are of equivalent length, ignoring gaps. Alignments should be of
- * the same type (protein/nucleotide) or different types with 3:1 length
- * scaling.
- *
- * @param al
+ * Align this alignment like the given (mapped) one.
*/
@Override
- public boolean isMappableTo(AlignmentI al)
+ public int alignAs(AlignmentI al)
{
- int thisCodonScale = this.isNucleotide() ? 1 : 3;
- int thatCodonScale = al.isNucleotide() ? 1 : 3;
- if (this == al || this.getHeight() != al.getHeight())
- {
- return false;
- }
- int i = 0;
- for (SequenceI seq : this.getSequences())
- {
- final int thisSequenceDnaLength = seq.getDatasetSequence()
- .getLength() * thisCodonScale;
- final int thatSequenceDnaLength = al.getSequenceAt(i)
- .getDatasetSequence().getLength()
- * thatCodonScale;
- if (thisSequenceDnaLength != thatSequenceDnaLength)
- {
- return false;
- }
- i++;
- }
- return true;
+ /*
+ * Currently retains unmapped gaps (in introns), regaps mapped regions
+ * (exons)
+ */
+ return alignAs(al, false, true);
}
/**
- * Align this alignment the same as the given one. If both of the same type
- * (nucleotide/protein) then align both identically. If this is nucleotide and
- * the other is protein, make 3 gaps for each gap in the protein sequences. If
- * this is protein and the other is nucleotide, insert a gap for each 3 gaps
- * (or part thereof) between nucleotide bases. The two alignments should be
- * compatible in height and lengths, but if not, then discrepancies will be
- * ignored with unpredictable results.
+ * Align this alignment 'the same as' the given one. Mapped sequences only are
+ * realigned. If both of the same type (nucleotide/protein) then align both
+ * identically. If this is nucleotide and the other is protein, make 3 gaps
+ * for each gap in the protein sequences. If this is protein and the other is
+ * nucleotide, insert a gap for each 3 gaps (or part thereof) between
+ * nucleotide bases. Does nothing if alignment of protein from cDNA is
+ * requested (not yet implemented).
+ *
+ * Parameters control whether gaps in exon (mapped) and intron (unmapped)
+ * regions are preserved. Gaps that connect introns to exons are treated
+ * conservatively, i.e. only preserved if both intron and exon gaps are
+ * preserved.
*
* @param al
- * @throws UnsupportedOperation
- * if alignment of protein from cDNA is requested (not yet
- * implemented)
+ * @param preserveMappedGaps
+ * if true, gaps within and between mapped codons are preserved
+ * @param preserveUnmappedGaps
+ * if true, gaps within and between unmapped codons are preserved
*/
- @Override
- public void alignAs(AlignmentI al)
+// @Override
+ public int alignAs(AlignmentI al, boolean preserveMappedGaps,
+ boolean preserveUnmappedGaps)
{
+ // TODO should this method signature be the one in the interface?
+ int count = 0;
boolean thisIsNucleotide = this.isNucleotide();
boolean thatIsProtein = !al.isNucleotide();
if (!thatIsProtein && !thisIsNucleotide)
{
- throw new UnsupportedOperationException(
- "Alignment of protein from cDNA not implemented");
+ return AlignmentUtils.alignProteinAsDna(this, al);
}
+
char thisGapChar = this.getGapCharacter();
- char thatGapChar = al.getGapCharacter();
String gap = thisIsNucleotide && thatIsProtein ? String
.valueOf(new char[]
{ thisGapChar, thisGapChar, thisGapChar }) : String
.valueOf(thisGapChar);
- int ratio = thisIsNucleotide && thatIsProtein ? 3 : 1;
- int i = 0;
- for (SequenceI seq : this.getSequences())
+
+ /*
+ * Get mappings from 'that' alignment's sequences to this.
+ */
+ for (SequenceI alignTo : getSequences())
{
- SequenceI other = al.getSequenceAt(i++);
- if (other == null)
- {
- continue;
- }
- char[] thisDs = seq.getDatasetSequence().getSequence();
- char[] thatDs = other.getSequence();
- StringBuilder thisAligned = new StringBuilder(2 * thisDs.length);
- int thisDsPosition = 0;
- for (char thatChar : thatDs)
- {
- if (thatChar == thatGapChar)
- {
- /*
- * Add (equivalent of) a gap
- */
- thisAligned.append(gap);
- }
- else
- {
- /*
- * Add (equivalent of) a residue
- */
- for (int j = 0; j < ratio && thisDsPosition < thisDs.length; j++)
- {
- thisAligned.append(thisDs[thisDsPosition++]);
- }
- }
- }
- /*
- * Include any 'extra' residues (there shouldn't be).
- */
- while (thisDsPosition < thisDs.length)
- {
- thisAligned.append(thisDs[thisDsPosition++]);
- }
- seq.setSequence(new String(thisAligned));
+ count += AlignmentUtils.alignSequenceAs(alignTo, al, gap, preserveMappedGaps,
+ preserveUnmappedGaps) ? 1 : 0;
}
+ return count;
+ }
+
+ /**
+ * Returns the alignment in Fasta format. Behaviour of this method is not
+ * guaranteed between versions.
+ */
+ @Override
+ public String toString()
+ {
+ return new FastaFile().print(getSequencesArray());
+ }
+
+ /**
+ * Returns the set of distinct sequence names. No ordering is guaranteed.
+ */
+ @Override
+ public Set<String> getSequenceNames()
+ {
+ Set<String> names = new HashSet<String>();
+ for (SequenceI seq : getSequences())
+ {
+ names.add(seq.getName());
+ }
+ return names;
+ }
+
+ /**
+ * Returns a (possibly empty) alignment whose sequences are aligned to match
+ * the current alignment, as mapped by the given codon mappings.
+ *
+ * @param codonFrames
+ * @return
+ */
+ @Override
+ public AlignmentI getAlignedComplement(Set<AlignedCodonFrame> codonFrames)
+ {
+ // Note: passing codonFrames as a parameter rather than using
+ // this.codonFrameList as more flexible. Specifically, mappings are held
+ // on the protein alignment but we might want to act on dna.
+
+ // TODO we want the gap character of the mapped alignment, not this one!
+ List<SequenceI> alignedSeqs = AlignmentUtils.getAlignedTranslation(
+ getSequences(), getGapCharacter(), codonFrames);
+ final SequenceI[] seqsAsArray = alignedSeqs
+ .toArray(new SequenceI[alignedSeqs.size()]);
+ AlignmentI al = new Alignment(seqsAsArray);
+ al.padGaps();
+ al.setDataset(null);
+ return al;
}
}