import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedHashMap;
-import java.util.LinkedHashSet;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
* Answers true if the mappings include one between the given (dataset)
* sequences.
*/
- public static boolean mappingExists(Set<AlignedCodonFrame> set,
+ public static boolean mappingExists(List<AlignedCodonFrame> mappings,
SequenceI aaSeq, SequenceI cdnaSeq)
{
- if (set != null)
+ if (mappings != null)
{
- for (AlignedCodonFrame acf : set)
+ for (AlignedCodonFrame acf : mappings)
{
if (cdnaSeq == acf.getDnaForAaSeq(aaSeq))
{
/*
* Locate the aligned source sequence whose dataset sequence is mapped. We
- * just take the first match here (as we can't align cDNA like more than one
- * protein sequence).
+ * just take the first match here (as we can't align like more than one
+ * sequence).
*/
SequenceI alignFrom = null;
AlignedCodonFrame mapping = null;
/**
* Align sequence 'alignTo' the same way as 'alignFrom', using the mapping to
* match residues and codons. Flags control whether existing gaps in unmapped
- * (intron) and mapped (exon) regions are preserved or not. Gaps linking intro
- * and exon are only retained if both flags are set.
+ * (intron) and mapped (exon) regions are preserved or not. Gaps between
+ * intron and exon are only retained if both flags are set.
*
* @param alignTo
* @param alignFrom
boolean preserveUnmappedGaps)
{
// TODO generalise to work for Protein-Protein, dna-dna, dna-protein
- final char[] thisSeq = alignTo.getSequence();
- final char[] thatAligned = alignFrom.getSequence();
- StringBuilder thisAligned = new StringBuilder(2 * thisSeq.length);
// aligned and dataset sequence positions, all base zero
int thisSeqPos = 0;
char myGapChar = myGap.charAt(0);
int ratio = myGap.length();
- /*
- * Traverse the aligned protein sequence.
- */
int fromOffset = alignFrom.getStart() - 1;
int toOffset = alignTo.getStart() - 1;
int sourceGapMappedLength = 0;
boolean inExon = false;
+ final char[] thisSeq = alignTo.getSequence();
+ final char[] thatAligned = alignFrom.getSequence();
+ StringBuilder thisAligned = new StringBuilder(2 * thisSeq.length);
+
+ /*
+ * Traverse the 'model' aligned sequence
+ */
for (char sourceChar : thatAligned)
{
if (sourceChar == sourceGap)
}
/*
- * Found a residue. Locate its mapped codon (start) position.
+ * Found a non-gap character. Locate its mapped region if any.
*/
sourceDsPos++;
// Note mapping positions are base 1, our sequence positions base 0
if (mappedPos == null)
{
/*
- * Abort realignment if unmapped protein. Or could ignore it??
+ * unmapped position; treat like a gap
*/
- System.err.println("Can't align: no codon mapping to residue "
- + sourceDsPos + "(" + sourceChar + ")");
- return;
+ sourceGapMappedLength += ratio;
+ // System.err.println("Can't align: no codon mapping to residue "
+ // + sourceDsPos + "(" + sourceChar + ")");
+ // return;
+ continue;
}
int mappedCodonStart = mappedPos[0]; // position (1...) of codon start
}
/*
- * At end of protein sequence. Copy any remaining dna sequence, optionally
- * including (intron) gaps. We do not copy trailing gaps in protein.
+ * At end of model aligned sequence. Copy any remaining target sequence, optionally
+ * including (intron) gaps.
*/
while (thisSeqPos < thisSeq.length)
{
{
thisAligned.append(c);
}
+ sourceGapMappedLength--;
+ }
+
+ /*
+ * finally add gaps to pad for any trailing source gaps or
+ * unmapped characters
+ */
+ if (preserveUnmappedGaps)
+ {
+ while (sourceGapMappedLength > 0)
+ {
+ thisAligned.append(myGapChar);
+ sourceGapMappedLength--;
+ }
}
/*
List<SequenceI> unmappedProtein = new ArrayList<SequenceI>();
unmappedProtein.addAll(protein.getSequences());
- Set<AlignedCodonFrame> mappings = protein.getCodonFrames();
+ List<AlignedCodonFrame> mappings = protein.getCodonFrames();
/*
* Map will hold, for each aligned codon position e.g. [3, 5, 6], a map of
}
AlignmentI dna = al1.isNucleotide() ? al1 : al2;
AlignmentI protein = dna == al1 ? al2 : al1;
- Set<AlignedCodonFrame> mappings = protein.getCodonFrames();
+ List<AlignedCodonFrame> mappings = protein.getCodonFrames();
for (SequenceI dnaSeq : dna.getSequences())
{
for (SequenceI proteinSeq : protein.getSequences())
* @return
*/
protected static boolean isMappable(SequenceI dnaSeq,
- SequenceI proteinSeq, Set<AlignedCodonFrame> mappings)
+ SequenceI proteinSeq, List<AlignedCodonFrame> mappings)
{
if (dnaSeq == null || proteinSeq == null)
{
* sequences (or null if no exons are found)
*/
public static AlignmentI makeExonAlignment(SequenceI[] dna,
- Set<AlignedCodonFrame> mappings)
+ List<AlignedCodonFrame> mappings)
{
- Set<AlignedCodonFrame> newMappings = new LinkedHashSet<AlignedCodonFrame>();
+ List<AlignedCodonFrame> newMappings = new ArrayList<AlignedCodonFrame>();
List<SequenceI> exonSequences = new ArrayList<SequenceI>();
for (SequenceI dnaSeq : dna)
import java.awt.event.KeyListener;
import java.beans.PropertyVetoException;
import java.util.Map.Entry;
-import java.util.Set;
import javax.swing.AbstractAction;
import javax.swing.InputMap;
import javax.swing.JComponent;
import javax.swing.JMenuItem;
import javax.swing.KeyStroke;
-import javax.swing.UIDefaults;
-import javax.swing.UIManager;
import javax.swing.event.InternalFrameAdapter;
import javax.swing.event.InternalFrameEvent;
* estimate width and height of SplitFrame; this.getInsets() doesn't seem to
* give the full additional size (a few pixels short)
*/
- UIDefaults defaults = UIManager.getDefaults();
- Set<Object> keySet = defaults.keySet();
- for (Object key : keySet)
- {
- System.out.println(key.toString() + " = "
- + UIManager.get(key).toString());
- }
int widthFudge = Platform.isAMac() ? 28 : 28; // Windows tbc
int heightFudge = Platform.isAMac() ? 50 : 50; // tbc
int width = ((AlignFrame) getTopFrame()).getWidth() + widthFudge;
import jalview.datamodel.SequenceI;
import java.util.ArrayList;
-import java.util.Collections;
+import java.util.Arrays;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
-import java.util.Set;
/**
* Helper methods for manipulations involving sequence mappings.
*/
protected static void mapCutOrPaste(Edit edit, boolean undo,
List<SequenceI> targetSeqs, EditCommand result,
- Set<AlignedCodonFrame> mappings)
+ List<AlignedCodonFrame> mappings)
{
Action action = edit.getAction();
if (undo)
*/
public static EditCommand mapEditCommand(EditCommand command,
boolean undo, final AlignmentI mapTo, char gapChar,
- Set<AlignedCodonFrame> mappings)
+ List<AlignedCodonFrame> mappings)
{
/*
* For now, only support mapping from protein edits to cDna
Map<SequenceI, SequenceI> originalSequences,
final List<SequenceI> targetSeqs,
Map<SequenceI, SequenceI> targetCopies, char gapChar,
- EditCommand result, Set<AlignedCodonFrame> mappings)
+ EditCommand result, List<AlignedCodonFrame> mappings)
{
Action action = edit.getAction();
* @return
*/
public static SearchResults buildSearchResults(SequenceI seq, int index,
- Set<AlignedCodonFrame> seqmappings)
+ List<AlignedCodonFrame> seqmappings)
{
SearchResults results = new SearchResults();
addSearchResults(results, seq, index, seqmappings);
* @param seqmappings
*/
public static void addSearchResults(SearchResults results, SequenceI seq,
- int index, Set<AlignedCodonFrame> seqmappings)
+ int index, List<AlignedCodonFrame> seqmappings)
{
if (index >= seq.getStart() && index <= seq.getEnd())
{
*/
boolean targetIsNucleotide = mapTo.isNucleotide();
AlignViewportI protein = targetIsNucleotide ? mapFrom : mapTo;
- Set<AlignedCodonFrame> codonFrames = protein.getAlignment()
+ List<AlignedCodonFrame> codonFrames = protein.getAlignment()
.getCodonFrames();
/*
* Copy group name, colours etc, but not sequences or sequence colour scheme
/*
* Found a sequence mapping. Locate the start/end mapped residues.
*/
+ List<AlignedCodonFrame> mapping = Arrays.asList(new AlignedCodonFrame[] { acf });
SearchResults sr = buildSearchResults(selected,
- startResiduePos, Collections.singleton(acf));
+ startResiduePos, mapping);
for (Match m : sr.getResults())
{
mappedStartResidue = m.getStart();
mappedEndResidue = m.getEnd();
}
- sr = buildSearchResults(selected, endResiduePos,
- Collections.singleton(acf));
+ sr = buildSearchResults(selected, endResiduePos, mapping);
for (Match m : sr.getResults())
{
mappedStartResidue = Math.min(mappedStartResidue,
* @return
*/
public static CommandI mapOrderCommand(OrderCommand command,
- boolean undo, AlignmentI mapTo, Set<AlignedCodonFrame> mappings)
+ boolean undo, AlignmentI mapTo, List<AlignedCodonFrame> mappings)
{
SequenceI[] sortOrder = command.getSequenceOrder(undo);
List<SequenceI> mappedOrder = new ArrayList<SequenceI>();
{
boolean targetIsNucleotide = mapTo.isNucleotide();
AlignViewportI protein = targetIsNucleotide ? mapFrom : mapTo;
- Set<AlignedCodonFrame> codonFrames = protein.getAlignment()
+ List<AlignedCodonFrame> codonFrames = protein.getAlignment()
.getCodonFrames();
ColumnSelection mappedColumns = new ColumnSelection();
}
/**
- * Returns the mapped codon for a given aligned sequence column position (base
- * 0).
+ * Returns the mapped codon or codons for a given aligned sequence column
+ * position (base 0).
*
* @param seq
* an aligned peptide sequence
* an aligned column position (base 0)
* @param mappings
* a set of codon mappings
- * @return the bases of the mapped codon in the cDNA dataset sequence, or null
- * if not found
+ * @return the bases of the mapped codon(s) in the cDNA dataset sequence(s),
+ * or an empty list if none found
*/
- public static char[] findCodonFor(SequenceI seq, int col,
- Set<AlignedCodonFrame> mappings)
+ public static List<char[]> findCodonsFor(SequenceI seq, int col,
+ List<AlignedCodonFrame> mappings)
{
+ List<char[]> result = new ArrayList<char[]>();
int dsPos = seq.findPosition(col);
for (AlignedCodonFrame mapping : mappings)
{
if (mapping.involvesSequence(seq))
{
- return mapping.getMappedCodon(seq.getDatasetSequence(), dsPos);
+ List<char[]> codons = mapping.getMappedCodons(
+ seq.getDatasetSequence(), dsPos);
+ if (codons != null)
+ {
+ result.addAll(codons);
+ }
}
}
- return null;
+ return result;
}
/**
* @return
*/
public static List<AlignedCodonFrame> findMappingsForSequence(
- SequenceI sequence, Set<AlignedCodonFrame> mappings)
+ SequenceI sequence, List<AlignedCodonFrame> mappings)
{
List<AlignedCodonFrame> result = new ArrayList<AlignedCodonFrame>();
if (sequence == null || mappings == null)
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
-import java.util.Collections;
import java.util.HashSet;
import java.util.Iterator;
-import java.util.LinkedHashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
@Test(groups = { "Functional" })
public void testAlignSequenceAs_withMapping_withUnmappedProtein()
{
-
/*
* Exons at codon 2 (AAA) and 4 (TTT) mapped to A and P
*/
* Expect alignment does nothing (aborts realignment). Change this test
* first if different behaviour wanted.
*/
- checkAlignSequenceAs("GGGAAACCCTTTGGG", "-A-L-P-", false, false, map,
- "GGGAAACCCTTTGGG");
+ checkAlignSequenceAs("gggAAAcccTTTggg", "-A-L-P-", false, false, map,
+ "gggAAAccc---TTTggg");
}
/**
* Helper method that performs and verifies the method under test.
*
- * @param dnaSeq
- * @param proteinSeq
+ * @param alignee
+ * the sequence to be realigned
+ * @param alignModel
+ * the sequence whose alignment is to be copied
* @param preserveMappedGaps
* @param preserveUnmappedGaps
* @param map
* @param expected
*/
- protected void checkAlignSequenceAs(final String dnaSeq,
- final String proteinSeq, final boolean preserveMappedGaps,
+ protected void checkAlignSequenceAs(final String alignee,
+ final String alignModel, final boolean preserveMappedGaps,
final boolean preserveUnmappedGaps, MapList map,
final String expected)
{
- SequenceI dna = new Sequence("Seq1", dnaSeq);
- dna.createDatasetSequence();
- SequenceI protein = new Sequence("Seq1", proteinSeq);
- protein.createDatasetSequence();
+ SequenceI alignMe = new Sequence("Seq1", alignee);
+ alignMe.createDatasetSequence();
+ SequenceI alignFrom = new Sequence("Seq2", alignModel);
+ alignFrom.createDatasetSequence();
AlignedCodonFrame acf = new AlignedCodonFrame();
- acf.addMap(dna.getDatasetSequence(), protein.getDatasetSequence(), map);
+ acf.addMap(alignMe.getDatasetSequence(), alignFrom.getDatasetSequence(), map);
- AlignmentUtils.alignSequenceAs(dna, protein, acf, "---", '-',
+ AlignmentUtils.alignSequenceAs(alignMe, alignFrom, acf, "---", '-',
preserveMappedGaps, preserveUnmappedGaps);
- assertEquals(expected, dna.getSequenceAsString());
+ assertEquals(expected, alignMe.getSequenceAsString());
}
/**
acf.addMap(dna1.getDatasetSequence(), prot1.getDatasetSequence(), map);
acf.addMap(dna2.getDatasetSequence(), prot2.getDatasetSequence(), map);
acf.addMap(dna3.getDatasetSequence(), prot3.getDatasetSequence(), map);
- protein.setCodonFrames(Collections.singleton(acf));
+ protein.setCodonFrames(new ArrayList<AlignedCodonFrame>());
/*
* Translated codon order is [1,2,3] [1,3,4] [4,5,6] [4,5,7] [5,6,7] [7,8,9]
pep1.createDatasetSequence();
pep2.createDatasetSequence();
- Set<AlignedCodonFrame> mappings = new HashSet<AlignedCodonFrame>();
+ List<AlignedCodonFrame> mappings = new ArrayList<AlignedCodonFrame>();
MapList map = new MapList(new int[] { 4, 6, 10, 12 },
new int[] { 1, 2 }, 3, 1);
AlignedCodonFrame acf = new AlignedCodonFrame();
* convenience so results are in the input order. There is no assertion that
* the generated exon sequences are in any particular order.
*/
- Set<AlignedCodonFrame> mappings = new LinkedHashSet<AlignedCodonFrame>();
+ List<AlignedCodonFrame> mappings = new ArrayList<AlignedCodonFrame>();
// map ...GGG...TTT to GF
MapList map = new MapList(new int[] { 4, 6, 10, 12 },
new int[] { 1, 2 }, 3, 1);
assertEquals(40, map.getFromLowest());
assertEquals(48, map.getFromHighest());
}
+
+ /**
+ * Test for the alignSequenceAs method where we have protein mapped to protein
+ */
+ @Test(groups = { "Functional" })
+ public void testAlignSequenceAs_mappedProteinProtein()
+ {
+
+ SequenceI alignMe = new Sequence("Match", "MGAASEV");
+ alignMe.createDatasetSequence();
+ SequenceI alignFrom = new Sequence("Query", "LQTGYMGAASEVMFSPTRR");
+ alignFrom.createDatasetSequence();
+
+ AlignedCodonFrame acf = new AlignedCodonFrame();
+ // this is like a domain or motif match of part of a peptide sequence
+ MapList map = new MapList(new int[] { 6, 12 }, new int[] { 1, 7 }, 1, 1);
+ acf.addMap(alignFrom.getDatasetSequence(),
+ alignMe.getDatasetSequence(), map);
+
+ AlignmentUtils.alignSequenceAs(alignMe, alignFrom, acf, "-", '-', true,
+ true);
+ assertEquals("-----MGAASEV-------", alignMe.getSequenceAsString());
+ }
+
+ /**
+ * Test for the alignSequenceAs method where there are trailing unmapped
+ * residues in the model sequence
+ */
+ @Test(groups = { "Functional" })
+ public void testAlignSequenceAs_withTrailingPeptide()
+ {
+ // map first 3 codons to KPF; G is a trailing unmapped residue
+ MapList map = new MapList(new int[] { 1, 9 }, new int[] { 1, 3 }, 3, 1);
+
+ checkAlignSequenceAs("AAACCCTTT", "K-PFG", true, true, map,
+ "AAA---CCCTTT---");
+ }
}
import jalview.datamodel.SequenceI;
import jalview.io.FormatAdapter;
-import java.util.HashSet;
-import java.util.Set;
+import java.util.ArrayList;
+import java.util.List;
import org.testng.annotations.BeforeMethod;
import org.testng.annotations.Test;
AlignedCodonFrame acf2 = new AlignedCodonFrame();
ssm.registerMapping(acf1);
- assertEquals(1, ssm.seqmappings.size());
- assertTrue(ssm.seqmappings.contains(acf1));
+ assertEquals(1, ssm.getSequenceMappings().size());
+ assertTrue(ssm.getSequenceMappings().contains(acf1));
ssm.registerMapping(acf2);
- assertEquals(2, ssm.seqmappings.size());
- assertTrue(ssm.seqmappings.contains(acf1));
- assertTrue(ssm.seqmappings.contains(acf2));
+ assertEquals(2, ssm.getSequenceMappings().size());
+ assertTrue(ssm.getSequenceMappings().contains(acf1));
+ assertTrue(ssm.getSequenceMappings().contains(acf2));
/*
* Re-adding the first mapping does nothing
*/
ssm.registerMapping(acf1);
- assertEquals(2, ssm.seqmappings.size());
- assertTrue(ssm.seqmappings.contains(acf1));
- assertTrue(ssm.seqmappings.contains(acf2));
+ assertEquals(2, ssm.getSequenceMappings().size());
+ assertTrue(ssm.getSequenceMappings().contains(acf1));
+ assertTrue(ssm.getSequenceMappings().contains(acf2));
}
@Test(groups = { "Functional" })
AlignedCodonFrame acf2 = new AlignedCodonFrame();
AlignedCodonFrame acf3 = new AlignedCodonFrame();
- Set<AlignedCodonFrame> set1 = new HashSet<AlignedCodonFrame>();
+ List<AlignedCodonFrame> set1 = new ArrayList<AlignedCodonFrame>();
set1.add(acf1);
set1.add(acf2);
- Set<AlignedCodonFrame> set2 = new HashSet<AlignedCodonFrame>();
+ List<AlignedCodonFrame> set2 = new ArrayList<AlignedCodonFrame>();
set2.add(acf2);
set2.add(acf3);
ssm.registerMappings(set2);
ssm.registerMappings(set2);
- assertEquals(3, ssm.seqmappings.size());
- assertTrue(ssm.seqmappings.contains(acf1));
- assertTrue(ssm.seqmappings.contains(acf2));
- assertTrue(ssm.seqmappings.contains(acf3));
+ assertEquals(3, ssm.getSequenceMappings().size());
+ assertTrue(ssm.getSequenceMappings().contains(acf1));
+ assertTrue(ssm.getSequenceMappings().contains(acf2));
+ assertTrue(ssm.getSequenceMappings().contains(acf3));
}
/**
import java.awt.Color;
import java.io.IOException;
+import java.util.ArrayList;
import java.util.Arrays;
-import java.util.Collections;
-import java.util.HashSet;
-import java.util.LinkedHashSet;
import java.util.List;
-import java.util.Set;
import org.testng.annotations.Test;
MapList map = new MapList(new int[] { 5, 10 }, new int[] { 12, 13 }, 3,
1);
acf.addMap(seq1.getDatasetSequence(), aseq1.getDatasetSequence(), map);
- Set<AlignedCodonFrame> acfList = Collections.singleton(acf);
+ List<AlignedCodonFrame> acfList = Arrays.asList(new AlignedCodonFrame[]
+ { acf });
/*
* Check protein residue 12 maps to codon 5-7, 13 to codon 8-10
MapList map = new MapList(new int[] { 6, 6, 8, 9, 11, 11, 13, 13, 15,
15 }, new int[] { 8, 9 }, 3, 1);
acf.addMap(seq1.getDatasetSequence(), aseq1.getDatasetSequence(), map);
- Set<AlignedCodonFrame> acfList = Collections.singleton(acf);
+ List<AlignedCodonFrame> acfList = Arrays.asList(new AlignedCodonFrame[]
+ { acf });
/*
* Check protein residue 8 maps to [6, 8, 9]
acf.addMap(cdna.getSequenceAt(seq).getDatasetSequence(), protein
.getSequenceAt(seq).getDatasetSequence(), map);
}
- Set<AlignedCodonFrame> acfList = Collections.singleton(acf);
+ List<AlignedCodonFrame> acfList = Arrays.asList(new AlignedCodonFrame[]
+ { acf });
AlignViewportI dnaView = new AlignViewport(cdna);
AlignViewportI proteinView = new AlignViewport(protein);
61 }, 3, 1);
acf.addMap(cdna.getSequenceAt(2).getDatasetSequence(), protein
.getSequenceAt(2).getDatasetSequence(), map);
- Set<AlignedCodonFrame> acfList = Collections.singleton(acf);
+ List<AlignedCodonFrame> acfList = Arrays.asList(new AlignedCodonFrame[]
+ { acf });
dnaView = new AlignViewport(cdna);
proteinView = new AlignViewport(protein);
acf.addMap(cdna.getSequenceAt(seq).getDatasetSequence(), protein
.getSequenceAt(seq).getDatasetSequence(), map);
}
- Set<AlignedCodonFrame> acfList = Collections.singleton(acf);
+ List<AlignedCodonFrame> acfList = Arrays.asList(new AlignedCodonFrame[]
+ { acf });
AlignViewportI dnaView = new AlignViewport(cdna);
AlignViewportI proteinView = new AlignViewport(protein);
acf.addMap(cdna.getSequenceAt(seq).getDatasetSequence(), protein
.getSequenceAt(seq).getDatasetSequence(), map);
}
- Set<AlignedCodonFrame> acfList = Collections.singleton(acf);
+ List<AlignedCodonFrame> acfList = Arrays.asList(new AlignedCodonFrame[]
+ { acf });
AlignViewportI dnaView = new AlignViewport(cdna);
AlignViewportI proteinView = new AlignViewport(protein);
AlignedCodonFrame acf3 = new AlignedCodonFrame();
acf3.addMap(seq3.getDatasetSequence(), seq1.getDatasetSequence(), map);
- Set<AlignedCodonFrame> mappings = new HashSet<AlignedCodonFrame>();
+ List<AlignedCodonFrame> mappings = new ArrayList<AlignedCodonFrame>();
mappings.add(acf1);
mappings.add(acf2);
mappings.add(acf3);
AlignedCodonFrame acf = new AlignedCodonFrame();
MapList map = new MapList(new int[] { 8, 16 }, new int[] { 5, 7 }, 3, 1);
acf.addMap(dna.getDatasetSequence(), protein.getDatasetSequence(), map);
- Set<AlignedCodonFrame> mappings = new LinkedHashSet<AlignedCodonFrame>();
+ List<AlignedCodonFrame> mappings = new ArrayList<AlignedCodonFrame>();
mappings.add(acf);
AlignmentI prot = new Alignment(new SequenceI[] { protein });