* Constructs an alignment consisting of the mapped (CDS) regions in the given
* nucleotide sequences, and updates mappings to match. The CDS sequences are
* added to the original alignment's dataset, which is shared by the new
- * alignment.
+ * alignment. Mappings from nucleotide to CDS, and from CDS to protein, are
+ * added to the alignment dataset.
*
* @param dna
* aligned dna sequences
* @param mappings
- * from dna to protein; these are replaced with new mappings
+ * from dna to protein
* @param al
* @return an alignment whose sequences are the cds-only parts of the dna
* sequences (or null if no mappings are found)
Map<Integer, Map<SequenceI, Character>> map = new TreeMap<Integer, Map<SequenceI, Character>>();
/*
- * report any sequences that have no mapping so can't be realigned
+ * r any sequences that have no mapping so can't be realigned
*/
unmapped.addAll(unaligned.getSequences());
SequenceI fromSeq = mapping.findAlignedSequence(seq, aligned);
if (fromSeq != null)
{
- Mapping seqMap = mapping.getMappingForSequence(seq);
+ Mapping seqMap = mapping.getMappingBetween(fromSeq, seq);
if (addMappedPositions(seq, fromSeq, seqMap, map))
{
unmapped.remove(seq);
static boolean addMappedPositions(SequenceI seq, SequenceI fromSeq,
Mapping seqMap, Map<Integer, Map<SequenceI, Character>> map)
{
+ if (seqMap == null)
+ {
+ return false;
+ }
+
char[] fromChars = fromSeq.getSequence();
int toStart = seq.getStart();
char[] toChars = seq.getSequence();
}
return true;
}
+
+ // strictly temporary hack until proper criteria for aligning protein to cds
+ // are in place; this is so Ensembl -> fetch xrefs Uniprot aligns the Uniprot
+ public static boolean looksLikeEnsembl(AlignmentI alignment)
+ {
+ for (SequenceI seq : alignment.getSequences())
+ {
+ String name = seq.getName();
+ if (!name.startsWith("ENSG") && !name.startsWith("ENST"))
+ {
+ return false;
+ }
+ }
+ return true;
+ }
}
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
+import java.util.TreeMap;
import org.testng.annotations.Test;
*/
dna.addCodonFrame(acf);
AlignmentUtils.alignAs(cds, dna);
- assertEquals("---GGGTTT---", cds.getSequenceAt(0).getSequenceAsString());
+ assertEquals("---GGGTTT", cds.getSequenceAt(0).getSequenceAsString());
assertEquals("CCC------AAA", cds.getSequenceAt(1).getSequenceAsString());
}
+
+ @Test(groups = { "Functional" })
+ public void testAddMappedPositions()
+ {
+ SequenceI from = new Sequence("dna", "ggAA-ATcc-TT-g");
+ SequenceI seq1 = new Sequence("cds", "AAATTT");
+ from.createDatasetSequence();
+ seq1.createDatasetSequence();
+ Mapping mapping = new Mapping(seq1, new MapList(
+ new int[] { 3, 6, 9, 10 },
+ new int[] { 1, 6 }, 1, 1));
+ Map<Integer, Map<SequenceI, Character>> map = new TreeMap<Integer, Map<SequenceI, Character>>();
+ AlignmentUtils.addMappedPositions(seq1, from, mapping, map);
+
+ /*
+ * verify map has seq1 residues in columns 3,4,6,7,11,12
+ */
+ assertEquals(6, map.size());
+ assertEquals('A', map.get(3).get(seq1).charValue());
+ assertEquals('A', map.get(4).get(seq1).charValue());
+ assertEquals('A', map.get(6).get(seq1).charValue());
+ assertEquals('T', map.get(7).get(seq1).charValue());
+ assertEquals('T', map.get(11).get(seq1).charValue());
+ assertEquals('T', map.get(12).get(seq1).charValue());
+
+ /*
+ *
+ */
+ }
+
+ /**
+ * Test case where the mapping 'from' range includes a stop codon which is
+ * absent in the 'to' range
+ */
+ @Test(groups = { "Functional" })
+ public void testAddMappedPositions_withStopCodon()
+ {
+ SequenceI from = new Sequence("dna", "ggAA-ATcc-TT-g");
+ SequenceI seq1 = new Sequence("cds", "AAATTT");
+ from.createDatasetSequence();
+ seq1.createDatasetSequence();
+ Mapping mapping = new Mapping(seq1, new MapList(
+ new int[] { 3, 6, 9, 10 },
+ new int[] { 1, 6 }, 1, 1));
+ Map<Integer, Map<SequenceI, Character>> map = new TreeMap<Integer, Map<SequenceI, Character>>();
+ AlignmentUtils.addMappedPositions(seq1, from, mapping, map);
+
+ /*
+ * verify map has seq1 residues in columns 3,4,6,7,11,12
+ */
+ assertEquals(6, map.size());
+ assertEquals('A', map.get(3).get(seq1).charValue());
+ assertEquals('A', map.get(4).get(seq1).charValue());
+ assertEquals('A', map.get(6).get(seq1).charValue());
+ assertEquals('T', map.get(7).get(seq1).charValue());
+ assertEquals('T', map.get(11).get(seq1).charValue());
+ assertEquals('T', map.get(12).get(seq1).charValue());
+ }
}