+ assertEquals("cccGGG---TTTaaa", uas1.getSequenceAsString());
+ assertEquals("GGGT", uas2.getSequenceAsString());
+ assertEquals("CCC---gggtttAAA", uas3.getSequenceAsString());
+ }
+
+ @Test(groups = { "Functional" })
+ public void testTransferGeneLoci()
+ {
+ SequenceI from = new Sequence("transcript",
+ "aaacccgggTTTAAACCCGGGtttaaacccgggttt");
+ SequenceI to = new Sequence("CDS", "TTTAAACCCGGG");
+ MapList map = new MapList(new int[] { 1, 12 }, new int[] { 10, 21 }, 1,
+ 1);
+
+ /*
+ * first with nothing to transfer
+ */
+ AlignmentUtils.transferGeneLoci(from, map, to);
+ assertNull(to.getGeneLoci());
+
+ /*
+ * next with gene loci set on 'from' sequence
+ */
+ int[] exons = new int[] { 100, 105, 155, 164, 210, 229 };
+ MapList geneMap = new MapList(new int[] { 1, 36 }, exons, 1, 1);
+ from.setGeneLoci("human", "GRCh38", "7", geneMap);
+ AlignmentUtils.transferGeneLoci(from, map, to);
+
+ GeneLociI toLoci = to.getGeneLoci();
+ assertNotNull(toLoci);
+ // DBRefEntry constructor upper-cases 'source'
+ assertEquals("HUMAN", toLoci.getSpeciesId());
+ assertEquals("GRCh38", toLoci.getAssemblyId());
+ assertEquals("7", toLoci.getChromosomeId());
+
+ /*
+ * transcript 'exons' are 1-6, 7-16, 17-36
+ * CDS 1:12 is transcript 10-21
+ * transcript 'CDS' is 10-16, 17-21
+ * which is 'gene' 158-164, 210-214
+ */
+ MapList toMap = toLoci.getMapping();
+ assertEquals(1, toMap.getFromRanges().size());
+ assertEquals(2, toMap.getFromRanges().get(0).length);
+ assertEquals(1, toMap.getFromRanges().get(0)[0]);
+ assertEquals(12, toMap.getFromRanges().get(0)[1]);
+ assertEquals(2, toMap.getToRanges().size());
+ assertEquals(2, toMap.getToRanges().get(0).length);
+ assertEquals(158, toMap.getToRanges().get(0)[0]);
+ assertEquals(164, toMap.getToRanges().get(0)[1]);
+ assertEquals(210, toMap.getToRanges().get(1)[0]);
+ assertEquals(214, toMap.getToRanges().get(1)[1]);
+ // or summarised as (but toString might change in future):
+ assertEquals("[ [1, 12] ] 1:1 to [ [158, 164] [210, 214] ]",
+ toMap.toString());
+
+ /*
+ * an existing value is not overridden
+ */
+ geneMap = new MapList(new int[] { 1, 36 }, new int[] { 36, 1 }, 1, 1);
+ from.setGeneLoci("inhuman", "GRCh37", "6", geneMap);
+ AlignmentUtils.transferGeneLoci(from, map, to);
+ assertEquals("GRCh38", toLoci.getAssemblyId());
+ assertEquals("7", toLoci.getChromosomeId());
+ toMap = toLoci.getMapping();
+ assertEquals("[ [1, 12] ] 1:1 to [ [158, 164] [210, 214] ]",
+ toMap.toString());
+ }
+
+ /**
+ * Tests for the method that maps nucleotide to protein based on CDS features
+ */
+ @Test(groups = "Functional")
+ public void testMapCdsToProtein()
+ {
+ SequenceI peptide = new Sequence("pep", "KLQ");
+
+ /*
+ * Case 1: CDS 3 times length of peptide
+ * NB method only checks lengths match, not translation
+ */
+ SequenceI dna = new Sequence("dna", "AACGacgtCTCCT");
+ dna.createDatasetSequence();
+ dna.addSequenceFeature(new SequenceFeature("CDS", "", 1, 4, null));
+ dna.addSequenceFeature(new SequenceFeature("CDS", "", 9, 13, null));
+ MapList ml = AlignmentUtils.mapCdsToProtein(dna, peptide);
+ assertEquals(3, ml.getFromRatio());
+ assertEquals(1, ml.getToRatio());
+ assertEquals("[[1, 3]]",
+ Arrays.deepToString(ml.getToRanges().toArray()));
+ assertEquals("[[1, 4], [9, 13]]",
+ Arrays.deepToString(ml.getFromRanges().toArray()));
+
+ /*
+ * Case 2: CDS 3 times length of peptide + stop codon
+ * (note code does not currently check trailing codon is a stop codon)
+ */
+ dna = new Sequence("dna", "AACGacgtCTCCTCCC");
+ dna.createDatasetSequence();
+ dna.addSequenceFeature(new SequenceFeature("CDS", "", 1, 4, null));
+ dna.addSequenceFeature(new SequenceFeature("CDS", "", 9, 16, null));
+ ml = AlignmentUtils.mapCdsToProtein(dna, peptide);
+ assertEquals(3, ml.getFromRatio());
+ assertEquals(1, ml.getToRatio());
+ assertEquals("[[1, 3]]",
+ Arrays.deepToString(ml.getToRanges().toArray()));
+ assertEquals("[[1, 4], [9, 13]]",
+ Arrays.deepToString(ml.getFromRanges().toArray()));
+
+ /*
+ * Case 3: CDS longer than 3 * peptide + stop codon - no mapping is made
+ */
+ dna = new Sequence("dna", "AACGacgtCTCCTTGATCA");
+ dna.createDatasetSequence();
+ dna.addSequenceFeature(new SequenceFeature("CDS", "", 1, 4, null));
+ dna.addSequenceFeature(new SequenceFeature("CDS", "", 9, 19, null));
+ ml = AlignmentUtils.mapCdsToProtein(dna, peptide);
+ assertNull(ml);
+
+ /*
+ * Case 4: CDS shorter than 3 * peptide - no mapping is made
+ */
+ dna = new Sequence("dna", "AACGacgtCTCC");
+ dna.createDatasetSequence();
+ dna.addSequenceFeature(new SequenceFeature("CDS", "", 1, 4, null));
+ dna.addSequenceFeature(new SequenceFeature("CDS", "", 9, 12, null));
+ ml = AlignmentUtils.mapCdsToProtein(dna, peptide);
+ assertNull(ml);
+
+ /*
+ * Case 5: CDS 3 times length of peptide + part codon - mapping is truncated
+ */
+ dna = new Sequence("dna", "AACGacgtCTCCTTG");
+ dna.createDatasetSequence();
+ dna.addSequenceFeature(new SequenceFeature("CDS", "", 1, 4, null));
+ dna.addSequenceFeature(new SequenceFeature("CDS", "", 9, 15, null));
+ ml = AlignmentUtils.mapCdsToProtein(dna, peptide);
+ assertEquals(3, ml.getFromRatio());
+ assertEquals(1, ml.getToRatio());
+ assertEquals("[[1, 3]]",
+ Arrays.deepToString(ml.getToRanges().toArray()));
+ assertEquals("[[1, 4], [9, 13]]",
+ Arrays.deepToString(ml.getFromRanges().toArray()));
+
+ /*
+ * Case 6: incomplete start codon corresponding to X in peptide
+ */
+ dna = new Sequence("dna", "ACGacgtCTCCTTGG");
+ dna.createDatasetSequence();
+ SequenceFeature sf = new SequenceFeature("CDS", "", 1, 3, null);
+ sf.setPhase("2"); // skip 2 positions (AC) to start of next codon (GCT)
+ dna.addSequenceFeature(sf);
+ dna.addSequenceFeature(new SequenceFeature("CDS", "", 8, 15, null));
+ peptide = new Sequence("pep", "XLQ");
+ ml = AlignmentUtils.mapCdsToProtein(dna, peptide);
+ assertEquals("[[2, 3]]",
+ Arrays.deepToString(ml.getToRanges().toArray()));
+ assertEquals("[[3, 3], [8, 12]]",
+ Arrays.deepToString(ml.getFromRanges().toArray()));
+ }
+
+ /**
+ * Tests for the method that locates the CDS sequence that has a mapping to
+ * the given protein. That is, given a transcript-to-peptide mapping, find the
+ * cds-to-peptide mapping that relates to both, and return the CDS sequence.
+ */
+ @Test(groups = "Functional")
+ public void testFindCdsForProtein()
+ {
+ List<AlignedCodonFrame> mappings = new ArrayList<>();
+ AlignedCodonFrame acf1 = new AlignedCodonFrame();
+ mappings.add(acf1);
+
+ SequenceI dna1 = new Sequence("dna1", "cgatATcgGCTATCTATGacg");
+ dna1.createDatasetSequence();
+
+ // NB we currently exclude STOP codon from CDS sequences
+ // the test would need to change if this changes in future
+ SequenceI cds1 = new Sequence("cds1", "ATGCTATCT");
+ cds1.createDatasetSequence();
+
+ SequenceI pep1 = new Sequence("pep1", "MLS");
+ pep1.createDatasetSequence();
+ List<AlignedCodonFrame> seqMappings = new ArrayList<>();
+ MapList mapList = new MapList(new int[] { 5, 6, 9, 15 },
+ new int[]
+ { 1, 3 }, 3, 1);
+ Mapping dnaToPeptide = new Mapping(pep1.getDatasetSequence(), mapList);
+
+ // add dna to peptide mapping
+ seqMappings.add(acf1);
+ acf1.addMap(dna1.getDatasetSequence(), pep1.getDatasetSequence(),
+ mapList);
+
+ /*
+ * first case - no dna-to-CDS mapping exists - search fails
+ */
+ SequenceI seq = AlignmentUtils.findCdsForProtein(mappings, dna1,
+ seqMappings, dnaToPeptide);
+ assertNull(seq);
+
+ /*
+ * second case - CDS-to-peptide mapping exists but no dna-to-CDS
+ * - search fails
+ */
+ // todo this test fails if the mapping is added to acf1, not acf2
+ // need to tidy up use of lists of mappings in AlignedCodonFrame
+ AlignedCodonFrame acf2 = new AlignedCodonFrame();
+ mappings.add(acf2);
+ MapList cdsToPeptideMapping = new MapList(new int[] { 1, 9 },
+ new int[]
+ { 1, 3 }, 3, 1);
+ acf2.addMap(cds1.getDatasetSequence(), pep1.getDatasetSequence(),
+ cdsToPeptideMapping);
+ assertNull(AlignmentUtils.findCdsForProtein(mappings, dna1, seqMappings,
+ dnaToPeptide));
+
+ /*
+ * third case - add dna-to-CDS mapping - CDS is now found!
+ */
+ MapList dnaToCdsMapping = new MapList(new int[] { 5, 6, 9, 15 },
+ new int[]
+ { 1, 9 }, 1, 1);
+ acf1.addMap(dna1.getDatasetSequence(), cds1.getDatasetSequence(),
+ dnaToCdsMapping);
+ seq = AlignmentUtils.findCdsForProtein(mappings, dna1, seqMappings,
+ dnaToPeptide);
+ assertSame(seq, cds1.getDatasetSequence());
+ }
+
+ /**
+ * Tests for the method that locates the CDS sequence that has a mapping to
+ * the given protein. That is, given a transcript-to-peptide mapping, find the
+ * cds-to-peptide mapping that relates to both, and return the CDS sequence.
+ * This test is for the case where transcript and CDS are the same length.
+ */
+ @Test(groups = "Functional")
+ public void testFindCdsForProtein_noUTR()
+ {
+ List<AlignedCodonFrame> mappings = new ArrayList<>();
+ AlignedCodonFrame acf1 = new AlignedCodonFrame();
+ mappings.add(acf1);
+
+ SequenceI dna1 = new Sequence("dna1", "ATGCTATCTTAA");
+ dna1.createDatasetSequence();
+
+ // NB we currently exclude STOP codon from CDS sequences
+ // the test would need to change if this changes in future
+ SequenceI cds1 = new Sequence("cds1", "ATGCTATCT");
+ cds1.createDatasetSequence();
+
+ SequenceI pep1 = new Sequence("pep1", "MLS");
+ pep1.createDatasetSequence();
+ List<AlignedCodonFrame> seqMappings = new ArrayList<>();
+ MapList mapList = new MapList(new int[] { 1, 9 }, new int[] { 1, 3 }, 3,
+ 1);
+ Mapping dnaToPeptide = new Mapping(pep1.getDatasetSequence(), mapList);
+
+ // add dna to peptide mapping
+ seqMappings.add(acf1);
+ acf1.addMap(dna1.getDatasetSequence(), pep1.getDatasetSequence(),
+ mapList);
+
+ /*
+ * first case - transcript lacks CDS features - it appears to be
+ * the CDS sequence and is returned
+ */
+ SequenceI seq = AlignmentUtils.findCdsForProtein(mappings, dna1,
+ seqMappings, dnaToPeptide);
+ assertSame(seq, dna1.getDatasetSequence());
+
+ /*
+ * second case - transcript has CDS feature - this means it is
+ * not returned as a match for CDS (CDS sequences don't have CDS features)
+ */
+ dna1.addSequenceFeature(
+ new SequenceFeature(SequenceOntologyI.CDS, "cds", 1, 12, null));
+ seq = AlignmentUtils.findCdsForProtein(mappings, dna1, seqMappings,
+ dnaToPeptide);
+ assertNull(seq);
+
+ /*
+ * third case - CDS-to-peptide mapping exists but no dna-to-CDS
+ * - search fails
+ */
+ // todo this test fails if the mapping is added to acf1, not acf2
+ // need to tidy up use of lists of mappings in AlignedCodonFrame
+ AlignedCodonFrame acf2 = new AlignedCodonFrame();
+ mappings.add(acf2);
+ MapList cdsToPeptideMapping = new MapList(new int[] { 1, 9 },
+ new int[]
+ { 1, 3 }, 3, 1);
+ acf2.addMap(cds1.getDatasetSequence(), pep1.getDatasetSequence(),
+ cdsToPeptideMapping);
+ assertNull(AlignmentUtils.findCdsForProtein(mappings, dna1, seqMappings,
+ dnaToPeptide));
+
+ /*
+ * fourth case - add dna-to-CDS mapping - CDS is now found!
+ */
+ MapList dnaToCdsMapping = new MapList(new int[] { 1, 9 },
+ new int[]
+ { 1, 9 }, 1, 1);
+ acf1.addMap(dna1.getDatasetSequence(), cds1.getDatasetSequence(),
+ dnaToCdsMapping);
+ seq = AlignmentUtils.findCdsForProtein(mappings, dna1, seqMappings,
+ dnaToPeptide);
+ assertSame(seq, cds1.getDatasetSequence());
+ }
+
+ @Test(groups = "Functional")
+ public void testAddReferenceAnnotations()
+ {
+ SequenceI longseq = new Sequence("longA", "ASDASDASDASDAASDASDASDASDA");
+ Annotation[] aa = new Annotation[longseq.getLength()];
+
+ for (int p = 0; p < aa.length; p++)
+ {
+ aa[p] = new Annotation("P", "pos " + (p + 1), (char) 0,
+ (float) p + 1);
+ }
+ AlignmentAnnotation refAnnot = new AlignmentAnnotation("LongSeqAnnot",
+ "Annotations", aa);
+ refAnnot.setCalcId("Test");
+ longseq.addAlignmentAnnotation(refAnnot);
+ verifyExpectedSequenceAnnotation(refAnnot);
+
+ Alignment ourAl = new Alignment(
+ new SequenceI[]
+ { longseq.getSubSequence(5, 10),
+ longseq.getSubSequence(7, 12) });
+ ourAl.createDatasetAlignment();
+
+ // transfer annotation
+ SortedMap<String, String> tipEntries = new TreeMap<>();
+ Map<SequenceI, List<AlignmentAnnotation>> candidates = new LinkedHashMap<>();
+
+ AlignmentUtils.findAddableReferenceAnnotations(ourAl.getSequences(),
+ tipEntries, candidates, ourAl);
+ AlignmentUtils.addReferenceAnnotations(candidates, ourAl, null);
+
+ assertNotNull(ourAl.getAlignmentAnnotation());
+ assertEquals(ourAl.getAlignmentAnnotation().length, 2);
+
+ for (AlignmentAnnotation alan : ourAl.getAlignmentAnnotation())
+ {
+ verifyExpectedSequenceAnnotation(alan);
+ }
+ // Everything above works for 2.11.3 and 2.11.2.x.
+ // now simulate copy/paste to new alignment
+ SequenceI[] newSeqAl = new SequenceI[2];
+ // copy sequences but no annotation
+ newSeqAl[0] = new Sequence(ourAl.getSequenceAt(0),
+ ourAl.getSequenceAt(0).getAnnotation());
+ newSeqAl[1] = new Sequence(ourAl.getSequenceAt(1),
+ ourAl.getSequenceAt(1).getAnnotation());
+
+ Alignment newAl = new Alignment(newSeqAl);
+ // delete annotation
+ for (SequenceI sq : newAl.getSequences())
+ {
+ sq.setAlignmentAnnotation(new AlignmentAnnotation[0]);
+ }
+ // JAL-4182 scenario test
+ SequenceGroup sg = new SequenceGroup(Arrays.asList(newSeqAl));
+ sg.setStartRes(0);
+ sg.setEndRes(newAl.getWidth());
+ AlignmentUtils.addReferenceAnnotationTo(newAl, newSeqAl[0],
+ newSeqAl[0].getDatasetSequence().getAnnotation()[0], sg);
+ AlignmentUtils.addReferenceAnnotationTo(newAl, newSeqAl[1],
+ newSeqAl[1].getDatasetSequence().getAnnotation()[0], sg);
+ for (AlignmentAnnotation alan : newAl.getAlignmentAnnotation())
+ {
+ verifyExpectedSequenceAnnotation(alan);
+ }
+ }
+
+ /**
+ * helper - tests annotation is mapped to position it was originally created
+ * for
+ *
+ * @param alan
+ */
+ private void verifyExpectedSequenceAnnotation(AlignmentAnnotation alan)
+ {
+ for (int c = 0; c < alan.annotations.length; c++)
+ {
+ Annotation a = alan.annotations[c];
+ if (a != null)
+ {
+ assertEquals("Misaligned annotation at " + c,
+ (float) alan.sequenceRef.findPosition(c), a.value);
+ }
+ else
+ {
+ assertTrue("Unexpected Null at position " + c,
+ c >= alan.sequenceRef.getLength()
+ || Comparison.isGap(alan.sequenceRef.getCharAt(c)));
+ }
+ }
+ }
+
+ @Test(groups = "Functional")
+ public void testAddReferenceContactMap()
+ {
+ SequenceI sq = new Sequence("a", "SSSQ");
+ ContactMatrixI cm = new SeqDistanceContactMatrix(4);
+ AlignmentAnnotation cm_aan = sq.addContactList(cm);
+ cm_aan.description = cm_aan.description + " cm1";
+ SequenceI dssq = sq.createDatasetSequence();
+
+ // remove annotation on our non-dataset sequence
+ sq.removeAlignmentAnnotation(sq.getAnnotation()[0]);
+ // test transfer
+ Alignment al = new Alignment(new SequenceI[] { sq });
+ SortedMap<String, String> tipEntries = new TreeMap<>();
+ Map<SequenceI, List<AlignmentAnnotation>> candidates = new LinkedHashMap<>();
+
+ AlignmentUtils.findAddableReferenceAnnotations(al.getSequences(),
+ tipEntries, candidates, al);
+ AlignmentUtils.addReferenceAnnotations(candidates, al, null);
+ assertTrue("No contact map annotation transferred",
+ al.getAlignmentAnnotation() != null
+ && al.getAlignmentAnnotation().length == 1);
+ AlignmentAnnotation alan = al.findAnnotations(sq, null, cm_aan.label)
+ .iterator().next();
+ ContactMatrixI t_cm = al.getContactMatrixFor(alan);
+ assertNotNull("No contact map for the transferred annotation row.",
+ t_cm);
+ assertTrue(t_cm instanceof SeqDistanceContactMatrix);
+ assertTrue(((SeqDistanceContactMatrix) t_cm).hasReferenceSeq());
+
+ ContactListI cl = al.getContactListFor(alan, 1);
+ assertNotNull(
+ "No contact matrix recovered after reference annotation transfer",
+ cl);
+ // semantics of sequence associated contact list is slightly tricky - column
+ // 3 in alignment should have data
+ cl = al.getContactListFor(alan, 3);
+ assertNotNull(
+ "Contact matrix should have data for last position in sequence",
+ cl);
+
+ ContactMatrixI cm2 = new SeqDistanceContactMatrix(4);
+ dssq.addContactList(cm2);
+ tipEntries = new TreeMap<>();
+ candidates = new LinkedHashMap<>();
+
+ AlignmentUtils.findAddableReferenceAnnotations(al.getSequences(),
+ tipEntries, candidates, al);
+ AlignmentUtils.addReferenceAnnotations(candidates, al, null);
+ assertTrue("Expected two contact map annotation transferred",
+ al.getAlignmentAnnotation() != null
+ && al.getAlignmentAnnotation().length == 2);
+