From 3e115aff96aa9c7bf5d1c393e8e89fb4367c23ab Mon Sep 17 00:00:00 2001 From: gmungoc Date: Thu, 21 Jan 2016 15:23:20 +0000 Subject: [PATCH] JAL-1705 features transferred to CDS sequence; makeExon renamed makeCds --- src/jalview/analysis/AlignmentUtils.java | 232 ++++++++++++++++++------ test/jalview/analysis/AlignmentUtilsTests.java | 190 +++++++++++++------ 2 files changed, 305 insertions(+), 117 deletions(-) diff --git a/src/jalview/analysis/AlignmentUtils.java b/src/jalview/analysis/AlignmentUtils.java index 0e30d8c..d8cb9a2 100644 --- a/src/jalview/analysis/AlignmentUtils.java +++ b/src/jalview/analysis/AlignmentUtils.java @@ -31,6 +31,7 @@ import jalview.datamodel.FeatureProperties; import jalview.datamodel.Mapping; import jalview.datamodel.SearchResults; import jalview.datamodel.Sequence; +import jalview.datamodel.SequenceFeature; import jalview.datamodel.SequenceGroup; import jalview.datamodel.SequenceI; import jalview.schemes.ResidueProperties; @@ -1317,21 +1318,21 @@ public class AlignmentUtils } /** - * Constructs an alignment consisting of the mapped exon regions in the given + * Constructs an alignment consisting of the mapped cds regions in the given * nucleotide sequences, and updates mappings to match. * * @param dna * aligned dna sequences * @param mappings * from dna to protein; these are replaced with new mappings - * @return an alignment whose sequences are the exon-only parts of the dna - * sequences (or null if no exons are found) + * @return an alignment whose sequences are the cds-only parts of the dna + * sequences (or null if no cds are found) */ - public static AlignmentI makeExonAlignment(SequenceI[] dna, + public static AlignmentI makeCdsAlignment(SequenceI[] dna, List mappings) { List newMappings = new ArrayList(); - List exonSequences = new ArrayList(); + List cdsSequences = new ArrayList(); for (SequenceI dnaSeq : dna) { @@ -1341,17 +1342,17 @@ public class AlignmentUtils for (AlignedCodonFrame acf : seqMappings) { AlignedCodonFrame newMapping = new AlignedCodonFrame(); - final List mappedExons = makeExonSequences(ds, acf, + final List mappedCds = makeCdsSequences(ds, acf, newMapping); - if (!mappedExons.isEmpty()) + if (!mappedCds.isEmpty()) { - exonSequences.addAll(mappedExons); + cdsSequences.addAll(mappedCds); newMappings.add(newMapping); } } } AlignmentI al = new Alignment( - exonSequences.toArray(new SequenceI[exonSequences.size()])); + cdsSequences.toArray(new SequenceI[cdsSequences.size()])); al.setDataset(null); /* @@ -1364,86 +1365,203 @@ public class AlignmentUtils } /** - * Helper method to make exon-only sequences and populate their mappings to + * Helper method to make cds-only sequences and populate their mappings to * protein products *

* For example, if ggCCaTTcGAg has mappings [3, 4, 6, 7, 9, 10] to protein * then generate a sequence CCTTGA with mapping [1, 6] to the same protein * residues *

- * Typically eukaryotic dna will include exons encoding for a single peptide + * Typically eukaryotic dna will include cds encoding for a single peptide * sequence i.e. return a single result. Bacterial dna may have overlapping - * exon mappings coding for multiple peptides so return multiple results + * cds mappings coding for multiple peptides so return multiple results * (example EMBL KF591215). * * @param dnaSeq * a dna dataset sequence * @param mapping * containing one or more mappings of the sequence to protein - * @param newMapping - * the new mapping to populate, from the exon-only sequences to their + * @param newMappings + * the new mapping to populate, from the cds-only sequences to their * mapped protein sequences * @return */ - protected static List makeExonSequences(SequenceI dnaSeq, - AlignedCodonFrame mapping, AlignedCodonFrame newMapping) + protected static List makeCdsSequences(SequenceI dnaSeq, + AlignedCodonFrame mapping, AlignedCodonFrame newMappings) { - List exonSequences = new ArrayList(); + List cdsSequences = new ArrayList(); List seqMappings = mapping.getMappingsForSequence(dnaSeq); - final char[] dna = dnaSeq.getSequence(); + for (Mapping seqMapping : seqMappings) { - StringBuilder newSequence = new StringBuilder(dnaSeq.getLength()); + SequenceI cds = makeCdsSequence(dnaSeq, seqMapping, newMappings); + cdsSequences.add(cds); + + /* + * add new mappings, from dna to cds, and from cds to peptide + */ + MapList dnaToCds = addCdsMappings(dnaSeq, cds, seqMapping, + newMappings); /* - * Get the codon regions as { [2, 5], [7, 12], [14, 14] etc } + * transfer any features on dna that overlap the CDS */ - final List dnaExonRanges = seqMapping.getMap().getFromRanges(); - for (int[] range : dnaExonRanges) + transferFeatures(dnaSeq, cds, dnaToCds, "CDS" /* SequenceOntology.CDS */); + } + return cdsSequences; + } + + /** + * Transfers any co-located features on 'fromSeq' to 'toSeq', adjusting the + * feature start/end ranges, optionally omitting specified feature types. + * + * @param fromSeq + * @param toSeq + * @param mapping + * the mapping from 'fromSeq' to 'toSeq' + * @param omitting + */ + protected static void transferFeatures(SequenceI fromSeq, + SequenceI toSeq, MapList mapping, String... omitting) + { + SequenceFeature[] sfs = fromSeq.getSequenceFeatures(); + if (sfs != null) + { + for (SequenceFeature sf : sfs) { - for (int pos = range[0]; pos <= range[1]; pos++) + String type = sf.getType(); + boolean omit = false; + for (String toOmit : omitting) + { + if (type.equals(toOmit)) + { + omit = true; + } + } + if (omit) { - newSequence.append(dna[pos - 1]); + continue; + } + + /* + * locate the mapped range - null if either start or end is + * not mapped (no partial overlaps are calculated) + */ + int[] mappedTo = mapping.locateInTo(sf.getBegin(), sf.getEnd()); + if (mappedTo != null) + { + SequenceFeature copy = new SequenceFeature(sf); + copy.setBegin(Math.min(mappedTo[0], mappedTo[1])); + copy.setEnd(Math.max(mappedTo[0], mappedTo[1])); + toSeq.addSequenceFeature(copy); } } + } + } - SequenceI exon = new Sequence(dnaSeq.getName(), - newSequence.toString()); + /** + * Creates and adds mappings + *

    + *
  • from cds to peptide
  • + *
  • from dna to cds
  • + *
+ * and returns the dna-to-cds mapping + * + * @param dnaSeq + * @param cdsSeq + * @param dnaMapping + * @param newMappings + * @return + */ + protected static MapList addCdsMappings(SequenceI dnaSeq, + SequenceI cdsSeq, + Mapping dnaMapping, AlignedCodonFrame newMappings) + { + cdsSeq.createDatasetSequence(); - /* - * Locate any xrefs to CDS database on the protein product and attach to - * the CDS sequence. Also add as a sub-token of the sequence name. - */ - // default to "CDS" if we can't locate an actual gene id - String cdsAccId = FeatureProperties - .getCodingFeature(DBRefSource.EMBL); - DBRefEntry[] cdsRefs = DBRefUtils.selectRefs(seqMapping.getTo() - .getDBRef(), DBRefSource.CODINGDBS); - if (cdsRefs != null) + /* + * CDS to peptide is just a contiguous 3:1 mapping, with + * the peptide ranges taken unchanged from the dna mapping + */ + List cdsRanges = new ArrayList(); + cdsRanges.add(new int[] { 1, cdsSeq.getLength() }); + MapList cdsToPeptide = new MapList(cdsRanges, dnaMapping.getMap() + .getToRanges(), 3, 1); + newMappings.addMap(cdsSeq.getDatasetSequence(), dnaMapping.getTo(), + cdsToPeptide); + + /* + * dna 'from' ranges map 1:1 to the contiguous extracted CDS + */ + MapList dnaToCds = new MapList( + dnaMapping.getMap().getFromRanges(), cdsRanges, 1, 1); + newMappings.addMap(dnaSeq, cdsSeq.getDatasetSequence(), dnaToCds); + return dnaToCds; + } + + /** + * Makes and returns a CDS-only sequence, where the CDS regions are identified + * as the 'from' ranges of the mapping on the dna. Any sequence features on + * the dna which overlap the CDS regions are copied to the new sequence. + * + * @param dnaSeq + * nucleotide sequence + * @param seqMapping + * mappings from CDS regions of nucleotide + * @param exonMappings + * CDS-to-peptide mapping (to add to) + * @return + */ + protected static SequenceI makeCdsSequence(SequenceI dnaSeq, + Mapping seqMapping, AlignedCodonFrame exonMappings) + { + StringBuilder newSequence = new StringBuilder(dnaSeq.getLength()); + final char[] dna = dnaSeq.getSequence(); + int offset = dnaSeq.getStart() - 1; + + /* + * Get the codon regions as { [2, 5], [7, 12], [14, 14] etc } + */ + final List dnaCdsRanges = seqMapping.getMap().getFromRanges(); + for (int[] range : dnaCdsRanges) + { + // TODO handle reverse mapping as well (range[1] < range[0]) + for (int pos = range[0]; pos <= range[1]; pos++) { - for (DBRefEntry cdsRef : cdsRefs) - { - exon.addDBRef(new DBRefEntry(cdsRef)); - cdsAccId = cdsRef.getAccessionId(); - } + newSequence.append(dna[pos - offset - 1]); } - exon.setName(exon.getName() + "|" + cdsAccId); - exon.createDatasetSequence(); + } - /* - * Build new mappings - from the same protein regions, but now to - * contiguous exons - */ - List exonRange = new ArrayList(); - exonRange.add(new int[] { 1, newSequence.length() }); - MapList map = new MapList(exonRange, seqMapping.getMap() - .getToRanges(), 3, 1); - newMapping.addMap(exon.getDatasetSequence(), seqMapping.getTo(), map); - MapList cdsToDnaMap = new MapList(dnaExonRanges, exonRange, 1, 1); - newMapping.addMap(dnaSeq, exon.getDatasetSequence(), cdsToDnaMap); - - exonSequences.add(exon); + SequenceI cds = new Sequence(dnaSeq.getName(), + newSequence.toString()); + + transferDbRefs(seqMapping.getTo(), cds); + return cds; + } + + /** + * Locate any xrefs to CDS databases on the protein product and attach to the + * CDS sequence. Also add as a sub-token of the sequence name. + * + * @param from + * @param to + */ + protected static void transferDbRefs(SequenceI from, SequenceI to) + { + String cdsAccId = FeatureProperties.getCodingFeature(DBRefSource.EMBL); + DBRefEntry[] cdsRefs = DBRefUtils.selectRefs(from.getDBRef(), + DBRefSource.CODINGDBS); + if (cdsRefs != null) + { + for (DBRefEntry cdsRef : cdsRefs) + { + to.addDBRef(new DBRefEntry(cdsRef)); + cdsAccId = cdsRef.getAccessionId(); + } + } + if (!to.getName().contains(cdsAccId)) + { + to.setName(to.getName() + "|" + cdsAccId); } - return exonSequences; } } diff --git a/test/jalview/analysis/AlignmentUtilsTests.java b/test/jalview/analysis/AlignmentUtilsTests.java index 1815eb7..a48db4b 100644 --- a/test/jalview/analysis/AlignmentUtilsTests.java +++ b/test/jalview/analysis/AlignmentUtilsTests.java @@ -36,6 +36,7 @@ import jalview.datamodel.Mapping; import jalview.datamodel.SearchResults; import jalview.datamodel.SearchResults.Match; import jalview.datamodel.Sequence; +import jalview.datamodel.SequenceFeature; import jalview.datamodel.SequenceI; import jalview.io.AppletFormatAdapter; import jalview.io.FormatAdapter; @@ -1000,10 +1001,10 @@ public class AlignmentUtilsTests } /** - * Test the method that extracts the exon-only part of a dna alignment. + * Test the method that extracts the cds-only part of a dna alignment. */ @Test(groups = { "Functional" }) - public void testMakeExonAlignment() + public void testMakeCdsAlignment() { SequenceI dna1 = new Sequence("dna1", "aaaGGGcccTTTaaa"); SequenceI dna2 = new Sequence("dna2", "GGGcccTTTaaaCCC"); @@ -1026,11 +1027,11 @@ public class AlignmentUtilsTests acf.addMap(dna2.getDatasetSequence(), pep2.getDatasetSequence(), map); mappings.add(acf); - AlignmentI exons = AlignmentUtils.makeExonAlignment(new SequenceI[] { + AlignmentI cds = AlignmentUtils.makeCdsAlignment(new SequenceI[] { dna1, dna2 }, mappings); - assertEquals(2, exons.getSequences().size()); - assertEquals("GGGTTT", exons.getSequenceAt(0).getSequenceAsString()); - assertEquals("GGGTTTCCC", exons.getSequenceAt(1).getSequenceAsString()); + assertEquals(2, cds.getSequences().size()); + assertEquals("GGGTTT", cds.getSequenceAt(0).getSequenceAsString()); + assertEquals("GGGTTTCCC", cds.getSequenceAt(1).getSequenceAsString()); /* * Verify updated mappings @@ -1047,14 +1048,14 @@ public class AlignmentUtilsTests SearchResults sr = MappingUtils.buildSearchResults(pep1, 1, mappings); assertEquals(1, sr.getResults().size()); Match m = sr.getResults().get(0); - assertEquals(exons.getSequenceAt(0).getDatasetSequence(), + assertEquals(cds.getSequenceAt(0).getDatasetSequence(), m.getSequence()); assertEquals(1, m.getStart()); assertEquals(3, m.getEnd()); // map F to TTT sr = MappingUtils.buildSearchResults(pep1, 2, mappings); m = sr.getResults().get(0); - assertEquals(exons.getSequenceAt(0).getDatasetSequence(), + assertEquals(cds.getSequenceAt(0).getDatasetSequence(), m.getSequence()); assertEquals(4, m.getStart()); assertEquals(6, m.getEnd()); @@ -1069,34 +1070,34 @@ public class AlignmentUtilsTests sr = MappingUtils.buildSearchResults(pep2, 1, mappings); assertEquals(1, sr.getResults().size()); m = sr.getResults().get(0); - assertEquals(exons.getSequenceAt(1).getDatasetSequence(), + assertEquals(cds.getSequenceAt(1).getDatasetSequence(), m.getSequence()); assertEquals(1, m.getStart()); assertEquals(3, m.getEnd()); // map F to TTT sr = MappingUtils.buildSearchResults(pep2, 2, mappings); m = sr.getResults().get(0); - assertEquals(exons.getSequenceAt(1).getDatasetSequence(), + assertEquals(cds.getSequenceAt(1).getDatasetSequence(), m.getSequence()); assertEquals(4, m.getStart()); assertEquals(6, m.getEnd()); // map P to CCC sr = MappingUtils.buildSearchResults(pep2, 3, mappings); m = sr.getResults().get(0); - assertEquals(exons.getSequenceAt(1).getDatasetSequence(), + assertEquals(cds.getSequenceAt(1).getDatasetSequence(), m.getSequence()); assertEquals(7, m.getStart()); assertEquals(9, m.getEnd()); } /** - * Test the method that makes an exon-only sequence from a DNA sequence and - * its product mapping. Test includes the expected case that the DNA sequence + * Test the method that makes a cds-only sequence from a DNA sequence and its + * product mapping. Test includes the expected case that the DNA sequence * already has a protein product (Uniprot translation) which in turn has an * x-ref to the EMBLCDS record. */ @Test(groups = { "Functional" }) - public void testMakeExonSequences() + public void testMakeCdsSequences() { SequenceI dna1 = new Sequence("dna1", "aaaGGGcccTTTaaa"); SequenceI pep1 = new Sequence("pep1", "GF"); @@ -1117,27 +1118,27 @@ public class AlignmentUtilsTests mappings.add(acf); AlignedCodonFrame newMapping = new AlignedCodonFrame(); - List exons = AlignmentUtils.makeExonSequences(dna1, acf, + List cdsSeqs = AlignmentUtils.makeCdsSequences(dna1, acf, newMapping); - assertEquals(1, exons.size()); - SequenceI exon = exons.get(0); + assertEquals(1, cdsSeqs.size()); + SequenceI cdsSeq = cdsSeqs.get(0); - assertEquals("GGGTTT", exon.getSequenceAsString()); - assertEquals("dna1|A12345", exon.getName()); - assertEquals(1, exon.getDBRef().length); - DBRefEntry cdsRef = exon.getDBRef()[0]; + assertEquals("GGGTTT", cdsSeq.getSequenceAsString()); + assertEquals("dna1|A12345", cdsSeq.getName()); + assertEquals(1, cdsSeq.getDBRef().length); + DBRefEntry cdsRef = cdsSeq.getDBRef()[0]; assertEquals("EMBLCDS", cdsRef.getSource()); assertEquals("2", cdsRef.getVersion()); assertEquals("A12345", cdsRef.getAccessionId()); } /** - * Test the method that makes an exon-only alignment from a DNA sequence and - * its product mappings, for the case where there are multiple exon mappings - * to different protein products. + * Test the method that makes a cds-only alignment from a DNA sequence and its + * product mappings, for the case where there are multiple exon mappings to + * different protein products. */ @Test(groups = { "Functional" }) - public void testMakeExonAlignment_multipleProteins() + public void testMakeCdsAlignment_multipleProteins() { SequenceI dna1 = new Sequence("dna1", "aaaGGGcccTTTaaa"); SequenceI pep1 = new Sequence("pep1", "GF"); // GGGTTT @@ -1183,82 +1184,82 @@ public class AlignmentUtilsTests * Create the Exon alignment; also replaces the dna-to-protein mappings with * exon-to-protein and exon-to-dna mappings */ - AlignmentI exal = AlignmentUtils.makeExonAlignment( + AlignmentI exal = AlignmentUtils.makeCdsAlignment( new SequenceI[] { dna1 }, mappings); /* - * Verify we have 3 exon sequences, mapped to pep1/2/3 respectively + * Verify we have 3 cds sequences, mapped to pep1/2/3 respectively */ - List exons = exal.getSequences(); - assertEquals(3, exons.size()); - - SequenceI exon = exons.get(0); - assertEquals("GGGTTT", exon.getSequenceAsString()); - assertEquals("dna1|A12345", exon.getName()); - assertEquals(1, exon.getDBRef().length); - DBRefEntry cdsRef = exon.getDBRef()[0]; + List cds = exal.getSequences(); + assertEquals(3, cds.size()); + + SequenceI cdsSeq = cds.get(0); + assertEquals("GGGTTT", cdsSeq.getSequenceAsString()); + assertEquals("dna1|A12345", cdsSeq.getName()); + assertEquals(1, cdsSeq.getDBRef().length); + DBRefEntry cdsRef = cdsSeq.getDBRef()[0]; assertEquals("EMBLCDS", cdsRef.getSource()); assertEquals("2", cdsRef.getVersion()); assertEquals("A12345", cdsRef.getAccessionId()); - exon = exons.get(1); - assertEquals("aaaccc", exon.getSequenceAsString()); - assertEquals("dna1|A12346", exon.getName()); - assertEquals(1, exon.getDBRef().length); - cdsRef = exon.getDBRef()[0]; + cdsSeq = cds.get(1); + assertEquals("aaaccc", cdsSeq.getSequenceAsString()); + assertEquals("dna1|A12346", cdsSeq.getName()); + assertEquals(1, cdsSeq.getDBRef().length); + cdsRef = cdsSeq.getDBRef()[0]; assertEquals("EMBLCDS", cdsRef.getSource()); assertEquals("3", cdsRef.getVersion()); assertEquals("A12346", cdsRef.getAccessionId()); - exon = exons.get(2); - assertEquals("aaaTTT", exon.getSequenceAsString()); - assertEquals("dna1|A12347", exon.getName()); - assertEquals(1, exon.getDBRef().length); - cdsRef = exon.getDBRef()[0]; + cdsSeq = cds.get(2); + assertEquals("aaaTTT", cdsSeq.getSequenceAsString()); + assertEquals("dna1|A12347", cdsSeq.getName()); + assertEquals(1, cdsSeq.getDBRef().length); + cdsRef = cdsSeq.getDBRef()[0]; assertEquals("EMBLCDS", cdsRef.getSource()); assertEquals("4", cdsRef.getVersion()); assertEquals("A12347", cdsRef.getAccessionId()); /* - * Verify there are mappings from each exon sequence to its protein product + * Verify there are mappings from each cds sequence to its protein product * and also to its dna source */ Iterator newMappingsIterator = mappings.iterator(); // mappings for dna1 - exon1 - pep1 - AlignedCodonFrame exonMapping = newMappingsIterator.next(); - List dnaMappings = exonMapping.getMappingsForSequence(dna1); + AlignedCodonFrame cdsMapping = newMappingsIterator.next(); + List dnaMappings = cdsMapping.getMappingsForSequence(dna1); assertEquals(1, dnaMappings.size()); - assertSame(exons.get(0).getDatasetSequence(), dnaMappings.get(0) + assertSame(cds.get(0).getDatasetSequence(), dnaMappings.get(0) .getTo()); assertEquals("G(1) in CDS should map to G(4) in DNA", 4, dnaMappings .get(0).getMap().getToPosition(1)); - List peptideMappings = exonMapping + List peptideMappings = cdsMapping .getMappingsForSequence(pep1); assertEquals(1, peptideMappings.size()); assertSame(pep1.getDatasetSequence(), peptideMappings.get(0).getTo()); - // mappings for dna1 - exon2 - pep2 - exonMapping = newMappingsIterator.next(); - dnaMappings = exonMapping.getMappingsForSequence(dna1); + // mappings for dna1 - cds2 - pep2 + cdsMapping = newMappingsIterator.next(); + dnaMappings = cdsMapping.getMappingsForSequence(dna1); assertEquals(1, dnaMappings.size()); - assertSame(exons.get(1).getDatasetSequence(), dnaMappings.get(0) + assertSame(cds.get(1).getDatasetSequence(), dnaMappings.get(0) .getTo()); assertEquals("c(4) in CDS should map to c(7) in DNA", 7, dnaMappings .get(0).getMap().getToPosition(4)); - peptideMappings = exonMapping.getMappingsForSequence(pep2); + peptideMappings = cdsMapping.getMappingsForSequence(pep2); assertEquals(1, peptideMappings.size()); assertSame(pep2.getDatasetSequence(), peptideMappings.get(0).getTo()); - // mappings for dna1 - exon3 - pep3 - exonMapping = newMappingsIterator.next(); - dnaMappings = exonMapping.getMappingsForSequence(dna1); + // mappings for dna1 - cds3 - pep3 + cdsMapping = newMappingsIterator.next(); + dnaMappings = cdsMapping.getMappingsForSequence(dna1); assertEquals(1, dnaMappings.size()); - assertSame(exons.get(2).getDatasetSequence(), dnaMappings.get(0) + assertSame(cds.get(2).getDatasetSequence(), dnaMappings.get(0) .getTo()); assertEquals("T(4) in CDS should map to T(10) in DNA", 10, dnaMappings .get(0).getMap().getToPosition(4)); - peptideMappings = exonMapping.getMappingsForSequence(pep3); + peptideMappings = cdsMapping.getMappingsForSequence(pep3); assertEquals(1, peptideMappings.size()); assertSame(pep3.getDatasetSequence(), peptideMappings.get(0).getTo()); } @@ -1340,4 +1341,73 @@ public class AlignmentUtilsTests checkAlignSequenceAs("AAACCCTTT", "K-PFG", true, true, map, "AAA---CCCTTT---"); } + + @Test(groups = { "Functional" }) + public void testTransferFeatures() + { + SequenceI dna = new Sequence("dna/20-34", "acgTAGcaaGCCcgt"); + SequenceI cds = new Sequence("cds/10-15", "TAGGCC"); + + // no overlap + dna.addSequenceFeature(new SequenceFeature("type1", "desc1", 1, 2, 1f, + null)); + // partial overlap - to [1, 1] + dna.addSequenceFeature(new SequenceFeature("type2", "desc2", 3, 4, 2f, + null)); + // exact overlap - to [1, 3] + dna.addSequenceFeature(new SequenceFeature("type3", "desc3", 4, 6, 3f, + null)); + // spanning overlap - to [2, 5] + dna.addSequenceFeature(new SequenceFeature("type4", "desc4", 5, 11, 4f, + null)); + // exactly overlaps whole mapped range [1, 6] + dna.addSequenceFeature(new SequenceFeature("type5", "desc5", 4, 12, 5f, + null)); + // no overlap (internal) + dna.addSequenceFeature(new SequenceFeature("type6", "desc6", 7, 9, 6f, + null)); + // no overlap (3' end) + dna.addSequenceFeature(new SequenceFeature("type7", "desc7", 13, 15, + 7f, null)); + // overlap (3' end) - to [6, 6] + dna.addSequenceFeature(new SequenceFeature("type8", "desc8", 12, 12, + 8f, null)); + // extended overlap - to [6, +] + dna.addSequenceFeature(new SequenceFeature("type9", "desc9", 12, 13, + 9f, null)); + + MapList map = new MapList(new int[] { 4, 6, 10, 12 }, + new int[] { 1, 6 }, 1, 1); + + /* + * behaviour of transferFeatures depends on MapList.locateInTo() + * if start and end positions are mapped, returns the mapped region + * if either is not mapped, does _not_ search for overlapped region + */ + AlignmentUtils.transferFeatures(dna, cds, map); + SequenceFeature[] sfs = cds.getSequenceFeatures(); + assertEquals(4, sfs.length); + + SequenceFeature sf = sfs[0]; + assertEquals("type3", sf.getType()); + assertEquals("desc3", sf.getDescription()); + assertEquals(3f, sf.getScore()); + assertEquals(1, sf.getBegin()); + assertEquals(3, sf.getEnd()); + + sf = sfs[1]; + assertEquals("type4", sf.getType()); + assertEquals(2, sf.getBegin()); + assertEquals(5, sf.getEnd()); + + sf = sfs[2]; + assertEquals("type5", sf.getType()); + assertEquals(1, sf.getBegin()); + assertEquals(6, sf.getEnd()); + + sf = sfs[3]; + assertEquals("type8", sf.getType()); + assertEquals(6, sf.getBegin()); + assertEquals(6, sf.getEnd()); + } } -- 1.7.10.2