From: gmungoc Date: Tue, 1 Mar 2016 12:09:47 +0000 (+0000) Subject: JAL-1705 align CDS and peptide products to transcripts X-Git-Tag: Release_2_10_0~296^2~19 X-Git-Url: http://source.jalview.org/gitweb/?a=commitdiff_plain;h=1e8c7a9ab9f5da589d0aa2482fd2e3361c320d57;p=jalview.git JAL-1705 align CDS and peptide products to transcripts --- diff --git a/src/jalview/datamodel/Sequence.java b/src/jalview/datamodel/Sequence.java index 7b05649..3ea510b 100755 --- a/src/jalview/datamodel/Sequence.java +++ b/src/jalview/datamodel/Sequence.java @@ -611,17 +611,15 @@ public class Sequence extends ASequence implements SequenceI } /** - * DOCUMENT ME! - * - * @param i - * DOCUMENT ME! + * Returns the character of the aligned sequence at the given position (base + * zero), or space if the position is not within the sequence's bounds * - * @return DOCUMENT ME! + * @return */ @Override public char getCharAt(int i) { - if (i < sequence.length) + if (i >= 0 && i < sequence.length) { return sequence[i]; } diff --git a/src/jalview/ext/ensembl/EnsemblGene.java b/src/jalview/ext/ensembl/EnsemblGene.java index df4e45a..6507ff5 100644 --- a/src/jalview/ext/ensembl/EnsemblGene.java +++ b/src/jalview/ext/ensembl/EnsemblGene.java @@ -1,5 +1,6 @@ package jalview.ext.ensembl; +import jalview.api.FeatureColourI; import jalview.api.FeatureSettingsI; import jalview.datamodel.AlignmentI; import jalview.datamodel.Sequence; @@ -7,10 +8,12 @@ import jalview.datamodel.SequenceFeature; import jalview.datamodel.SequenceI; import jalview.io.gff.SequenceOntologyFactory; import jalview.io.gff.SequenceOntologyI; -import jalview.schemes.FeatureColourScheme; +import jalview.schemes.FeatureColourAdapter; +import jalview.schemes.FeatureSettingsAdapter; import jalview.util.MapList; import jalview.util.StringUtils; +import java.awt.Color; import java.util.ArrayList; import java.util.Arrays; import java.util.List; @@ -117,6 +120,9 @@ public class EnsemblGene extends EnsemblSeqProxy return getSequenceRecords(theIds); } + /* + * fetch the gene sequence(s) with features and xrefs + */ AlignmentI al = super.getSequenceRecords(query); /* @@ -173,6 +179,36 @@ public class EnsemblGene extends EnsemblSeqProxy { makeTranscript(transcriptFeature, al, gene); } + + clearGeneFeatures(gene); + } + + /** + * Remove unwanted features (transcript, exon, CDS) from the gene sequence + * after we have used them to derive transcripts and transfer features + * + * @param gene + */ + protected void clearGeneFeatures(SequenceI gene) + { + SequenceFeature[] sfs = gene.getSequenceFeatures(); + if (sfs != null) + { + SequenceOntologyI so = SequenceOntologyFactory.getInstance(); + List filtered = new ArrayList(); + for (SequenceFeature sf : sfs) + { + String type = sf.getType(); + if (!isTranscript(type) && !so.isA(type, SequenceOntologyI.EXON) + && !so.isA(type, SequenceOntologyI.CDS)) + { + filtered.add(sf); + } + } + gene.setSequenceFeatures(filtered + .toArray(new SequenceFeature[filtered + .size()])); + } } /** @@ -362,13 +398,13 @@ public class EnsemblGene extends EnsemblSeqProxy @Override protected boolean retainFeature(SequenceFeature sf, String accessionId) { - if (SequenceOntologyFactory.getInstance().isA(sf.getType(), - SequenceOntologyI.GENE)) + SequenceOntologyI so = SequenceOntologyFactory.getInstance(); + String type = sf.getType(); + if (so.isA(type, SequenceOntologyI.GENE)) { return false; } - - if (isTranscript(sf.getType())) + if (isTranscript(type)) { String parent = (String) sf.getValue(PARENT); if (!(GENE_PREFIX + accessionId).equals(parent)) @@ -417,7 +453,70 @@ public class EnsemblGene extends EnsemblSeqProxy @Override public FeatureSettingsI getFeatureColourScheme() { - return FeatureColourScheme.EnsemblVariants; + return new FeatureSettingsAdapter() + { + SequenceOntologyI so = SequenceOntologyFactory.getInstance(); + @Override + public boolean isFeatureDisplayed(String type) + { + return (so.isA(type, SequenceOntologyI.EXON) || so.isA(type, + SequenceOntologyI.SEQUENCE_VARIANT)); + } + + @Override + public FeatureColourI getFeatureColour(String type) + { + if (so.isA(type, SequenceOntologyI.EXON)) + { + return new FeatureColourAdapter() + { + @Override + public boolean isColourByLabel() + { + return true; + } + }; + } + if (so.isA(type, SequenceOntologyI.SEQUENCE_VARIANT)) + { + return new FeatureColourAdapter() + { + + @Override + public Color getColour() + { + return Color.RED; + } + }; + } + return null; + } + + /** + * order to render sequence_variant after exon after the rest + */ + @Override + public int compare(String feature1, String feature2) + { + if (so.isA(feature1, SequenceOntologyI.SEQUENCE_VARIANT)) + { + return +1; + } + if (so.isA(feature2, SequenceOntologyI.SEQUENCE_VARIANT)) + { + return -1; + } + if (so.isA(feature1, SequenceOntologyI.EXON)) + { + return +1; + } + if (so.isA(feature2, SequenceOntologyI.EXON)) + { + return -1; + } + return 0; + } + }; } } diff --git a/src/jalview/ext/ensembl/EnsemblSeqProxy.java b/src/jalview/ext/ensembl/EnsemblSeqProxy.java index 77263ff..869a702 100644 --- a/src/jalview/ext/ensembl/EnsemblSeqProxy.java +++ b/src/jalview/ext/ensembl/EnsemblSeqProxy.java @@ -343,6 +343,7 @@ public abstract class EnsemblSeqProxy extends EnsemblRestClient int mappedDnaLength = getCdsRanges(dnaSeq, ranges); int proteinLength = proteinSeq.getLength(); + int proteinEnd = proteinLength; int proteinStart = 1; /* @@ -367,7 +368,7 @@ public abstract class EnsemblSeqProxy extends EnsemblRestClient } if (codesForResidues == proteinLength) { - proteinRange.add(new int[] { proteinStart, proteinLength }); + proteinRange.add(new int[] { proteinStart, proteinEnd }); return new MapList(ranges, proteinRange, 3, 1); } return null; diff --git a/src/jalview/gui/AlignFrame.java b/src/jalview/gui/AlignFrame.java index 7777324..8d54f08 100644 --- a/src/jalview/gui/AlignFrame.java +++ b/src/jalview/gui/AlignFrame.java @@ -4780,11 +4780,13 @@ public class AlignFrame extends GAlignFrame implements DropTargetListener, AlignmentI copyAlignment = null; final SequenceI[] sequenceSelection = AlignFrame.this.viewport .getSequenceSelection(); + final char gapChar = AlignFrame.this.viewport + .getGapCharacter(); List cf = xrefs.getCodonFrames(); if (dna) { copyAlignment = AlignmentUtils.makeCdsAlignment( - sequenceSelection, cf); + sequenceSelection, cf, gapChar); al.getCodonFrames().clear(); al.getCodonFrames().addAll(cf); } @@ -4793,6 +4795,7 @@ public class AlignFrame extends GAlignFrame implements DropTargetListener, copyAlignment = new Alignment(new Alignment( sequenceSelection)); copyAlignment.getCodonFrames().addAll(cf); + copyAlignment.setGapCharacter(gapChar); } StructureSelectionManager ssm = StructureSelectionManager .getStructureSelectionManager(Desktop.instance); @@ -4810,15 +4813,14 @@ public class AlignFrame extends GAlignFrame implements DropTargetListener, /* * align protein to dna */ - // TODO needs debugging - // if (dna) - // { - // al.alignAs(copyAlignment); - // } - // else - // { - // copyAlignment.alignAs(al); - // } + if (dna) + { + al.alignAs(copyAlignment); + } + else + { + copyAlignment.alignAs(al); + } AlignFrame copyThis = new AlignFrame(copyAlignment, AlignFrame.DEFAULT_WIDTH, AlignFrame.DEFAULT_HEIGHT); diff --git a/src/jalview/schemes/FeatureColourScheme.java b/src/jalview/schemes/FeatureColourScheme.java deleted file mode 100644 index 308495c..0000000 --- a/src/jalview/schemes/FeatureColourScheme.java +++ /dev/null @@ -1,109 +0,0 @@ -package jalview.schemes; - -import jalview.api.FeatureColourI; -import jalview.api.FeatureSettingsI; - -import java.awt.Color; - -/** - * Pre-set configurations for feature settings - * - * @author gmcarstairs - * - */ -public enum FeatureColourScheme implements FeatureSettingsI -{ - /** - * Show sequence variants in red, on top of exons coloured by label - */ - EnsemblVariants - { - - @Override - public boolean isFeatureDisplayed(String type) - { - // TODO accept SO sub-types of these features - // if (SequenceOntologyFactory.getInstance().isA(SequenceOntologyI.EXON... - return (EXON.equals(type) || SEQUENCE_VARIANT.equals(type)); - } - - @Override - public boolean isGroupDisplayed(String group) - { - return true; - } - - @Override - public FeatureColourI getFeatureColour(String type) - { - if (EXON.equals(type)) - { - return new FeatureColourAdapter() - { - @Override - public boolean isColourByLabel() - { - return true; - } - }; - } - if (SEQUENCE_VARIANT.equals(type)) - { - return new FeatureColourAdapter() - { - - @Override - public Color getColour() - { - return Color.RED; - } - }; - } - return null; - } - - @Override - public float getTransparency() - { - return 1f; - } - - /** - * order to render sequence_variant after exon after the rest - */ - @Override - public int compare(String feature1, String feature2) - { - if (SEQUENCE_VARIANT.equals(feature1)) - { - return +1; - } - if (SEQUENCE_VARIANT.equals(feature2)) - { - return -1; - } - if (EXON.equals(feature1)) - { - return +1; - } - if (EXON.equals(feature2)) - { - return -1; - } - return 0; - } - - @Override - public boolean optimiseOrder() - { - return false; - }; - - }; - - // SequenceOntologyI.SEQUENCE_VARIANT - private static final String SEQUENCE_VARIANT = "sequence_variant"; - - // SequenceOntologyI.EXON - private static final String EXON = "exon"; -} diff --git a/src/jalview/schemes/FeatureSettingsAdapter.java b/src/jalview/schemes/FeatureSettingsAdapter.java new file mode 100644 index 0000000..52b01ce --- /dev/null +++ b/src/jalview/schemes/FeatureSettingsAdapter.java @@ -0,0 +1,48 @@ +package jalview.schemes; + +import jalview.api.FeatureColourI; +import jalview.api.FeatureSettingsI; + +/** + * An adapter class that may be extended to instantiate feature colour schemes + */ +public class FeatureSettingsAdapter implements FeatureSettingsI +{ + + @Override + public boolean isFeatureDisplayed(String type) + { + return false; + } + + @Override + public boolean isGroupDisplayed(String group) + { + return true; + } + + @Override + public FeatureColourI getFeatureColour(String type) + { + return null; + } + + @Override + public float getTransparency() + { + return 1f; + } + + @Override + public int compare(String feature1, String feature2) + { + return 0; + } + + @Override + public boolean optimiseOrder() + { + return false; + } + +} diff --git a/src/jalview/util/MapList.java b/src/jalview/util/MapList.java index bf66b91..34a8926 100644 --- a/src/jalview/util/MapList.java +++ b/src/jalview/util/MapList.java @@ -322,12 +322,14 @@ public class MapList } /** - * Consolidates a list of ranges so that any contiguous ranges are merged + * Consolidates a list of ranges so that any contiguous ranges are merged. + * This assumes the ranges are already in start order (does not sort them). * * @param ranges - * @return + * @return the same list (if unchanged), else a new merged list, leaving the + * input list unchanged */ - public static List coalesceRanges(List ranges) + public static List coalesceRanges(final List ranges) { if (ranges == null || ranges.size() < 2) { return ranges; @@ -337,31 +339,56 @@ public class MapList List merged = new ArrayList(); int[] lastRange = ranges.get(0); int lastDirection = lastRange[1] >= lastRange[0] ? 1 : -1; + lastRange = new int[] { lastRange[0], lastRange[1] }; merged.add(lastRange); + boolean first = true; - for (int[] range : ranges) + for (final int[] range : ranges) { - if (range == lastRange) + if (first) { + first = false; continue; } + if (range[0] == lastRange[0] && range[1] == lastRange[1]) + { + // drop duplicate range + changed = true; + continue; + } + + /* + * drop this range if it lies within the last range + */ + if ((lastDirection == 1 && range[0] >= lastRange[0] + && range[0] <= lastRange[1] && range[1] >= lastRange[0] && range[1] <= lastRange[1]) + || (lastDirection == -1 && range[0] <= lastRange[0] + && range[0] >= lastRange[1] + && range[1] <= lastRange[0] && range[1] >= lastRange[1])) + { + changed = true; + continue; + } + int direction = range[1] >= range[0] ? 1 : -1; /* * if next range is in the same direction as last and contiguous, * just update the end position of the last range */ - if ((range[1] == range[0] || direction == lastDirection) - && (range[0] == lastRange[1] || range[0] == lastRange[1] - + lastDirection)) + boolean sameDirection = range[1] == range[0] || direction == lastDirection; + boolean extending = range[0] == lastRange[1] + lastDirection; + boolean overlapping = (lastDirection == 1 && range[0] >= lastRange[0] && range[0] <= lastRange[1]) + || (lastDirection == -1 && range[0] <= lastRange[0] && range[0] >= lastRange[1]); + if (sameDirection && (overlapping || extending)) { lastRange[1] = range[1]; changed = true; } else { - merged.add(range); - lastRange = range; + lastRange = new int[] { range[0], range[1] }; + merged.add(lastRange); // careful: merging [5, 5] after [7, 6] should keep negative direction lastDirection = (range[1] == range[0]) ? lastDirection : direction; } diff --git a/src/jalview/util/MappingUtils.java b/src/jalview/util/MappingUtils.java index 267e871..16db13a 100644 --- a/src/jalview/util/MappingUtils.java +++ b/src/jalview/util/MappingUtils.java @@ -819,4 +819,57 @@ public final class MappingUtils it.remove(); } } + + /** + * Returns the total length of the supplied ranges + * + * @param ranges + * @return + */ + public static int getLength(List ranges) + { + if (ranges == null) + { + return 0; + } + int length = 0; + for (int[] range : ranges) + { + length += Math.abs(range[1] - range[0]) + 1; + } + return length; + } + + /** + * Answers true if any range includes the given value + * + * @param ranges + * @param value + * @return + */ + public static boolean contains(List ranges, int value) + { + if (ranges == null) + { + return false; + } + for (int[] range : ranges) + { + if (range[1] >= range[0] && value >= range[0] && value <= range[1]) + { + /* + * value within ascending range + */ + return true; + } + if (range[1] < range[0] && value <= range[0] && value >= range[1]) + { + /* + * value within descending range + */ + return true; + } + } + return false; + } } diff --git a/test/jalview/analysis/AlignmentUtilsTests.java b/test/jalview/analysis/AlignmentUtilsTests.java index a82a881..818267d 100644 --- a/test/jalview/analysis/AlignmentUtilsTests.java +++ b/test/jalview/analysis/AlignmentUtilsTests.java @@ -1014,6 +1014,16 @@ public class AlignmentUtilsTests dna2.createDatasetSequence(); pep1.createDatasetSequence(); pep2.createDatasetSequence(); + dna1.addSequenceFeature(new SequenceFeature("CDS", "cds1", 4, 6, 0f, + null)); + dna1.addSequenceFeature(new SequenceFeature("CDS", "cds2", 10, 12, 0f, + null)); + dna2.addSequenceFeature(new SequenceFeature("CDS", "cds3", 1, 3, 0f, + null)); + dna2.addSequenceFeature(new SequenceFeature("CDS", "cds4", 7, 9, 0f, + null)); + dna2.addSequenceFeature(new SequenceFeature("CDS", "cds5", 13, 15, 0f, + null)); List mappings = new ArrayList(); MapList map = new MapList(new int[] { 4, 6, 10, 12 }, @@ -1028,10 +1038,12 @@ public class AlignmentUtilsTests mappings.add(acf); AlignmentI cds = AlignmentUtils.makeCdsAlignment(new SequenceI[] { - dna1, dna2 }, mappings); + dna1, dna2 }, mappings, '-'); assertEquals(2, cds.getSequences().size()); - assertEquals("GGGTTT", cds.getSequenceAt(0).getSequenceAsString()); - assertEquals("GGGTTTCCC", cds.getSequenceAt(1).getSequenceAsString()); + assertEquals("---GGG---TTT---", cds.getSequenceAt(0) + .getSequenceAsString()); + assertEquals("GGG---TTT---CCC", cds.getSequenceAt(1) + .getSequenceAsString()); /* * Verify updated mappings @@ -1048,14 +1060,14 @@ public class AlignmentUtilsTests SearchResults sr = MappingUtils.buildSearchResults(pep1, 1, mappings); assertEquals(1, sr.getResults().size()); Match m = sr.getResults().get(0); - assertEquals(cds.getSequenceAt(0).getDatasetSequence(), + assertSame(cds.getSequenceAt(0).getDatasetSequence(), m.getSequence()); assertEquals(1, m.getStart()); assertEquals(3, m.getEnd()); // map F to TTT sr = MappingUtils.buildSearchResults(pep1, 2, mappings); m = sr.getResults().get(0); - assertEquals(cds.getSequenceAt(0).getDatasetSequence(), + assertSame(cds.getSequenceAt(0).getDatasetSequence(), m.getSequence()); assertEquals(4, m.getStart()); assertEquals(6, m.getEnd()); @@ -1070,21 +1082,21 @@ public class AlignmentUtilsTests sr = MappingUtils.buildSearchResults(pep2, 1, mappings); assertEquals(1, sr.getResults().size()); m = sr.getResults().get(0); - assertEquals(cds.getSequenceAt(1).getDatasetSequence(), + assertSame(cds.getSequenceAt(1).getDatasetSequence(), m.getSequence()); assertEquals(1, m.getStart()); assertEquals(3, m.getEnd()); // map F to TTT sr = MappingUtils.buildSearchResults(pep2, 2, mappings); m = sr.getResults().get(0); - assertEquals(cds.getSequenceAt(1).getDatasetSequence(), + assertSame(cds.getSequenceAt(1).getDatasetSequence(), m.getSequence()); assertEquals(4, m.getStart()); assertEquals(6, m.getEnd()); // map P to CCC sr = MappingUtils.buildSearchResults(pep2, 3, mappings); m = sr.getResults().get(0); - assertEquals(cds.getSequenceAt(1).getDatasetSequence(), + assertSame(cds.getSequenceAt(1).getDatasetSequence(), m.getSequence()); assertEquals(7, m.getStart()); assertEquals(9, m.getEnd()); @@ -1118,8 +1130,12 @@ public class AlignmentUtilsTests mappings.add(acf); AlignedCodonFrame newMapping = new AlignedCodonFrame(); + List ungappedColumns = new ArrayList(); + ungappedColumns.add(new int[] { 4, 6 }); + ungappedColumns.add(new int[] { 10, 12 }); List cdsSeqs = AlignmentUtils.makeCdsSequences(dna1, acf, - newMapping); + ungappedColumns, + newMapping, '-'); assertEquals(1, cdsSeqs.size()); SequenceI cdsSeq = cdsSeqs.get(0); @@ -1148,6 +1164,18 @@ public class AlignmentUtilsTests pep1.createDatasetSequence(); pep2.createDatasetSequence(); pep3.createDatasetSequence(); + dna1.addSequenceFeature(new SequenceFeature("CDS", "cds1", 4, 6, 0f, + null)); + dna1.addSequenceFeature(new SequenceFeature("CDS", "cds2", 10, 12, 0f, + null)); + dna1.addSequenceFeature(new SequenceFeature("CDS", "cds3", 1, 3, 0f, + null)); + dna1.addSequenceFeature(new SequenceFeature("CDS", "cds4", 7, 9, 0f, + null)); + dna1.addSequenceFeature(new SequenceFeature("CDS", "cds5", 1, 3, 0f, + null)); + dna1.addSequenceFeature(new SequenceFeature("CDS", "cds6", 10, 12, 0f, + null)); pep1.getDatasetSequence().addDBRef( new DBRefEntry("EMBLCDS", "2", "A12345")); pep2.getDatasetSequence().addDBRef( @@ -1156,9 +1184,7 @@ public class AlignmentUtilsTests new DBRefEntry("EMBLCDS", "4", "A12347")); /* - * Make the mappings from dna to protein. Using LinkedHashset is a - * convenience so results are in the input order. There is no assertion that - * the generated exon sequences are in any particular order. + * Make the mappings from dna to protein */ List mappings = new ArrayList(); // map ...GGG...TTT to GF @@ -1185,7 +1211,7 @@ public class AlignmentUtilsTests * exon-to-protein and exon-to-dna mappings */ AlignmentI exal = AlignmentUtils.makeCdsAlignment( - new SequenceI[] { dna1 }, mappings); + new SequenceI[] { dna1 }, mappings, '-'); /* * Verify we have 3 cds sequences, mapped to pep1/2/3 respectively @@ -1194,7 +1220,7 @@ public class AlignmentUtilsTests assertEquals(3, cds.size()); SequenceI cdsSeq = cds.get(0); - assertEquals("GGGTTT", cdsSeq.getSequenceAsString()); + assertEquals("---GGG---TTT", cdsSeq.getSequenceAsString()); assertEquals("dna1|A12345", cdsSeq.getName()); assertEquals(1, cdsSeq.getDBRefs().length); DBRefEntry cdsRef = cdsSeq.getDBRefs()[0]; @@ -1203,7 +1229,7 @@ public class AlignmentUtilsTests assertEquals("A12345", cdsRef.getAccessionId()); cdsSeq = cds.get(1); - assertEquals("aaaccc", cdsSeq.getSequenceAsString()); + assertEquals("aaa---ccc---", cdsSeq.getSequenceAsString()); assertEquals("dna1|A12346", cdsSeq.getName()); assertEquals(1, cdsSeq.getDBRefs().length); cdsRef = cdsSeq.getDBRefs()[0]; @@ -1212,7 +1238,7 @@ public class AlignmentUtilsTests assertEquals("A12346", cdsRef.getAccessionId()); cdsSeq = cds.get(2); - assertEquals("aaaTTT", cdsSeq.getSequenceAsString()); + assertEquals("aaa------TTT", cdsSeq.getSequenceAsString()); assertEquals("dna1|A12347", cdsSeq.getName()); assertEquals(1, cdsSeq.getDBRefs().length); cdsRef = cdsSeq.getDBRefs()[0]; @@ -1490,4 +1516,184 @@ public class AlignmentUtilsTests assertEquals(1, sf.getBegin()); assertEquals(6, sf.getEnd()); } + + /** + * Test the method that extracts the cds-only part of a dna alignment, for the + * case where the cds should be aligned to match its nucleotide sequence. + */ + @Test(groups = { "Functional" }) + public void testMakeCdsAlignment_alternativeTranscripts() + { + SequenceI dna1 = new Sequence("dna1", "aaaGGGCC-----CTTTaaaGGG"); + // alternative transcript of same dna skips CCC codon + SequenceI dna2 = new Sequence("dna2", "aaaGGGCC-----cttTaaaGGG"); + // dna3 has no mapping (protein product) so should be ignored here + SequenceI dna3 = new Sequence("dna3", "aaaGGGCCCCCGGGcttTaaaGGG"); + SequenceI pep1 = new Sequence("pep1", "GPFG"); + SequenceI pep2 = new Sequence("pep2", "GPG"); + dna1.createDatasetSequence(); + dna2.createDatasetSequence(); + dna3.createDatasetSequence(); + pep1.createDatasetSequence(); + pep2.createDatasetSequence(); + dna1.addSequenceFeature(new SequenceFeature("CDS", "cds1", 4, 8, 0f, + null)); + dna1.addSequenceFeature(new SequenceFeature("CDS", "cds2", 9, 12, 0f, + null)); + dna1.addSequenceFeature(new SequenceFeature("CDS", "cds3", 16, 18, 0f, + null)); + dna2.addSequenceFeature(new SequenceFeature("CDS", "cds", 4, 8, 0f, + null)); + dna2.addSequenceFeature(new SequenceFeature("CDS", "cds", 12, 12, 0f, + null)); + dna2.addSequenceFeature(new SequenceFeature("CDS", "cds", 16, 18, 0f, + null)); + + List mappings = new ArrayList(); + MapList map = new MapList(new int[] { 4, 12, 16, 18 }, + new int[] { 1, 4 }, 3, 1); + AlignedCodonFrame acf = new AlignedCodonFrame(); + acf.addMap(dna1.getDatasetSequence(), pep1.getDatasetSequence(), map); + mappings.add(acf); + map = new MapList(new int[] { 4, 8, 12, 12, 16, 18 }, + new int[] { 1, 3 }, + 3, 1); + acf = new AlignedCodonFrame(); + acf.addMap(dna2.getDatasetSequence(), pep2.getDatasetSequence(), map); + mappings.add(acf); + + AlignmentI cds = AlignmentUtils.makeCdsAlignment(new SequenceI[] { + dna1, dna2, dna3 }, mappings, '-'); + assertEquals(2, cds.getSequences().size()); + assertEquals("GGGCCCTTTGGG", cds.getSequenceAt(0).getSequenceAsString()); + assertEquals("GGGCC---TGGG", cds.getSequenceAt(1).getSequenceAsString()); + + /* + * Verify updated mappings + */ + assertEquals(2, mappings.size()); + + /* + * Mapping from pep1 to GGGTTT in first new CDS sequence + */ + List pep1Mapping = MappingUtils + .findMappingsForSequence(pep1, mappings); + assertEquals(1, pep1Mapping.size()); + /* + * maps GPFG to 1-3,4-6,7-9,10-12 + */ + SearchResults sr = MappingUtils.buildSearchResults(pep1, 1, mappings); + assertEquals(1, sr.getResults().size()); + Match m = sr.getResults().get(0); + assertEquals(cds.getSequenceAt(0).getDatasetSequence(), + m.getSequence()); + assertEquals(1, m.getStart()); + assertEquals(3, m.getEnd()); + sr = MappingUtils.buildSearchResults(pep1, 2, mappings); + m = sr.getResults().get(0); + assertEquals(4, m.getStart()); + assertEquals(6, m.getEnd()); + sr = MappingUtils.buildSearchResults(pep1, 3, mappings); + m = sr.getResults().get(0); + assertEquals(7, m.getStart()); + assertEquals(9, m.getEnd()); + sr = MappingUtils.buildSearchResults(pep1, 4, mappings); + m = sr.getResults().get(0); + assertEquals(10, m.getStart()); + assertEquals(12, m.getEnd()); + + /* + * GPG in pep2 map to 1-3,4-6,7-9 in second CDS sequence + */ + List pep2Mapping = MappingUtils + .findMappingsForSequence(pep2, mappings); + assertEquals(1, pep2Mapping.size()); + sr = MappingUtils.buildSearchResults(pep2, 1, mappings); + assertEquals(1, sr.getResults().size()); + m = sr.getResults().get(0); + assertEquals(cds.getSequenceAt(1).getDatasetSequence(), + m.getSequence()); + assertEquals(1, m.getStart()); + assertEquals(3, m.getEnd()); + sr = MappingUtils.buildSearchResults(pep2, 2, mappings); + m = sr.getResults().get(0); + assertEquals(4, m.getStart()); + assertEquals(6, m.getEnd()); + sr = MappingUtils.buildSearchResults(pep2, 3, mappings); + m = sr.getResults().get(0); + assertEquals(7, m.getStart()); + assertEquals(9, m.getEnd()); + } + + /** + * Tests for gapped column in sequences + */ + @Test(groups = { "Functional" }) + public void testIsGappedColumn() + { + SequenceI seq1 = new Sequence("Seq1", "a--c.tc-a-g"); + SequenceI seq2 = new Sequence("Seq2", "aa---t--a-g"); + SequenceI seq3 = new Sequence("Seq3", "ag-c t-g-"); + List seqs = Arrays + .asList(new SequenceI[] { seq1, seq2, seq3 }); + // the column number is base 1 + assertFalse(AlignmentUtils.isGappedColumn(seqs, 1)); + assertFalse(AlignmentUtils.isGappedColumn(seqs, 2)); + assertTrue(AlignmentUtils.isGappedColumn(seqs, 3)); + assertFalse(AlignmentUtils.isGappedColumn(seqs, 4)); + assertTrue(AlignmentUtils.isGappedColumn(seqs, 5)); + assertFalse(AlignmentUtils.isGappedColumn(seqs, 6)); + assertFalse(AlignmentUtils.isGappedColumn(seqs, 7)); + assertFalse(AlignmentUtils.isGappedColumn(seqs, 8)); + assertFalse(AlignmentUtils.isGappedColumn(seqs, 9)); + assertTrue(AlignmentUtils.isGappedColumn(seqs, 10)); + assertFalse(AlignmentUtils.isGappedColumn(seqs, 11)); + // out of bounds: + assertTrue(AlignmentUtils.isGappedColumn(seqs, 0)); + assertTrue(AlignmentUtils.isGappedColumn(seqs, 100)); + assertTrue(AlignmentUtils.isGappedColumn(seqs, -100)); + assertTrue(AlignmentUtils.isGappedColumn(null, 0)); + } + + @Test(groups = { "Functional" }) + public void testFindCdsColumns() + { + // TODO target method belongs in a general-purpose alignment + // analysis method to find columns for feature + + /* + * NB this method assumes CDS ranges are contiguous (no introns) + */ + SequenceI gene = new Sequence("gene", "aaacccgggtttaaacccgggttt"); + SequenceI seq1 = new Sequence("Seq1", "--ac-cgGG-GGaaACC--GGtt-"); + SequenceI seq2 = new Sequence("Seq2", "AA--CCGG--g-AAA--cG-GTTt"); + seq1.createDatasetSequence(); + seq2.createDatasetSequence(); + seq1.addSequenceFeature(new SequenceFeature("CDS", "cds", 5, 6, 0f, + null)); + seq1.addSequenceFeature(new SequenceFeature("CDS", "cds", 7, 8, 0f, + null)); + seq1.addSequenceFeature(new SequenceFeature("CDS", "cds", 11, 13, 0f, + null)); + seq1.addSequenceFeature(new SequenceFeature("CDS", "cds", 14, 15, 0f, + null)); + seq2.addSequenceFeature(new SequenceFeature("CDS", "cds", 1, 2, 0f, + null)); + seq2.addSequenceFeature(new SequenceFeature("CDS", "cds", 3, 6, 0f, + null)); + seq2.addSequenceFeature(new SequenceFeature("CDS", "cds", 8, 10, 0f, + null)); + seq2.addSequenceFeature(new SequenceFeature("CDS", "cds", 12, 12, 0f, + null)); + seq2.addSequenceFeature(new SequenceFeature("CDS", "cds", 13, 15, 0f, + null)); + + List cdsColumns = AlignmentUtils.findCdsColumns(new SequenceI[] { + seq1, seq2 }); + assertEquals(4, cdsColumns.size()); + assertEquals("[1, 2]", Arrays.toString(cdsColumns.get(0))); + assertEquals("[5, 9]", Arrays.toString(cdsColumns.get(1))); + assertEquals("[11, 17]", Arrays.toString(cdsColumns.get(2))); + assertEquals("[19, 23]", Arrays.toString(cdsColumns.get(3))); + } } diff --git a/test/jalview/datamodel/SequenceTest.java b/test/jalview/datamodel/SequenceTest.java index 0d40037..b8116f5 100644 --- a/test/jalview/datamodel/SequenceTest.java +++ b/test/jalview/datamodel/SequenceTest.java @@ -523,4 +523,14 @@ public class SequenceTest assertFalse(pdbs.get(0) == seq1.getAllPDBEntries().get(0)); assertTrue(pdbs.get(0).equals(seq1.getAllPDBEntries().get(0))); } + + @Test(groups = "Functional") + public void testGetCharAt() + { + SequenceI sq = new Sequence("", "abcde"); + assertEquals('a', sq.getCharAt(0)); + assertEquals('e', sq.getCharAt(4)); + assertEquals(' ', sq.getCharAt(5)); + assertEquals(' ', sq.getCharAt(-1)); + } } diff --git a/test/jalview/util/MapListTest.java b/test/jalview/util/MapListTest.java index 2520de0..c442b6f 100644 --- a/test/jalview/util/MapListTest.java +++ b/test/jalview/util/MapListTest.java @@ -697,6 +697,12 @@ public class MapListTest List merged = MapList.coalesceRanges(ranges); assertEquals(1, merged.size()); assertArrayEquals(new int[] { 1, 7 }, merged.get(0)); + // verify input list is unchanged + assertEquals(4, ranges.size()); + assertArrayEquals(new int[] { 1, 3 }, ranges.get(0)); + assertArrayEquals(new int[] { 4, 5 }, ranges.get(1)); + assertArrayEquals(new int[] { 5, 5 }, ranges.get(2)); + assertArrayEquals(new int[] { 5, 7 }, ranges.get(3)); // merging in reverse direction: ranges.clear(); @@ -723,4 +729,62 @@ public class MapListTest assertArrayEquals(new int[] { 1, 6 }, merged.get(0)); assertArrayEquals(new int[] { 12, 7 }, merged.get(1)); } + + /** + * Test the method that merges a list of ranges where possible + */ + @Test(groups = { "Functional" }) + public void testCoalesceRanges_withOverlap() + { + List ranges = new ArrayList(); + ranges.add(new int[] { 1, 3 }); + ranges.add(new int[] { 2, 5 }); + + /* + * [2, 5] should extend [1, 3] + */ + List merged = MapList.coalesceRanges(ranges); + assertEquals(1, merged.size()); + assertArrayEquals(new int[] { 1, 5 }, merged.get(0)); + + /* + * a subsumed interval should be dropped + */ + ranges.clear(); + ranges.add(new int[] { 1, 6 }); + ranges.add(new int[] { 2, 4 }); + merged = MapList.coalesceRanges(ranges); + assertEquals(1, merged.size()); + assertArrayEquals(new int[] { 1, 6 }, merged.get(0)); + + ranges.clear(); + ranges.add(new int[] { 1, 5 }); + ranges.add(new int[] { 1, 6 }); + merged = MapList.coalesceRanges(ranges); + assertEquals(1, merged.size()); + assertArrayEquals(new int[] { 1, 6 }, merged.get(0)); + + /* + * merge duplicate ranges + */ + ranges.clear(); + ranges.add(new int[] { 1, 3 }); + ranges.add(new int[] { 1, 3 }); + merged = MapList.coalesceRanges(ranges); + assertEquals(1, merged.size()); + assertArrayEquals(new int[] { 1, 3 }, merged.get(0)); + + /* + * reverse direction + */ + ranges.clear(); + ranges.add(new int[] { 9, 5 }); + ranges.add(new int[] { 9, 4 }); + ranges.add(new int[] { 8, 3 }); + ranges.add(new int[] { 3, 2 }); + ranges.add(new int[] { 1, 0 }); + merged = MapList.coalesceRanges(ranges); + assertEquals(1, merged.size()); + assertArrayEquals(new int[] { 9, 0 }, merged.get(0)); + } } diff --git a/test/jalview/util/MappingUtilsTest.java b/test/jalview/util/MappingUtilsTest.java index 095ab1b..3c4d4f8 100644 --- a/test/jalview/util/MappingUtilsTest.java +++ b/test/jalview/util/MappingUtilsTest.java @@ -21,6 +21,7 @@ package jalview.util; import static org.testng.AssertJUnit.assertEquals; +import static org.testng.AssertJUnit.assertFalse; import static org.testng.AssertJUnit.assertSame; import static org.testng.AssertJUnit.assertTrue; import static org.testng.internal.junit.ArrayAsserts.assertArrayEquals; @@ -911,4 +912,58 @@ public class MappingUtilsTest assertEquals(1, ranges.size()); assertArrayEquals(new int[] { 12, 7 }, ranges.get(0)); } + + @Test(groups = { "Functional" }) + public void testGetLength() + { + assertEquals(0, MappingUtils.getLength(null)); + List ranges = new ArrayList(); + assertEquals(0, MappingUtils.getLength(ranges)); + ranges.add(new int[] { 1, 1 }); + assertEquals(1, MappingUtils.getLength(ranges)); + ranges.add(new int[] { 2, 10 }); + assertEquals(10, MappingUtils.getLength(ranges)); + ranges.add(new int[] { 20, 10 }); + assertEquals(21, MappingUtils.getLength(ranges)); + } + + @Test(groups = { "Functional" }) + public void testContains() + { + assertFalse(MappingUtils.contains(null, 1)); + List ranges = new ArrayList(); + assertFalse(MappingUtils.contains(ranges, 1)); + + ranges.add(new int[] { 1, 4 }); + ranges.add(new int[] { 6, 6 }); + ranges.add(new int[] { 8, 10 }); + ranges.add(new int[] { 30, 20 }); + ranges.add(new int[] { -16, -44 }); + + assertFalse(MappingUtils.contains(ranges, 0)); + assertTrue(MappingUtils.contains(ranges, 1)); + assertTrue(MappingUtils.contains(ranges, 2)); + assertTrue(MappingUtils.contains(ranges, 3)); + assertTrue(MappingUtils.contains(ranges, 4)); + assertFalse(MappingUtils.contains(ranges, 5)); + + assertTrue(MappingUtils.contains(ranges, 6)); + assertFalse(MappingUtils.contains(ranges, 7)); + + assertTrue(MappingUtils.contains(ranges, 8)); + assertTrue(MappingUtils.contains(ranges, 9)); + assertTrue(MappingUtils.contains(ranges, 10)); + + assertFalse(MappingUtils.contains(ranges, 31)); + assertTrue(MappingUtils.contains(ranges, 30)); + assertTrue(MappingUtils.contains(ranges, 29)); + assertTrue(MappingUtils.contains(ranges, 20)); + assertFalse(MappingUtils.contains(ranges, 19)); + + assertFalse(MappingUtils.contains(ranges, -15)); + assertTrue(MappingUtils.contains(ranges, -16)); + assertTrue(MappingUtils.contains(ranges, -44)); + assertFalse(MappingUtils.contains(ranges, -45)); + } + }