From a57f7818a994eaabd11db26e83bc23832cc7b359 Mon Sep 17 00:00:00 2001 From: gmungoc Date: Tue, 15 Mar 2016 14:26:45 +0000 Subject: [PATCH] JAL-1705 compute reverse complement of variants on reverse strand --- src/jalview/ext/ensembl/EnsemblSeqProxy.java | 133 +++++++++++++++++---- test/jalview/ext/ensembl/EnsemblSeqProxyTest.java | 66 +++++++++- 2 files changed, 172 insertions(+), 27 deletions(-) diff --git a/src/jalview/ext/ensembl/EnsemblSeqProxy.java b/src/jalview/ext/ensembl/EnsemblSeqProxy.java index 4af6525..b4c708d 100644 --- a/src/jalview/ext/ensembl/EnsemblSeqProxy.java +++ b/src/jalview/ext/ensembl/EnsemblSeqProxy.java @@ -1,6 +1,7 @@ package jalview.ext.ensembl; import jalview.analysis.AlignmentUtils; +import jalview.analysis.Dna; import jalview.datamodel.Alignment; import jalview.datamodel.AlignmentI; import jalview.datamodel.DBRefEntry; @@ -33,6 +34,8 @@ import java.util.List; */ public abstract class EnsemblSeqProxy extends EnsemblRestClient { + private static final String ALLELES = "alleles"; + private static final List CROSS_REFERENCES = Arrays .asList(new String[] { "CCDS", "Uniprot/SWISSPROT", "Uniprot/SPTREMBL" }); @@ -611,9 +614,10 @@ public abstract class EnsemblSeqProxy extends EnsemblRestClient * @param mapping * mapping from the sequence feature's coordinates to the target * sequence + * @param forwardStrand */ protected void transferFeature(SequenceFeature sf, - SequenceI targetSequence, MapList mapping) + SequenceI targetSequence, MapList mapping, boolean forwardStrand) { int start = sf.getBegin(); int end = sf.getEnd(); @@ -627,20 +631,77 @@ public abstract class EnsemblSeqProxy extends EnsemblRestClient targetSequence.addSequenceFeature(copy); /* - * for sequence_variant, make an additional feature with consequence + * for sequence_variant on reverse strand, have to convert the allele + * values to their complements */ - // if (SequenceOntologyFactory.getInstance().isA(sf.getType(), - // SequenceOntologyI.SEQUENCE_VARIANT)) - // { - // String consequence = (String) sf.getValue(CONSEQUENCE_TYPE); - // if (consequence != null) - // { - // SequenceFeature sf2 = new SequenceFeature("consequence", - // consequence, copy.getBegin(), copy.getEnd(), 0f, - // null); - // targetSequence.addSequenceFeature(sf2); - // } - // } + if (!forwardStrand + && SequenceOntologyFactory.getInstance().isA(sf.getType(), + SequenceOntologyI.SEQUENCE_VARIANT)) + { + reverseComplementAlleles(copy); + } + } + } + + /** + * Change the 'alleles' value of a feature by converting to complementary + * bases, and also update the feature description to match + * + * @param sf + */ + static void reverseComplementAlleles(SequenceFeature sf) + { + final String alleles = (String) sf.getValue(ALLELES); + if (alleles == null) + { + return; + } + StringBuilder complement = new StringBuilder(alleles.length()); + for (String allele : alleles.split(",")) + { + reverseComplementAllele(complement, allele); + } + String comp = complement.toString(); + sf.setValue(ALLELES, comp); + sf.setDescription(comp); + + /* + * replace value of "alleles=" in sf.ATTRIBUTES as well + * so 'output as GFF' shows reverse complement alleles + */ + String atts = sf.getAttributes(); + if (atts != null) + { + atts = atts.replace(ALLELES + "=" + alleles, ALLELES + "=" + comp); + sf.setAttributes(atts); + } + } + + /** + * Makes the 'reverse complement' of the given allele and appends it to the + * buffer, after a comma separator if not the first + * + * @param complement + * @param allele + */ + static void reverseComplementAllele(StringBuilder complement, + String allele) + { + if (complement.length() > 0) + { + complement.append(","); + } + if ("HGMD_MUTATION".equalsIgnoreCase(allele)) + { + complement.append(allele); + } + else + { + char[] alleles = allele.toCharArray(); + for (int i = alleles.length - 1; i >= 0; i--) + { + complement.append(Dna.getComplement(alleles[i])); + } } } @@ -695,25 +756,18 @@ public abstract class EnsemblSeqProxy extends EnsemblRestClient final boolean forwardStrand = mapping.isFromForwardStrand(); /* - * sort features by start position (descending if reverse strand) - * before transferring (in forwards order) to the target sequence + * sort features by start position (which corresponds to end + * position descending if reverse strand) so as to add them in + * 'forwards' order to the target sequence */ - Arrays.sort(features, new Comparator() - { - @Override - public int compare(SequenceFeature o1, SequenceFeature o2) - { - int c = Integer.compare(o1.getBegin(), o2.getBegin()); - return forwardStrand ? c : -c; - } - }); + sortFeatures(features, forwardStrand); boolean transferred = false; for (SequenceFeature sf : features) { if (retainFeature(sf, parentId)) { - transferFeature(sf, targetSequence, mapping); + transferFeature(sf, targetSequence, mapping, forwardStrand); transferred = true; } } @@ -721,6 +775,33 @@ public abstract class EnsemblSeqProxy extends EnsemblRestClient } /** + * Sort features by start position ascending (if on forward strand), or end + * position descending (if on reverse strand) + * + * @param features + * @param forwardStrand + */ + protected static void sortFeatures(SequenceFeature[] features, + final boolean forwardStrand) + { + Arrays.sort(features, new Comparator() + { + @Override + public int compare(SequenceFeature o1, SequenceFeature o2) + { + if (forwardStrand) + { + return Integer.compare(o1.getBegin(), o2.getBegin()); + } + else + { + return Integer.compare(o2.getEnd(), o1.getEnd()); + } + } + }); + } + + /** * Answers true if the feature type is one we want to keep for the sequence. * Some features are only retrieved in order to identify the sequence range, * and may then be discarded as redundant information (e.g. "CDS" feature for diff --git a/test/jalview/ext/ensembl/EnsemblSeqProxyTest.java b/test/jalview/ext/ensembl/EnsemblSeqProxyTest.java index f9c2c4b..6df479c 100644 --- a/test/jalview/ext/ensembl/EnsemblSeqProxyTest.java +++ b/test/jalview/ext/ensembl/EnsemblSeqProxyTest.java @@ -3,10 +3,11 @@ package jalview.ext.ensembl; import static org.testng.AssertJUnit.assertEquals; import static org.testng.AssertJUnit.assertFalse; import static org.testng.AssertJUnit.assertTrue; +import static org.testng.internal.junit.ArrayAsserts.assertArrayEquals; -import jalview.analysis.AlignmentUtils; import jalview.datamodel.Alignment; import jalview.datamodel.AlignmentI; +import jalview.datamodel.SequenceFeature; import jalview.datamodel.SequenceI; import jalview.io.AppletFormatAdapter; import jalview.io.FastaFile; @@ -247,4 +248,67 @@ public class EnsemblSeqProxyTest assertFalse(testee.isGeneIdentifier("ENSG000000123456")); assertFalse(testee.isGeneIdentifier("ENSG0000001234")); } + + /** + * Test the method that appends a single allele's reverse complement to a + * string buffer + */ + @Test(groups = "Functional") + public void testReverseComplementAllele() + { + StringBuilder sb = new StringBuilder(); + EnsemblSeqProxy.reverseComplementAllele(sb, "G"); // comp=C + EnsemblSeqProxy.reverseComplementAllele(sb, "g"); // comp=c + EnsemblSeqProxy.reverseComplementAllele(sb, "C"); // comp=G + EnsemblSeqProxy.reverseComplementAllele(sb, "T"); // comp=A + EnsemblSeqProxy.reverseComplementAllele(sb, "A"); // comp=T + assertEquals("C,c,G,A,T", sb.toString()); + + sb = new StringBuilder(); + EnsemblSeqProxy.reverseComplementAllele(sb, "-GATt"); // revcomp=aATC- + EnsemblSeqProxy.reverseComplementAllele(sb, "hgmd_mutation"); + assertEquals("aATC-,hgmd_mutation", sb.toString()); + } + + /** + * Test the method that computes the reverse complement of the alleles in a + * sequence_variant feature + */ + @Test(groups = "Functional") + public void testReverseComplementAlleles() + { + String alleles = "C,G,-TAC,HGMD_MUTATION,gac"; + SequenceFeature sf = new SequenceFeature("sequence_variant", alleles, + 1, 2, 0f, null); + sf.setValue("alleles", alleles); + sf.setAttributes("x=y,z;alleles=" + alleles + ";a=b,c"); + + EnsemblSeqProxy.reverseComplementAlleles(sf); + String revcomp = "G,C,GTA-,HGMD_MUTATION,gtc"; + // verify description is updated with reverse complement + assertEquals(revcomp, sf.getDescription()); + // verify alleles attribute is updated with reverse complement + assertEquals(revcomp, sf.getValue("alleles")); + // verify attributes string is updated with reverse complement + assertEquals("x=y,z;alleles=" + revcomp + ";a=b,c", sf.getAttributes()); + } + + @Test(groups = "Functional") + public void testSortFeatures() + { + SequenceFeature sf1 = new SequenceFeature("", "", 10, 15, 0f, null); + SequenceFeature sf2 = new SequenceFeature("", "", 8, 12, 0f, null); + SequenceFeature sf3 = new SequenceFeature("", "", 8, 13, 0f, null); + SequenceFeature sf4 = new SequenceFeature("", "", 11, 11, 0f, null); + SequenceFeature[] sfs = new SequenceFeature[] { sf1, sf2, sf3, sf4 }; + + // sort by start position ascending (forward strand) + // sf2 and sf3 tie and should not be reordered by sorting + EnsemblSeqProxy.sortFeatures(sfs, true); + assertArrayEquals(new SequenceFeature[] { sf2, sf3, sf1, sf4 }, sfs); + + // sort by end position descending (reverse strand) + EnsemblSeqProxy.sortFeatures(sfs, false); + assertArrayEquals(new SequenceFeature[] { sf1, sf3, sf2, sf4 }, sfs); + } } \ No newline at end of file -- 1.7.10.2