From 0ae70dbd95d7eb6932c1ec1252628f58f0989668 Mon Sep 17 00:00:00 2001 From: gmungoc Date: Thu, 14 Apr 2016 12:04:15 +0100 Subject: [PATCH] JAL-1705 refactored/utility methods to detect e.g. 'PhenCode_variation' --- src/jalview/ext/ensembl/EnsemblSeqProxy.java | 14 +++-- src/jalview/util/Comparison.java | 64 ++++++++++++++++++--- test/jalview/ext/ensembl/EnsemblSeqProxyTest.java | 3 +- test/jalview/util/ComparisonTest.java | 32 ++++++++++- 4 files changed, 99 insertions(+), 14 deletions(-) diff --git a/src/jalview/ext/ensembl/EnsemblSeqProxy.java b/src/jalview/ext/ensembl/EnsemblSeqProxy.java index 8fb668a..fb81e66 100644 --- a/src/jalview/ext/ensembl/EnsemblSeqProxy.java +++ b/src/jalview/ext/ensembl/EnsemblSeqProxy.java @@ -14,6 +14,7 @@ import jalview.io.FastaFile; import jalview.io.FileParse; import jalview.io.gff.SequenceOntologyFactory; import jalview.io.gff.SequenceOntologyI; +import jalview.util.Comparison; import jalview.util.DBRefUtils; import jalview.util.MapList; @@ -679,16 +680,21 @@ public abstract class EnsemblSeqProxy extends EnsemblRestClient { complement.append(","); } - if ("HGMD_MUTATION".equalsIgnoreCase(allele)) + + /* + * some 'alleles' are actually descriptive terms + * e.g. HGMD_MUTATION, PhenCode_variation + * - we don't want to 'reverse complement' these + */ + if (!Comparison.isNucleotideSequence(allele, true)) { complement.append(allele); } else { - char[] alleles = allele.toCharArray(); - for (int i = alleles.length - 1; i >= 0; i--) + for (int i = allele.length() - 1; i >= 0; i--) { - complement.append(Dna.getComplement(alleles[i])); + complement.append(Dna.getComplement(allele.charAt(i))); } } } diff --git a/src/jalview/util/Comparison.java b/src/jalview/util/Comparison.java index 8902e2c..5605a53 100644 --- a/src/jalview/util/Comparison.java +++ b/src/jalview/util/Comparison.java @@ -286,7 +286,7 @@ public class Comparison * @param letters * @return */ - public static final boolean areNucleotide(char[][] letters) + static final boolean areNucleotide(char[][] letters) { int ntCount = 0; int aaCount = 0; @@ -300,16 +300,11 @@ public class Comparison // to save a lengthy calculation for (char c : seq) { - if ('a' <= c && c <= 'z') - { - c -= TO_UPPER_CASE; - } - - if (c == 'A' || c == 'G' || c == 'C' || c == 'T' || c == 'U') + if (isNucleotide(c)) { ntCount++; } - else if (!Comparison.isGap(c)) + else if (!isGap(c)) { aaCount++; } @@ -332,6 +327,59 @@ public class Comparison } /** + * Answers true if the character is one of aAcCgGtTuU + * + * @param c + * @return + */ + public static boolean isNucleotide(char c) + { + if ('a' <= c && c <= 'z') + { + c -= TO_UPPER_CASE; + } + + switch (c) + { + case 'A': + case 'C': + case 'G': + case 'T': + case 'U': + return true; + } + return false; + } + + /** + * Answers true if every character in the string is one of aAcCgGtTuU, or + * (optionally) a gap character (dot, dash, space), else false + * + * @param s + * @param allowGaps + * @return + */ + public static boolean isNucleotideSequence(String s, boolean allowGaps) + { + if (s == null) + { + return false; + } + for (int i = 0; i < s.length(); i++) + { + char c = s.charAt(i); + if (!isNucleotide(c)) + { + if (!allowGaps || !isGap(c)) + { + return false; + } + } + } + return true; + } + + /** * Convenience overload of isNucleotide * * @param seqs diff --git a/test/jalview/ext/ensembl/EnsemblSeqProxyTest.java b/test/jalview/ext/ensembl/EnsemblSeqProxyTest.java index 6df479c..71f0212 100644 --- a/test/jalview/ext/ensembl/EnsemblSeqProxyTest.java +++ b/test/jalview/ext/ensembl/EnsemblSeqProxyTest.java @@ -267,7 +267,8 @@ public class EnsemblSeqProxyTest sb = new StringBuilder(); EnsemblSeqProxy.reverseComplementAllele(sb, "-GATt"); // revcomp=aATC- EnsemblSeqProxy.reverseComplementAllele(sb, "hgmd_mutation"); - assertEquals("aATC-,hgmd_mutation", sb.toString()); + EnsemblSeqProxy.reverseComplementAllele(sb, "PhenCode_variation"); + assertEquals("aATC-,hgmd_mutation,PhenCode_variation", sb.toString()); } /** diff --git a/test/jalview/util/ComparisonTest.java b/test/jalview/util/ComparisonTest.java index 0c2c998..9aab66c 100644 --- a/test/jalview/util/ComparisonTest.java +++ b/test/jalview/util/ComparisonTest.java @@ -49,7 +49,7 @@ public class ComparisonTest * AGCTU. Test is not case-sensitive and ignores gaps. */ @Test(groups = { "Functional" }) - public void testIsNucleotide() + public void testIsNucleotide_sequences() { SequenceI seq = new Sequence("eightypercent", "agctuAGCPV"); assertFalse(Comparison.isNucleotide(new SequenceI[] { seq })); @@ -130,6 +130,23 @@ public class ComparisonTest 0.001f); } + @Test(groups = { "Functional" }) + public void testIsNucleotide() + { + assertTrue(Comparison.isNucleotide('a')); + assertTrue(Comparison.isNucleotide('A')); + assertTrue(Comparison.isNucleotide('c')); + assertTrue(Comparison.isNucleotide('C')); + assertTrue(Comparison.isNucleotide('g')); + assertTrue(Comparison.isNucleotide('G')); + assertTrue(Comparison.isNucleotide('t')); + assertTrue(Comparison.isNucleotide('T')); + assertTrue(Comparison.isNucleotide('u')); + assertTrue(Comparison.isNucleotide('U')); + assertFalse(Comparison.isNucleotide('-')); + assertFalse(Comparison.isNucleotide('P')); + } + /** * Test the percentage identity calculation for two sequences */ @@ -158,4 +175,17 @@ public class ComparisonTest assertEquals(87.5f, Comparison.PID(seq1, seq2, 0, length, false, true), 0.001f); } + + @Test(groups = { "Functional" }) + public void testIsNucleotideSequence() + { + assertFalse(Comparison.isNucleotideSequence(null, true)); + assertTrue(Comparison.isNucleotideSequence("", true)); + assertTrue(Comparison.isNucleotideSequence("aAgGcCtTuU", true)); + assertTrue(Comparison.isNucleotideSequence("aAgGcCtTuU", false)); + assertFalse(Comparison.isNucleotideSequence("xAgGcCtTuU", false)); + assertFalse(Comparison.isNucleotideSequence("aAgGcCtTuUx", false)); + assertTrue(Comparison.isNucleotideSequence("a A-g.GcCtTuU", true)); + assertFalse(Comparison.isNucleotideSequence("a A-g.GcCtTuU", false)); + } } -- 1.7.10.2