import jalview.io.FileParse;
import jalview.io.gff.SequenceOntologyFactory;
import jalview.io.gff.SequenceOntologyI;
+import jalview.util.Comparison;
import jalview.util.DBRefUtils;
import jalview.util.MapList;
{
complement.append(",");
}
- if ("HGMD_MUTATION".equalsIgnoreCase(allele))
+
+ /*
+ * some 'alleles' are actually descriptive terms
+ * e.g. HGMD_MUTATION, PhenCode_variation
+ * - we don't want to 'reverse complement' these
+ */
+ if (!Comparison.isNucleotideSequence(allele, true))
{
complement.append(allele);
}
else
{
- char[] alleles = allele.toCharArray();
- for (int i = alleles.length - 1; i >= 0; i--)
+ for (int i = allele.length() - 1; i >= 0; i--)
{
- complement.append(Dna.getComplement(alleles[i]));
+ complement.append(Dna.getComplement(allele.charAt(i)));
}
}
}
* @param letters
* @return
*/
- public static final boolean areNucleotide(char[][] letters)
+ static final boolean areNucleotide(char[][] letters)
{
int ntCount = 0;
int aaCount = 0;
// to save a lengthy calculation
for (char c : seq)
{
- if ('a' <= c && c <= 'z')
- {
- c -= TO_UPPER_CASE;
- }
-
- if (c == 'A' || c == 'G' || c == 'C' || c == 'T' || c == 'U')
+ if (isNucleotide(c))
{
ntCount++;
}
- else if (!Comparison.isGap(c))
+ else if (!isGap(c))
{
aaCount++;
}
}
/**
+ * Answers true if the character is one of aAcCgGtTuU
+ *
+ * @param c
+ * @return
+ */
+ public static boolean isNucleotide(char c)
+ {
+ if ('a' <= c && c <= 'z')
+ {
+ c -= TO_UPPER_CASE;
+ }
+
+ switch (c)
+ {
+ case 'A':
+ case 'C':
+ case 'G':
+ case 'T':
+ case 'U':
+ return true;
+ }
+ return false;
+ }
+
+ /**
+ * Answers true if every character in the string is one of aAcCgGtTuU, or
+ * (optionally) a gap character (dot, dash, space), else false
+ *
+ * @param s
+ * @param allowGaps
+ * @return
+ */
+ public static boolean isNucleotideSequence(String s, boolean allowGaps)
+ {
+ if (s == null)
+ {
+ return false;
+ }
+ for (int i = 0; i < s.length(); i++)
+ {
+ char c = s.charAt(i);
+ if (!isNucleotide(c))
+ {
+ if (!allowGaps || !isGap(c))
+ {
+ return false;
+ }
+ }
+ }
+ return true;
+ }
+
+ /**
* Convenience overload of isNucleotide
*
* @param seqs
sb = new StringBuilder();
EnsemblSeqProxy.reverseComplementAllele(sb, "-GATt"); // revcomp=aATC-
EnsemblSeqProxy.reverseComplementAllele(sb, "hgmd_mutation");
- assertEquals("aATC-,hgmd_mutation", sb.toString());
+ EnsemblSeqProxy.reverseComplementAllele(sb, "PhenCode_variation");
+ assertEquals("aATC-,hgmd_mutation,PhenCode_variation", sb.toString());
}
/**
* AGCTU. Test is not case-sensitive and ignores gaps.
*/
@Test(groups = { "Functional" })
- public void testIsNucleotide()
+ public void testIsNucleotide_sequences()
{
SequenceI seq = new Sequence("eightypercent", "agctuAGCPV");
assertFalse(Comparison.isNucleotide(new SequenceI[] { seq }));
0.001f);
}
+ @Test(groups = { "Functional" })
+ public void testIsNucleotide()
+ {
+ assertTrue(Comparison.isNucleotide('a'));
+ assertTrue(Comparison.isNucleotide('A'));
+ assertTrue(Comparison.isNucleotide('c'));
+ assertTrue(Comparison.isNucleotide('C'));
+ assertTrue(Comparison.isNucleotide('g'));
+ assertTrue(Comparison.isNucleotide('G'));
+ assertTrue(Comparison.isNucleotide('t'));
+ assertTrue(Comparison.isNucleotide('T'));
+ assertTrue(Comparison.isNucleotide('u'));
+ assertTrue(Comparison.isNucleotide('U'));
+ assertFalse(Comparison.isNucleotide('-'));
+ assertFalse(Comparison.isNucleotide('P'));
+ }
+
/**
* Test the percentage identity calculation for two sequences
*/
assertEquals(87.5f, Comparison.PID(seq1, seq2, 0, length, false, true),
0.001f);
}
+
+ @Test(groups = { "Functional" })
+ public void testIsNucleotideSequence()
+ {
+ assertFalse(Comparison.isNucleotideSequence(null, true));
+ assertTrue(Comparison.isNucleotideSequence("", true));
+ assertTrue(Comparison.isNucleotideSequence("aAgGcCtTuU", true));
+ assertTrue(Comparison.isNucleotideSequence("aAgGcCtTuU", false));
+ assertFalse(Comparison.isNucleotideSequence("xAgGcCtTuU", false));
+ assertFalse(Comparison.isNucleotideSequence("aAgGcCtTuUx", false));
+ assertTrue(Comparison.isNucleotideSequence("a A-g.GcCtTuU", true));
+ assertFalse(Comparison.isNucleotideSequence("a A-g.GcCtTuU", false));
+ }
}