Add method to SequenceUtil to clean the protein sequence
[jabaws.git] / testsrc / compbio / data / sequence / SequenceUtilTester.java
index b6e74ae..3e351bf 100644 (file)
@@ -83,6 +83,31 @@ public class SequenceUtilTester {
        }\r
 \r
        @Test()\r
+       public void testCleanProteinSequence() {\r
+               String dirtySeq = "atgAGTggt\taGGTgc\ncgcAC\rTgc gACtcgcGAt cgA ";\r
+               assertFalse(SequenceUtil.isProteinSequence(dirtySeq));\r
+               // This will still be NON protein sequence despite having only correct\r
+               // letters because the letters match perfectly the nucleotide sequence!\r
+               assertFalse(SequenceUtil.isProteinSequence(SequenceUtil\r
+                               .cleanProteinSequence(dirtySeq)));\r
+\r
+               String notaSeq = "atgc1tgatgcatgcatgatgmctga";\r
+               assertFalse(SequenceUtil.isProteinSequence(notaSeq));\r
+               assertTrue(SequenceUtil.isProteinSequence(SequenceUtil\r
+                               .cleanProteinSequence(notaSeq)));\r
+\r
+               String AAseq = "ARLGRVRWTQQRHAEAAVLLQQASDAAPEHPGIALWLGHALEDAGQAEAAAAAYTRAHQL";\r
+               assertTrue(SequenceUtil.isProteinSequence(AAseq));\r
+               assertTrue(SequenceUtil.isProteinSequence(SequenceUtil\r
+                               .cleanProteinSequence(AAseq)));\r
+               AAseq += "XU";\r
+\r
+               assertFalse(SequenceUtil.isProteinSequence(AAseq));\r
+               assertTrue(SequenceUtil.isProteinSequence(SequenceUtil\r
+                               .cleanProteinSequence(AAseq)));\r
+       }\r
+\r
+       @Test()\r
        public void testReadWriteFasta() {\r
 \r
                try {\r