Add method to SequenceUtil to clean the protein sequence
authorpvtroshin <pvtroshin@e3abac25-378b-4346-85de-24260fe3988d>
Thu, 28 Apr 2011 15:17:21 +0000 (15:17 +0000)
committerpvtroshin <pvtroshin@e3abac25-378b-4346-85de-24260fe3988d>
Thu, 28 Apr 2011 15:17:21 +0000 (15:17 +0000)
git-svn-id: link to svn.lifesci.dundee.ac.uk/svn/barton/ptroshin/JABA2@4047 e3abac25-378b-4346-85de-24260fe3988d

build.xml
datamodel/compbio/data/sequence/SequenceUtil.java
testsrc/compbio/data/sequence/SequenceUtilTester.java
website/archive/datamodel-1.2.jar
website/archive/datamodel-src-1.2.jar

index fc99421..3d4535e 100644 (file)
--- a/build.xml
+++ b/build.xml
                        <compilerarg value="-Xlint:-unchecked" />\r
                        <classpath refid="project.classpath" />\r
                </javac>\r
-               <!-- Complile the engines -->\r
-               <javac srcdir="${basedir}/engine" destdir="${classes}" target="1.6" source="1.6" debug="off" optimize="on" encoding="UTF-8" verbose="false" nowarn="true">\r
-                       <compilerarg value="-Xlint:-all" />\r
-                       <compilerarg value="-Xlint:-unchecked" />\r
-                       <classpath refid="project.classpath" />\r
-               </javac>\r
-               <!-- Complile the runners -->\r
-               <javac srcdir="${basedir}/runner" destdir="${classes}" target="1.6" source="1.6" debug="off" optimize="on" encoding="UTF-8" verbose="false" nowarn="true">\r
-                       <compilerarg value="-Xlint:-unchecked" />\r
-                       <classpath refid="project.classpath" />\r
-               </javac>\r
-               <!-- Complile the webservices -->\r
-               <javac srcdir="${basedir}/webservices" destdir="${classes}" target="1.6" source="1.6" debug="off" optimize="on" encoding="UTF-8" verbose="false" nowarn="true">\r
-                       <compilerarg value="-Xlint:-unchecked" />\r
-                       <classpath refid="project.classpath" />\r
-               </javac>\r
-\r
+       \r
        </target>\r
 \r
 \r
index d0a6cd7..4d61d2a 100644 (file)
@@ -170,6 +170,17 @@ public final class SequenceUtil {
        }\r
 \r
        /**\r
+        * Remove all non AA chars from the sequence\r
+        * \r
+        * @param sequence\r
+        *            the sequence to clean\r
+        * @return cleaned sequence\r
+        */\r
+       public static String cleanProteinSequence(String sequence) {\r
+               return SequenceUtil.NON_AA.matcher(sequence).replaceAll("");\r
+       }\r
+\r
+       /**\r
         * @param sequence\r
         * @return true is the sequence is a protein sequence, false overwise\r
         */\r
index b6e74ae..3e351bf 100644 (file)
@@ -83,6 +83,31 @@ public class SequenceUtilTester {
        }\r
 \r
        @Test()\r
+       public void testCleanProteinSequence() {\r
+               String dirtySeq = "atgAGTggt\taGGTgc\ncgcAC\rTgc gACtcgcGAt cgA ";\r
+               assertFalse(SequenceUtil.isProteinSequence(dirtySeq));\r
+               // This will still be NON protein sequence despite having only correct\r
+               // letters because the letters match perfectly the nucleotide sequence!\r
+               assertFalse(SequenceUtil.isProteinSequence(SequenceUtil\r
+                               .cleanProteinSequence(dirtySeq)));\r
+\r
+               String notaSeq = "atgc1tgatgcatgcatgatgmctga";\r
+               assertFalse(SequenceUtil.isProteinSequence(notaSeq));\r
+               assertTrue(SequenceUtil.isProteinSequence(SequenceUtil\r
+                               .cleanProteinSequence(notaSeq)));\r
+\r
+               String AAseq = "ARLGRVRWTQQRHAEAAVLLQQASDAAPEHPGIALWLGHALEDAGQAEAAAAAYTRAHQL";\r
+               assertTrue(SequenceUtil.isProteinSequence(AAseq));\r
+               assertTrue(SequenceUtil.isProteinSequence(SequenceUtil\r
+                               .cleanProteinSequence(AAseq)));\r
+               AAseq += "XU";\r
+\r
+               assertFalse(SequenceUtil.isProteinSequence(AAseq));\r
+               assertTrue(SequenceUtil.isProteinSequence(SequenceUtil\r
+                               .cleanProteinSequence(AAseq)));\r
+       }\r
+\r
+       @Test()\r
        public void testReadWriteFasta() {\r
 \r
                try {\r
index 4c7cffc..ecbf161 100644 (file)
Binary files a/website/archive/datamodel-1.2.jar and b/website/archive/datamodel-1.2.jar differ
index 6b324de..06610ed 100644 (file)
Binary files a/website/archive/datamodel-src-1.2.jar and b/website/archive/datamodel-src-1.2.jar differ