Merge branch 'JWS-29' into JABAWS_Release_2_0
authorjprocter <jprocter@compbio.dundee.ac.uk>
Fri, 1 Jun 2012 11:56:14 +0000 (12:56 +0100)
committerjprocter <jprocter@compbio.dundee.ac.uk>
Fri, 1 Jun 2012 11:56:14 +0000 (12:56 +0100)
datamodel/compbio/data/sequence/FastaReader.java
testsrc/compbio/data/sequence/FastaReaderTester.java
testsrc/testdata/complicated.fasta

index 1641d34..6c3e943 100644 (file)
@@ -22,6 +22,9 @@ import java.io.FileNotFoundException;
 import java.io.InputStream;\r
 import java.util.Iterator;\r
 import java.util.Scanner;\r
+import java.util.regex.MatchResult;\r
+\r
+import javax.vecmath.MismatchedSizeException;\r
 \r
 import compbio.util.Util;\r
 \r
@@ -67,7 +70,10 @@ import compbio.util.Util;
 public class FastaReader implements Iterator<FastaSequence> {\r
 \r
        private final Scanner input;\r
-\r
+       /**\r
+        * Delimiter for the scanner\r
+        */\r
+       private final String DELIM=">";\r
        /**\r
         * Header data can contain non-ASCII symbols and read in UTF8\r
         * \r
@@ -82,7 +88,7 @@ public class FastaReader implements Iterator<FastaSequence> {
         */\r
        public FastaReader(final String inputFile) throws FileNotFoundException {\r
                input = new Scanner(new File(inputFile), "UTF8");\r
-               input.useDelimiter("\\s*>");\r
+               input.useDelimiter(DELIM);\r
                Runtime.getRuntime().addShutdownHook(new Thread() {\r
 \r
                        @Override\r
@@ -104,7 +110,7 @@ public class FastaReader implements Iterator<FastaSequence> {
        public FastaReader(final InputStream inputStream)\r
                        throws FileNotFoundException {\r
                input = new Scanner(inputStream);\r
-               input.useDelimiter("\\s*>");\r
+               input.useDelimiter(DELIM);\r
        }\r
        /**\r
         * {@inheritDoc}\r
@@ -124,10 +130,17 @@ public class FastaReader implements Iterator<FastaSequence> {
         *             if the header or the sequence is missing\r
         * @throws IllegalStateException\r
         *             if the close method was called on this instance\r
+        *             @throws MismatchException - if there were no more FastaSequence's.\r
         */\r
        @Override\r
        public FastaSequence next() {\r
-               return FastaReader.toFastaSequence(input.next());\r
+               String fastaHeader=input.next();\r
+               while (fastaHeader.indexOf("\n")<0 && input.hasNext())\r
+               {\r
+                       fastaHeader = fastaHeader.concat(">");\r
+                       fastaHeader = fastaHeader.concat(input.next());\r
+               }\r
+               return FastaReader.toFastaSequence(fastaHeader);\r
        }\r
 \r
        /**\r
index d57bbe5..3a3b747 100644 (file)
@@ -34,6 +34,9 @@ public class FastaReaderTester {
        static FastaSequence s3 = new FastaSequence(" 12 d t y wi               k       jbke    ",\r
                        "  KLSHHDCD" + "   N" + "    H" + "    HSKCTEPHCGNSHQML\n\rHRDP"\r
                                        + "    CCDQCQSWEAENWCASMRKAILF");\r
+       static FastaSequence s4 = new FastaSequence(" 12 d t>y wi->foo          k       jbke    ",\r
+                       "  KLSHHDCD" + "   N" + "    H" + "    HSKCTEPHCGNSHQML\n\rHRDP"\r
+                                       + "    CCDQCQSWEAENWCASMRKAILF");\r
        @Test()\r
        public void test() {\r
 \r
@@ -68,6 +71,7 @@ public class FastaReaderTester {
                Assert.assertEquals(FastaReaderTester.s1, list.get(1));\r
                Assert.assertEquals(FastaReaderTester.s2, list.get(2));\r
                Assert.assertEquals(FastaReaderTester.s3, list.get(3));\r
+               Assert.assertEquals(FastaReaderTester.s4, list.get(4));\r
 \r
        }\r
 }\r
index 3c891d6..e0a71d5 100644 (file)
@@ -20,4 +20,11 @@ EFITEA       WWGRWGAITFFHAH  ENKNEIQECSDQNLKE        SRTTCEIID   TCHLFTRHLDGW
    N\r
     H\r
     HSKCTEPHCGNSHQMLHRDP\r
-    CCDQCQSWEAENWCASMRKAILF
\ No newline at end of file
+    CCDQCQSWEAENWCASMRKAILF\r
+   > 12 d t>y wi->foo          k       jbke    \r
+  KLSHHDCD\r
+   N\r
+    H\r
+    HSKCTEPHCGNSHQMLHRDP\r
+    CCDQCQSWEAENWCASMRKAILF\r
+    
\ No newline at end of file