X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=datamodel%2Fcompbio%2Fdata%2Fsequence%2FFastaReader.java;h=6c3e9432fb416ae9abd1c4c5c59ac92df6f41f64;hb=8ce6db56df6c0a1c2baca2fc2d33bcb10067c6cd;hp=ed91e937253ba899a8a4c710cca91682deff5246;hpb=2c9b93f44b1837859552b14ab82d265ec7959d1b;p=jabaws.git diff --git a/datamodel/compbio/data/sequence/FastaReader.java b/datamodel/compbio/data/sequence/FastaReader.java index ed91e93..6c3e943 100644 --- a/datamodel/compbio/data/sequence/FastaReader.java +++ b/datamodel/compbio/data/sequence/FastaReader.java @@ -19,8 +19,12 @@ package compbio.data.sequence; import java.io.File; import java.io.FileNotFoundException; +import java.io.InputStream; import java.util.Iterator; import java.util.Scanner; +import java.util.regex.MatchResult; + +import javax.vecmath.MismatchedSizeException; import compbio.util.Util; @@ -66,11 +70,14 @@ import compbio.util.Util; public class FastaReader implements Iterator { private final Scanner input; - + /** + * Delimiter for the scanner + */ + private final String DELIM=">"; /** * Header data can contain non-ASCII symbols and read in UTF8 * - * @param input + * @param inputFile * the file containing the list of FASTA formatted sequences to * read from * @throws FileNotFoundException @@ -81,7 +88,7 @@ public class FastaReader implements Iterator { */ public FastaReader(final String inputFile) throws FileNotFoundException { input = new Scanner(new File(inputFile), "UTF8"); - input.useDelimiter("\\s*>"); + input.useDelimiter(DELIM); Runtime.getRuntime().addShutdownHook(new Thread() { @Override @@ -92,6 +99,19 @@ public class FastaReader implements Iterator { } }); } + + /** + * This class will not close the incoming stream! So the client should do + * so. + * + * @param inputStream + * @throws FileNotFoundException + */ + public FastaReader(final InputStream inputStream) + throws FileNotFoundException { + input = new Scanner(inputStream); + input.useDelimiter(DELIM); + } /** * {@inheritDoc} * @@ -110,10 +130,17 @@ public class FastaReader implements Iterator { * if the header or the sequence is missing * @throws IllegalStateException * if the close method was called on this instance + * @throws MismatchException - if there were no more FastaSequence's. */ @Override public FastaSequence next() { - return FastaReader.toFastaSequence(input.next()); + String fastaHeader=input.next(); + while (fastaHeader.indexOf("\n")<0 && input.hasNext()) + { + fastaHeader = fastaHeader.concat(">"); + fastaHeader = fastaHeader.concat(input.next()); + } + return FastaReader.toFastaSequence(fastaHeader); } /** @@ -135,26 +162,24 @@ public class FastaReader implements Iterator { } private static FastaSequence toFastaSequence(final String singleFastaEntry) { - final Scanner sc = new Scanner(singleFastaEntry); - // Use new line delimiter - sc.useDelimiter("\n"); - if (!sc.hasNext()) { + + assert !Util.isEmpty(singleFastaEntry) : "Empty String where FASTA sequence is expected!"; + + int nlineidx = singleFastaEntry.indexOf("\n"); + if (nlineidx < 0) { throw new AssertionError( "The FASTA sequence must contain the header information" + " separated by the new line from the sequence. Given sequence does not appear to " + "contain the header! Given data:\n " + singleFastaEntry); } - String header = sc.next(); + String header = singleFastaEntry.substring(0, nlineidx); + // Get rid of the new line chars (should cover common cases) header = header.replaceAll("\r", ""); - sc.useDelimiter("\\s*"); - final StringBuilder sb = new StringBuilder(); - while (sc.hasNext()) { - sb.append(sc.next()); - } - final String sequence = sb.toString(); + String sequence = singleFastaEntry.substring(nlineidx); + if (Util.isEmpty(sequence)) { throw new AssertionError( "Empty sequences are not allowed! Please make sure the "