X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=datamodel%2Fcompbio%2Fdata%2Fsequence%2FFastaReader.java;h=6c3e9432fb416ae9abd1c4c5c59ac92df6f41f64;hb=8ce6db56df6c0a1c2baca2fc2d33bcb10067c6cd;hp=4b6fd66414628e98eb067a4a0d5247a573c7bf58;hpb=f55e379f8941bc5589a5fa24f77dc6b4e69635d6;p=jabaws.git diff --git a/datamodel/compbio/data/sequence/FastaReader.java b/datamodel/compbio/data/sequence/FastaReader.java index 4b6fd66..6c3e943 100644 --- a/datamodel/compbio/data/sequence/FastaReader.java +++ b/datamodel/compbio/data/sequence/FastaReader.java @@ -1,9 +1,30 @@ +/* Copyright (c) 2011 Peter Troshin + * + * JAva Bioinformatics Analysis Web Services (JABAWS) @version: 2.0 + * + * This library is free software; you can redistribute it and/or modify it under the terms of the + * Apache License version 2 as published by the Apache Software Foundation + * + * This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without + * even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the Apache + * License for more details. + * + * A copy of the license is in apache_license.txt. It is also available here: + * @see: http://www.apache.org/licenses/LICENSE-2.0.txt + * + * Any republication or derived work distributed in source code form + * must include this copyright and license notice. + */ package compbio.data.sequence; import java.io.File; import java.io.FileNotFoundException; +import java.io.InputStream; import java.util.Iterator; import java.util.Scanner; +import java.util.regex.MatchResult; + +import javax.vecmath.MismatchedSizeException; import compbio.util.Util; @@ -49,11 +70,14 @@ import compbio.util.Util; public class FastaReader implements Iterator { private final Scanner input; - + /** + * Delimiter for the scanner + */ + private final String DELIM=">"; /** * Header data can contain non-ASCII symbols and read in UTF8 * - * @param input + * @param inputFile * the file containing the list of FASTA formatted sequences to * read from * @throws FileNotFoundException @@ -64,7 +88,7 @@ public class FastaReader implements Iterator { */ public FastaReader(final String inputFile) throws FileNotFoundException { input = new Scanner(new File(inputFile), "UTF8"); - input.useDelimiter("\\s*>"); + input.useDelimiter(DELIM); Runtime.getRuntime().addShutdownHook(new Thread() { @Override @@ -75,6 +99,19 @@ public class FastaReader implements Iterator { } }); } + + /** + * This class will not close the incoming stream! So the client should do + * so. + * + * @param inputStream + * @throws FileNotFoundException + */ + public FastaReader(final InputStream inputStream) + throws FileNotFoundException { + input = new Scanner(inputStream); + input.useDelimiter(DELIM); + } /** * {@inheritDoc} * @@ -93,10 +130,17 @@ public class FastaReader implements Iterator { * if the header or the sequence is missing * @throws IllegalStateException * if the close method was called on this instance + * @throws MismatchException - if there were no more FastaSequence's. */ @Override public FastaSequence next() { - return FastaReader.toFastaSequence(input.next()); + String fastaHeader=input.next(); + while (fastaHeader.indexOf("\n")<0 && input.hasNext()) + { + fastaHeader = fastaHeader.concat(">"); + fastaHeader = fastaHeader.concat(input.next()); + } + return FastaReader.toFastaSequence(fastaHeader); } /** @@ -118,26 +162,24 @@ public class FastaReader implements Iterator { } private static FastaSequence toFastaSequence(final String singleFastaEntry) { - final Scanner sc = new Scanner(singleFastaEntry); - // Use new line delimiter - sc.useDelimiter("\n"); - if (!sc.hasNext()) { + + assert !Util.isEmpty(singleFastaEntry) : "Empty String where FASTA sequence is expected!"; + + int nlineidx = singleFastaEntry.indexOf("\n"); + if (nlineidx < 0) { throw new AssertionError( "The FASTA sequence must contain the header information" + " separated by the new line from the sequence. Given sequence does not appear to " + "contain the header! Given data:\n " + singleFastaEntry); } - String header = sc.next(); + String header = singleFastaEntry.substring(0, nlineidx); + // Get rid of the new line chars (should cover common cases) - header = header.replaceAll("\n", "").replaceAll("\r", ""); + header = header.replaceAll("\r", ""); + + String sequence = singleFastaEntry.substring(nlineidx); - sc.useDelimiter("\\s*"); - final StringBuilder sb = new StringBuilder(); - while (sc.hasNext()) { - sb.append(sc.next().trim()); - } - final String sequence = sb.toString(); if (Util.isEmpty(sequence)) { throw new AssertionError( "Empty sequences are not allowed! Please make sure the "