X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=datamodel%2Fcompbio%2Fdata%2Fsequence%2FFastaReader.java;h=6c3e9432fb416ae9abd1c4c5c59ac92df6f41f64;hb=8ce6db56df6c0a1c2baca2fc2d33bcb10067c6cd;hp=10eec8f60c50377ce548fb4b61dfb398bd8167c7;hpb=26166c1ca1dd3761305024cef6ee6ae67c6c135f;p=jabaws.git diff --git a/datamodel/compbio/data/sequence/FastaReader.java b/datamodel/compbio/data/sequence/FastaReader.java index 10eec8f..6c3e943 100644 --- a/datamodel/compbio/data/sequence/FastaReader.java +++ b/datamodel/compbio/data/sequence/FastaReader.java @@ -1,9 +1,30 @@ +/* Copyright (c) 2011 Peter Troshin + * + * JAva Bioinformatics Analysis Web Services (JABAWS) @version: 2.0 + * + * This library is free software; you can redistribute it and/or modify it under the terms of the + * Apache License version 2 as published by the Apache Software Foundation + * + * This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without + * even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the Apache + * License for more details. + * + * A copy of the license is in apache_license.txt. It is also available here: + * @see: http://www.apache.org/licenses/LICENSE-2.0.txt + * + * Any republication or derived work distributed in source code form + * must include this copyright and license notice. + */ package compbio.data.sequence; import java.io.File; import java.io.FileNotFoundException; +import java.io.InputStream; import java.util.Iterator; import java.util.Scanner; +import java.util.regex.MatchResult; + +import javax.vecmath.MismatchedSizeException; import compbio.util.Util; @@ -49,23 +70,53 @@ import compbio.util.Util; public class FastaReader implements Iterator { private final Scanner input; - + /** + * Delimiter for the scanner + */ + private final String DELIM=">"; /** * Header data can contain non-ASCII symbols and read in UTF8 * - * @param input + * @param inputFile * the file containing the list of FASTA formatted sequences to * read from * @throws FileNotFoundException * if the input file is not found + * @throws IllegalStateException + * if the close method was called on this instance + * */ - public FastaReader(final String input) throws FileNotFoundException { - this.input = new Scanner(new File(input), "UTF8"); - this.input.useDelimiter("\\s*>"); + public FastaReader(final String inputFile) throws FileNotFoundException { + input = new Scanner(new File(inputFile), "UTF8"); + input.useDelimiter(DELIM); + Runtime.getRuntime().addShutdownHook(new Thread() { + + @Override + public void run() { + if (input != null) { + input.close(); + } + } + }); } /** + * This class will not close the incoming stream! So the client should do + * so. + * + * @param inputStream + * @throws FileNotFoundException + */ + public FastaReader(final InputStream inputStream) + throws FileNotFoundException { + input = new Scanner(inputStream); + input.useDelimiter(DELIM); + } + /** * {@inheritDoc} + * + * @throws IllegalStateException + * if the close method was called on this instance */ @Override public boolean hasNext() { @@ -77,10 +128,19 @@ public class FastaReader implements Iterator { * * @throws AssertionError * if the header or the sequence is missing + * @throws IllegalStateException + * if the close method was called on this instance + * @throws MismatchException - if there were no more FastaSequence's. */ @Override public FastaSequence next() { - return FastaReader.toFastaSequence(input.next()); + String fastaHeader=input.next(); + while (fastaHeader.indexOf("\n")<0 && input.hasNext()) + { + fastaHeader = fastaHeader.concat(">"); + fastaHeader = fastaHeader.concat(input.next()); + } + return FastaReader.toFastaSequence(fastaHeader); } /** @@ -91,27 +151,35 @@ public class FastaReader implements Iterator { throw new UnsupportedOperationException(); } + /** + * Call this method to close the connection to the input file if you want to + * free up the resources. The connection will be closed on the JVM shutdown + * if this method was not called explicitly. No further reading on this + * instance of the FastaReader will be possible after calling this method. + */ + public void close() { + input.close(); + } + private static FastaSequence toFastaSequence(final String singleFastaEntry) { - final Scanner sc = new Scanner(singleFastaEntry); - // Use new line delimiter - sc.useDelimiter("\n"); - if (!sc.hasNext()) { + + assert !Util.isEmpty(singleFastaEntry) : "Empty String where FASTA sequence is expected!"; + + int nlineidx = singleFastaEntry.indexOf("\n"); + if (nlineidx < 0) { throw new AssertionError( "The FASTA sequence must contain the header information" + " separated by the new line from the sequence. Given sequence does not appear to " + "contain the header! Given data:\n " + singleFastaEntry); } - String header = sc.next(); + String header = singleFastaEntry.substring(0, nlineidx); + // Get rid of the new line chars (should cover common cases) - header = header.replaceAll("\n", "").replaceAll("\r", ""); + header = header.replaceAll("\r", ""); + + String sequence = singleFastaEntry.substring(nlineidx); - sc.useDelimiter("\\s*"); - final StringBuilder sb = new StringBuilder(); - while (sc.hasNext()) { - sb.append(sc.next().trim()); - } - final String sequence = sb.toString(); if (Util.isEmpty(sequence)) { throw new AssertionError( "Empty sequences are not allowed! Please make sure the "