X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=datamodel%2Fcompbio%2Fdata%2Fsequence%2FSequenceUtil.java;h=6e20988d0b4c1bdd2ab4751e672402ac2779e7d9;hb=91dc99b9b18e403c97b1c4e0ead8f754991714a5;hp=f65ec9eee36a84c916a904adaaa1da450ded670f;hpb=3b7b9e0a5fe1f6f3d57874a5ae63aaedf6a35ef3;p=jabaws.git diff --git a/datamodel/compbio/data/sequence/SequenceUtil.java b/datamodel/compbio/data/sequence/SequenceUtil.java index f65ec9e..6e20988 100644 --- a/datamodel/compbio/data/sequence/SequenceUtil.java +++ b/datamodel/compbio/data/sequence/SequenceUtil.java @@ -1,15 +1,19 @@ -/* - * @(#)SequenceUtil.java 1.0 September 2009 Copyright (c) 2009 Peter Troshin - * Jalview Web Services version: 2.0 This library is free software; you can - * redistribute it and/or modify it under the terms of the Apache License - * version 2 as published by the Apache Software Foundation This library is - * distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; - * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A - * PARTICULAR PURPOSE. See the Apache License for more details. A copy of the - * license is in apache_license.txt. It is also available here: see: - * http://www.apache.org/licenses/LICENSE-2.0.txt Any republication or derived - * work distributed in source code form must include this copyright and license - * notice. +/* Copyright (c) 2011 Peter Troshin + * + * JAva Bioinformatics Analysis Web Services (JABAWS) @version: 2.0 + * + * This library is free software; you can redistribute it and/or modify it under the terms of the + * Apache License version 2 as published by the Apache Software Foundation + * + * This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without + * even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the Apache + * License for more details. + * + * A copy of the license is in apache_license.txt. It is also available here: + * @see: http://www.apache.org/licenses/LICENSE-2.0.txt + * + * Any republication or derived work distributed in source code form + * must include this copyright and license notice. */ package compbio.data.sequence; @@ -41,8 +45,9 @@ import compbio.util.Util; /** * Utility class for operations on sequences * - * @author Petr Troshin - * @version 1.0 + * @author Peter Troshin + * @since 1.0 + * @version 2.0 June 2011 */ public final class SequenceUtil { @@ -99,19 +104,6 @@ public final class SequenceUtil { private SequenceUtil() { } // utility class, no instantiation - /* - * public static void write_PirSeq(OutputStream os, FastaSequence seq) - * throws IOException { BufferedWriter pir_out = new BufferedWriter(new - * OutputStreamWriter(os)); pir_out.write(">P1;" + seq.getId() + - * SysPrefs.newlinechar); pir_out.write(seq.getSequence() + - * SysPrefs.newlinechar); pir_out.close(); } public static void - * write_FastaSeq(OutputStream os, FastaSequence seq) throws IOException { - * BufferedWriter fasta_out = new BufferedWriter( new - * OutputStreamWriter(os)); fasta_out.write(">" + seq.getId() + - * SysPrefs.newlinechar); fasta_out.write(seq.getSequence() + - * SysPrefs.newlinechar); fasta_out.close(); } - */ - /** * @return true is the sequence contains only letters a,c, t, g, u */ @@ -268,28 +260,11 @@ public final class SequenceUtil { public static List readFasta(final InputStream inStream) throws IOException { final List seqs = new ArrayList(); - - final BufferedReader infasta = new BufferedReader( - new InputStreamReader(inStream, "UTF8"), 16000); - final Pattern pattern = Pattern.compile("//s+"); - - String line; - String sname = "", seqstr = null; - do { - line = infasta.readLine(); - if ((line == null) || line.startsWith(">")) { - if (seqstr != null) { - seqs.add(new FastaSequence(sname.substring(1), seqstr)); - } - sname = line; // remove > - seqstr = ""; - } else { - final String subseq = pattern.matcher(line).replaceAll(""); - seqstr += subseq; - } - } while (line != null); - - infasta.close(); + FastaReader reader = new FastaReader(inStream); + while (reader.hasNext()) { + seqs.add(reader.next()); + } + inStream.close(); return seqs; } @@ -315,14 +290,15 @@ public final class SequenceUtil { * Read IUPred output * * @param result - * @return + * @return Map key->sequence name, value->Score * @throws IOException * @throws UnknownFileFormatException */ - public static Map readIUPred(final File result, - IUPredResult type) throws IOException, UnknownFileFormatException { + public static Map readIUPred(final File result) + throws IOException, UnknownFileFormatException { InputStream input = new FileInputStream(result); - Map sequences = readIUPred(input, type); + Map sequences = readIUPred(input, + IUPredResult.getType(result)); input.close(); return sequences; } @@ -357,7 +333,7 @@ public final class SequenceUtil { while (scan.hasNext()) { String nextEntry = scan.next(); Scanner entry = new Scanner(nextEntry); - String name = entry.nextLine(); + String name = entry.nextLine().trim(); // inside entry: if (IUPredResult.Glob == type) { // parse domains @@ -451,7 +427,7 @@ public final class SequenceUtil { * * @param inStream * the InputStream connected to the JRonn output file - * @return List of {@link AnnotatedSequence} objects + * @return Map key=sequence name value=Score * @throws IOException * is thrown if the inStream has problems accessing the data * @throws UnknownFileFormatException @@ -557,7 +533,8 @@ public final class SequenceUtil { * * * @param input - * @return + * the InputStream + * @return Map key=sequence name, value=set of score * @throws IOException * @throws UnknownFileFormatException */ @@ -679,7 +656,7 @@ public final class SequenceUtil { * * * @param input - * @return + * @return Map key=sequence name, value=set of score * @throws IOException * @throws UnknownFileFormatException */ @@ -845,5 +822,23 @@ enum IUPredResult { /** * Globular domains */ - Glob + Glob; + + static IUPredResult getType(File file) { + assert file != null; + String name = file.getName(); + if (name.endsWith(Long.toString().toLowerCase())) { + return Long; + } + if (name.endsWith(Short.toString().toLowerCase())) { + return Short; + } + if (name.endsWith(Glob.toString().toLowerCase())) { + return Glob; + } + throw new AssertionError( + "IUPred result file type cannot be recognised! " + + "\nFile must ends with one of [glob, long or short]" + + "\n but given file name was: " + file.getName()); + } } \ No newline at end of file