X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Fio%2FFastaFile.java;h=d8890afe8909831decb9e7f68bf011c2db726073;hb=b2f9a8d7bce642ff4011bc6d49e02bb0569fbb11;hp=757bb57dbaac990bcf6fb3b16d476e338d4941bf;hpb=efc31b4a8d5cee63555586804a2b79c06bdb5a14;p=jalview.git diff --git a/src/jalview/io/FastaFile.java b/src/jalview/io/FastaFile.java index 757bb57..d8890af 100755 --- a/src/jalview/io/FastaFile.java +++ b/src/jalview/io/FastaFile.java @@ -1,289 +1,238 @@ -/* -* Jalview - A Sequence Alignment Editor and Viewer -* Copyright (C) 2005 AM Waterhouse, J Procter, G Barton, M Clamp, S Searle -* -* This program is free software; you can redistribute it and/or -* modify it under the terms of the GNU General Public License -* as published by the Free Software Foundation; either version 2 -* of the License, or (at your option) any later version. -* -* This program is distributed in the hope that it will be useful, -* but WITHOUT ANY WARRANTY; without even the implied warranty of -* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -* GNU General Public License for more details. -* -* You should have received a copy of the GNU General Public License -* along with this program; if not, write to the Free Software -* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA -*/ -package jalview.io; - -import jalview.analysis.*; - -import jalview.datamodel.*; - -import java.io.*; - -import java.util.*; - - -/** - * DOCUMENT ME! - * - * @author $author$ - * @version $Revision$ - */ -public class FastaFile extends AlignFile -{ - /** - * Creates a new FastaFile object. - */ - public FastaFile() - { - } - - /** - * Creates a new FastaFile object. - * - * @param inStr DOCUMENT ME! - */ - public FastaFile(String inStr) - { - super(inStr); - } - - /** - * Creates a new FastaFile object. - * - * @param inFile DOCUMENT ME! - * @param type DOCUMENT ME! - * - * @throws IOException DOCUMENT ME! - */ - public FastaFile(String inFile, String type) throws IOException - { - super(inFile, type); - } - - /** - * DOCUMENT ME! - * - * @throws IOException DOCUMENT ME! - */ - public void parse() throws IOException - { - String id = ""; - StringBuffer seq = new StringBuffer(); - int count = 0; - - int sstart = 0; - int send = 0; - - String line; - - while ((line = nextLine()) != null) - { - if (line.length() > 0) - { - // Do we have an id line? - // JBPNote - this code needs to be standardised to EBI/whatever for the - // >dbref/dbref/dbref|refid1|refid2|refid3 'human-readable' style of naming (should it really exist) - if (line.substring(0, 1).equals(">")) - { - if (count != 0) - { - if (sstart != 0) - { - seqs.addElement(new Sequence(id, seq.toString(), - sstart, send)); - } - else - { - seqs.addElement(new Sequence(id, seq.toString(), 1, - seq.length())); - } - } - - count++; - - StringTokenizer str = new StringTokenizer(line, " "); - - id = str.nextToken(); - id = id.substring(1); - - com.stevesoft.pat.Regex dbId = new com.stevesoft.pat.Regex( - "[A-Za-z-]+/?[A-Za-z-]+\\|(\\w+)\\|(.+)"); - - // JBPNote At the moment - we don't get rid of the friendly names but this - // behaviour is probably wrong in the long run. - if (dbId.search(id)) - { - String dbid = dbId.stringMatched(1); - String idname = dbId.stringMatched(2); - - if ((idname.length() > 0) && - (idname.indexOf("_") > -1)) - { - id = idname; // use the friendly name - apparently no dbid - } - else if (dbid.length() > 1) - { - id = dbid; // ignore the friendly name - we lose uniprot accession ID otherwise - } - } - - if (id.indexOf("/") > 0) - { - StringTokenizer st = new StringTokenizer(id, "/"); - - if (st.countTokens() == 2) - { - id = st.nextToken(); - - String tmp = st.nextToken(); - - st = new StringTokenizer(tmp, "-"); - - if (st.countTokens() == 2) - { - sstart = Integer.valueOf(st.nextToken()) - .intValue(); - send = Integer.valueOf(st.nextToken()).intValue(); - } - } - } - - seq = new StringBuffer(); - } - else - { - seq = seq.append(line); - } - } - } - - if (count > 0) - { - if (!isValidProteinSequence(seq.toString().toUpperCase())) - { - throw new IOException("Invalid protein sequence"); - } - - if (sstart != 0) - { - seqs.addElement(new Sequence(id, seq.toString().toUpperCase(), - sstart, send)); - } - else - { - seqs.addElement(new Sequence(id, seq.toString().toUpperCase(), - 1, seq.length())); - } - } - } - - /** - * DOCUMENT ME! - * - * @param s DOCUMENT ME! - * - * @return DOCUMENT ME! - */ - public static String print(SequenceI[] s) - { - return print(s, 72); - } - - /** - * DOCUMENT ME! - * - * @param s DOCUMENT ME! - * @param len DOCUMENT ME! - * - * @return DOCUMENT ME! - */ - public static String print(SequenceI[] s, int len) - { - return print(s, len, true); - } - - /** - * DOCUMENT ME! - * - * @param s DOCUMENT ME! - * @param len DOCUMENT ME! - * @param gaps DOCUMENT ME! - * - * @return DOCUMENT ME! - */ - public static String print(SequenceI[] s, int len, boolean gaps) - { - return print(s, len, gaps, true); - } - - /** - * DOCUMENT ME! - * - * @param s DOCUMENT ME! - * @param len DOCUMENT ME! - * @param gaps DOCUMENT ME! - * @param displayId DOCUMENT ME! - * - * @return DOCUMENT ME! - */ - public static String print(SequenceI[] s, int len, boolean gaps, - boolean displayId) - { - StringBuffer out = new StringBuffer(); - int i = 0; - - while ((i < s.length) && (s[i] != null)) - { - String seq = ""; - - if (gaps) - { - seq = s[i].getSequence(); - } - else - { - seq = AlignSeq.extractGaps("-. ", s[i].getSequence()); - } - - // used to always put this here: + "/" + s[i].getStart() + "-" + s[i].getEnd() + - out.append(">" + - ((displayId) ? s[i].getDisplayId() : s[i].getName()) + "\n"); - - int nochunks = (seq.length() / len) + 1; - - for (int j = 0; j < nochunks; j++) - { - int start = j * len; - int end = start + len; - - if (end < seq.length()) - { - out.append(seq.substring(start, end) + "\n"); - } - else if (start < seq.length()) - { - out.append(seq.substring(start) + "\n"); - } - } - - i++; - } - - return out.toString(); - } - - /** - * DOCUMENT ME! - * - * @return DOCUMENT ME! - */ - public String print() - { - return print(getSeqsAsArray()); - } -} +/* + * Jalview - A Sequence Alignment Editor and Viewer (Version 2.8.1) + * Copyright (C) 2014 The Jalview Authors + * + * This file is part of Jalview. + * + * Jalview is free software: you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. + * + * Jalview is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR + * PURPOSE. See the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along with Jalview. If not, see . + * The Jalview Authors are detailed in the 'AUTHORS' file. + */ +package jalview.io; + +import java.io.*; + +import jalview.datamodel.*; + +/** + * DOCUMENT ME! + * + * @author $author$ + * @version $Revision$ + */ +public class FastaFile extends AlignFile +{ + /** + * Length of a sequence line + */ + int len = 72; + + StringBuffer out; + + /** + * Creates a new FastaFile object. + */ + public FastaFile() + { + } + + /** + * Creates a new FastaFile object. + * + * @param inFile + * DOCUMENT ME! + * @param type + * DOCUMENT ME! + * + * @throws IOException + * DOCUMENT ME! + */ + public FastaFile(String inFile, String type) throws IOException + { + super(inFile, type); + } + + public FastaFile(FileParse source) throws IOException + { + super(source); + } + + /** + * DOCUMENT ME! + * + * @throws IOException + * DOCUMENT ME! + */ + public void parse() throws IOException + { + StringBuffer sb = new StringBuffer(); + boolean firstLine = true; + + String line,uline; + Sequence seq = null; + + boolean annotation = false; + + while ((uline = nextLine()) != null) + { + line = uline.trim(); + if (line.length() > 0) + { + if (line.charAt(0) == '>') + { + if (line.startsWith(">#_")) + { + if (annotation) + { + annotations.addElement(makeAnnotation(seq, sb)); + } + } + else + { + annotation = false; + } + + if (!firstLine) + { + seq.setSequence(sb.toString()); + + if (!annotation) + { + seqs.addElement(seq); + } + } + + seq = parseId(line.substring(1)); + firstLine = false; + + sb = new StringBuffer(); + + if (line.startsWith(">#_")) + { + annotation = true; + } + } + else + { + sb.append(annotation ? uline : line); + } + } + } + + if (annotation) + { + annotations.addElement(makeAnnotation(seq, sb)); + } + + else if (!firstLine) + { + seq.setSequence(sb.toString()); + seqs.addElement(seq); + } + } + private AlignmentAnnotation makeAnnotation(SequenceI seq, StringBuffer sb) + { + Annotation[] anots = new Annotation[sb.length()]; + char cb; + for (int i=0;i" + printId(s[i])); + if (s[i].getDescription() != null) + { + out.append(" " + s[i].getDescription()); + } + + out.append(newline); + + int nochunks = (s[i].getLength() / len) + 1; + + for (int j = 0; j < nochunks; j++) + { + int start = j * len; + int end = start + len; + + if (end < s[i].getLength()) + { + out.append(s[i].getSequenceAsString(start, end) + newline); + } + else if (start < s[i].getLength()) + { + out.append(s[i].getSequenceAsString(start, s[i].getLength()) + + newline); + } + } + + i++; + } + + return out.toString(); + } + + /** + * DOCUMENT ME! + * + * @return DOCUMENT ME! + */ + public String print() + { + return print(getSeqsAsArray()); + } +}