/* * Jalview - A Sequence Alignment Editor and Viewer * Copyright (C) 2005 AM Waterhouse, J Procter, G Barton, M Clamp, S Searle * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version 2 * of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA */ package jalview.io; import jalview.analysis.*; import jalview.datamodel.*; import java.io.*; import java.util.*; public class FastaFile extends AlignFile { public FastaFile() { } public FastaFile(String inStr) { super(inStr); } public FastaFile(String inFile, String type) throws IOException { super(inFile, type); } public void parse() throws IOException { String id = ""; StringBuffer seq = new StringBuffer(); int count = 0; boolean flag = false; int sstart = 0; int send = 0; String line; while ((line = nextLine()) != null) { if (line.length() > 0) { // Do we have an id line? if (line.substring(0, 1).equals(">")) { if (count != 0) { if (sstart != 0) { seqs.addElement(new Sequence(id, seq.toString().toUpperCase(), sstart, send)); } else { seqs.addElement(new Sequence(id, seq.toString().toUpperCase(), 1, seq.length())); } } count++; StringTokenizer str = new StringTokenizer(line, " "); id = str.nextToken(); id = id.substring(1); com.stevesoft.pat.Regex dbId = new com.stevesoft.pat.Regex( "[A-Za-z-]+/[A-Za-z-]+\\|(\\w+)\\|(.+)"); if (dbId.search(id)) { String dbid = dbId.stringMatched(1); String idname = dbId.stringMatched(2); if ((idname.length() > 0) && (idname.indexOf("_") > -1)) { id = idname; // just use friendly name // JBPNote: we may lose uniprot standardised ID here. } else { id = dbid; // use dbid to ensure sensible queries } } if (id.indexOf("/") > 0) { StringTokenizer st = new StringTokenizer(id, "/"); if (st.countTokens() == 2) { id = st.nextToken(); String tmp = st.nextToken(); st = new StringTokenizer(tmp, "-"); if (st.countTokens() == 2) { sstart = Integer.valueOf(st.nextToken()) .intValue(); send = Integer.valueOf(st.nextToken()).intValue(); } } } seq = new StringBuffer(); } else { seq = seq.append(line); } } } if (count > 0) { if (!isValidProteinSequence(seq.toString().toUpperCase())) { throw new IOException("Invalid protein sequence"); } if (sstart != 0) { seqs.addElement(new Sequence(id, seq.toString().toUpperCase(), sstart, send)); } else { seqs.addElement(new Sequence(id, seq.toString().toUpperCase(), 1, seq.length())); } } } public static String print(SequenceI[] s) { return print(s, 72); } public static String print(SequenceI[] s, int len) { return print(s, len, true); } public static String print(SequenceI[] s, int len, boolean gaps) { return print(s, len, gaps, true); } public static String print(SequenceI[] s, int len, boolean gaps, boolean displayId) { StringBuffer out = new StringBuffer(); int i = 0; while ((i < s.length) && (s[i] != null)) { String seq = ""; if (gaps) { seq = s[i].getSequence(); } else { seq = AlignSeq.extractGaps("-. ", s[i].getSequence()); } // used to always put this here: + "/" + s[i].getStart() + "-" + s[i].getEnd() + out.append(">" + ((displayId) ? s[i].getDisplayId() : s[i].getName()) + "\n"); int nochunks = (seq.length() / len) + 1; for (int j = 0; j < nochunks; j++) { int start = j * len; int end = start + len; if (end < seq.length()) { out.append(seq.substring(start, end) + "\n"); } else if (start < seq.length()) { out.append(seq.substring(start) + "\n"); } } i++; } return out.toString(); } public String print() { return print(getSeqsAsArray()); } }