/* * Jalview - A Sequence Alignment Editor and Viewer * Copyright (C) 2005 AM Waterhouse, J Procter, G Barton, M Clamp, S Searle * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version 2 * of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA */ package jalview.io; import jalview.analysis.*; import jalview.datamodel.*; import java.io.*; import java.util.*; /** * DOCUMENT ME! * * @author $author$ * @version $Revision$ */ public class FastaFile extends AlignFile { /** * Creates a new FastaFile object. */ public FastaFile() { } /** * Creates a new FastaFile object. * * @param inStr DOCUMENT ME! */ public FastaFile(String inStr) { super(inStr); } /** * Creates a new FastaFile object. * * @param inFile DOCUMENT ME! * @param type DOCUMENT ME! * * @throws IOException DOCUMENT ME! */ public FastaFile(String inFile, String type) throws IOException { super(inFile, type); } /** * DOCUMENT ME! * * @throws IOException DOCUMENT ME! */ public void parse() throws IOException { String id = ""; StringBuffer seq = new StringBuffer(); int count = 0; int sstart = 0; int send = 0; String line; while ((line = nextLine()) != null) { if (line.length() > 0) { // Do we have an id line? // JBPNote - this code needs to be standardised to EBI/whatever for the // >dbref/dbref/dbref|refid1|refid2|refid3 'human-readable' style of naming (should it really exist) if (line.substring(0, 1).equals(">")) { if (count != 0) { if (sstart != 0) { seqs.addElement(new Sequence(id, seq.toString(), sstart, send)); } else { seqs.addElement(new Sequence(id, seq.toString(), 1, seq.length())); } } count++; StringTokenizer str = new StringTokenizer(line, " "); id = str.nextToken(); id = id.substring(1); com.stevesoft.pat.Regex dbId = new com.stevesoft.pat.Regex( "[A-Za-z-]+/?[A-Za-z-]+\\|(\\w+)\\|(.+)"); // JBPNote At the moment - we don't get rid of the friendly names but this // behaviour is probably wrong in the long run. if (dbId.search(id)) { String dbid = dbId.stringMatched(1); String idname = dbId.stringMatched(2); if ((idname.length() > 0) && (idname.indexOf("_") > -1)) { id = idname; // use the friendly name - apparently no dbid } else if (dbid.length() > 1) { id = dbid; // ignore the friendly name - we lose uniprot accession ID otherwise } } if (id.indexOf("/") > 0) { StringTokenizer st = new StringTokenizer(id, "/"); if (st.countTokens() == 2) { id = st.nextToken(); String tmp = st.nextToken(); st = new StringTokenizer(tmp, "-"); if (st.countTokens() == 2) { sstart = Integer.valueOf(st.nextToken()) .intValue(); send = Integer.valueOf(st.nextToken()).intValue(); } } } seq = new StringBuffer(); } else { seq = seq.append(line); } } } if (count > 0) { if (!isValidProteinSequence(seq.toString().toUpperCase())) { throw new IOException("Invalid protein sequence"); } if (sstart != 0) { seqs.addElement(new Sequence(id, seq.toString().toUpperCase(), sstart, send)); } else { seqs.addElement(new Sequence(id, seq.toString().toUpperCase(), 1, seq.length())); } } } /** * DOCUMENT ME! * * @param s DOCUMENT ME! * * @return DOCUMENT ME! */ public static String print(SequenceI[] s) { return print(s, 72); } /** * DOCUMENT ME! * * @param s DOCUMENT ME! * @param len DOCUMENT ME! * * @return DOCUMENT ME! */ public static String print(SequenceI[] s, int len) { return print(s, len, true); } /** * DOCUMENT ME! * * @param s DOCUMENT ME! * @param len DOCUMENT ME! * @param gaps DOCUMENT ME! * * @return DOCUMENT ME! */ public static String print(SequenceI[] s, int len, boolean gaps) { return print(s, len, gaps, true); } /** * DOCUMENT ME! * * @param s DOCUMENT ME! * @param len DOCUMENT ME! * @param gaps DOCUMENT ME! * @param displayId DOCUMENT ME! * * @return DOCUMENT ME! */ public static String print(SequenceI[] s, int len, boolean gaps, boolean displayId) { StringBuffer out = new StringBuffer(); int i = 0; while ((i < s.length) && (s[i] != null)) { String seq = ""; if (gaps) { seq = s[i].getSequence(); } else { seq = AlignSeq.extractGaps("-. ", s[i].getSequence()); } // used to always put this here: + "/" + s[i].getStart() + "-" + s[i].getEnd() + out.append(">" + ((displayId) ? s[i].getDisplayId() : s[i].getName()) + "\n"); int nochunks = (seq.length() / len) + 1; for (int j = 0; j < nochunks; j++) { int start = j * len; int end = start + len; if (end < seq.length()) { out.append(seq.substring(start, end) + "\n"); } else if (start < seq.length()) { out.append(seq.substring(start) + "\n"); } } i++; } return out.toString(); } /** * DOCUMENT ME! * * @return DOCUMENT ME! */ public String print() { return print(getSeqsAsArray()); } }