2 * Jalview - A Sequence Alignment Editor and Viewer
\r
3 * Copyright (C) 2005 AM Waterhouse, J Procter, G Barton, M Clamp, S Searle
\r
5 * This program is free software; you can redistribute it and/or
\r
6 * modify it under the terms of the GNU General Public License
\r
7 * as published by the Free Software Foundation; either version 2
\r
8 * of the License, or (at your option) any later version.
\r
10 * This program is distributed in the hope that it will be useful,
\r
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
\r
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
\r
13 * GNU General Public License for more details.
\r
15 * You should have received a copy of the GNU General Public License
\r
16 * along with this program; if not, write to the Free Software
\r
17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
\r
21 import jalview.analysis.*;
\r
23 import jalview.datamodel.*;
\r
34 * @version $Revision$
\r
36 public class FastaFile extends AlignFile
\r
39 * Creates a new FastaFile object.
\r
46 * Creates a new FastaFile object.
\r
48 * @param inStr DOCUMENT ME!
\r
50 public FastaFile(String inStr)
\r
56 * Creates a new FastaFile object.
\r
58 * @param inFile DOCUMENT ME!
\r
59 * @param type DOCUMENT ME!
\r
61 * @throws IOException DOCUMENT ME!
\r
63 public FastaFile(String inFile, String type) throws IOException
\r
65 super(inFile, type);
\r
71 * @throws IOException DOCUMENT ME!
\r
73 public void parse() throws IOException
\r
76 StringBuffer seq = new StringBuffer();
\r
84 while ((line = nextLine()) != null)
\r
86 if (line.length() > 0)
\r
88 // Do we have an id line?
\r
89 // JBPNote - this code needs to be standardised to EBI/whatever for the
\r
90 // >dbref/dbref/dbref|refid1|refid2|refid3 'human-readable' style of naming (should it really exist)
\r
91 if (line.substring(0, 1).equals(">"))
\r
97 seqs.addElement(new Sequence(id, seq.toString(),
\r
102 seqs.addElement(new Sequence(id, seq.toString(), 1,
\r
109 StringTokenizer str = new StringTokenizer(line, " ");
\r
111 id = str.nextToken();
\r
112 id = id.substring(1);
\r
114 com.stevesoft.pat.Regex dbId = new com.stevesoft.pat.Regex(
\r
115 "[A-Za-z-]+/?[A-Za-z-]+\\|(\\w+)\\|(.+)");
\r
117 // JBPNote At the moment - we don't get rid of the friendly names but this
\r
118 // behaviour is probably wrong in the long run.
\r
119 if (dbId.search(id))
\r
121 String dbid = dbId.stringMatched(1);
\r
122 String idname = dbId.stringMatched(2);
\r
124 if ((idname.length() > 0) &&
\r
125 (idname.indexOf("_") > -1))
\r
127 id = idname; // use the friendly name - apparently no dbid
\r
129 else if (dbid.length() > 1)
\r
131 id = dbid; // ignore the friendly name - we lose uniprot accession ID otherwise
\r
135 if (id.indexOf("/") > 0)
\r
137 StringTokenizer st = new StringTokenizer(id, "/");
\r
139 if (st.countTokens() == 2)
\r
141 id = st.nextToken();
\r
143 String tmp = st.nextToken();
\r
145 st = new StringTokenizer(tmp, "-");
\r
147 if (st.countTokens() == 2)
\r
149 sstart = Integer.valueOf(st.nextToken())
\r
151 send = Integer.valueOf(st.nextToken()).intValue();
\r
156 seq = new StringBuffer();
\r
160 seq = seq.append(line);
\r
167 if (!isValidProteinSequence(seq.toString().toUpperCase()))
\r
169 throw new IOException("Invalid protein sequence");
\r
174 seqs.addElement(new Sequence(id, seq.toString().toUpperCase(),
\r
179 seqs.addElement(new Sequence(id, seq.toString().toUpperCase(),
\r
188 * @param s DOCUMENT ME!
\r
190 * @return DOCUMENT ME!
\r
192 public static String print(SequenceI[] s)
\r
194 return print(s, 72);
\r
200 * @param s DOCUMENT ME!
\r
201 * @param len DOCUMENT ME!
\r
203 * @return DOCUMENT ME!
\r
205 public static String print(SequenceI[] s, int len)
\r
207 return print(s, len, true);
\r
213 * @param s DOCUMENT ME!
\r
214 * @param len DOCUMENT ME!
\r
215 * @param gaps DOCUMENT ME!
\r
217 * @return DOCUMENT ME!
\r
219 public static String print(SequenceI[] s, int len, boolean gaps)
\r
221 return print(s, len, gaps, true);
\r
227 * @param s DOCUMENT ME!
\r
228 * @param len DOCUMENT ME!
\r
229 * @param gaps DOCUMENT ME!
\r
230 * @param displayId DOCUMENT ME!
\r
232 * @return DOCUMENT ME!
\r
234 public static String print(SequenceI[] s, int len, boolean gaps,
\r
237 StringBuffer out = new StringBuffer();
\r
240 while ((i < s.length) && (s[i] != null))
\r
246 seq = s[i].getSequence();
\r
250 seq = AlignSeq.extractGaps("-. ", s[i].getSequence());
\r
253 // used to always put this here: + "/" + s[i].getStart() + "-" + s[i].getEnd() +
\r
255 ((displayId) ? s[i].getDisplayId() : s[i].getName()) + "\n");
\r
257 int nochunks = (seq.length() / len) + 1;
\r
259 for (int j = 0; j < nochunks; j++)
\r
261 int start = j * len;
\r
262 int end = start + len;
\r
264 if (end < seq.length())
\r
266 out.append(seq.substring(start, end) + "\n");
\r
268 else if (start < seq.length())
\r
270 out.append(seq.substring(start) + "\n");
\r
277 return out.toString();
\r
283 * @return DOCUMENT ME!
\r
285 public String print()
\r
287 return print(getSeqsAsArray());
\r