2 * Jalview - A Sequence Alignment Editor and Viewer
\r
3 * Copyright (C) 2005 AM Waterhouse, J Procter, G Barton, M Clamp, S Searle
\r
5 * This program is free software; you can redistribute it and/or
\r
6 * modify it under the terms of the GNU General Public License
\r
7 * as published by the Free Software Foundation; either version 2
\r
8 * of the License, or (at your option) any later version.
\r
10 * This program is distributed in the hope that it will be useful,
\r
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
\r
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
\r
13 * GNU General Public License for more details.
\r
15 * You should have received a copy of the GNU General Public License
\r
16 * along with this program; if not, write to the Free Software
\r
17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
\r
21 import jalview.analysis.*;
\r
23 import jalview.datamodel.*;
\r
30 public class FastaFile extends AlignFile {
\r
31 public FastaFile() {
\r
34 public FastaFile(String inStr) {
\r
38 public FastaFile(String inFile, String type) throws IOException {
\r
39 super(inFile, type);
\r
42 public void parse() throws IOException {
\r
44 StringBuffer seq = new StringBuffer();
\r
46 boolean flag = false;
\r
53 while ((line = nextLine()) != null) {
\r
54 if (line.length() > 0) {
\r
55 // Do we have an id line?
\r
56 if (line.substring(0, 1).equals(">")) {
\r
59 seqs.addElement(new Sequence(id,
\r
60 seq.toString().toUpperCase(), sstart, send));
\r
62 seqs.addElement(new Sequence(id,
\r
63 seq.toString().toUpperCase(), 1,
\r
70 StringTokenizer str = new StringTokenizer(line, " ");
\r
72 id = str.nextToken();
\r
73 id = id.substring(1);
\r
75 com.stevesoft.pat.Regex dbId = new com.stevesoft.pat.Regex(
\r
76 "[A-Za-z-]+/[A-Za-z-]+\\|(\\w+)\\|(.+)");
\r
78 if (dbId.search(id)) {
\r
79 String dbid = dbId.stringMatched(1);
\r
80 String idname = dbId.stringMatched(2);
\r
82 if ((idname.length() > 0) &&
\r
83 (idname.indexOf("_") > -1)) {
\r
84 id = idname; // just use friendly name // JBPNote: we may lose uniprot standardised ID here.
\r
86 id = dbid; // use dbid to ensure sensible queries
\r
90 if (id.indexOf("/") > 0) {
\r
91 StringTokenizer st = new StringTokenizer(id, "/");
\r
93 if (st.countTokens() == 2) {
\r
94 id = st.nextToken();
\r
96 String tmp = st.nextToken();
\r
98 st = new StringTokenizer(tmp, "-");
\r
100 if (st.countTokens() == 2) {
\r
101 sstart = Integer.valueOf(st.nextToken())
\r
103 send = Integer.valueOf(st.nextToken()).intValue();
\r
108 seq = new StringBuffer();
\r
110 seq = seq.append(line);
\r
116 if (!isValidProteinSequence(seq.toString().toUpperCase())) {
\r
117 throw new IOException("Invalid protein sequence");
\r
121 seqs.addElement(new Sequence(id, seq.toString().toUpperCase(),
\r
124 seqs.addElement(new Sequence(id, seq.toString().toUpperCase(),
\r
130 public static String print(SequenceI[] s) {
\r
131 return print(s, 72);
\r
134 public static String print(SequenceI[] s, int len) {
\r
135 return print(s, len, true);
\r
138 public static String print(SequenceI[] s, int len, boolean gaps) {
\r
139 return print(s, len, gaps, true);
\r
142 public static String print(SequenceI[] s, int len, boolean gaps,
\r
143 boolean displayId) {
\r
144 StringBuffer out = new StringBuffer();
\r
147 while ((i < s.length) && (s[i] != null)) {
\r
151 seq = s[i].getSequence();
\r
153 seq = AlignSeq.extractGaps("-. ", s[i].getSequence());
\r
156 // used to always put this here: + "/" + s[i].getStart() + "-" + s[i].getEnd() +
\r
158 ((displayId) ? s[i].getDisplayId() : s[i].getName()) + "\n");
\r
160 int nochunks = (seq.length() / len) + 1;
\r
162 for (int j = 0; j < nochunks; j++) {
\r
163 int start = j * len;
\r
164 int end = start + len;
\r
166 if (end < seq.length()) {
\r
167 out.append(seq.substring(start, end) + "\n");
\r
168 } else if (start < seq.length()) {
\r
169 out.append(seq.substring(start) + "\n");
\r
176 return out.toString();
\r
179 public String print() {
\r
180 return print(getSeqsAsArray());
\r