2 * Jalview - A Sequence Alignment Editor and Viewer
\r
3 * Copyright (C) 2005 AM Waterhouse, J Procter, G Barton, M Clamp, S Searle
\r
5 * This program is free software; you can redistribute it and/or
\r
6 * modify it under the terms of the GNU General Public License
\r
7 * as published by the Free Software Foundation; either version 2
\r
8 * of the License, or (at your option) any later version.
\r
10 * This program is distributed in the hope that it will be useful,
\r
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
\r
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
\r
13 * GNU General Public License for more details.
\r
15 * You should have received a copy of the GNU General Public License
\r
16 * along with this program; if not, write to the Free Software
\r
17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
\r
21 import jalview.analysis.*;
\r
23 import jalview.datamodel.*;
\r
30 public class FastaFile extends AlignFile {
\r
31 public FastaFile() {
\r
34 public FastaFile(String inStr) {
\r
38 public FastaFile(String inFile, String type) throws IOException {
\r
39 super(inFile, type);
\r
42 public void parse() throws IOException {
\r
44 StringBuffer seq = new StringBuffer();
\r
46 boolean flag = false;
\r
53 while ((line = nextLine()) != null) {
\r
54 if (line.length() > 0) {
\r
55 // Do we have an id line?
\r
56 // JBPNote - this code needs to be standardised to EBI/whatever for the
\r
57 // >dbref/dbref/dbref|refid1|refid2|refid3 'human-readable' style of naming (should it really exist)
\r
59 if (line.substring(0, 1).equals(">")) {
\r
62 seqs.addElement(new Sequence(id,
\r
63 seq.toString().toUpperCase(), sstart, send));
\r
65 seqs.addElement(new Sequence(id,
\r
66 seq.toString().toUpperCase(), 1,
\r
73 StringTokenizer str = new StringTokenizer(line, " ");
\r
75 id = str.nextToken();
\r
76 id = id.substring(1);
\r
78 com.stevesoft.pat.Regex dbId = new com.stevesoft.pat.Regex(
\r
79 "[A-Za-z-]+/?[A-Za-z-]+\\|(\\w+)\\|(.+)");
\r
80 // JBPNote At the moment - we don't get rid of the friendly names but this
\r
81 // behaviour is probably wrong in the long run.
\r
82 if (dbId.search(id)) {
\r
83 String dbid = dbId.stringMatched(1);
\r
84 String idname = dbId.stringMatched(2);
\r
85 if ( (idname.length() > 0) &&
\r
86 (idname.indexOf("_") > -1)) {
\r
87 id = idname; // use the friendly name - apparently no dbid
\r
89 if (dbid.length()>1) {
\r
90 id = dbid; // ignore the friendly name - we lose uniprot accession ID otherwise
\r
94 if (id.indexOf("/") > 0) {
\r
95 StringTokenizer st = new StringTokenizer(id, "/");
\r
97 if (st.countTokens() == 2) {
\r
98 id = st.nextToken();
\r
100 String tmp = st.nextToken();
\r
102 st = new StringTokenizer(tmp, "-");
\r
104 if (st.countTokens() == 2) {
\r
105 sstart = Integer.valueOf(st.nextToken())
\r
107 send = Integer.valueOf(st.nextToken()).intValue();
\r
112 seq = new StringBuffer();
\r
114 seq = seq.append(line);
\r
120 if (!isValidProteinSequence(seq.toString().toUpperCase())) {
\r
121 throw new IOException("Invalid protein sequence");
\r
125 seqs.addElement(new Sequence(id, seq.toString().toUpperCase(),
\r
128 seqs.addElement(new Sequence(id, seq.toString().toUpperCase(),
\r
134 public static String print(SequenceI[] s) {
\r
135 return print(s, 72);
\r
138 public static String print(SequenceI[] s, int len) {
\r
139 return print(s, len, true);
\r
142 public static String print(SequenceI[] s, int len, boolean gaps) {
\r
143 return print(s, len, gaps, true);
\r
146 public static String print(SequenceI[] s, int len, boolean gaps,
\r
147 boolean displayId) {
\r
148 StringBuffer out = new StringBuffer();
\r
151 while ((i < s.length) && (s[i] != null)) {
\r
155 seq = s[i].getSequence();
\r
157 seq = AlignSeq.extractGaps("-. ", s[i].getSequence());
\r
160 // used to always put this here: + "/" + s[i].getStart() + "-" + s[i].getEnd() +
\r
162 ((displayId) ? s[i].getDisplayId() : s[i].getName()) + "\n");
\r
164 int nochunks = (seq.length() / len) + 1;
\r
166 for (int j = 0; j < nochunks; j++) {
\r
167 int start = j * len;
\r
168 int end = start + len;
\r
170 if (end < seq.length()) {
\r
171 out.append(seq.substring(start, end) + "\n");
\r
172 } else if (start < seq.length()) {
\r
173 out.append(seq.substring(start) + "\n");
\r
180 return out.toString();
\r
183 public String print() {
\r
184 return print(getSeqsAsArray());
\r