2 * Jalview - A Sequence Alignment Editor and Viewer
\r
3 * Copyright (C) 2005 AM Waterhouse, J Procter, G Barton, M Clamp, S Searle
\r
5 * This program is free software; you can redistribute it and/or
\r
6 * modify it under the terms of the GNU General Public License
\r
7 * as published by the Free Software Foundation; either version 2
\r
8 * of the License, or (at your option) any later version.
\r
10 * This program is distributed in the hope that it will be useful,
\r
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
\r
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
\r
13 * GNU General Public License for more details.
\r
15 * You should have received a copy of the GNU General Public License
\r
16 * along with this program; if not, write to the Free Software
\r
17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
\r
21 import jalview.analysis.*;
\r
23 import jalview.datamodel.*;
\r
30 public class FastaFile extends AlignFile {
\r
31 public FastaFile() {
\r
34 public FastaFile(String inStr) {
\r
38 public FastaFile(String inFile, String type) throws IOException {
\r
39 super(inFile, type);
\r
42 public void parse() throws IOException {
\r
44 StringBuffer seq = new StringBuffer();
\r
52 while ((line = nextLine()) != null) {
\r
53 if (line.length() > 0) {
\r
54 // Do we have an id line?
\r
55 // JBPNote - this code needs to be standardised to EBI/whatever for the
\r
56 // >dbref/dbref/dbref|refid1|refid2|refid3 'human-readable' style of naming (should it really exist)
\r
58 if (line.substring(0, 1).equals(">")) {
\r
61 seqs.addElement(new Sequence(id,
\r
62 seq.toString(), sstart, send));
\r
64 seqs.addElement(new Sequence(id,
\r
72 StringTokenizer str = new StringTokenizer(line, " ");
\r
74 id = str.nextToken();
\r
75 id = id.substring(1);
\r
77 com.stevesoft.pat.Regex dbId = new com.stevesoft.pat.Regex(
\r
78 "[A-Za-z-]+/?[A-Za-z-]+\\|(\\w+)\\|(.+)");
\r
79 // JBPNote At the moment - we don't get rid of the friendly names but this
\r
80 // behaviour is probably wrong in the long run.
\r
81 if (dbId.search(id)) {
\r
82 String dbid = dbId.stringMatched(1);
\r
83 String idname = dbId.stringMatched(2);
\r
84 if ( (idname.length() > 0) &&
\r
85 (idname.indexOf("_") > -1)) {
\r
86 id = idname; // use the friendly name - apparently no dbid
\r
88 if (dbid.length()>1) {
\r
89 id = dbid; // ignore the friendly name - we lose uniprot accession ID otherwise
\r
93 if (id.indexOf("/") > 0) {
\r
94 StringTokenizer st = new StringTokenizer(id, "/");
\r
96 if (st.countTokens() == 2) {
\r
97 id = st.nextToken();
\r
99 String tmp = st.nextToken();
\r
101 st = new StringTokenizer(tmp, "-");
\r
103 if (st.countTokens() == 2) {
\r
104 sstart = Integer.valueOf(st.nextToken())
\r
106 send = Integer.valueOf(st.nextToken()).intValue();
\r
111 seq = new StringBuffer();
\r
113 seq = seq.append(line);
\r
119 if (!isValidProteinSequence(seq.toString().toUpperCase())) {
\r
120 throw new IOException("Invalid protein sequence");
\r
124 seqs.addElement(new Sequence(id, seq.toString().toUpperCase(),
\r
127 seqs.addElement(new Sequence(id, seq.toString().toUpperCase(),
\r
133 public static String print(SequenceI[] s) {
\r
134 return print(s, 72);
\r
137 public static String print(SequenceI[] s, int len) {
\r
138 return print(s, len, true);
\r
141 public static String print(SequenceI[] s, int len, boolean gaps) {
\r
142 return print(s, len, gaps, true);
\r
145 public static String print(SequenceI[] s, int len, boolean gaps,
\r
146 boolean displayId) {
\r
147 StringBuffer out = new StringBuffer();
\r
150 while ((i < s.length) && (s[i] != null)) {
\r
154 seq = s[i].getSequence();
\r
156 seq = AlignSeq.extractGaps("-. ", s[i].getSequence());
\r
159 // used to always put this here: + "/" + s[i].getStart() + "-" + s[i].getEnd() +
\r
161 ((displayId) ? s[i].getDisplayId() : s[i].getName()) + "\n");
\r
163 int nochunks = (seq.length() / len) + 1;
\r
165 for (int j = 0; j < nochunks; j++) {
\r
166 int start = j * len;
\r
167 int end = start + len;
\r
169 if (end < seq.length()) {
\r
170 out.append(seq.substring(start, end) + "\n");
\r
171 } else if (start < seq.length()) {
\r
172 out.append(seq.substring(start) + "\n");
\r
179 return out.toString();
\r
182 public String print() {
\r
183 return print(getSeqsAsArray());
\r