2 * Jalview - A Sequence Alignment Editor and Viewer
\r
3 * Copyright (C) 2005 AM Waterhouse, J Procter, G Barton, M Clamp, S Searle
\r
5 * This program is free software; you can redistribute it and/or
\r
6 * modify it under the terms of the GNU General Public License
\r
7 * as published by the Free Software Foundation; either version 2
\r
8 * of the License, or (at your option) any later version.
\r
10 * This program is distributed in the hope that it will be useful,
\r
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
\r
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
\r
13 * GNU General Public License for more details.
\r
15 * You should have received a copy of the GNU General Public License
\r
16 * along with this program; if not, write to the Free Software
\r
17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
\r
22 import jalview.datamodel.*;
\r
23 import jalview.analysis.*;
\r
28 public class FastaFile extends AlignFile {
\r
33 public FastaFile(String inStr) {
\r
37 public FastaFile(String inFile, String type) throws IOException {
\r
41 public void parse() throws IOException
\r
45 StringBuffer seq = new StringBuffer();
\r
47 boolean flag = false;
\r
54 while ((line = nextLine()) != null) {
\r
56 if (line.length() > 0) {
\r
58 // Do we have an id line?
\r
60 if (line.substring(0,1).equals(">")) {
\r
64 seqs.addElement(new Sequence(id,seq.toString().toUpperCase(),sstart,send));
\r
66 seqs.addElement(new Sequence(id,seq.toString().toUpperCase(),1,seq.length()));
\r
72 StringTokenizer str = new StringTokenizer(line," ");
\r
74 id = str.nextToken();
\r
75 id = id.substring(1);
\r
76 com.stevesoft.pat.Regex dbId = new com.stevesoft.pat.Regex("[A-Za-z-]+/[A-Za-z-]+\\|(\\w+)\\|(.+)");
\r
77 if (dbId.search(id))
\r
79 String dbid = dbId.stringMatched(1);
\r
80 String idname = dbId.stringMatched(2);
\r
81 if (idname.length()>0 && idname.indexOf("_") > -1)
\r
83 id = idname; // just use friendly name // JBPNote: we may lose uniprot standardised ID here.
\r
87 id = dbid; // use dbid to ensure sensible queries
\r
91 if (id.indexOf("/") > 0 ) {
\r
93 StringTokenizer st = new StringTokenizer(id,"/");
\r
94 if (st.countTokens() == 2) {
\r
95 id = st.nextToken();
\r
96 String tmp = st.nextToken();
\r
98 st = new StringTokenizer(tmp,"-");
\r
100 if (st.countTokens() == 2) {
\r
101 sstart = Integer.valueOf(st.nextToken()).intValue();
\r
102 send = Integer.valueOf(st.nextToken()).intValue();
\r
107 seq = new StringBuffer();
\r
110 seq = seq.append(line);
\r
116 if(!isValidProteinSequence(seq.toString().toUpperCase()))
\r
117 throw new IOException("Invalid protein sequence");
\r
120 seqs.addElement(new Sequence(id,seq.toString().toUpperCase(),sstart,send));
\r
122 seqs.addElement(new Sequence(id,seq.toString().toUpperCase(),1,seq.length()));
\r
128 public static String print(SequenceI[] s) {
\r
129 return print(s,72);
\r
131 public static String print(SequenceI[] s, int len) {
\r
132 return print(s,len,true);
\r
135 public static String print(SequenceI[] s, int len,boolean gaps) {
\r
136 return print(s,len,gaps,true);
\r
139 public static String print(SequenceI[] s, int len,boolean gaps, boolean displayId) {
\r
140 StringBuffer out = new StringBuffer();
\r
142 while (i < s.length && s[i] != null) {
\r
145 seq = s[i].getSequence();
\r
147 seq = AlignSeq.extractGaps("-. ",s[i].getSequence());
\r
149 // used to always put this here: + "/" + s[i].getStart() + "-" + s[i].getEnd() +
\r
150 out.append(">" + ((displayId) ? s[i].getDisplayId() : s[i].getName())+"\n");
\r
152 int nochunks = seq.length() / len + 1;
\r
154 for (int j = 0; j < nochunks; j++) {
\r
156 int end = start + len;
\r
158 if (end < seq.length()) {
\r
159 out.append(seq.substring(start,end) + "\n");
\r
160 } else if (start < seq.length()) {
\r
161 out.append(seq.substring(start) + "\n");
\r
166 return out.toString();
\r
169 public String print() {
\r
170 return print(getSeqsAsArray());
\r