3 import jalview.datamodel.*;
\r
4 import jalview.analysis.*;
\r
9 public class FastaFile extends AlignFile {
\r
14 public FastaFile(String inStr) {
\r
18 public FastaFile(String inFile, String type) throws IOException {
\r
22 public void parse() throws IOException
\r
26 StringBuffer seq = new StringBuffer();
\r
28 boolean flag = false;
\r
35 while ((line = nextLine()) != null) {
\r
37 if (line.length() > 0) {
\r
39 // Do we have an id line?
\r
41 if (line.substring(0,1).equals(">")) {
\r
45 seqs.addElement(new Sequence(id,seq.toString().toUpperCase(),sstart,send));
\r
47 seqs.addElement(new Sequence(id,seq.toString().toUpperCase(),1,seq.length()));
\r
53 StringTokenizer str = new StringTokenizer(line," ");
\r
55 id = str.nextToken();
\r
56 id = id.substring(1);
\r
57 com.stevesoft.pat.Regex dbId = new com.stevesoft.pat.Regex("[A-Za-z-]+/[A-Za-z-]+\\|(\\w+)\\|(.+)");
\r
58 if (dbId.search(id))
\r
60 String dbid = dbId.stringMatched(1);
\r
61 String idname = dbId.stringMatched(2);
\r
62 if (idname.length()>0 && idname.indexOf("_") > -1)
\r
64 id = idname; // just use friendly name // JBPNote: we may lose uniprot standardised ID here.
\r
68 id = dbid; // use dbid to ensure sensible queries
\r
72 if (id.indexOf("/") > 0 ) {
\r
74 StringTokenizer st = new StringTokenizer(id,"/");
\r
75 if (st.countTokens() == 2) {
\r
76 id = st.nextToken();
\r
77 String tmp = st.nextToken();
\r
79 st = new StringTokenizer(tmp,"-");
\r
81 if (st.countTokens() == 2) {
\r
82 sstart = Integer.valueOf(st.nextToken()).intValue();
\r
83 send = Integer.valueOf(st.nextToken()).intValue();
\r
88 seq = new StringBuffer();
\r
91 seq = seq.append(line);
\r
97 if(!isValidProteinSequence(seq.toString().toUpperCase()))
\r
98 throw new IOException("Invalid protein sequence");
\r
101 seqs.addElement(new Sequence(id,seq.toString().toUpperCase(),sstart,send));
\r
103 seqs.addElement(new Sequence(id,seq.toString().toUpperCase(),1,seq.length()));
\r
109 public static String print(SequenceI[] s) {
\r
110 return print(s,72);
\r
112 public static String print(SequenceI[] s, int len) {
\r
113 return print(s,len,true);
\r
116 public static String print(SequenceI[] s, int len,boolean gaps) {
\r
117 return print(s,len,gaps,true);
\r
120 public static String print(SequenceI[] s, int len,boolean gaps, boolean displayId) {
\r
121 StringBuffer out = new StringBuffer();
\r
123 while (i < s.length && s[i] != null) {
\r
126 seq = s[i].getSequence();
\r
128 seq = AlignSeq.extractGaps("-. ",s[i].getSequence());
\r
130 // used to always put this here: + "/" + s[i].getStart() + "-" + s[i].getEnd() +
\r
131 out.append(">" + ((displayId) ? s[i].getDisplayId() : s[i].getName())+"\n");
\r
133 int nochunks = seq.length() / len + 1;
\r
135 for (int j = 0; j < nochunks; j++) {
\r
137 int end = start + len;
\r
139 if (end < seq.length()) {
\r
140 out.append(seq.substring(start,end) + "\n");
\r
141 } else if (start < seq.length()) {
\r
142 out.append(seq.substring(start) + "\n");
\r
147 return out.toString();
\r
150 public String print() {
\r
151 return print(getSeqsAsArray());
\r