* along with this program; if not, write to the Free Software\r
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA\r
*/\r
-\r
package jalview.io;\r
\r
-import jalview.datamodel.*;\r
import jalview.analysis.*;\r
\r
+import jalview.datamodel.*;\r
+\r
import java.io.*;\r
+\r
import java.util.*;\r
\r
+\r
public class FastaFile extends AlignFile {\r
+ public FastaFile() {\r
+ }\r
\r
- public FastaFile()\r
- {}\r
-\r
- public FastaFile(String inStr) {\r
- super(inStr);\r
- }\r
-\r
- public FastaFile(String inFile, String type) throws IOException {\r
- super(inFile,type);\r
- }\r
-\r
- public void parse() throws IOException\r
- {\r
-\r
- String id = "";\r
- StringBuffer seq = new StringBuffer();\r
- int count = 0;\r
- boolean flag = false;\r
-\r
- int sstart = 0;\r
- int send = 0;\r
-\r
- String line;\r
-\r
- while ((line = nextLine()) != null) {\r
-\r
- if (line.length() > 0) {\r
-\r
- // Do we have an id line?\r
-\r
- if (line.substring(0,1).equals(">")) {\r
-\r
- if (count != 0) {\r
- if (sstart != 0) {\r
- seqs.addElement(new Sequence(id,seq.toString().toUpperCase(),sstart,send));\r
- } else {\r
- seqs.addElement(new Sequence(id,seq.toString().toUpperCase(),1,seq.length()));\r
- }\r
- }\r
-\r
- count++;\r
-\r
- StringTokenizer str = new StringTokenizer(line," ");\r
-\r
- id = str.nextToken();\r
- id = id.substring(1);\r
- com.stevesoft.pat.Regex dbId = new com.stevesoft.pat.Regex("[A-Za-z-]+/[A-Za-z-]+\\|(\\w+)\\|(.+)");\r
- if (dbId.search(id))\r
- {\r
- String dbid = dbId.stringMatched(1);\r
- String idname = dbId.stringMatched(2);\r
- if (idname.length()>0 && idname.indexOf("_") > -1)\r
- {\r
- id = idname; // just use friendly name // JBPNote: we may lose uniprot standardised ID here.\r
- }\r
- else\r
- {\r
- id = dbid; // use dbid to ensure sensible queries\r
- }\r
-\r
- }\r
- if (id.indexOf("/") > 0 ) {\r
-\r
- StringTokenizer st = new StringTokenizer(id,"/");\r
- if (st.countTokens() == 2) {\r
- id = st.nextToken();\r
- String tmp = st.nextToken();\r
-\r
- st = new StringTokenizer(tmp,"-");\r
-\r
- if (st.countTokens() == 2) {\r
- sstart = Integer.valueOf(st.nextToken()).intValue();\r
- send = Integer.valueOf(st.nextToken()).intValue();\r
- }\r
- }\r
- }\r
-\r
- seq = new StringBuffer();\r
-\r
- } else {\r
- seq = seq.append(line);\r
- }\r
- }\r
- }\r
- if (count > 0) {\r
-\r
- if(!isValidProteinSequence(seq.toString().toUpperCase()))\r
- throw new IOException("Invalid protein sequence");\r
-\r
- if (sstart != 0) {\r
- seqs.addElement(new Sequence(id,seq.toString().toUpperCase(),sstart,send));\r
- } else {\r
- seqs.addElement(new Sequence(id,seq.toString().toUpperCase(),1,seq.length()));\r
- }\r
- }\r
-\r
- }\r
-\r
- public static String print(SequenceI[] s) {\r
- return print(s,72);\r
- }\r
- public static String print(SequenceI[] s, int len) {\r
- return print(s,len,true);\r
- }\r
-\r
- public static String print(SequenceI[] s, int len,boolean gaps) {\r
- return print(s,len,gaps,true);\r
- }\r
-\r
- public static String print(SequenceI[] s, int len,boolean gaps, boolean displayId) {\r
- StringBuffer out = new StringBuffer();\r
- int i = 0;\r
- while (i < s.length && s[i] != null) {\r
- String seq = "";\r
- if (gaps) {\r
- seq = s[i].getSequence();\r
- } else {\r
- seq = AlignSeq.extractGaps("-. ",s[i].getSequence());\r
- }\r
- // used to always put this here: + "/" + s[i].getStart() + "-" + s[i].getEnd() +\r
- out.append(">" + ((displayId) ? s[i].getDisplayId() : s[i].getName())+"\n");\r
-\r
- int nochunks = seq.length() / len + 1;\r
-\r
- for (int j = 0; j < nochunks; j++) {\r
- int start = j*len;\r
- int end = start + len;\r
-\r
- if (end < seq.length()) {\r
- out.append(seq.substring(start,end) + "\n");\r
- } else if (start < seq.length()) {\r
- out.append(seq.substring(start) + "\n");\r
+ public FastaFile(String inStr) {\r
+ super(inStr);\r
+ }\r
+\r
+ public FastaFile(String inFile, String type) throws IOException {\r
+ super(inFile, type);\r
+ }\r
+\r
+ public void parse() throws IOException {\r
+ String id = "";\r
+ StringBuffer seq = new StringBuffer();\r
+ int count = 0;\r
+ boolean flag = false;\r
+\r
+ int sstart = 0;\r
+ int send = 0;\r
+\r
+ String line;\r
+\r
+ while ((line = nextLine()) != null) {\r
+ if (line.length() > 0) {\r
+ // Do we have an id line?\r
+ if (line.substring(0, 1).equals(">")) {\r
+ if (count != 0) {\r
+ if (sstart != 0) {\r
+ seqs.addElement(new Sequence(id,\r
+ seq.toString().toUpperCase(), sstart, send));\r
+ } else {\r
+ seqs.addElement(new Sequence(id,\r
+ seq.toString().toUpperCase(), 1,\r
+ seq.length()));\r
+ }\r
+ }\r
+\r
+ count++;\r
+\r
+ StringTokenizer str = new StringTokenizer(line, " ");\r
+\r
+ id = str.nextToken();\r
+ id = id.substring(1);\r
+\r
+ com.stevesoft.pat.Regex dbId = new com.stevesoft.pat.Regex(\r
+ "[A-Za-z-]+/[A-Za-z-]+\\|(\\w+)\\|(.+)");\r
+\r
+ if (dbId.search(id)) {\r
+ String dbid = dbId.stringMatched(1);\r
+ String idname = dbId.stringMatched(2);\r
+\r
+ if ((idname.length() > 0) &&\r
+ (idname.indexOf("_") > -1)) {\r
+ id = idname; // just use friendly name // JBPNote: we may lose uniprot standardised ID here.\r
+ } else {\r
+ id = dbid; // use dbid to ensure sensible queries\r
+ }\r
+ }\r
+\r
+ if (id.indexOf("/") > 0) {\r
+ StringTokenizer st = new StringTokenizer(id, "/");\r
+\r
+ if (st.countTokens() == 2) {\r
+ id = st.nextToken();\r
+\r
+ String tmp = st.nextToken();\r
+\r
+ st = new StringTokenizer(tmp, "-");\r
+\r
+ if (st.countTokens() == 2) {\r
+ sstart = Integer.valueOf(st.nextToken())\r
+ .intValue();\r
+ send = Integer.valueOf(st.nextToken()).intValue();\r
+ }\r
+ }\r
+ }\r
+\r
+ seq = new StringBuffer();\r
+ } else {\r
+ seq = seq.append(line);\r
+ }\r
+ }\r
+ }\r
+\r
+ if (count > 0) {\r
+ if (!isValidProteinSequence(seq.toString().toUpperCase())) {\r
+ throw new IOException("Invalid protein sequence");\r
+ }\r
+\r
+ if (sstart != 0) {\r
+ seqs.addElement(new Sequence(id, seq.toString().toUpperCase(),\r
+ sstart, send));\r
+ } else {\r
+ seqs.addElement(new Sequence(id, seq.toString().toUpperCase(),\r
+ 1, seq.length()));\r
+ }\r
}\r
- }\r
- i++;\r
}\r
- return out.toString();\r
- }\r
\r
- public String print() {\r
- return print(getSeqsAsArray());\r
- }\r
-}\r
+ public static String print(SequenceI[] s) {\r
+ return print(s, 72);\r
+ }\r
\r
+ public static String print(SequenceI[] s, int len) {\r
+ return print(s, len, true);\r
+ }\r
+\r
+ public static String print(SequenceI[] s, int len, boolean gaps) {\r
+ return print(s, len, gaps, true);\r
+ }\r
+\r
+ public static String print(SequenceI[] s, int len, boolean gaps,\r
+ boolean displayId) {\r
+ StringBuffer out = new StringBuffer();\r
+ int i = 0;\r
+\r
+ while ((i < s.length) && (s[i] != null)) {\r
+ String seq = "";\r
+\r
+ if (gaps) {\r
+ seq = s[i].getSequence();\r
+ } else {\r
+ seq = AlignSeq.extractGaps("-. ", s[i].getSequence());\r
+ }\r
+\r
+ // used to always put this here: + "/" + s[i].getStart() + "-" + s[i].getEnd() +\r
+ out.append(">" +\r
+ ((displayId) ? s[i].getDisplayId() : s[i].getName()) + "\n");\r
+\r
+ int nochunks = (seq.length() / len) + 1;\r
\r
+ for (int j = 0; j < nochunks; j++) {\r
+ int start = j * len;\r
+ int end = start + len;\r
\r
+ if (end < seq.length()) {\r
+ out.append(seq.substring(start, end) + "\n");\r
+ } else if (start < seq.length()) {\r
+ out.append(seq.substring(start) + "\n");\r
+ }\r
+ }\r
+\r
+ i++;\r
+ }\r
+\r
+ return out.toString();\r
+ }\r
+\r
+ public String print() {\r
+ return print(getSeqsAsArray());\r
+ }\r
+}\r