+++ /dev/null
-#include <stdlib.h>
-#include <stdio.h>
-#include <string.h>
-#include <ctype.h>
-
-#include "getquery.h"
-#include "gjutil.h"
-
-/* get the next sequence entry from a fasta format file and
- return a pointer to a structure containing this information.
-
- Format expected of the input file is:
-
- >IDENT Title here
- ONE LETTER CODE SEQUENCE ON SEVERAL LINES
- LIKE THIS
- >NEXTIDENT Next title etc.
-
- The routine reads lines from the file until it finds one that starts
- in '>'. It then reads this line as an ID, title line before reading
- all alphabetic characters that follow as the amino acid sequence.
- The sequence is terminated by the next '>' or End of File.
-
- This means that the must ONLY contain sequences. PIR format permits
- more flexibility in the input file.
-
- NOTE: This routine assumes that no line will be longer than the
- p->MAX_BUFFER_LEN. Any program that calls this routine should
- have p->MAX_BUFFER_LEN set suitably big. This can be done by
- pre-checking the database file for line lengths.
-
- Author: G. J. Barton (October 1995)
-
-*/
-
-SEQS *gseq_fasta(FILE *seqfile)
-{
-
- char *buff,*tbuff;
- SEQS *ret_val;
- int MAX_BUFFER_LEN = 10000;
- int MAX_SEQ_LEN = 10000;
- char *ident = NULL;
- char *title = NULL;
-
- GJ_S_COUNT j;
- int c;
- STD_FILES;
-
- buff = (char *) GJmalloc(sizeof(char) * MAX_BUFFER_LEN);
- tbuff = buff;
-
- while((buff = fgets(buff,MAX_BUFFER_LEN,seqfile)) != NULL) {
- if(buff[0] == '>') {
- ret_val = (SEQS *) GJmalloc(sizeof(SEQS));
- ident = strtok(&buff[1]," ");
- if(ident != NULL) {
- ident=GJremovechar2(ident,'\n');
- ret_val->id = GJstrdup(ident);
- ret_val->ilen = strlen(ident);
- }else {
- GJerror("Something strange with sequence identifier in fasta file");
- fprintf(std_err,"Line:%s\n",buff);
- return NULL;
- }
- title = strtok(NULL,"\n");
- if(title != NULL) {
- ret_val->title = GJstrdup(title);
- ret_val->tlen = strlen(title);
- }else {
- /* GJerror("Something strange with sequence title in fasta file");*/
- /* fprintf(std_err,"Line:%s\n",buff);*/
- /*if(p->VERBOSE > 10)fprintf(std_err,"Title missing in FASTA file - Inserting dummy: %s\n",buff);*/
- ret_val->title = GJstrdup("-");
- ret_val->tlen = strlen(ret_val->title);
- /* return NULL;*/
- }
- ret_val->seq = (char *) GJmalloc(sizeof(char) * MAX_SEQ_LEN);
- ret_val->seq[0] = ' ';
- j = 0;
- for(;;) {
- c = fgetc(seqfile);
- if(c == EOF || c == '>') {
- ungetc(c,seqfile);
- ret_val->seq[j] = '\0';
- ret_val->slen = j;
- ret_val->seq = (char *) GJrealloc(ret_val->seq,sizeof(char) * ret_val->slen);
- GJfree(buff);
- return ret_val;
- }else if(isalpha(c)) {
- if(j == (MAX_SEQ_LEN - 3)){
- fprintf(std_err,"Sequence too long: %s (> %d residues): Increase MAX_SEQ_LEN\n",
- ret_val->id,j);
- exit(-1);
- }
- ret_val->seq[j++] = toupper(c);
- }
- }
- }
- }
- GJfree(tbuff);
- return(NULL);
-}
-
-/*
-int main(int argc, char **argv)
-{
- FILE *fasta;
- SEQS *seq;
- fasta = fopen("/homes/pvtroshin/large.fasta", "r");
-
- do{
- seq = gseq_fasta(fasta);
- if(seq!=NULL) printf("Seq: %s\n",seq->id );
- } while(seq != NULL);
-
- fclose(fasta);
- return 0;
-}
-*/