Next version of JABA
[jabaws.git] / binaries / src / muscle / fasta2.cpp
1 #include "muscle.h"\r
2 #include <stdio.h>\r
3 #include <errno.h>\r
4 \r
5 //const int BUFFER_BYTES = 16*1024;\r
6 const int BUFFER_BYTES = 128;\r
7 const int CR = '\r';\r
8 const int NL = '\n';\r
9 \r
10 #define ADD(c)                                                                                                                  \\r
11                 {                                                                                                                               \\r
12                 if (Pos >= BufferLength)                                                                                \\r
13                         {                                                                                                                       \\r
14                         const int NewBufferLength = BufferLength + BUFFER_BYTES;        \\r
15                         char *NewBuffer = new char[NewBufferLength];                            \\r
16                         memcpy(NewBuffer, Buffer, BufferLength);                                        \\r
17                         delete[] Buffer;                                                                                        \\r
18                         Buffer = NewBuffer;                                                                                     \\r
19                         BufferLength = NewBufferLength;                                                         \\r
20                         }                                                                                                                       \\r
21                 Buffer[Pos++] = c;                                                                                              \\r
22                 }\r
23 \r
24 // Get next sequence from file.\r
25 char *GetFastaSeq(FILE *f, unsigned *ptrSeqLength, char **ptrLabel, bool DeleteGaps)\r
26         {\r
27         unsigned BufferLength = 0;\r
28         unsigned Pos = 0;\r
29         char *Buffer = 0;\r
30 \r
31         int c = fgetc(f);\r
32         if (EOF == c)\r
33                 return 0;\r
34         if ('>' != c)\r
35                 Quit("Invalid file format, expected '>' to start FASTA label");\r
36 \r
37         for (;;)\r
38                 {\r
39                 int c = fgetc(f);\r
40                 if (EOF == c)\r
41                         Quit("End-of-file or input error in FASTA label");\r
42 \r
43         // NL or CR terminates label\r
44                 if (NL == c || CR == c)\r
45                         break;\r
46 \r
47         // All other characters added to label\r
48                 ADD(c)\r
49                 }\r
50 \r
51 // Nul-terminate label\r
52         ADD(0)\r
53         *ptrLabel = Buffer;\r
54 \r
55         BufferLength = 0;\r
56         Pos = 0;\r
57         Buffer = 0;\r
58         int PreviousChar = NL;\r
59         for (;;)\r
60                 {\r
61                 int c = fgetc(f);\r
62                 if (EOF == c)\r
63                         {\r
64                         if (feof(f))\r
65                                 break;\r
66                         else if (ferror(f))\r
67                                 Quit("Error reading FASTA file, ferror=TRUE feof=FALSE errno=%d %s",\r
68                                   errno, strerror(errno));\r
69                         else\r
70                                 Quit("Error reading FASTA file, fgetc=EOF feof=FALSE ferror=FALSE errno=%d %s",\r
71                                   errno, strerror(errno));\r
72                         }\r
73 \r
74                 if ('>' == c)\r
75                         {\r
76                         if (NL == PreviousChar || CR == PreviousChar)\r
77                                 {\r
78                                 ungetc(c, f);\r
79                                 break;\r
80                                 }\r
81                         else\r
82                                 Quit("Unexpected '>' in FASTA sequence data");\r
83                         }\r
84                 else if (isspace(c))\r
85                         ;\r
86                 else if (IsGapChar(c))\r
87                         {\r
88                         if (!DeleteGaps)\r
89                                 ADD(c)\r
90                         }\r
91                 else if (isalpha(c))\r
92                         {\r
93                         c = toupper(c);\r
94                         ADD(c)\r
95                         }\r
96                 else if (isprint(c))\r
97                         {\r
98                         Warning("Invalid character '%c' in FASTA sequence data, ignored", c);\r
99                         continue;\r
100                         }\r
101                 else\r
102                         {\r
103                         Warning("Invalid byte hex %02x in FASTA sequence data, ignored", (unsigned char) c);\r
104                         continue;\r
105                         }\r
106                 PreviousChar = c;\r
107                 }\r
108 \r
109         if (0 == Pos)\r
110                 return GetFastaSeq(f, ptrSeqLength, ptrLabel, DeleteGaps);\r
111 \r
112         *ptrSeqLength = Pos;\r
113         return Buffer;\r
114         }\r