6 Extended DNA / RNA alphabet
7 ------------------------------------------
8 Symbol Meaning Nucleic Acid
9 ------------------------------------------
28 IUPAC-IUB SYMBOLS FOR NUCLEOTIDE NOMENCLATURE:
29 Cornish-Bowden (1985) Nucl. Acids Res. 13: 3021-3030.
32 unsigned g_CharToLetter[MAX_CHAR];
33 unsigned g_CharToLetterEx[MAX_CHAR];
35 char g_LetterToChar[MAX_ALPHA];
36 char g_LetterExToChar[MAX_ALPHA_EX];
38 char g_UnalignChar[MAX_CHAR];
39 char g_AlignChar[MAX_CHAR];
41 bool g_IsWildcardChar[MAX_CHAR];
42 bool g_IsResidueChar[MAX_CHAR];
44 ALPHA g_Alpha = ALPHA_Undefined;
45 unsigned g_AlphaSize = 0;
47 #define Res(c, Letter) \
49 const unsigned char Upper = (unsigned char) toupper(c); \
50 const unsigned char Lower = (unsigned char) tolower(c); \
51 g_CharToLetter[Upper] = Letter; \
52 g_CharToLetter[Lower] = Letter; \
53 g_CharToLetterEx[Upper] = Letter; \
54 g_CharToLetterEx[Lower] = Letter; \
55 g_LetterToChar[Letter] = Upper; \
56 g_LetterExToChar[Letter] = Upper; \
57 g_IsResidueChar[Upper] = true; \
58 g_IsResidueChar[Lower] = true; \
59 g_AlignChar[Upper] = Upper; \
60 g_AlignChar[Lower] = Upper; \
61 g_UnalignChar[Upper] = Lower; \
62 g_UnalignChar[Lower] = Lower; \
65 #define Wild(c, Letter) \
67 const unsigned char Upper = (unsigned char) toupper(c); \
68 const unsigned char Lower = (unsigned char) tolower(c); \
69 g_CharToLetterEx[Upper] = Letter; \
70 g_CharToLetterEx[Lower] = Letter; \
71 g_LetterExToChar[Letter] = Upper; \
72 g_IsResidueChar[Upper] = true; \
73 g_IsResidueChar[Lower] = true; \
74 g_AlignChar[Upper] = Upper; \
75 g_AlignChar[Lower] = Upper; \
76 g_UnalignChar[Upper] = Lower; \
77 g_UnalignChar[Lower] = Lower; \
78 g_IsWildcardChar[Lower] = true; \
79 g_IsWildcardChar[Upper] = true; \
82 static unsigned GetAlphaSize(ALPHA Alpha)
93 Quit("Invalid Alpha=%d", Alpha);
97 static void InitArrays()
99 memset(g_CharToLetter, 0xff, sizeof(g_CharToLetter));
100 memset(g_CharToLetterEx, 0xff, sizeof(g_CharToLetterEx));
102 memset(g_LetterToChar, '?', sizeof(g_LetterToChar));
103 memset(g_LetterExToChar, '?', sizeof(g_LetterExToChar));
105 memset(g_AlignChar, '?', sizeof(g_UnalignChar));
106 memset(g_UnalignChar, '?', sizeof(g_UnalignChar));
108 memset(g_IsWildcardChar, 0, sizeof(g_IsWildcardChar));
111 static void SetGapChar(char c)
113 unsigned char u = (unsigned char) c;
115 g_CharToLetterEx[u] = AX_GAP;
116 g_LetterExToChar[AX_GAP] = u;
118 g_UnalignChar[u] = u;
121 static void SetAlphaDNA()
141 static void SetAlphaRNA()
162 static void SetAlphaAmino()
190 void SetAlpha(ALPHA Alpha)
211 Quit("Invalid Alpha=%d", Alpha);
214 g_AlphaSize = GetAlphaSize(Alpha);
218 Log("Alphabet %s\n", ALPHAToStr(g_Alpha));
221 char GetWildcardChar()
233 Quit("Invalid Alpha=%d", g_Alpha);
238 bool IsNucleo(char c)
240 return strchr("ACGTURYNacgturyn", c) != 0;
245 return strchr("AGCTNagctn", c) != 0;
250 return strchr("AGCUNagcun", c) != 0;
253 static char InvalidLetters[256];
254 static int InvalidLetterCount = 0;
256 void ClearInvalidLetterWarning()
258 memset(InvalidLetters, 0, 256);
261 void InvalidLetterWarning(char c, char w)
263 InvalidLetters[(unsigned char) c] = 1;
264 ++InvalidLetterCount;
267 void ReportInvalidLetters()
269 if (0 == InvalidLetterCount)
276 for (int i = 0; i < 256; ++i)
278 if (InvalidLetters[i])
281 Warning("Assuming %s (see -seqtype option), invalid letters found: %s",
282 ALPHAToStr(g_Alpha), Str);