11 static const int NBASE = 4;// A, C, G, U
12 static const int NBASEG = (NBASE + 1); // A, C, G, U, Gap
13 static const int NBASENG = (NBASE + 2); // A, C, G, U, N, Gap
14 static const int NPBASE = (NBASE * NBASE);
15 static const int NPBASEG = (NBASEG * NBASEG);
16 static const int NPBASENG = (NBASENG * NBASENG);
17 static const int NCODE = 7;
18 static const int BASEBIT = 2;
19 static const int BASEMSK = ~(~0 << BASEBIT);
30 AA_CODE = 0, AC_CODE = 1, AG_CODE = 2, AU_CODE = 3,
31 CA_CODE = 4, CC_CODE = 5, CG_CODE = 6, CU_CODE = 7,
32 GA_CODE = 8, GC_CODE = 9, GG_CODE = 10, GU_CODE = 11,
33 UA_CODE = 12, UC_CODE = 13, UG_CODE = 14, UU_CODE = 15,
34 INVALID_PAIR_CODE = 16
36 enum ReducedPairCode {
45 REDUCED_INVALID_PAIR_CODE = 16
48 static const int N_CANONICAL = 6;
49 static const int canonicalPairs[N_CANONICAL];
50 static const int N_NON_CANONICAL = 10;
51 static const int nonCanonicalPairs[N_NON_CANONICAL];
52 static const int i2nt[NCODE];
53 static const int nt2i[256];
54 static const bool isCanonicalBasePair[NPBASE];
56 static bool IsValidCode(const int& c) {return A_CODE <= c && c <= GAP_CODE;}
57 static bool IsBaseCode(const int& c) {return (A_CODE <= c && c <= U_CODE);}
58 static bool IsBasePairCode(const int& c){return (AA_CODE <= c && c <= UU_CODE);}
59 static bool IsReducedBasePairCode(const int& c) {
60 return (REDUCED_AU_CODE <= c && c <= REDUCED_MM_CODE);
62 static bool IsAmbiguousCode(const int& c) {return c == N_CODE;}
63 static bool IsGapCode(const int& c) {return c == GAP_CODE;}
64 static bool IsValidChar(const unsigned char& c) {
65 return IsValidCode(nt2i[c]);
67 static bool IsBaseChar(const int& c) {return IsBaseCode(nt2i[c]);}
68 static bool IsAmbiguousChar(const int& c) {return IsAmbiguousCode(nt2i[c]);}
69 static bool IsGapChar(const int& c) {return IsGapCode(nt2i[c]);}
70 static int nt2code(const int& nt) {
71 if (0 <= nt && nt < 256) {
77 static int getPairCode(const int& c, const int& c1) {
78 return (IsBaseCode(c) && IsBaseCode(c1)
79 ? ((c << BASEBIT) | c1)
82 static bool isValidPairCode(const int& pairCode) {
83 return (0 <= pairCode && pairCode < NPBASE);
85 static void pair2Bases(const int& pairCode, int& c, int& c1) {
86 //Assert(IsBasePairCode(pairCode));
87 c1 = pairCode & BASEMSK;
88 c = (pairCode >> BASEBIT) & BASEMSK;
90 static int getReducedPairCode(const int& c, const int& c1) {
91 return reducePairCode(getPairCode(c, c1));
93 static int reducePairCode(const int& pairCode) {
94 static const int table[NPBASE] = {
95 REDUCED_MM_CODE, REDUCED_MM_CODE, REDUCED_MM_CODE, REDUCED_AU_CODE,
96 REDUCED_MM_CODE, REDUCED_MM_CODE, REDUCED_CG_CODE, REDUCED_MM_CODE,
97 REDUCED_MM_CODE, REDUCED_GC_CODE, REDUCED_MM_CODE, REDUCED_GU_CODE,
98 REDUCED_UA_CODE, REDUCED_MM_CODE, REDUCED_UG_CODE, REDUCED_MM_CODE,
100 return (IsBasePairCode(pairCode)
102 : REDUCED_INVALID_PAIR_CODE);
104 static bool isValidReducedPairCode(const int& pairCode) {
105 return (0 <= pairCode && pairCode < REDUCED_NPBASE);
107 static bool isCanonicalReducedPairCode(const int& pairCode) {
108 return (REDUCED_AU_CODE <= pairCode
109 && pairCode <= REDUCED_UG_CODE);
111 static int flipReducedPairCode(const int& reducedPairCode) {
112 static const int table[REDUCED_NPBASE + 1] = {
120 REDUCED_INVALID_PAIR_CODE
122 return (IsReducedBasePairCode(reducedPairCode)
123 ? table[reducedPairCode] : table[REDUCED_NPBASE]);
125 static void seq2i(char* s, const char* t, int len) {
126 const char* const s_end = s + len;
127 while (s < s_end) *s++ = nt2i[(unsigned) (*t++)];
129 static void i2seq(char* s, const char* t, int len) {
130 const char* const s_end = s + len;
131 while (s < s_end) *s++ = i2nt[(unsigned) (*t++)];
133 static void i2seq(ostream& fo, const char* t, int len) {
134 const char* const t_end = t + len;
135 while (t < t_end) fo << (char) i2nt[(unsigned) (*t++)];
137 static char* wd2str(unsigned int wdSize, unsigned int wd) {
138 const unsigned int MAX_WD_SIZE = (sizeof(unsigned int) * 8 / BASEBIT);
139 static char buf[MAX_WD_SIZE + 1] = {};
140 //Assert(wdSize <= MAX_WD_SIZE);
142 char* s = buf + wdSize;
145 *(--s) = Beta::i2nt[wd & BASEMSK];
150 static void printWd(ostream& fo, unsigned int wdSize, unsigned int wd) {
151 fo << wd2str(wdSize, wd);
153 static const char* code2str(const int& code) {
154 static const char table[NBASENG+1][2] = {
155 "A", "C", "G", "U", "N", ".", "?"
157 return ((A_CODE <= code && code <= GAP_CODE)
158 ? table[code] : table[NBASENG]);
160 static const char* pairCode2str(const int& pairCode) {
161 static const char table[NPBASE+1][3] = {
162 "AA", "AC", "AG", "AU",
163 "CA", "CC", "CG", "CU",
164 "GA", "GC", "GG", "GU",
165 "UA", "UC", "UG", "UU",
168 return (IsBasePairCode(pairCode) ? table[pairCode] : table[NPBASE]);
170 static const char* reducedPairCode2str(const int& reducedPairCode) {
171 static const char table[REDUCED_NPBASE+1][3] = {
172 "AU", "CG", "GC", "GU", "UA", "UG", "MM", "??"
174 return (IsReducedBasePairCode(reducedPairCode)
175 ? table[reducedPairCode] : table[REDUCED_NPBASE]);
177 static char nt2Code(const char& c){
178 if (!IsValidChar(c)) { cerr << "character " << c << " is not valid"; }
180 return (char) nt2i[(int) c];
182 static char code2char(const int& c) {
183 static const char table[NBASENG+1] = {
184 'A', 'C', 'G', 'U', 'N', '.', '?'
186 return ((A_CODE <= c && c <= GAP_CODE)
187 ? table[(int) c] : table[NBASENG]);
190 static string generateRandomRNA(const int& len) {
191 static const char nt[5] = "ACGU";
192 string rna(len, '\0');
193 for (int i = 0; i < len; i++) {
194 rna[i] = nt[Rand(4)];