/***************************************************************** * SQUID - a library of functions for biological sequence analysis * Copyright (C) 1992-2002 Washington University School of Medicine * * This source code is freely distributed under the terms of the * GNU General Public License. See the files COPYRIGHT and LICENSE * for details. *****************************************************************/ /* iupac.c * * Globally defines the IUPAC symbols for nucleic acid sequence * Slowly evolving into a repository of globals. Tue Apr 20 1993 * * RCS $Id: iupac.c 217 2011-03-19 10:27:10Z andreas $ (Original squid RCS Id: iupac.c,v 1.3 2001/02/21 21:09:10 eddy Exp) */ #include "squid.h" /* Default expected nucleotide occurrence frequencies, A/C/G/T. * Used (for instance) as the default distribution for * i.i.d. random nucleotide sequences. */ float dnafq[4] = { 0.25, 0.25, 0.25, 0.25 }; /* Dayhoff f(i) amino acid occurrence frequencies. * From SwissProt 34: 21,210,388 residues * In alphabetic order by single-letter code. * Used (for instance) as the default distribution for * i.i.d. random protein sequences. */ float aafq[20] = { 0.075520, /* A */ 0.016973, /* C */ 0.053029, /* D */ 0.063204, /* E */ 0.040762, /* F */ 0.068448, /* G */ 0.022406, /* H */ 0.057284, /* I */ 0.059398, /* K */ 0.093399, /* L */ 0.023569, /* M */ 0.045293, /* N */ 0.049262, /* P */ 0.040231, /* Q */ 0.051573, /* R */ 0.072214, /* S */ 0.057454, /* T */ 0.065252, /* V */ 0.012513, /* W */ 0.031985 /* Y */ }; char aa_alphabet[] = AMINO_ALPHABET; /* aa_index converts to pam's 27x27 scheme */ int aa_index[20] = { 0, 2, 3, 4, 5, 6, 7, 8, 10, 11, 12, 13, 15, 16, 17, 18, 19, 21, 22, 24 }; /* IUPAC code translations */ /* note: sequence chars are UPPER CASE */ struct iupactype iupac[] = { { 'A', 'T', NTA, NTT, }, { 'C', 'G', NTC, NTG, }, { 'G', 'C', NTG, NTC, }, { 'T', 'A', NTT, NTA, }, { 'U', 'A', NTU, NTA, }, { 'N', 'N', NTN, NTN, }, { ' ', ' ', NTGAP, NTGAP, }, { 'R', 'Y', NTR, NTY, }, { 'Y', 'R', NTY, NTR, }, { 'M', 'K', NTM, NTK, }, { 'K', 'M', NTK, NTM, }, { 'S', 'S', NTS, NTS, }, { 'W', 'W', NTW, NTW, }, { 'H', 'D', NTH, NTD, }, { 'B', 'V', NTB, NTV, }, { 'V', 'B', NTV, NTB, }, { 'D', 'H', NTD, NTH, }, }; char *stdcode1[65] = { "K", /* AAA */ "N", /* AAC */ "K", /* AAG */ "N", /* AAU */ "T", /* ACA */ "T", /* ACC */ "T", /* ACG */ "T", /* ACU */ "R", /* AGA */ "S", /* AGC */ "R", /* AGG */ "S", /* AGU */ "I", /* AUA */ "I", /* AUC */ "M", /* AUG */ "I", /* AUU */ "Q", /* CAA */ "H", /* CAC */ "Q", /* CAG */ "H", /* CAU */ "P", /* CCA */ "P", /* CCC */ "P", /* CCG */ "P", /* CCU */ "R", /* CGA */ "R", /* CGC */ "R", /* CGG */ "R", /* CGU */ "L", /* CUA */ "L", /* CUC */ "L", /* CUG */ "L", /* CUU */ "E", /* GAA */ "D", /* GAC */ "E", /* GAG */ "D", /* GAU */ "A", /* GCA */ "A", /* GCC */ "A", /* GCG */ "A", /* GCU */ "G", /* GGA */ "G", /* GGC */ "G", /* GGG */ "G", /* GGU */ "V", /* GUA */ "V", /* GUC */ "V", /* GUG */ "V", /* GUU */ "*", /* UAA */ "Y", /* UAC */ "*", /* UAG */ "Y", /* UAU */ "S", /* UCA */ "S", /* UCC */ "S", /* UCG */ "S", /* UCU */ "*", /* UGA */ "C", /* UGC */ "W", /* UGG */ "C", /* UGU */ "L", /* UUA */ "F", /* UUC */ "L", /* UUG */ "F", /* UUU */ "X", /* unknown */ }; char *stdcode3[65] = { "Lys", /* AAA */ "Asn", /* AAC */ "Lys", /* AAG */ "Asn", /* AAU */ "Thr", /* ACA */ "Thr", /* ACC */ "Thr", /* ACG */ "Thr", /* ACU */ "Arg", /* AGA */ "Ser", /* AGC */ "Arg", /* AGG */ "Ser", /* AGU */ "Ile", /* AUA */ "Ile", /* AUC */ "Met", /* AUG */ "Ile", /* AUU */ "Gln", /* CAA */ "His", /* CAC */ "Gln", /* CAG */ "His", /* CAU */ "Pro", /* CCA */ "Pro", /* CCC */ "Pro", /* CCG */ "Pro", /* CCU */ "Arg", /* CGA */ "Arg", /* CGC */ "Arg", /* CGG */ "Arg", /* CGU */ "Leu", /* CUA */ "Leu", /* CUC */ "Leu", /* CUG */ "Leu", /* CUU */ "Glu", /* GAA */ "Asp", /* GAC */ "Glu", /* GAG */ "Asp", /* GAU */ "Ala", /* GCA */ "Ala", /* GCC */ "Ala", /* GCG */ "Ala", /* GCU */ "Gly", /* GGA */ "Gly", /* GGC */ "Gly", /* GGG */ "Gly", /* GGU */ "Val", /* GUA */ "Val", /* GUC */ "Val", /* GUG */ "Val", /* GUU */ "***", /* UAA */ "Tyr", /* UAC */ "***", /* UAG */ "Tyr", /* UAU */ "Ser", /* UCA */ "Ser", /* UCC */ "Ser", /* UCG */ "Ser", /* UCU */ "***", /* UGA */ "Cys", /* UGC */ "Trp", /* UGG */ "Cys", /* UGU */ "Leu", /* UUA */ "Phe", /* UUC */ "Leu", /* UUG */ "Trp", /* UUU */ "XXX", /* unknown */ };