Mac binaries
[jabaws.git] / website / archive / binaries / mac / src / globplot / biopython-1.50 / Bio / SeqUtils / MeltingTemp.py
1 # Copyright 2004-2008 by Sebastian Bassi.
2 # All rights reserved.
3 # This code is part of the Biopython distribution and governed by its
4 # license.  Please see the LICENSE file that should have been included
5 # as part of this package.
6
7 """Calculate the thermodynamic melting temperatures of nucleotide sequences."""
8
9 import math
10 def Tm_staluc(s,dnac=50,saltc=50,rna=0):
11     """Returns DNA/DNA tm using nearest neighbor thermodynamics.
12
13     dnac is DNA concentration [nM]
14     saltc is salt concentration [mM].
15     rna=0 is for DNA/DNA (default), for RNA, rna should be 1.
16     
17     Sebastian Bassi <sbassi@genesdigitales.com>"""
18     
19     #Credits: 
20     #Main author: Sebastian Bassi <sbassi@genesdigitales.com>
21     #Overcount function: Greg Singer <singerg@tcd.ie>
22     #Based on the work of Nicolas Le Novere <lenov@ebi.ac.uk> Bioinformatics.
23     #17:1226-1227(2001)
24
25     #This function returns better results than EMBOSS DAN because it uses
26     #updated thermodynamics values and takes into account inicialization
27     #parameters from the work of SantaLucia (1998).
28     
29     #Things to do:
30     #+Detect complementary sequences. Change K according to result.
31     #+Add support for heteroduplex (see Sugimoto et al. 1995).
32     #+Correction for Mg2+. Now supports only monovalent ions.
33     #+Put thermodinamics table in a external file for users to change at will
34     #+Add support for danglings ends (see Le Novele. 2001) and mismatches.
35     
36     dh = 0 #DeltaH. Enthalpy
37     ds = 0 #deltaS Entropy
38
39     def tercorr(stri):
40         deltah = 0
41         deltas = 0
42         if rna==0:
43             #DNA/DNA
44             #Allawi and SantaLucia (1997). Biochemistry 36 : 10581-10594
45             if stri.startswith('G') or stri.startswith('C'):
46                 deltah -= 0.1
47                 deltas += 2.8
48             elif stri.startswith('A') or stri.startswith('T'):
49                 deltah -= 2.3
50                 deltas -= 4.1
51             if stri.endswith('G') or stri.endswith('C'):
52                 deltah -= 0.1
53                 deltas += 2.8
54             elif stri.endswith('A') or stri.endswith('T'):
55                 deltah -= 2.3
56                 deltas -= 4.1
57             dhL = dh + deltah
58             dsL = ds + deltas
59             return dsL,dhL
60         elif rna==1:
61             #RNA
62             if stri.startswith('G') or stri.startswith('C'):
63                 deltah -= 3.61
64                 deltas -= 1.5
65             elif stri.startswith('A') or stri.startswith('T') or \
66                  stri.startswith('U'):
67                 deltah -= 3.72
68                 deltas += 10.5
69             if stri.endswith('G') or stri.endswith('C'):
70                 deltah -= 3.61
71                 deltas -= 1.5
72             elif stri.endswith('A') or stri.endswith('T') or \
73                  stri.endswith('U'):
74                 deltah -= 3.72
75                 deltas += 10.5
76             dhL = dh + deltah
77             dsL = ds + deltas
78             # print "delta h=",dhL
79             return dsL,dhL
80
81     def overcount(st,p):
82         """Returns how many p are on st, works even for overlapping"""
83         ocu = 0
84         x = 0
85         while 1:
86             try:
87                 i = st.index(p,x)
88             except ValueError:
89                 break
90             ocu += 1
91             x = i + 1
92         return ocu
93
94     R = 1.987 # universal gas constant in Cal/degrees C*Mol
95     sup = s.upper()
96     vsTC,vh = tercorr(sup)
97     vs = vsTC
98     
99     k = (dnac/4.0)*1e-9
100     #With complementary check on, the 4.0 should be changed to a variable.
101     
102     if rna==0:
103         #DNA/DNA
104         #Allawi and SantaLucia (1997). Biochemistry 36 : 10581-10594
105         vh = vh + (overcount(sup,"AA"))*7.9 + (overcount(sup,"TT"))*\
106         7.9 + (overcount(sup,"AT"))*7.2 + (overcount(sup,"TA"))*7.2 \
107         + (overcount(sup,"CA"))*8.5 + (overcount(sup,"TG"))*8.5 + \
108         (overcount(sup,"GT"))*8.4 + (overcount(sup,"AC"))*8.4
109         vh = vh + (overcount(sup,"CT"))*7.8+(overcount(sup,"AG"))*\
110         7.8 + (overcount(sup,"GA"))*8.2 + (overcount(sup,"TC"))*8.2
111         vh = vh + (overcount(sup,"CG"))*10.6+(overcount(sup,"GC"))*\
112         9.8 + (overcount(sup,"GG"))*8 + (overcount(sup,"CC"))*8
113         vs = vs + (overcount(sup,"AA"))*22.2+(overcount(sup,"TT"))*\
114         22.2 + (overcount(sup,"AT"))*20.4 + (overcount(sup,"TA"))*21.3
115         vs = vs + (overcount(sup,"CA"))*22.7+(overcount(sup,"TG"))*\
116         22.7 + (overcount(sup,"GT"))*22.4 + (overcount(sup,"AC"))*22.4
117         vs = vs + (overcount(sup,"CT"))*21.0+(overcount(sup,"AG"))*\
118         21.0 + (overcount(sup,"GA"))*22.2 + (overcount(sup,"TC"))*22.2
119         vs = vs + (overcount(sup,"CG"))*27.2+(overcount(sup,"GC"))*\
120         24.4 + (overcount(sup,"GG"))*19.9 + (overcount(sup,"CC"))*19.9
121         ds = vs
122         dh = vh
123         
124     else:
125         #RNA/RNA hybridisation of Xia et al (1998)
126         #Biochemistry 37: 14719-14735         
127         vh = vh+(overcount(sup,"AA"))*6.82+(overcount(sup,"TT"))*6.6+\
128         (overcount(sup,"AT"))*9.38 + (overcount(sup,"TA"))*7.69+\
129         (overcount(sup,"CA"))*10.44 + (overcount(sup,"TG"))*10.5+\
130         (overcount(sup,"GT"))*11.4 + (overcount(sup,"AC"))*10.2
131         vh = vh + (overcount(sup,"CT"))*10.48 + (overcount(sup,"AG"))\
132         *7.6+(overcount(sup,"GA"))*12.44+(overcount(sup,"TC"))*13.3
133         vh = vh + (overcount(sup,"CG"))*10.64 + (overcount(sup,"GC"))\
134         *14.88+(overcount(sup,"GG"))*13.39+(overcount(sup,"CC"))*12.2
135         vs = vs + (overcount(sup,"AA"))*19.0 + (overcount(sup,"TT"))*\
136         18.4+(overcount(sup,"AT"))*26.7+(overcount(sup,"TA"))*20.5
137         vs = vs + (overcount(sup,"CA"))*26.9 + (overcount(sup,"TG"))*\
138         27.8 + (overcount(sup,"GT"))*29.5 + (overcount(sup,"AC"))*26.2
139         vs = vs + (overcount(sup,"CT"))*27.1 + (overcount(sup,"AG"))*\
140         19.2 + (overcount(sup,"GA"))*32.5 + (overcount(sup,"TC"))*35.5
141         vs = vs + (overcount(sup,"CG"))*26.7 + (overcount(sup,"GC"))\
142         *36.9 + (overcount(sup,"GG"))*32.7 + (overcount(sup,"CC"))*29.7
143         ds = vs
144         dh = vh
145
146     ds = ds-0.368*(len(s)-1)*math.log(saltc/1e3)
147     tm = ((1000* (-dh))/(-ds+(R * (math.log(k)))))-273.15
148     # print "ds="+str(ds)
149     # print "dh="+str(dh)
150     return tm
151
152 if __name__ == "__main__" :
153     print "Quick self test"
154     assert Tm_staluc('CAGTCAGTACGTACGTGTACTGCCGTA') == 59.865612727457972
155     assert Tm_staluc('CAGTCAGTACGTACGTGTACTGCCGTA',rna=1) == 68.141611264576682
156     print "Done"