Mac binaries
[jabaws.git] / website / archive / binaries / mac / src / disembl / biopython-1.50 / Bio / SeqUtils / MeltingTemp.py
diff --git a/website/archive/binaries/mac/src/disembl/biopython-1.50/Bio/SeqUtils/MeltingTemp.py b/website/archive/binaries/mac/src/disembl/biopython-1.50/Bio/SeqUtils/MeltingTemp.py
new file mode 100644 (file)
index 0000000..d734ac2
--- /dev/null
@@ -0,0 +1,156 @@
+# Copyright 2004-2008 by Sebastian Bassi.
+# All rights reserved.
+# This code is part of the Biopython distribution and governed by its
+# license.  Please see the LICENSE file that should have been included
+# as part of this package.
+
+"""Calculate the thermodynamic melting temperatures of nucleotide sequences."""
+
+import math
+def Tm_staluc(s,dnac=50,saltc=50,rna=0):
+    """Returns DNA/DNA tm using nearest neighbor thermodynamics.
+
+    dnac is DNA concentration [nM]
+    saltc is salt concentration [mM].
+    rna=0 is for DNA/DNA (default), for RNA, rna should be 1.
+    
+    Sebastian Bassi <sbassi@genesdigitales.com>"""
+    
+    #Credits: 
+    #Main author: Sebastian Bassi <sbassi@genesdigitales.com>
+    #Overcount function: Greg Singer <singerg@tcd.ie>
+    #Based on the work of Nicolas Le Novere <lenov@ebi.ac.uk> Bioinformatics.
+    #17:1226-1227(2001)
+
+    #This function returns better results than EMBOSS DAN because it uses
+    #updated thermodynamics values and takes into account inicialization
+    #parameters from the work of SantaLucia (1998).
+    
+    #Things to do:
+    #+Detect complementary sequences. Change K according to result.
+    #+Add support for heteroduplex (see Sugimoto et al. 1995).
+    #+Correction for Mg2+. Now supports only monovalent ions.
+    #+Put thermodinamics table in a external file for users to change at will
+    #+Add support for danglings ends (see Le Novele. 2001) and mismatches.
+    
+    dh = 0 #DeltaH. Enthalpy
+    ds = 0 #deltaS Entropy
+
+    def tercorr(stri):
+        deltah = 0
+        deltas = 0
+        if rna==0:
+            #DNA/DNA
+            #Allawi and SantaLucia (1997). Biochemistry 36 : 10581-10594
+            if stri.startswith('G') or stri.startswith('C'):
+                deltah -= 0.1
+                deltas += 2.8
+            elif stri.startswith('A') or stri.startswith('T'):
+                deltah -= 2.3
+                deltas -= 4.1
+            if stri.endswith('G') or stri.endswith('C'):
+                deltah -= 0.1
+                deltas += 2.8
+            elif stri.endswith('A') or stri.endswith('T'):
+                deltah -= 2.3
+                deltas -= 4.1
+            dhL = dh + deltah
+            dsL = ds + deltas
+            return dsL,dhL
+        elif rna==1:
+            #RNA
+            if stri.startswith('G') or stri.startswith('C'):
+                deltah -= 3.61
+                deltas -= 1.5
+            elif stri.startswith('A') or stri.startswith('T') or \
+                 stri.startswith('U'):
+                deltah -= 3.72
+                deltas += 10.5
+            if stri.endswith('G') or stri.endswith('C'):
+                deltah -= 3.61
+                deltas -= 1.5
+            elif stri.endswith('A') or stri.endswith('T') or \
+                 stri.endswith('U'):
+                deltah -= 3.72
+                deltas += 10.5
+            dhL = dh + deltah
+            dsL = ds + deltas
+            # print "delta h=",dhL
+            return dsL,dhL
+
+    def overcount(st,p):
+        """Returns how many p are on st, works even for overlapping"""
+        ocu = 0
+        x = 0
+        while 1:
+            try:
+                i = st.index(p,x)
+            except ValueError:
+                break
+            ocu += 1
+            x = i + 1
+        return ocu
+
+    R = 1.987 # universal gas constant in Cal/degrees C*Mol
+    sup = s.upper()
+    vsTC,vh = tercorr(sup)
+    vs = vsTC
+    
+    k = (dnac/4.0)*1e-9
+    #With complementary check on, the 4.0 should be changed to a variable.
+    
+    if rna==0:
+        #DNA/DNA
+        #Allawi and SantaLucia (1997). Biochemistry 36 : 10581-10594
+        vh = vh + (overcount(sup,"AA"))*7.9 + (overcount(sup,"TT"))*\
+        7.9 + (overcount(sup,"AT"))*7.2 + (overcount(sup,"TA"))*7.2 \
+        + (overcount(sup,"CA"))*8.5 + (overcount(sup,"TG"))*8.5 + \
+        (overcount(sup,"GT"))*8.4 + (overcount(sup,"AC"))*8.4
+        vh = vh + (overcount(sup,"CT"))*7.8+(overcount(sup,"AG"))*\
+        7.8 + (overcount(sup,"GA"))*8.2 + (overcount(sup,"TC"))*8.2
+        vh = vh + (overcount(sup,"CG"))*10.6+(overcount(sup,"GC"))*\
+        9.8 + (overcount(sup,"GG"))*8 + (overcount(sup,"CC"))*8
+        vs = vs + (overcount(sup,"AA"))*22.2+(overcount(sup,"TT"))*\
+        22.2 + (overcount(sup,"AT"))*20.4 + (overcount(sup,"TA"))*21.3
+        vs = vs + (overcount(sup,"CA"))*22.7+(overcount(sup,"TG"))*\
+        22.7 + (overcount(sup,"GT"))*22.4 + (overcount(sup,"AC"))*22.4
+        vs = vs + (overcount(sup,"CT"))*21.0+(overcount(sup,"AG"))*\
+        21.0 + (overcount(sup,"GA"))*22.2 + (overcount(sup,"TC"))*22.2
+        vs = vs + (overcount(sup,"CG"))*27.2+(overcount(sup,"GC"))*\
+        24.4 + (overcount(sup,"GG"))*19.9 + (overcount(sup,"CC"))*19.9
+        ds = vs
+        dh = vh
+        
+    else:
+        #RNA/RNA hybridisation of Xia et al (1998)
+        #Biochemistry 37: 14719-14735         
+        vh = vh+(overcount(sup,"AA"))*6.82+(overcount(sup,"TT"))*6.6+\
+        (overcount(sup,"AT"))*9.38 + (overcount(sup,"TA"))*7.69+\
+        (overcount(sup,"CA"))*10.44 + (overcount(sup,"TG"))*10.5+\
+        (overcount(sup,"GT"))*11.4 + (overcount(sup,"AC"))*10.2
+        vh = vh + (overcount(sup,"CT"))*10.48 + (overcount(sup,"AG"))\
+        *7.6+(overcount(sup,"GA"))*12.44+(overcount(sup,"TC"))*13.3
+        vh = vh + (overcount(sup,"CG"))*10.64 + (overcount(sup,"GC"))\
+        *14.88+(overcount(sup,"GG"))*13.39+(overcount(sup,"CC"))*12.2
+        vs = vs + (overcount(sup,"AA"))*19.0 + (overcount(sup,"TT"))*\
+        18.4+(overcount(sup,"AT"))*26.7+(overcount(sup,"TA"))*20.5
+        vs = vs + (overcount(sup,"CA"))*26.9 + (overcount(sup,"TG"))*\
+        27.8 + (overcount(sup,"GT"))*29.5 + (overcount(sup,"AC"))*26.2
+        vs = vs + (overcount(sup,"CT"))*27.1 + (overcount(sup,"AG"))*\
+        19.2 + (overcount(sup,"GA"))*32.5 + (overcount(sup,"TC"))*35.5
+        vs = vs + (overcount(sup,"CG"))*26.7 + (overcount(sup,"GC"))\
+        *36.9 + (overcount(sup,"GG"))*32.7 + (overcount(sup,"CC"))*29.7
+        ds = vs
+        dh = vh
+
+    ds = ds-0.368*(len(s)-1)*math.log(saltc/1e3)
+    tm = ((1000* (-dh))/(-ds+(R * (math.log(k)))))-273.15
+    # print "ds="+str(ds)
+    # print "dh="+str(dh)
+    return tm
+
+if __name__ == "__main__" :
+    print "Quick self test"
+    assert Tm_staluc('CAGTCAGTACGTACGTGTACTGCCGTA') == 59.865612727457972
+    assert Tm_staluc('CAGTCAGTACGTACGTGTACTGCCGTA',rna=1) == 68.141611264576682
+    print "Done"