Delete unneeded directory
[jabaws.git] / website / archive / binaries / mac / src / disembl / biopython-1.50 / Bio / SeqUtils / lcc.py
diff --git a/website/archive/binaries/mac/src/disembl/biopython-1.50/Bio/SeqUtils/lcc.py b/website/archive/binaries/mac/src/disembl/biopython-1.50/Bio/SeqUtils/lcc.py
deleted file mode 100644 (file)
index 1fb71a2..0000000
+++ /dev/null
@@ -1,162 +0,0 @@
-# Copyright 2003, 2007 by Sebastian Bassi. sbassi@genesdigitales.com
-# All rights reserved.  This code is part of the Biopython 
-# distribution and governed by its license.
-# Please see the LICENSE file that should have been included as part
-# of this package.
-
-import math
-
-def lcc_mult(seq,wsize):
-    """Local Composition Complexity (LCC) values over sliding window.
-
-    Returns a list of floats, the LCC values for a sliding window over
-    the sequence.
-
-    seq - an unambiguous DNA sequence (a string or Seq object)
-    wsize - window size, integer
-
-    The result is the same as applying lcc_simp multiple times, but this
-    version is optimized for speed. The optimization works by using the
-    value of previous window as a base to compute the next one."""
-    l2=math.log(2)
-    tamseq=len(seq)
-    try :
-        #Assume its a string
-        upper = seq.upper()
-    except AttributeError :
-        #Should be a Seq object then
-        upper = seq.tostring().upper()
-    compone=[0]
-    lccsal=[0]
-    for i in range(wsize):
-        compone.append(((i+1)/float(wsize))*
-                       ((math.log((i+1)/float(wsize)))/l2))
-    window=seq[0:wsize]
-    cant_a=window.count('A')
-    cant_c=window.count('C')
-    cant_t=window.count('T')
-    cant_g=window.count('G')
-    term_a=compone[cant_a]
-    term_c=compone[cant_c]
-    term_t=compone[cant_t]
-    term_g=compone[cant_g]
-    lccsal.append(-(term_a+term_c+term_t+term_g))
-    tail=seq[0]
-    for x in range (tamseq-wsize):
-        window=upper[x+1:wsize+x+1]
-        if tail==window[-1]:
-            lccsal.append(lccsal[-1])
-        elif tail=='A':
-            cant_a=cant_a-1
-            if window.endswith('C'):
-                cant_c=cant_c+1
-                term_a=compone[cant_a]
-                term_c=compone[cant_c]
-                lccsal.append(-(term_a+term_c+term_t+term_g))
-            elif window.endswith('T'):
-                cant_t=cant_t+1
-                term_a=compone[cant_a]
-                term_t=compone[cant_t]
-                lccsal.append(-(term_a+term_c+term_t+term_g))
-            elif window.endswith('G'):
-                cant_g=cant_g+1
-                term_a=compone[cant_a]
-                term_g=compone[cant_g]
-                lccsal.append(-(term_a+term_c+term_t+term_g))
-        elif tail=='C':
-            cant_c=cant_c-1
-            if window.endswith('A'):
-                cant_a=cant_a+1
-                term_a=compone[cant_a]
-                term_c=compone[cant_c]
-                lccsal.append(-(term_a+term_c+term_t+term_g))
-            elif window.endswith('T'):
-                cant_t=cant_t+1
-                term_c=compone[cant_c]
-                term_t=compone[cant_t]
-                lccsal.append(-(term_a+term_c+term_t+term_g))
-            elif window.endswith('G'):
-                cant_g=cant_g+1
-                term_c=compone[cant_c]
-                term_g=compone[cant_g]
-                lccsal.append(-(term_a+term_c+term_t+term_g))
-        elif tail=='T':
-            cant_t=cant_t-1
-            if window.endswith('A'):
-                cant_a=cant_a+1
-                term_a=compone[cant_a]
-                term_t=compone[cant_t]
-                lccsal.append(-(term_a+term_c+term_t+term_g))
-            elif window.endswith('C'):
-                cant_c=cant_c+1
-                term_c=compone[cant_c]
-                term_t=compone[cant_t]
-                lccsal.append(-(term_a+term_c+term_t+term_g))
-            elif window.endswith('G'):
-                cant_g=cant_g+1
-                term_t=compone[cant_t]
-                term_g=compone[cant_g]
-                lccsal.append(-(term_a+term_c+term_t+term_g))
-        elif tail=='G':
-            cant_g=cant_g-1
-            if window.endswith('A'):
-                cant_a=cant_a+1
-                term_a=compone[cant_a]
-                term_g=compone[cant_g]
-                lccsal.append(-(term_a+term_c+term_t+term_g))
-            elif window.endswith('C'):
-                cant_c=cant_c+1
-                term_c=compone[cant_c]
-                term_g=compone[cant_g]
-                lccsal.append(-(term_a+term_c+term_t+term_g))
-            elif window.endswith('T'):
-                cant_t=cant_t+1
-                term_t=compone[cant_t]
-                term_g=compone[cant_g]
-                lccsal.append(-(term_a+term_c+term_t+term_g))
-        tail=window[0]
-    return lccsal
-
-def lcc_simp(seq):
-    """Local Composition Complexity (LCC) for a sequence.
-
-    seq - an unambiguous DNA sequence (a string or Seq object)
-    
-    Returns the Local Composition Complexity (LCC) value for the entire
-    sequence (as a float).
-
-    Reference:
-    Andrzej K Konopka (2005) Sequence Complexity and Composition
-    DOI: 10.1038/npg.els.0005260
-    """
-    wsize=len(seq)
-    try :
-        #Assume its a string
-        upper = seq.upper()
-    except AttributeError :
-        #Should be a Seq object then
-        upper = seq.tostring().upper()
-    l2=math.log(2)
-    if 'A' not in seq:
-        term_a=0
-       # Check to avoid calculating the log of 0.
-    else:
-        term_a=((upper.count('A'))/float(wsize))*((math.log((upper.count('A'))
-                                                          /float(wsize)))/l2)
-    if 'C' not in seq:
-        term_c=0
-    else:
-        term_c=((upper.count('C'))/float(wsize))*((math.log((upper.count('C'))
-                                                          /float(wsize)))/l2)
-    if 'T' not in seq:
-        term_t=0
-    else:
-        term_t=((upper.count('T'))/float(wsize))*((math.log((upper.count('T'))
-                                                          /float(wsize)))/l2)
-    if 'G' not in seq:
-        term_g=0
-    else:
-        term_g=((upper.count('G'))/float(wsize))*((math.log((upper.count('G'))
-                                                          /float(wsize)))/l2)
-    lccsal=-(term_a+term_c+term_t+term_g)
-    return lccsal