1 # Copyright 2003, 2007 by Sebastian Bassi. sbassi@genesdigitales.com
2 # All rights reserved. This code is part of the Biopython
3 # distribution and governed by its license.
4 # Please see the LICENSE file that should have been included as part
9 def lcc_mult(seq,wsize):
10 """Local Composition Complexity (LCC) values over sliding window.
12 Returns a list of floats, the LCC values for a sliding window over
15 seq - an unambiguous DNA sequence (a string or Seq object)
16 wsize - window size, integer
18 The result is the same as applying lcc_simp multiple times, but this
19 version is optimized for speed. The optimization works by using the
20 value of previous window as a base to compute the next one."""
26 except AttributeError :
27 #Should be a Seq object then
28 upper = seq.tostring().upper()
31 for i in range(wsize):
32 compone.append(((i+1)/float(wsize))*
33 ((math.log((i+1)/float(wsize)))/l2))
35 cant_a=window.count('A')
36 cant_c=window.count('C')
37 cant_t=window.count('T')
38 cant_g=window.count('G')
39 term_a=compone[cant_a]
40 term_c=compone[cant_c]
41 term_t=compone[cant_t]
42 term_g=compone[cant_g]
43 lccsal.append(-(term_a+term_c+term_t+term_g))
45 for x in range (tamseq-wsize):
46 window=upper[x+1:wsize+x+1]
48 lccsal.append(lccsal[-1])
51 if window.endswith('C'):
53 term_a=compone[cant_a]
54 term_c=compone[cant_c]
55 lccsal.append(-(term_a+term_c+term_t+term_g))
56 elif window.endswith('T'):
58 term_a=compone[cant_a]
59 term_t=compone[cant_t]
60 lccsal.append(-(term_a+term_c+term_t+term_g))
61 elif window.endswith('G'):
63 term_a=compone[cant_a]
64 term_g=compone[cant_g]
65 lccsal.append(-(term_a+term_c+term_t+term_g))
68 if window.endswith('A'):
70 term_a=compone[cant_a]
71 term_c=compone[cant_c]
72 lccsal.append(-(term_a+term_c+term_t+term_g))
73 elif window.endswith('T'):
75 term_c=compone[cant_c]
76 term_t=compone[cant_t]
77 lccsal.append(-(term_a+term_c+term_t+term_g))
78 elif window.endswith('G'):
80 term_c=compone[cant_c]
81 term_g=compone[cant_g]
82 lccsal.append(-(term_a+term_c+term_t+term_g))
85 if window.endswith('A'):
87 term_a=compone[cant_a]
88 term_t=compone[cant_t]
89 lccsal.append(-(term_a+term_c+term_t+term_g))
90 elif window.endswith('C'):
92 term_c=compone[cant_c]
93 term_t=compone[cant_t]
94 lccsal.append(-(term_a+term_c+term_t+term_g))
95 elif window.endswith('G'):
97 term_t=compone[cant_t]
98 term_g=compone[cant_g]
99 lccsal.append(-(term_a+term_c+term_t+term_g))
102 if window.endswith('A'):
104 term_a=compone[cant_a]
105 term_g=compone[cant_g]
106 lccsal.append(-(term_a+term_c+term_t+term_g))
107 elif window.endswith('C'):
109 term_c=compone[cant_c]
110 term_g=compone[cant_g]
111 lccsal.append(-(term_a+term_c+term_t+term_g))
112 elif window.endswith('T'):
114 term_t=compone[cant_t]
115 term_g=compone[cant_g]
116 lccsal.append(-(term_a+term_c+term_t+term_g))
121 """Local Composition Complexity (LCC) for a sequence.
123 seq - an unambiguous DNA sequence (a string or Seq object)
125 Returns the Local Composition Complexity (LCC) value for the entire
126 sequence (as a float).
129 Andrzej K Konopka (2005) Sequence Complexity and Composition
130 DOI: 10.1038/npg.els.0005260
136 except AttributeError :
137 #Should be a Seq object then
138 upper = seq.tostring().upper()
142 # Check to avoid calculating the log of 0.
144 term_a=((upper.count('A'))/float(wsize))*((math.log((upper.count('A'))
149 term_c=((upper.count('C'))/float(wsize))*((math.log((upper.count('C'))
154 term_t=((upper.count('T'))/float(wsize))*((math.log((upper.count('T'))
159 term_g=((upper.count('G'))/float(wsize))*((math.log((upper.count('G'))
161 lccsal=-(term_a+term_c+term_t+term_g)