2 # Copyright (C) 2003 Rune Linding - EMBL
4 # GlobPlot is licensed under the Academic Free license
10 from os import system,popen3
12 relpath = re.sub("/GlobPlot.py$","",argv[0])
13 # cwd = re.sub("/$","", os.getcwd())
14 newpath = relpath + "/biopython-1.50"
15 sys.path.append(newpath)
25 RL = {'N':0.229885057471264,'P':0.552316012226663,'Q':-0.187676577424997,'A':-0.261538461538462,'R':-0.176592654077609, \
26 'S':0.142883029808825,'C':-0.0151515151515152,'T':0.00887797506611258,'D':0.227629796839729,'E':-0.204684629516228, \
27 'V':-0.386174834235195,'F':-0.225572305974316,'W':-0.243375458622095,'G':0.433225711769886,'H':-0.00121743364986608, \
28 'Y':-0.20750516775322,'I':-0.422234699606962,'K':-0.100092289621613,'L':-0.337933495925287,'M':-0.225903614457831}
30 def Sum(seq,par_dict):
38 parameter = par_dict[residue]
44 sum = sum + parameter#*math.log10(p)
45 ssum = float(fpformat.fix(sum,10))
50 def getSlices(dydx_data, DOM_join_frame, DOM_peak_frame, DIS_join_frame, DIS_peak_frame):
59 for i in range( len(dydx_data) ):
61 if in_DOMslice and dydx_data[i] > 0:
62 DOMslices.append([beginDOMslice, endDOMslice])
65 elif in_DISslice and dydx_data[i] < 0:
66 DISslices.append([beginDISslice, endDISslice])
68 # elseif inSlice expandslice
73 # if not in slice and dydx !== 0 start slice
74 if dydx_data[i] > 0 and not in_DISslice:
78 elif dydx_data[i] < 0 and not in_DOMslice:
84 DOMslices.append([beginDOMslice, endDOMslice])
86 DISslices.append([beginDISslice,endDISslice])
89 while k < len(DOMslices):
90 if k+1 < len(DOMslices) and DOMslices[k+1][0]-DOMslices[k][1] < DOM_join_frame:
91 DOMslices[k] = [ DOMslices[k][0], DOMslices[k+1][1] ]
93 elif DOMslices[k][1]-DOMslices[k][0]+1 < DOM_peak_frame:
97 while l < len(DISslices):
98 if l+1 < len(DISslices) and DISslices[l+1][0]-DISslices[l][1] < DIS_join_frame:
99 DISslices[l] = [ DISslices[l][0], DISslices[l+1][1] ]
101 elif DISslices[l][1]-DISslices[l][0]+1 < DIS_peak_frame:
105 return DOMslices, DISslices
108 def SavitzkyGolay(window,derivative,datalist):
109 SG_bin = relpath +'/'+ 'sav_gol -V0 '
110 stdin, stdout, stderr = popen3(SG_bin + '-D' + str(derivative) + ' -n' + str(window)+','+str(window))
111 for data in datalist:
112 stdin.write(`data`+'\n')
116 print stderr.readlines()
117 results = stdout.readlines()
120 for result in results:
121 SG_results.append(float(fpformat.fix(result,6)))
125 def reportSlicesTXT(slices, sequence, maskFlag):
126 if maskFlag == 'DOM':
127 coordstr = 'GlobDoms '
128 elif maskFlag == 'DIS':
129 coordstr = 'Disorder '
133 #by default the sequence is in uppercase which is our search space
136 # insert seq before first slide
138 s = sequence[0:slices[0][0]]
141 for i in range(len(slices)):
144 coordstr = coordstr + ', '
145 coordstr = coordstr + str(slices[i][0]+1) + '-' + str(slices[i][1]+1)
146 #insert the actual slice
147 if maskFlag == 'DOM':
148 s = s + lower(sequence[slices[i][0]:(slices[i][1]+1)])
149 if i < len(slices)-1:
150 s = s + upper(sequence[(slices[i][1]+1):(slices[i+1][0])])
152 elif slices[i][1] < len(sequence)-1:
153 s = s + lower(sequence[(slices[i][1]+1):(len(sequence))])
154 elif maskFlag == 'DIS':
155 s = s + upper(sequence[slices[i][0]:(slices[i][1]+1)])
156 #insert untouched seq between disorder segments, 2-run labelling
157 if i < len(slices)-1:
158 s = s + sequence[(slices[i][1]+1):(slices[i+1][0])]
160 elif slices[i][1] < len(sequence)-1:
161 s = s + sequence[(slices[i][1]+1):(len(sequence))]
172 file = str(sys.argv[1])
176 print ' ./GlobPipe.py FASTAfile'
178 parser = Fasta.RecordParser()
179 iterator = Fasta.Iterator(db,parser)
182 cur_record = iterator.next()
183 #uppercase is searchspace
184 seq = upper(cur_record.sequence)
186 sum_vector = Sum(seq,RL)
188 smooth = SavitzkyGolay(`smoothFrame`,0, sum_vector)
189 dydx_vector = SavitzkyGolay(`smoothFrame`,1, sum_vector)
191 sumHEAD = sum_vector[:smoothFrame]
192 sumTAIL = sum_vector[len(sum_vector)-smoothFrame:]
195 for i in range(len(sumHEAD)):
197 dHEAD = (sumHEAD[i+1]-sumHEAD[i])/2
199 dHEAD = (sumHEAD[i]-sumHEAD[i-1])/2
201 dTAIL = (sumTAIL[i+1]-sumTAIL[i])/2
203 dTAIL = (sumTAIL[i]-sumTAIL[i-1])/2
204 newHEAD.append(dHEAD)
205 newTAIL.append(dTAIL)
206 dydx_vector[:smoothFrame] = newHEAD
207 dydx_vector[len(dydx_vector)-smoothFrame:] = newTAIL
208 globdoms, globdis = getSlices(dydx_vector, DOM_joinFrame, DOM_peakFrame, DIS_joinFrame, DIS_peakFrame)
209 s_domMask, coordstrDOM = reportSlicesTXT(globdoms, seq, 'DOM')
210 s_final, coordstrDIS = reportSlicesTXT(globdis, s_domMask, 'DIS')
211 sys.stdout.write('>'+cur_record.title+'\n')
212 sys.stdout.write('# '+coordstrDOM+'\n')
213 sys.stdout.write('# '+coordstrDIS+'\n')
215 # UNCOMMENT THIS IF NEED TO PRODUCE PER RESEDUE VALUES
216 sys.stdout.write('# RESIDUE' + '\t' + 'DYDX' + '\t' + 'RAW' + '\t' +'SMOOTHED\n')
217 for i in range(len(dydx_vector)):
218 # dydx (positive values seems to indicate disorder in rows more than ~6 chars) raw smoothed
219 sys.stdout.write(seq[i]+'\t'+fpformat.fix(dydx_vector[i],4)+ '\t'+fpformat.fix(smooth[i],4)+'\t'+fpformat.fix(sum_vector[i],4)+ '\n')
223 except AttributeError: