1 #!/usr/local/bin/python
2 # Copyright (C) 2004 Rune Linding & Lars Juhl Jensen - EMBL
3 # The DisEMBL is licensed under the GPL license
4 # (http://www.opensource.org/licenses/gpl-license.php)
15 from os import system,popen3
17 # change these to the correct paths
18 NN_bin = '/PATH/DisEMBL-1.4/disembl'
19 SG_bin = '/PATH/DisEMBL-1.4/sav_gol'
21 def JensenNet(sequence):
22 outFile = tempfile.mktemp()
23 inFile= tempfile.mktemp()
24 open(inFile,'w').write(sequence+'\n')
25 system(NN_bin + '< ' + inFile +' > ' + outFile)
29 resultsFile = open(outFile,'r')
30 results = resultsFile.readlines()
32 for result in results:
33 coil = float(fpformat.fix(split(result)[0],6))
35 hotloop = float(fpformat.fix(split(result)[1],6))
36 HOTLOOPS.append(hotloop)
37 rem465 = float(fpformat.fix(split(result)[2],6))
41 return COILS, HOTLOOPS, REM465
44 def SavitzkyGolay(window,derivative,datalist):
45 if len(datalist) < 2*window:
46 window = len(datalist)/2
49 stdin, stdout, stderr = popen3(SG_bin + ' -V0 -D' + str(derivative) + ' -n' + str(window)+','+str(window))
51 stdin.write(`data`+'\n')
55 print stderr.readlines()
56 results = stdout.readlines()
59 for result in results:
60 f = float(fpformat.fix(result,6))
67 def getSlices(NNdata, fold, join_frame, peak_frame, expect_val):
70 for i in range(len(NNdata)):
72 if NNdata[i] < expect_val:
73 if maxSlice >= fold*expect_val:
74 slices.append([beginSlice, endSlice])
78 if NNdata[i] > maxSlice:
80 elif NNdata[i] >= expect_val:
85 if inSlice and maxSlice >= fold*expect_val:
86 slices.append([beginSlice, endSlice])
89 while i < len(slices):
90 if i+1 < len(slices) and slices[i+1][0]-slices[i][1] <= join_frame:
91 slices[i] = [ slices[i][0], slices[i+1][1] ]
93 elif slices[i][1]-slices[i][0]+1 < peak_frame:
100 def reportSlicesTXT(slices, sequence):
105 s = lower(sequence[0:slices[0][0]])
108 for i in range(len(slices)):
110 sys.stdout.write(', ')
111 sys.stdout.write( str(slices[i][0]+1) + '-' + str(slices[i][1]+1) )
112 s = s + upper(sequence[slices[i][0]:(slices[i][1]+1)])
113 if i < len(slices)-1:
114 s = s + lower(sequence[(slices[i][1]+1):(slices[i+1][0])])
115 elif slices[i][1] < len(sequence)-1:
116 s = s + lower(sequence[(slices[i][1]+1):(len(sequence))])
122 def runDisEMBLpipeline():
124 smooth_frame = int(sys.argv[1])
125 peak_frame = int(sys.argv[2])
126 join_frame = int(sys.argv[3])
127 fold_coils = float(sys.argv[4])
128 fold_hotloops = float(sys.argv[5])
129 fold_rem465 = float(sys.argv[6])
130 file = str(sys.argv[7])
136 print '\nDisEMBL.py smooth_frame peak_frame join_frame fold_coils fold_hotloops fold_rem465 sequence_file [mode]\n'
137 print 'A default run would be: ./DisEMBL.py 8 8 4 1.2 1.4 1.2 fasta_file'
138 print 'Mode: "default"(nothing) or "scores" which will give scores per residue in TAB seperated format'
141 parser = Fasta.RecordParser()
142 iterator = Fasta.Iterator(db,parser)
143 print ' ____ _ _____ __ __ ____ _ _ _ _'
144 print '| _ \(_)___| ____| \/ | __ )| | / || || |'
145 print '| | | | / __| _| | |\/| | _ \| | | || || |_'
146 print '| |_| | \__ \ |___| | | | |_) | |___ | ||__ _|'
147 print '|____/|_|___/_____|_| |_|____/|_____| |_(_) |_|'
148 print '# Copyright (C) 2004 - Rune Linding & Lars Juhl Jensen '
149 print '# EMBL Biocomputing Unit - Heidelberg - Germany '
153 cur_record = iterator.next()
154 sequence = upper(cur_record.sequence)
156 COILS_raw, HOTLOOPS_raw, REM465_raw = JensenNet(sequence)
158 REM465_smooth = SavitzkyGolay(smooth_frame,0,REM465_raw)
159 COILS_smooth = SavitzkyGolay(smooth_frame,0,COILS_raw)
160 HOTLOOPS_smooth = SavitzkyGolay(smooth_frame,0,HOTLOOPS_raw)
161 if mode == 'default':
162 sys.stdout.write('> '+cur_record.title+'_COILS ')
163 reportSlicesTXT( getSlices(COILS_smooth, fold_coils, join_frame, peak_frame, 0.43), sequence )
164 sys.stdout.write('> '+cur_record.title+'_REM465 ')
165 reportSlicesTXT( getSlices(REM465_smooth, fold_rem465, join_frame, peak_frame, 0.50), sequence )
166 sys.stdout.write('> '+cur_record.title+'_HOTLOOPS ')
167 reportSlicesTXT( getSlices(HOTLOOPS_smooth, fold_hotloops, join_frame, peak_frame, 0.086), sequence )
168 sys.stdout.write('\n')
169 elif mode == 'scores':
170 sys.stdout.write('# RESIDUE COILS REM465 HOTLOOPS\n')
171 for i in range(len(REM465_smooth)):
172 sys.stdout.write(sequence[i]+'\t'+fpformat.fix(COILS_smooth[i],5)+'\t'+fpformat.fix(REM465_smooth[i],5)+'\t'+fpformat.fix(HOTLOOPS_smooth[i],5)+'\n')
174 sys.stderr.write('Wrong mode given: '+mode+'\n')
176 except AttributeError: