--- /dev/null
+#!/usr/bin/env python
+# Copyright (C) 2003 Rune Linding - EMBL
+# GlobPlot TM
+# GlobPlot is licensed under the Academic Free license
+
+from string import *
+from sys import argv
+import sys
+import os
+from os import system,popen3
+
+relpath = re.sub("/GlobPipe.py$","",argv[0])
+cwd = re.sub("/$","", os.getcwd())
+print '!' +os.getcwd()
+print '!!' +cwd
+newpath =cwd+"/"+relpath+"/biopython-1.50"
+sys.path.append(newpath)
+
+import Bio
+from Bio import File
+from Bio import Fasta
+import fpformat
+import tempfile
+import math
+
+# Russell/Linding
+RL = {'N':0.229885057471264,'P':0.552316012226663,'Q':-0.187676577424997,'A':-0.261538461538462,'R':-0.176592654077609, \
+ 'S':0.142883029808825,'C':-0.0151515151515152,'T':0.00887797506611258,'D':0.227629796839729,'E':-0.204684629516228, \
+ 'V':-0.386174834235195,'F':-0.225572305974316,'W':-0.243375458622095,'G':0.433225711769886,'H':-0.00121743364986608, \
+ 'Y':-0.20750516775322,'I':-0.422234699606962,'K':-0.100092289621613,'L':-0.337933495925287,'M':-0.225903614457831}
+
+def Sum(seq,par_dict):
+ sum = 0
+ results = []
+ raws = []
+ sums = []
+ p = 1
+ for residue in seq:
+ try:
+ parameter = par_dict[residue]
+ except:
+ parameter = 0
+ if p == 1:
+ sum = parameter
+ else:
+ sum = sum + parameter#*math.log10(p)
+ ssum = float(fpformat.fix(sum,10))
+ sums.append(ssum)
+ p +=1
+ return sums
+
+def getSlices(dydx_data, DOM_join_frame, DOM_peak_frame, DIS_join_frame, DIS_peak_frame):
+ DOMslices = []
+ DISslices = []
+ in_DOMslice = 0
+ in_DISslice = 0
+ beginDOMslice = 0
+ endDOMslice = 0
+ beginDISslice = 0
+ endDISslice = 0
+ for i in range( len(dydx_data) ):
+ #close dom slice
+ if in_DOMslice and dydx_data[i] > 0:
+ DOMslices.append([beginDOMslice, endDOMslice])
+ in_DOMslice = 0
+ #close dis slice
+ elif in_DISslice and dydx_data[i] < 0:
+ DISslices.append([beginDISslice, endDISslice])
+ in_DISslice = 0
+ # elseif inSlice expandslice
+ elif in_DOMslice:
+ endDOMslice += 1
+ elif in_DISslice:
+ endDISslice += 1
+ # if not in slice and dydx !== 0 start slice
+ if dydx_data[i] > 0 and not in_DISslice:
+ beginDISslice = i
+ endDISslice = i
+ in_DISslice = 1
+ elif dydx_data[i] < 0 and not in_DOMslice:
+ beginDOMslice = i
+ endDOMslice = i
+ in_DOMslice = 1
+ #last slice
+ if in_DOMslice:
+ DOMslices.append([beginDOMslice, endDOMslice])
+ if in_DISslice:
+ DISslices.append([beginDISslice,endDISslice])
+ k = 0
+ l = 0
+ while k < len(DOMslices):
+ if k+1 < len(DOMslices) and DOMslices[k+1][0]-DOMslices[k][1] < DOM_join_frame:
+ DOMslices[k] = [ DOMslices[k][0], DOMslices[k+1][1] ]
+ del DOMslices[k+1]
+ elif DOMslices[k][1]-DOMslices[k][0]+1 < DOM_peak_frame:
+ del DOMslices[k]
+ else:
+ k += 1
+ while l < len(DISslices):
+ if l+1 < len(DISslices) and DISslices[l+1][0]-DISslices[l][1] < DIS_join_frame:
+ DISslices[l] = [ DISslices[l][0], DISslices[l+1][1] ]
+ del DISslices[l+1]
+ elif DISslices[l][1]-DISslices[l][0]+1 < DIS_peak_frame:
+ del DISslices[l]
+ else:
+ l += 1
+ return DOMslices, DISslices
+
+
+def SavitzkyGolay(window,derivative,datalist):
+ SG_bin = '/homes/pvtroshin/soft/GlobPipe-2.3/sav_gol -V0 '
+ stdin, stdout, stderr = popen3(SG_bin + '-D' + str(derivative) + ' -n' + str(window)+','+str(window))
+ for data in datalist:
+ stdin.write(`data`+'\n')
+ try:
+ stdin.close()
+ except:
+ print stderr.readlines()
+ results = stdout.readlines()
+ stdout.close()
+ SG_results = []
+ for result in results:
+ SG_results.append(float(fpformat.fix(result,6)))
+ return SG_results
+
+
+def reportSlicesTXT(slices, sequence, maskFlag):
+ if maskFlag == 'DOM':
+ coordstr = '|GlobDoms:'
+ elif maskFlag == 'DIS':
+ coordstr = '|Disorder:'
+ else:
+ raise SystemExit
+ if slices == []:
+ #by default the sequence is in uppercase which is our search space
+ s = sequence
+ else:
+ # insert seq before first slide
+ if slices[0][0] > 0:
+ s = sequence[0:slices[0][0]]
+ else:
+ s = ''
+ for i in range(len(slices)):
+ #skip first slice
+ if i > 0:
+ coordstr = coordstr + ', '
+ coordstr = coordstr + str(slices[i][0]+1) + '-' + str(slices[i][1]+1)
+ #insert the actual slice
+ if maskFlag == 'DOM':
+ s = s + lower(sequence[slices[i][0]:(slices[i][1]+1)])
+ if i < len(slices)-1:
+ s = s + upper(sequence[(slices[i][1]+1):(slices[i+1][0])])
+ #last slice
+ elif slices[i][1] < len(sequence)-1:
+ s = s + lower(sequence[(slices[i][1]+1):(len(sequence))])
+ elif maskFlag == 'DIS':
+ s = s + upper(sequence[slices[i][0]:(slices[i][1]+1)])
+ #insert untouched seq between disorder segments, 2-run labelling
+ if i < len(slices)-1:
+ s = s + sequence[(slices[i][1]+1):(slices[i+1][0])]
+ #last slice
+ elif slices[i][1] < len(sequence)-1:
+ s = s + sequence[(slices[i][1]+1):(len(sequence))]
+ return s,coordstr
+
+
+def runGlobPlot():
+ try:
+ smoothFrame = int(sys.argv[1])
+ DOM_joinFrame = int(sys.argv[2])
+ DOM_peakFrame = int(sys.argv[3])
+ DIS_joinFrame = int(sys.argv[4])
+ DIS_peakFrame = int(sys.argv[5])
+ file = str(sys.argv[6])
+ db = open(file,'r')
+ except:
+ print 'Usage:'
+ print ' ./GlobPipe.py SmoothFrame DOMjoinFrame DOMpeakFrame DISjoinFrame DISpeakFrame FASTAfile'
+ print ' Optimised for ELM: ./GlobPlot.py 10 8 75 8 8 sequence_file'
+ print ' Webserver settings: ./GlobPlot.py 10 15 74 4 5 sequence_file'
+ raise SystemExit
+ parser = Fasta.RecordParser()
+ iterator = Fasta.Iterator(db,parser)
+ while 1:
+ try:
+ cur_record = iterator.next()
+ #uppercase is searchspace
+ seq = upper(cur_record.sequence)
+ # sum function
+ sum_vector = Sum(seq,RL)
+ # Run Savitzky-Golay
+ smooth = SavitzkyGolay(`smoothFrame`,0, sum_vector)
+ dydx_vector = SavitzkyGolay(`smoothFrame`,1, sum_vector)
+ #test
+ sumHEAD = sum_vector[:smoothFrame]
+ sumTAIL = sum_vector[len(sum_vector)-smoothFrame:]
+ newHEAD = []
+ newTAIL = []
+ for i in range(len(sumHEAD)):
+ try:
+ dHEAD = (sumHEAD[i+1]-sumHEAD[i])/2
+ except:
+ dHEAD = (sumHEAD[i]-sumHEAD[i-1])/2
+ try:
+ dTAIL = (sumTAIL[i+1]-sumTAIL[i])/2
+ except:
+ dTAIL = (sumTAIL[i]-sumTAIL[i-1])/2
+ newHEAD.append(dHEAD)
+ newTAIL.append(dTAIL)
+ dydx_vector[:smoothFrame] = newHEAD
+ dydx_vector[len(dydx_vector)-smoothFrame:] = newTAIL
+ globdoms, globdis = getSlices(dydx_vector, DOM_joinFrame, DOM_peakFrame, DIS_joinFrame, DIS_peakFrame)
+ s_domMask, coordstrDOM = reportSlicesTXT(globdoms, seq, 'DOM')
+ s_final, coordstrDIS = reportSlicesTXT(globdis, s_domMask, 'DIS')
+ sys.stdout.write('>'+cur_record.title+coordstrDOM+coordstrDIS+'\n')
+ print s_final
+ print '\n'
+ except AttributeError:
+ break
+ return
+
+runGlobPlot()