3 #####################################################################
4 # Author: KM Amada (kmamada@ifrec.osaka-u.ac.jp)
7 #####################################################################
8 # 1.0 07.26.13 Initial release
9 # 2.0 09.03.13 Added extensive warnings and error messages
10 # 3.0 10.28.13 Fix for retrieving large files. Added STDERR logs
11 # 3.1 11.08.13 Added LWP failsafe. Made hat3 not a required output
12 # 3.2 12.08.14 Removed 5-char restriction for own structure files
14 #####################################################################
18 use File::Path qw(make_path remove_tree);
22 # to prevent error 'Header line too long (limit is 8192)' [v3.1]
23 use LWP::Protocol::http;
24 push(@LWP::Protocol::http::EXTRA_SOCK_OPTS, MaxLineLength => 0);
28 my $BASEURL = "http://sysimm.ifrec.osaka-u.ac.jp/MAFFTash/REST/service.cgi/premafft";
30 my ( $WORKDIR, $PDBLIST, $OWNLIST, $HAT3FILE, $INSTRFILE );
41 print STDERR "[MAFFTash-premafft]\n";
44 my $TMP = "/tmp/mapremafft$$";
45 make_path($TMP) unless -d $TMP;
51 &help("Required parameter : atleast one of either '-p' or '-o'") unless ( defined $PDBLIST || defined $OWNLIST);
52 &help("Required parameter : '-d'") if defined $OWNLIST && ! defined $WORKDIR;
54 $HAT3FILE = "hat3" unless defined $HAT3FILE;
55 $INSTRFILE = "instr" unless defined $INSTRFILE;
56 chop $WORKDIR if defined $WORKDIR && $WORKDIR =~ m/\/$/g;
61 print STDERR "Preparing inputs for service request...\n";
64 push(@files, "strweight" => "0.5");
65 push(@files, "premafft" => "1");
69 if ( defined $PDBLIST )
71 print STDERR "PDB List defined!\n";
72 &bail("Error: Input file $PDBLIST does not exists!") unless -e $PDBLIST;
73 my $listfile = "$TMP/pdblist.inp";
76 open(INPF,"<$PDBLIST") or &bail("Error: Cannot open file $PDBLIST for reading!");
77 open(OUTF,">$listfile") or &bail("Error: Cannot open temporary file $listfile for writing!");
84 print OUTF ">PDBID\n$1\n";
91 push(@files, "inputfile" => ["$listfile"]);
96 # upload own structures
99 if ( defined $OWNLIST )
101 print STDERR "OWN List defined!\n";
102 &bail("Error: Input file $OWNLIST does not exists!") unless -e $OWNLIST;
105 open(OWNINPF,"<$OWNLIST") or &bail("Error: Cannot open file $OWNLIST for reading!");
113 my $fileref = "$WORKDIR/$1.pdb";
118 &bail("Error: File $fileref does not exists!");
121 push(@files, "inputownfile[]" => ["$fileref"]);
133 print STDERR "Sending service request...\n";
135 my $browser = LWP::UserAgent->new;
136 $browser->timeout(0);
139 # post: running a mafftash job
140 my $postResponse = $browser->post( $BASEURL, \@files, 'Content_Type' => 'form-data' );
141 &bail(sprintf("[%d] %s\n", $postResponse->code, &parseError($postResponse->content))) unless($postResponse->is_success);
144 # get response from post request
145 my ($status, $mafftashid) = &parseResponse($postResponse->content);
153 print STDERR "Request sent! Waiting for response...[$mafftashid]\n";
156 # wait for results until it becomes available
159 $longtimer = $longtimer <= ($STIMER*3) ? $longtimer+$STIMER : $STIMER;
163 # get: get results for mafftash job
164 my $getResponse = $browser->get("$BASEURL/$mafftashid");
166 if ( $getResponse->is_success )
169 # get response from get request
170 ($status, $mafftashid) = &parseResponse($getResponse->content);
171 next unless ( $status eq "done" );
174 # if job is finished and ready
175 print STDERR "Results found!\n";
176 my $csfile = "$TMP/checksum.tar.gz";
182 print STDERR "Fetching Results... [Trial $try1]\n";
184 if ( is_success(getstore("$BASEURL/getmdlist/$mafftashid", $csfile)) && -e $csfile && -s $csfile )
186 # get response from get request
187 my $checklist = &extractchecksum($csfile);
188 &bail("Error retrieving list of compressed files!") unless ( scalar %$checklist > 0 );
191 foreach my $id ( keys %$checklist )
193 my $checkfile = "$TMP/$id";
194 my $checkid = $checklist->{$id};
199 unlink $checkfile if -e $checkfile;
201 if ( is_success(getstore("$BASEURL/get/$mafftashid/$id", $checkfile)) && -e $checkfile && -s $checkfile )
203 my $hashid = &getchecksum($checkfile);
204 #print STDERR "[hashid]$hashid [checkid]$checkid\n";
206 if ($hashid ne "" && $hashid ne $checkid )
208 unlink $checkfile if -e $checkfile;
209 &bail("Error retrieving compressed file from server! [Checksum Failed]") if $try2 >= $MAXTRIES;
220 &bail("Error retrieving compressed file from server!") if $try2 >= $MAXTRIES;
231 &bail("Error retrieving list of compressed files from server!") if $try1 >= $MAXTRIES;
242 &bail(sprintf("[%d] %s\n", $getResponse->code, &parseError($getResponse->content)));
248 # make sure outputs were generated
250 print STDERR "Assembling final results...\n";
252 &backticks("cat $TMP/archive.tar.gz* | tar -zxf - -C $TMP/");
253 &backticks("mv -f $TMP/instr $INSTRFILE") if -e "$TMP/instr";
254 &backticks("mv -f $TMP/hat3 $HAT3FILE") if -e "$TMP/hat3";
256 # sometimes no hat3 file is generated [v3.1]
257 #&bail("Error: Output file $HAT3FILE not found!") unless -e $HAT3FILE;
258 &bail("Error: Output file $INSTRFILE not found!") unless -e $INSTRFILE;
261 # warn if some ownids were ommitted
262 if ( scalar keys(%ownids) > 0 )
266 open(INSTRF,"<$INSTRFILE") or &bail("Error: Cannot open file $INSTRFILE for reading!");
272 if ( /^>\d+_(\S+)$/ )
280 foreach my $id ( keys %ownids )
282 warn "Warning: Own structure $id was excluded from instr/hat3.\n" unless $instrids{$id};
300 my $response = shift;
302 #"status":"wait","mafftashid":"Ma8211432R"
307 if ( $response =~ /^([^\s:]+):([^\s:]+)$/ )
313 return ($status, $mafftashid);
323 open CSUM, "tar -zxf $infile -O|" or return \%dataset;
328 if ( /^(\S+)\s+(\S+)$/ )
344 my $response = shift;
346 #"error":"Invalid number of inputs found."
347 my $errorstr = ( $response =~ /\"error\"\s*:\s*\"([^\"]+)\"/ ) ? $1 : "";
359 if ( -x "/usr/bin/md5sum" )
361 $MD5BIN = "/usr/bin/md5sum";
363 elsif ( -x "/sbin/md5" )
365 $MD5BIN = "/sbin/md5 -q";
368 return "" if $MD5BIN eq "";
372 open MD5EXE, "$MD5BIN $infile|" or return "";
376 if (/^(\S+)\s+(\S+)$/)
400 return ($? == -1) ? 0 : 1;
407 print STDERR "$str\n" if defined $str;
416 return if ($TMP eq "" || !-d $TMP);
418 opendir(MAINDIR, $TMP);
419 my @files = readdir(MAINDIR);
422 foreach my $file (@files)
424 unlink "$TMP/$file" if -e "$TMP/$file";
439 ./mafftash_premafft.pl -p [FILE]
440 ./mafftash_premafft.pl -o [FILE] -d [DIRECTORY]
441 ./mafftash_premafft.pl -p [FILE] -o [FILE] -d [DIRECTORY]
446 FILE contains a list of PDBIDs (one entry per line); make sure that the PDBIDs are in the standard 5-character pdbid+chain naming format
448 -o [FILE] -d [DIRECTORY]
449 FILE contains a list of IDs from your own structure/pdb files (one entry per line)
450 for each ID in the list make sure that a corresponding structure file (same ID with .pdb extension) is stored in DIRECTORY
453 save the output hat3 file in HATFILE; if not set, the output is written to a file named 'hat3' in your current directory
456 save the output instr file in INSTRFILE; if not set, the output is written to a file named 'instr' in your current directory