3 # hack to pull the alignments and profiles from psiblast
5 # -m6 switch is needed to get the alignments
6 # in the BLAST report, also generates the
7 # psiblast frequency profiles, as they
8 # are no longer present in the output
13 if ($opt_f eq"req"){$freq=1}
14 elsif ($opt_g eq "ap"){$gap=1}
15 elsif ($opt_u eq "ngap"){$ungap=1}
16 elsif ($opt_d eq "egap"){$degap=1}
17 elsif ($opt_h eq "elp"){
21 This little script allows parsing of psiblast report files
22 to extract the embedded sequence alignment, and write out
25 parse_psi -help : help
26 parse_psi -degap : remove all gaps in alignment and print as
28 parse_psi -ungap : remove gaps in query sequence, and those
29 below the gap in the query sequence, print
31 parse_psi -gap : print alignment with gaps in fasta format
32 parse_psi -freq : print frequency profile needed for Jnet
36 parse_psi -freq file.blast > file.freq
40 else {print "NO options given, try -help\n";exit}
42 print ("No such file as $ARGV[0] exists in this path...\n");
47 print "No filename given... \n";
54 if (/^Sequences producing significant alignments/){
58 print STDERR "$num alignments were found in PSIBlast file $ARGV[0]\n";
64 if (/^Sequences producing significant alignments/){
65 if ($thisone == $num){
66 while (($ln2 = <IN>)){
67 if ($ln2 =~ /^QUERY/){
70 while (($align = <IN>) !~ /^ Database/){
84 # get number of blocks and number of sequences in a block
91 $store[$blocks][$z]=$_;
95 $npb=($z/($blocks+1));
97 for ($i=0; $i < ($blocks+1); $i++){
98 for ($j=0; $j < $npb ; $j++){
99 # get the alignment itself
100 #print "$store[$i][$q]";
101 (@tokens)=split(" ",$store[$i][$q]);
102 #print "$tokens[0] $tokens[1] $tokens[2] $tokens[3]\n";
105 $interest=$tokens[2];
109 $interest=$tokens[1];
113 $interest=$tokens[1];
114 if ($interest =~ /\d/g){
115 # get the next token, number
116 # must be at this end
117 $interest=$tokens[2];
120 # remove any digits that may creep in with the 3 token,
121 # this would mean the coord is on the end not in the
123 $interest =~ s/\d//g;
127 $seqkeep[$j]=$seqkeep[$j].$interest;
133 if ($gap==1 || $degap==1){
138 $seqkeep[$c] =~ s/-//g;
140 print ">$namekeep[$c]\n$seqkeep[$c]\n";
145 if ($ungap==1 || $freq == 1){
148 @sequence=split("",$seqkeep[0]);
151 if($sequence[$count] eq "\-"){
154 else{$printer[$count]=0;}
159 foreach $erm (@seqkeep){
161 @sequence=split("",$erm);
164 if ($printer[$count]==0){
165 $ung[$d]=$ung[$d].$sequence[$count];
174 print ">$namekeep[$c]\n$re\n";
179 # do the counting....
180 $seqlen=length($ung[0]);
183 for ($i = 0; $i < $#ung; $i++){
185 @sequence=split("",$ung[$i]);
187 # convert sequence to an integer value
188 @ints = seq2int(@sequence);
191 foreach (@sequence) {
192 $ar[$i][$j]=$ints[$j];
198 for ($r=0; $r <20; $r++){
202 for ($i = 0; $i < $seqlen; $i++) {
203 # skip the query sequence
204 for ($j = 1; $j < $#ung ; $j++){
206 $countup[$ar[$j][$i]]++;
209 # get total counts for this column
211 for ($r = 0; $r < 20; $r++){
212 $tot += $countup[$r];
214 for ($r=0; $r <20; $r++){
215 # get percent and round up to 10
216 # this seems to be slightly different to how
217 # psiblast works god knowns how....
219 $outnum= ($countup[$r] / $tot ) * 10;
220 printf ("%2.0f ",$outnum);
231 for ($g=0; $g <= $#s; $g++){
232 if ($s[$g] eq 'A'){ $intseq[$g] = "0"};
233 if ($s[$g] eq 'R'){ $intseq[$g] = "1"};
234 if ($s[$g] eq 'N'){ $intseq[$g] = "2"};
235 if ($s[$g] eq 'D'){ $intseq[$g] = "3"};
236 if ($s[$g] eq 'C'){ $intseq[$g] = "4"};
237 if ($s[$g] eq 'Q'){ $intseq[$g] = "5"};
238 if ($s[$g] eq 'E'){ $intseq[$g] = "6"};
239 if ($s[$g] eq 'G'){ $intseq[$g] = "7"};
240 if ($s[$g] eq 'H'){ $intseq[$g] = "8"};
241 if ($s[$g] eq 'I'){ $intseq[$g] = "9"};
242 if ($s[$g] eq 'L'){ $intseq[$g] = "10"};
243 if ($s[$g] eq 'K'){ $intseq[$g] = "11"};
244 if ($s[$g] eq 'M'){ $intseq[$g] = "12"};
245 if ($s[$g] eq 'F'){ $intseq[$g] = "13"};
246 if ($s[$g] eq 'P'){ $intseq[$g] = "14"};
247 if ($s[$g] eq 'S'){ $intseq[$g] = "15"};
248 if ($s[$g] eq 'T'){ $intseq[$g] = "16"};
249 if ($s[$g] eq 'W'){ $intseq[$g] = "17"};
250 if ($s[$g] eq 'Y'){ $intseq[$g] = "18"};
251 if ($s[$g] eq 'V'){ $intseq[$g] = "19"};
252 if ($s[$g] eq '-'){ $intseq[$g] = "25"};