8 use base qw(Root Read);
12 PSIBLAST - Yet Another PSIBLAST Parser
19 $psi->read_file("psiblast.output");
24 A module to parse PSIBLAST output. It only grabs the last itteration, and there are simple methods to retrieve the ids of the sequences, the aligments and the start positions of the alignments.
26 Now gets the scores for the last iteration as well.
30 =head2 $psi->read(\*FH)
32 Reads a PSIBLAST file from a filehandle.
39 croak "No filehandle passed to method read" unless ref $fh eq 'GLOB';
41 # Make sure that $/ is a defined value
44 $self->_blastpgp_parse($fh);
54 #die "Wrong version of BLASTP" if $version !~ /BLASTP 2.2.6 \[Apr-09-2003\]/;
56 # Find the position of the last itteration
58 <$fh>, $pos = tell $fh if /^Sequences used in model and found again:/
61 # and make sure we found it...
62 unless ($pos) { $self->{psi} = undef, return }
65 # Now read in the last itteration
66 # @alignments holds the lines containing the aligment
67 # @order is a list of the IDs
68 # $flag indicates that the ID's have been collected
70 # All of this has to be list based as there may be multiple local
71 # alignments against the same sequence
72 my (@alignments, @order, $flag);
75 # Have we reached the end of the alignment
76 last if /^\s*Database: /;
79 next if /^Sequences not found previously or not previously below threshold:/;
83 # Capture E scores and check to see if we've reached the start of the alignment
84 unless (/^QUERY\s*/ or @alignments) {
86 my ($id, $bit, $e) = unpack "A70A3A*", $_;
87 ($id) = (split /\s+/, $id)[0];
88 s/\s*//g for $bit, $e;
91 $self->{$id}{bit} = $bit;
92 $self->{$id}{evalue} = $e;
96 # Have we reached the end of the first block of the
98 $flag = 1, next if /^$/;
100 # Add IDs if we're still in the first block
101 push @order, (split)[0] unless $flag;
102 # Add alignment lines
103 push @alignments, $_;
106 $self->size(scalar @order);
108 # Add sequences and start positions to self
109 for (0..$#alignments) {
111 my @fields = split ' ', $alignments[$_], 4;
113 # blastpgp output allways has a ID at the begining of the line, but can
114 # have a variable number of fields after that. e.g.:
115 # $seqid $start $seq $end
116 # O24727 ------------------------------------------------------------
117 # O24727 0 ------------------------------------------------------------
118 # O24727 26 -Y--A---A-----L-----R----S-------L---------G-------P----V--- 35
120 if (@fields == 4 or @fields == 3) {
121 $self->{psi}{$i}{seq} .= $fields[2];
122 $self->{psi}{$i}{start} = $fields[1]
123 unless defined $self->{psi}{$i}{start};
125 elsif (@fields == 2) { $self->{psi}{$i}{seq} .= $fields[1] }
127 croak "Fatal error! Wrong number of fields in BLAST alignment:\n$alignments[$_]\n"
133 $self->{psi}{$_}{id} = $order[$_];
139 =head2 @ids = $psi->get_ids
141 The IDs that were in the last iteration in the order they appeared.
147 #return map { $self->{psi}{$_}{id} } sort { $a <=> $b } keys %{$self->{psi}}
148 return map { $self->{psi}{$_}{id} } 0..$self->size - 1;
151 =head2 @alignments = $psi->get_alignments
153 The alignment from the last iteration in the same order as they appeared.
159 #return map { $self->{psi}{$_}{seq} } sort { $a <=> $b } keys %{$self->{psi}};
160 return map { $self->{psi}{$_}{seq} } 0..$self->size - 1;
163 =head2 @starts = $psi->get_starts
165 The start position of the first residue from the alignments, returned in the same order as they appeared.
171 return map { $self->{psi}{$_}{start} } sort { $a <=> $b } keys %{$self->{psi}};
174 =head2 @all = $psi->get_all
176 Returns an array of arrays containing the ID, start position and alignment from the PSIBLAST output in the order they appeared.
183 my @ids = $self->get_ids;
184 my @starts = $self->get_starts;
185 my @alignments = $self->get_alignments;
187 return map { [ $ids[$_], $starts[$_], $alignments[$_] ] } 0..$#ids
190 =head2 $psi->evalue($label)
195 my ($self, $label) = @_;
196 defined $label or confess "No label passed\n";
197 exists $self->{evalue}{$label} or warn "No such label '$label'\n" and return undef;
198 return $self->{evalue}{$label};
201 =head2 $psi->bit($label)
206 my ($self, $label) = @_;
207 defined $label or confess "No label passed\n";
208 exists $self->{bit}{$label} or warn "No such label '$label'\n" and return undef;
209 return $self->{bit}{$label};
214 Returns the number of sequences in the alignments.
219 my ($self, $size) = @_;
221 unless (defined $size) {
222 exists $self->{size} and return $self->{size};
225 $self->{size} = $size;
231 Doesn't aquire any other information.
235 Jonathan Barber <jon@compbio.dundee.ac.uk>