8 use base qw(Root Read);
12 PSIBLAST - Yet Another PSIBLAST Parser
19 $psi->read_file("psiblast.output");
24 A module to parse PSIBLAST output. It only grabs the last itteration, and there are simple methods to retrieve the ids of the sequences, the aligments and the start positions of the alignments.
26 Now gets the scores for the last iteration as well.
30 =head2 $psi->read(\*FH)
32 Reads a PSIBLAST file from a filehandle.
37 my ( $self, $fh ) = @_;
39 croak "No filehandle passed to method read" unless ref $fh eq 'GLOB';
41 # Make sure that $/ is a defined value
44 $self->_blastpgp_parse($fh);
48 my ( $self, $fh ) = @_;
54 # Find the position of the last itteration
56 <$fh>, $pos = tell $fh if /^Sequences used in model and found again:/;
59 # and make sure we found it...
60 unless ($pos) { $self->{psi} = undef, return }
63 # Now read in the last itteration
64 # @alignments holds the lines containing the aligment
65 # @order is a list of the IDs
66 # $flag indicates that the ID's have been collected
68 # All of this has to be list based as there may be multiple local
69 # alignments against the same sequence
70 my ( @alignments, @order, $flag );
74 # Have we reached the end of the alignment
75 last if /^\s*Database: /;
78 next if /^Sequences not found previously or not previously below threshold:/;
82 # Capture E scores and check to see if we've reached the start of the alignment
83 unless ( /^QUERY\s*/ or @alignments ) {
85 my ( $id, $bit, $e ) = unpack "A70A3A*", $_;
86 ($id) = ( split /\s+/, $id )[0];
87 s/\s*//g for $bit, $e;
90 $self->{$id}{bit} = $bit;
91 $self->{$id}{evalue} = $e;
95 # Have we reached the end of the first block of the alignment
96 $flag = 1, next if /^$/;
98 # Add IDs if we're still in the first block
99 push @order, (split)[0] unless $flag;
101 # Add alignment lines
102 push @alignments, $_;
105 $self->size( scalar @order );
107 # Add sequences and start positions to self
108 for ( 0 .. $#alignments ) {
110 my @fields = split ' ', $alignments[$_], 4;
112 # blastpgp output allways has a ID at the begining of the line, but can
113 # have a variable number of fields after that. e.g.:
114 # $seqid $start $seq $end
115 # O24727 ------------------------------------------------------------
116 # O24727 0 ------------------------------------------------------------
117 # O24727 26 -Y--A---A-----L-----R----S-------L---------G-------P----V--- 35
119 if ( @fields == 4 or @fields == 3 ) {
120 $self->{psi}{$i}{seq} .= $fields[2];
121 $self->{psi}{$i}{start} = $fields[1]
122 unless defined $self->{psi}{$i}{start};
123 } elsif ( @fields == 2 ) {
124 $self->{psi}{$i}{seq} .= $fields[1];
126 croak "Fatal error! Wrong number of fields in BLAST alignment:\n$alignments[$_]\n";
131 for ( 0 .. $#order ) {
132 $self->{psi}{$_}{id} = $order[$_];
138 =head2 @ids = $psi->get_ids
140 The IDs that were in the last iteration in the order they appeared.
147 #return map { $self->{psi}{$_}{id} } sort { $a <=> $b } keys %{$self->{psi}}
148 return map { $self->{psi}{$_}{id} } 0 .. $self->size - 1;
151 =head2 @alignments = $psi->get_alignments
153 The alignment from the last iteration in the same order as they appeared.
160 #return map { $self->{psi}{$_}{seq} } sort { $a <=> $b } keys %{$self->{psi}};
161 return map { $self->{psi}{$_}{seq} } 0 .. $self->size - 1;
164 =head2 @starts = $psi->get_starts
166 The start position of the first residue from the alignments, returned in the same order as they appeared.
172 return map { $self->{psi}{$_}{start} } sort { $a <=> $b } keys %{ $self->{psi} };
175 =head2 @all = $psi->get_all
177 Returns an array of arrays containing the ID, start position and alignment from the PSIBLAST output in the order they appeared.
184 my @ids = $self->get_ids;
185 my @starts = $self->get_starts;
186 my @alignments = $self->get_alignments;
188 return map { [ $ids[$_], $starts[$_], $alignments[$_] ] } 0 .. $#ids;
191 =head2 $psi->evalue($label)
196 my ( $self, $label ) = @_;
197 defined $label or confess "No label passed\n";
198 exists $self->{evalue}{$label} or warn "No such label '$label'\n" and return undef;
199 return $self->{evalue}{$label};
202 =head2 $psi->bit($label)
207 my ( $self, $label ) = @_;
208 defined $label or confess "No label passed\n";
209 exists $self->{bit}{$label} or warn "No such label '$label'\n" and return undef;
210 return $self->{bit}{$label};
215 Returns the number of sequences in the alignments.
220 my ( $self, $size ) = @_;
222 unless ( defined $size ) {
223 exists $self->{size} and return $self->{size};
225 $self->{size} = $size;
231 Doesn't aquire any other information.
235 Jonathan Barber <jon@compbio.dundee.ac.uk>