package BLC; use strict; use warnings; use Carp; use constant { ID => 0, TITLE => 1, SEQ => 2 }; =head2 my $blc = new BLC; =cut sub new { my $proto = shift; my $class = ref($proto) || $proto; my $self = { _itteration => 0, _max_itteration => 0, }; return bless $self, $class; } =head2 my $blc->read_file($file) Read in a BLC file. =cut sub read_file { my @seqs; my $self = shift; my ($fn) = @_; open BLC, $fn or croak "$fn: $!\n"; while () { chomp; if (s/^>//) { s/\s*$//g; my ( $id, $title ) = split /\s+/, $_, 2; $title = '' unless $title; push @seqs, [ $id, $title ]; } # This regex copes with odd variations in the start of # the iterations line that marks the start of sequences if (/^\s*\*\s*[Ii]teration:?\s*(\d+)\s*$/) { # The start position of the sequences is the same # as the offset of the asterix from the start of # the line my $start = index $_, '*', 0; my $iter = $1; croak "Iteration not greater than 0 in BLC file $fn" unless $iter > 0; while () { chomp; last if /^\s{$start}\*\s*$/; my $line = substr $_, $start, @seqs; foreach ( 0 .. $#seqs ) { # Not just $iter, as we need to # leave room for the title # and header $seqs[$_]->[ SEQ + $iter - 1 ] .= substr( $line, $_, 1 ); } } } } close BLC; croak "No sequences found in BLC file $fn" unless @seqs; foreach (@seqs) { my ( $id, $title, @seqs ) = @{$_}; $title = '' unless defined $title; $id = '' unless defined $id; $self->set_sequence( $id, $title, @seqs ); } } =head2 $blc->set_sequence($id, $title, @seq) Add a BLC file to the object of $id, $title and @sequence, one sequence per iteration. =cut sub set_sequence { my $self = shift; my ( $id, $title, @data ) = @_; push @{ $self->{_seqs} }, [ $id, $title, @data ]; } =head2 $blc->get_sequence($number) Returns a list of the ($id, $title, @sequences) where each member of @sequences is from the itterations, from first to final. Defaults to the first sequences. =cut sub get_sequence { my ( $self, $number ) = @_; if ( defined $number and $number > $self->get_num_seqs ) { croak "You're trying to retrive a sequence past than the end of the BLC file in get_sequence($number)"; } elsif ( defined $number and $self->get_num_seqs + $number < 0 ) { croak "You're trying to retrive a sequence before the begining of the BLC file in get_sequence($number)"; } $number = 0 unless defined $number; return @{ ${ $self->{_seqs} }[$number] }; } =head2 $blc->get_num_seqs() Returns the number of sequences in the BLC file. =cut sub get_num_seqs { my ($self) = @_; return $#{ $self->{_seqs} }; } =head2 $blc->get_sequences($iteration) Returns all of the sequences in a block file for a particular itteration, in the same order as they occured in the block file. If left undefined, it will return the sequences from the first itteration. =cut sub get_sequences { my ( $self, $iteration ) = @_; $iteration = 0 unless $iteration and $iteration > 0; return map { $_->[ SEQ + $iteration ] } @{ $self->{_seqs} }; } =head2 $blc->get_seq_ids($number) Returns the ID for the $number sequence to occur in the file, if left undefined it'll return all of the IDs. =cut sub get_seq_ids { my ( $self, $number ) = @_; if ( defined $number ) { return ${ $self->{_seqs}[$number] }[ID]; } else { return map { $_->[ID] } @{ $self->{_seqs} }; } } =head2 $blc->get_seq_titles($number) Returns the titles for the number sequence to occur in the file, if left undefined it'll return all of the titles. =cut sub get_seq_titles { my ( $self, $number ) = @_; if ( defined $number ) { return ${ $self->{_seqs}[$number] }[TITLE]; } else { return map { $_->[TITLE] } @{ $self->{_seqs} }; } } =head2 $blc->print_blc($fh); This will print the BLC file object to the filehandle if given, otherwise to STDOUT. =cut sub print_blc { my ( $self, $fh ) = @_; if ($fh) { *OUT = $fh } else { *OUT = *STDOUT } # Print the IDs print OUT ">$_\n" foreach $self->get_seq_ids; # Get the sequences #my @sequences = $self->get_sequences my $i = 0; while (1) { my @sequences = $self->get_sequences($i); last unless defined $sequences[0]; print OUT "* iteration " . ( $i + 1 ) . "\n"; foreach my $j ( 0 .. length( $sequences[0] ) - 1 ) { foreach (@sequences) { print OUT substr $_, $j, 1; } print OUT "\n"; } print OUT "*\n"; $i++; } } =head2 $blc->print_fasta($fh) Prints the BLC file out in FASTA format, each sequence is 72 characters wide. =cut sub print_fasta { my ( $self, $fh ) = @_; if ($fh) { *OUT = $fh } else { *OUT = *STDOUT } my @ids = $self->get_seq_ids; my @sequences = $self->get_sequences; croak "Different number of sequences and IDs\n" unless @ids == @sequences; foreach ( 0 .. $#ids ) { print OUT ">$ids[$_]\n"; $sequences[$_] =~ s/(.{72})/$1\n/g; print OUT "$sequences[$_]\n"; } } =head2 $blc->next_itteration; =cut sub next_itteration { my ($self) = @_; $self->itteration( $self->itteration + 1 ); } =head2 $blc->next_itteration; =cut sub max_itteration { my ($self) = @_; return $self->{_max_itteration}; } =head2 $blc->itteration($number) If $number is defined, sets the itteration to $number, otherwise returns the number of itterations. =cut sub itteration { my ( $self, $itteration ) = @_; if ( defined $itteration ) { if ( $itteration < 1 ) { return undef; } } else { return $self->{_itteration}; } } 1;