package OC; use strict; use warnings; use Carp; use IPC::Open3; use base qw(Root Read Common); use Run qw(check); =head1 NAME OC - Read OC output =head1 SYNOPSIS $oc = OC->new; $oc->read_file($path); $oc->get_group(1); # Returns information about cluster 1 $oc->get_groups; # Returns all the groups =head1 DESCRIPTION This is a module to read OC output. =head1 $oc->read($fh); Reads an OC file from a filehandle. =cut sub read { my ( $self, $fh ) = @_; local $/ = "\n"; while (<$fh>) { chomp; next if /^\s*$/; if (s/^##\s*//) { my ( $num, $score, $size ) = split; my $nl = <$fh>; $nl =~ s/^\s+//; my (@labels) = map { chomp; $_ } split / /, $nl; # Add to self $self->add_group( $num, $score, $size, @labels ); } elsif (/^UNCLUSTERED ENTITIES$/) { my (@labels) = map { chomp; $_ } <$fh>; $self->add_unclustered(@labels); } } } =head $oc->add_group($number, $score, $size, @labels) Add information about a group. =cut sub add_group { my ( $self, $num, $score, $size, @labels ) = @_; for (qw(num score size)) { croak "No value for $_ passed to add_group" unless eval "defined \$$_"; } croak "No labels passed to add_group" unless @labels; $self->{ __PACKAGE__ . "score" }{$num} = $score; $self->{ __PACKAGE__ . "size" }{$num} = $size; $self->{ __PACKAGE__ . "labels" }{$num} = [@labels]; } =head2 $oc->add_unclustered(@labels) Adds those entities that are unclustered. =cut sub add_unclustered { my ( $self, @labels ) = @_; $self->{ __PACKAGE__ . "unclust" } = \@labels; } =head2 $oc->get_unclustered Returns those unclustered entities as a list. =cut sub get_unclustered { my ($self) = @_; return @{ $self->{ __PACKAGE__ . "unclust" } } if defined $self->{ __PACKAGE__ . "unclust" }; } =head2 ($score, $size, @labels) = $oc->get_group($n); Returns information about a particular group. If the group doesn't exist you get a warning message and undef. =cut sub get_group { my ( $self, $num ) = @_; if ( defined $num ) { if ( exists $self->{ __PACKAGE__ . "score" }{$num} ) { return $self->{ __PACKAGE__ . "score" }{$num}, $self->{ __PACKAGE__ . "size" }{$num}, @{ $self->{ __PACKAGE__ . "labels" }{$num} }; } else { carp "No such group '$num'"; return undef; } } else { croak "No value passed to get_group"; } } =head1 @info = $oc->get_groups; Returns information about all of the groups. This is an array of arrays, the second array holds group id and then the information returned by the get_group method. =cut sub get_groups { my ($self) = @_; map { [ $_, $self->get_group($_) ] } sort keys %{ $self->{ __PACKAGE__ . "score" } }; } =head1 $oc->run(); =cut sub run { my ( $self, $file ) = @_; local $/ = undef; local $| = 1; my $pid = open3( \*WRT, \*RD, \*ERR, $self->path ); print WRT $file; close WRT; my @output = join "\n", split "\n", ; close RD; waitpid $pid, 0; check( $self->path, $? ) or die "OC was naughty"; return @output; } 1;