use warnings;
my %annotLines;
-
+my %featureids;
my @fields;
while (<>) {
+ ($_=~/^\#/) and next;
my @fields = split /\s+/, $_;
if (scalar @fields) {
(defined $annotLines{$fields[1]}) or $annotLines{$fields[1]}=[];
- my $line = [$fields[2],$fields[0], "ID_NOT_SPECIFIED", $fields[3], $fields[4], $fields[2]];
+ # this is the tab-separated set of fields forming a jalview annotation line
+ # we only use sequence IDs, not numbers
+ my $line = [$fields[2],$fields[0],"-1", $fields[3], $fields[4], $fields[2]];
+ $featureids{$fields[2]}="FF0000"; # red is the colour.
my $attribs = {};
if (scalar @fields>5) {
$attribs->{"gff:score"}=$fields[5];
push @{$annotLines{$fields[1]}}, [$line, $attribs];
}
}
-
+foreach my $labels (keys %featureids) {
+ print "$labels\t".$featureids{$labels}."\n";
+}
foreach my $labels (keys %annotLines) {
- print "startgroup\t".$labels."\n";
+ print "STARTGROUP\t".$labels."\n";
foreach my $annot (@{$annotLines{$labels}}) {
- print "".(join "\t",@{$annot->[0]})."\n";
+ # bare minimum is written - no attributes/links yet.
+ print "".(join "\t",@{$annot->[0]})."\n";
}
- print "endgroup\t".$labels."\n";
+ print "ENDGROUP\t".$labels."\n";
}
+
+=pod
+
+=head1 NAME
+
+gff2annot.pl
+
+=head2 SYNOPSIS
+
+
+ gff2annot.pl [one or more files containing gff annotation]
+
+Generates a nominally usable Jalview Annotation file on B<STDOUT> from arbitrary GFF annotation lines.
+
+=head2 DESCRIPTION
+
+This script will generate a jalview features file on standard out, from a set of GFF annotation lines input from STDIN and/or any provided filenames.
+
+For a series of GFF annotation lines looking like :
+
+E<lt>seqIdE<gt> E<lt>sourceE<gt> E<lt>nameE<gt> E<lt>startE<gt> E<lt>endE<gt> [E<lt>scoreE<gt> E<lt>strandE<gt> E<lt>frameE<gt> [E<lt>AttributeE<gt> E<lt>Attribute-Value<gt>]]
+
+The script will generate a seuqence features file on B<STDOUT> where annotation with a particular B<source> string will be grouped together under that name.
+
+=head2 Example
+
+Passing some GFF annotation through STDIN:
+
+ perl gff2annot.pl
+ Seq1 blastx significant_hsp 1 5 0.9 + 1 link http://mylink/
+ # a comment
+ Seq1 blasty significant_hsp 15 25 0.9 + 1 link http://mylink/
+ Seq1 blastz significant_hsp 32 43 0.9 + 1 link http://mylink/
+ Seq2 blastx significant_hsp 1 5 0.9 + 1 link http://mylink/
+ Seq2 blasty significant_hsp 1 5 0.9 + 1 link http://mylink/
+ Seq2 blastz significant_hsp 1 5 0.9
+ Seq3 blastx significant_hsp 50 70
+ <Control^D/Z>
+
+Produces
+
+ significant_hsp FF0000
+ STARTGROUP blasty
+ significant_hsp Seq1 -1 15 25 significant_hsp
+ significant_hsp Seq2 -1 1 5 significant_hsp
+ ENDGROUP blasty
+ STARTGROUP blastx
+ significant_hsp Seq1 -1 1 5 significant_hsp
+ significant_hsp Seq2 -1 1 5 significant_hsp
+ significant_hsp Seq3 -1 50 70 significant_hsp
+ ENDGROUP blastx
+ STARTGROUP blastz
+ significant_hsp Seq1 -1 32 43 significant_hsp
+ significant_hsp Seq2 -1 1 5 significant_hsp
+ ENDGROUP blastz
+
+=cut