X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=utils%2Fgff2annot.pl;h=4ee683b769339e958f278fea72118247906608a6;hb=4bbd068738ed2e685f580ea1b65a7b11e32d3f3e;hp=f144df1fb8fbad36fb9816651a835ea88a83a468;hpb=08ae2201710b9a5ef0732129efc4a4afd6c7d09a;p=jalview.git diff --git a/utils/gff2annot.pl b/utils/gff2annot.pl index f144df1..4ee683b 100755 --- a/utils/gff2annot.pl +++ b/utils/gff2annot.pl @@ -1,16 +1,38 @@ #!/usr/bin/perl +## +# Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$) +# Copyright (C) $$Year-Rel$$ The Jalview Authors +# +# This file is part of Jalview. +# +# Jalview is free software: you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. +# +# Jalview is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty +# of MERCHANTABILITY or FITNESS FOR A PARTICULAR +# PURPOSE. See the GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along with Jalview. If not, see . +# The Jalview Authors are detailed in the 'AUTHORS' file. +## use strict; use warnings; my %annotLines; - +my %featureids; my @fields; while (<>) { + ($_=~/^\#/) and next; my @fields = split /\s+/, $_; if (scalar @fields) { (defined $annotLines{$fields[1]}) or $annotLines{$fields[1]}=[]; - my $line = [$fields[2],$fields[0], "ID_NOT_SPECIFIED", $fields[3], $fields[4], $fields[2]]; + # this is the tab-separated set of fields forming a jalview annotation line + # we only use sequence IDs, not numbers + my $line = [$fields[2],$fields[0],"-1", $fields[3], $fields[4], $fields[2]]; + $featureids{$fields[2]}="FF0000"; # red is the colour. my $attribs = {}; if (scalar @fields>5) { $attribs->{"gff:score"}=$fields[5]; @@ -25,11 +47,71 @@ while (<>) { push @{$annotLines{$fields[1]}}, [$line, $attribs]; } } - +foreach my $labels (keys %featureids) { + print "$labels\t".$featureids{$labels}."\n"; +} foreach my $labels (keys %annotLines) { - print "startgroup\t".$labels."\n"; + print "STARTGROUP\t".$labels."\n"; foreach my $annot (@{$annotLines{$labels}}) { - print "".(join "\t",@{$annot->[0]})."\n"; + # bare minimum is written - no attributes/links yet. + print "".(join "\t",@{$annot->[0]})."\n"; } - print "endgroup\t".$labels."\n"; + print "ENDGROUP\t".$labels."\n"; } + +=pod + +=head1 NAME + +gff2annot.pl + +=head2 SYNOPSIS + + + gff2annot.pl [one or more files containing gff annotation] + +Generates a nominally usable Jalview Annotation file on B from arbitrary GFF annotation lines. + +=head2 DESCRIPTION + +This script will generate a jalview features file on standard out, from a set of GFF annotation lines input from STDIN and/or any provided filenames. + +For a series of GFF annotation lines looking like : + +EseqIdE EsourceE EnameE EstartE EendE [EscoreE EstrandE EframeE [EAttributeE EAttribute-Value]] + +The script will generate a seuqence features file on B where annotation with a particular B string will be grouped together under that name. + +=head2 Example + +Passing some GFF annotation through STDIN: + + perl gff2annot.pl + Seq1 blastx significant_hsp 1 5 0.9 + 1 link http://mylink/ + # a comment + Seq1 blasty significant_hsp 15 25 0.9 + 1 link http://mylink/ + Seq1 blastz significant_hsp 32 43 0.9 + 1 link http://mylink/ + Seq2 blastx significant_hsp 1 5 0.9 + 1 link http://mylink/ + Seq2 blasty significant_hsp 1 5 0.9 + 1 link http://mylink/ + Seq2 blastz significant_hsp 1 5 0.9 + Seq3 blastx significant_hsp 50 70 + + +Produces + + significant_hsp FF0000 + STARTGROUP blasty + significant_hsp Seq1 -1 15 25 significant_hsp + significant_hsp Seq2 -1 1 5 significant_hsp + ENDGROUP blasty + STARTGROUP blastx + significant_hsp Seq1 -1 1 5 significant_hsp + significant_hsp Seq2 -1 1 5 significant_hsp + significant_hsp Seq3 -1 50 70 significant_hsp + ENDGROUP blastx + STARTGROUP blastz + significant_hsp Seq1 -1 32 43 significant_hsp + significant_hsp Seq2 -1 1 5 significant_hsp + ENDGROUP blastz + +=cut