From: Sasha Sherstnev <a.sherstnev@dundee.ac.uk> Date: Fri, 2 Aug 2013 16:15:25 +0000 (+0100) Subject: Add converion Jpred output from align/jnet to fasta X-Git-Url: http://source.jalview.org/gitweb/?a=commitdiff_plain;h=2a6ee304bfebc0c2c4f4bf267ab529d056740314;p=jabaws.git Add converion Jpred output from align/jnet to fasta --- diff --git a/binaries/src/jpred/jpred.pl b/binaries/src/jpred/jpred.pl index 88c4f58..7d3c0d2 100755 --- a/binaries/src/jpred/jpred.pl +++ b/binaries/src/jpred/jpred.pl @@ -51,6 +51,10 @@ Default: NNN = 1 Toggle allowing Jpred to make predictions even when there are no PSI-BLAST hits. +=item -no-final + +Untoggle final step of Jpred prediction + =item -verbose Verbose mode. Print more information while running jpred. @@ -172,6 +176,7 @@ my $ncpu = 1; # number of CPUs for psiblast calculations my $predNoHits = 0; # define whether to make predictions when no PSI-BLAST hits are found my $db_path = "."; my $db_entry = "unknown"; +my $nofinal; my ( $help, $man, $DEBUG, $VERBOSE ); @@ -183,6 +188,7 @@ GetOptions( "dbpath=s" => \$db_path, "ncpu=s" => \$ncpu, "pred-nohits" => \$predNoHits, + "no-final" => \$nofinal, "help" => \$help, "man" => \$man, "debug" => \$DEBUG, @@ -209,8 +215,8 @@ my $database = { ## database used for ported Jpred ported_db => { - database => $db_path . "/databases/uniref90.filt", - unfiltered => $db_path . "/databases/uniref90", + database => $db_path . "/uniref90.filt", + unfiltered => $db_path . "/uniref90", }, ## cluster-specific path for Jpred cluster => { @@ -221,6 +227,7 @@ my $database = { ## these other DBs are experimental ones used during development. ## check they exist before using them. swall => { + # generic entry for use with validate_jnet.pl # real db location defined by validate_jnet.pl database => $db_path . "/swall/swall.filt", @@ -228,6 +235,7 @@ my $database = { }, uniprot => { + # Path to PSIBLAST db database => $db_path . "/3/swall.filt", unfiltered => $db_path . "/3/swall", @@ -427,6 +435,23 @@ if ( @seqs == 0 ) { } print ">>100% complete\n"; + +unless ( defined $nofinal ) { + my $aligfile = $output.".align"; + my $jnetfile = $output.".jnet"; + my $jnetfastafile = $output.".jnet.fasta"; + my $resufile = $output.".res"; + concise2fasta($jnetfile, $jnetfastafile); + open( my $IN1, "<", $aligfile ) or die "ERROR! unable to open '$aligfile': ${!}\nDied"; + open( my $IN2, "<", $jnetfastafile ) or die "ERROR! unable to open '$jnetfastafile': ${!}\nDied"; + open( my $OUT, ">", $resufile ) or die "ERROR! unable to open '$resufile': ${!}\nDied"; + while (<$IN2>) {print $OUT $_;} + while (<$IN1>) {print $OUT $_;} + close($IN1); + close($IN2); + close($OUT); +} + print "Jpred Finished\n"; exit; @@ -434,6 +459,95 @@ exit; # Functions ##################################################################################################### +##################################################################################################### +sub fasta2concise { + my $infile = shift; + my $outfile = shift; + + open( my $IN, "<", $infile ) or die "ERROR! unable to open '$infile': ${!}\nDied"; + open( my $OUT, ">", $outfile ) or die "ERROR! unable to open '$outfile': ${!}\nDied"; + + my ( $seq, @seqs, @title ); + while (<$IN>) { + if (s/^>//) { + if ($seq) { + $seq =~ s/\n|\s//g; + $seq =~ s/(.)/$1,/g; + push @seqs, $seq; + $seq = ""; + } + chomp; + push @title, $_; + } else { + chomp; + $seq .= $_; + } + } + $seq =~ s/\n|\s//g; + $seq =~ s/(.)/$1,/g; + push @seqs, $seq; + + if ( @title != @seqs ) { die("non matching number of titles and sequences!\n"); } + + foreach ( 0 .. $#title ) { + print "align" . ( $_ + 1 ) . ";$title[$_]:$seqs[$_]\n"; + } + close($IN); + close($OUT); +} + +##################################################################################################### +sub concise2fasta { + my $infile = shift; + my $outfile = shift; + + my ( @seqs, %seq, @pred, %pred ); + + my @var = ( + "Lupas_21", "Lupas_14", "Lupas_28", "JNETPSSM", "MULTCOIL", "MULTCOIL_TRIMER", "MULTCOIL_DIMER", "JNETFREQ", + "JNETALIGN", "JNETHMM", "JNETSOL5", "JNETSOL25", "JNETSOL0", "jnetpred", "jpred" + ); + + open( my $IN, "<", $infile ) or die "ERROR! unable to open '$infile': ${!}\nDied"; + open( my $OUT, ">", $outfile ) or die "ERROR! unable to open '$outfile': ${!}\nDied"; + + while (<$IN>) { + if (/^\n/) { next; } + my ( $id, $seq ) = split( ":", $_ ); + if ( !$id || !$seq ) { next; } # Check we have proper values + $seq =~ s/,//g; + chomp($seq); + if ( $id =~ /;/ ) { # Then it's an alignment + @_ = split( ";", $id ); + push @seqs, $_[1]; + $seq{ $_[1] } = $seq; + } + foreach (@var) { + if ( $_ eq $id ) { + push @pred, $_; + $pred{$_} = $seq; + } + } + } + close($IN); + + foreach (@seqs) { + $seq{$_} =~ s/(.{72})/$1\n/g; + print $OUT ">$_\n$seq{$_}\n"; + } + + foreach (@pred) { + $pred{$_} =~ s/[TCYWXZSI\?_]/-/g; + $pred{$_} =~ s/B/E/g; + $pred{$_} =~ s/G/H/g; + + if (/SOL/) { $pred{$_} =~ s/E/B/g; } + $pred{$_} =~ s/(.{72})/$1\n/g; + print $OUT ">$_\n$pred{$_}\n"; + } + close($OUT); +} + =begin :private =head2 $PSISEQ = remove_seq_masks($PSISEQ)