X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=forester%2Fruby%2Fevoruby%2Flib%2Fevo%2Ftool%2Fhmmscan_analysis.rb;h=0aaab8465f38f347167cba6c420d9cb711e96efe;hb=36ea5e3d816a5afe5042c28ff0c07394575521f5;hp=e0f9a86d61177c3ba1ce141b1dc14ba7d16c2b56;hpb=2dd62272307b5ebdb33fabdc27dabc00e3ad601d;p=jalview.git diff --git a/forester/ruby/evoruby/lib/evo/tool/hmmscan_analysis.rb b/forester/ruby/evoruby/lib/evo/tool/hmmscan_analysis.rb index e0f9a86..0aaab84 100644 --- a/forester/ruby/evoruby/lib/evo/tool/hmmscan_analysis.rb +++ b/forester/ruby/evoruby/lib/evo/tool/hmmscan_analysis.rb @@ -7,8 +7,6 @@ # $Id: hmmscan_parser.rb,v 1.5 2010/12/13 19:00:11 cmzmasek Exp $ # -require 'set' - require 'lib/evo/util/constants' require 'lib/evo/util/util' require 'lib/evo/util/command_line_arguments' @@ -17,7 +15,6 @@ require 'lib/evo/msa/msa' require 'lib/evo/msa/msa_factory' require 'lib/evo/io/msa_io' require 'lib/evo/io/parser/fasta_parser' -require 'lib/evo/io/writer/fasta_writer' module Evoruby @@ -38,7 +35,7 @@ module Evoruby HELP_OPTION_1 = "help" HELP_OPTION_2 = "h" - USE_AVOID_HMMS = false + USE_AVOID_HMMS = true AVOID_HHMS = [ "RRM_1", "RRM_2", "RRM_3", "RRM_4", "RRM_5", "RRM_6" ] LIMIT_FOR_CLOSE_DOMAINS = 20 @@ -148,8 +145,6 @@ module Evoruby end end - - begin parse( inpath, i_e_value_threshold, @@ -164,6 +159,7 @@ module Evoruby end # def run + private def read_fasta_file( input ) @@ -241,7 +237,6 @@ module Evoruby end # def parse - def process_hmmscan_results_per_protein( hmmscan_results_per_protein, fs_e_value_threshold, target_hmms, @@ -266,7 +261,6 @@ module Evoruby end end - # dcs = [] hmmscan_results_per_protein_filtered = [] matched = Set.new @@ -314,8 +308,19 @@ module Evoruby end end + species = nil + ll = nil + if msa != nil + seq = get_sequence( query, msa ) + species = get_species( seq ) + raise StandardError, "could not get species" if species == nil || species.empty? + if x_models != nil && x_models.length > 0 + ll = extract_linker( hmmscan_results_per_protein_filtered, x_models, seq, extraction_output_file ) + end + end + s = query + "\t" - s << "species" + "\t" + s << species + "\t" owns.each do | own | s << own.fs_e_value.to_s + "\t" end @@ -327,29 +332,36 @@ module Evoruby s << r.model + " " end s << "\t" + if msa != nil + if ll != nil + s << ll.to_s + end + s << "\t" + end s << make_overview_da( hmmscan_results_per_protein_filtered ) s << "\t" s << make_detailed_da( hmmscan_results_per_protein_filtered, qlen ) puts s - if x_models != nil && x_models.length > 0 - extract_linkers( hmmscan_results_per_protein_filtered, x_models, query, "lizrd", msa, extraction_output_file ) - end + end - def extract_linkers( hmmscan_results_per_protein_filtered, x_models, seq, extraction_output_file ) + + def extract_linker( hmmscan_results_per_protein_filtered, x_models, seq, extraction_output_file ) raise StandardError, "extraction output file is nil" if extraction_output_file == nil prev_r = nil hmmscan_results_per_protein_filtered.each do | r | if prev_r != nil if ( x_models.length == 2 && prev_r.model == x_models[ 0 ] && r.model == x_models[ 1 ] ) - extract_linker( prev_r.env_to, r.env_from, seq, extraction_output_file ) + ll = output_linker( prev_r.env_to, r.env_from, seq, extraction_output_file ) + return ll end end prev_r = r end + return nil end - + def get_sequence( query, msa ) seq = nil indices = msa.find_by_name_start( query , true ) @@ -363,30 +375,27 @@ module Evoruby end seq end - - - def extract_linker( first, last , seq, output_file ) - + + + def get_species( seq ) + species = nil + if seq.get_name =~ /\[([A-Z0-9]{3,5})\]/ + species = $1 + end + species + end + + + def output_linker( first, last , seq, output_file ) if ( last - first >= 1 ) - indices = msa.find_by_name_start( query , true ) - if indices.length != 1 - if query[ -1, 1 ] == "|" - query.chop! - end - seq = msa.get_by_name_pattern( /\b#{Regexp.quote(query)}\b/ ) - else - seq = msa.get_sequence( indices[ 0 ] ) - end - species = nil - if seq.get_name =~ /\[([A-Z0-9]{3,5})\]/ - species = $1 - end - output_file.print( ">" + seq.get_name + " [" + first.to_s + "-" + last.to_s + "]" + "\n") output_file.print( seq.get_subsequence( first - 1 , last - 1 ).get_sequence_as_string + "\n" ) end + last - first + 1 + end + def make_detailed_da( hmmscan_results_per_protein_filtered , qlen ) s = "" prev_r = nil @@ -407,6 +416,7 @@ module Evoruby s end + def make_overview_da( hmmscan_results_per_protein_filtered ) overview = "" prev_r = nil @@ -427,24 +437,6 @@ module Evoruby end - - - - def calc_linkers( hmmscan_results_per_protein_filtered, hmm_for_protein_output ) - linkers = "" - prev_r = nil - hmmscan_results_per_protein_filtered.each do | r | - if r.model == hmm_for_protein_output - if prev_r != nil - linkers << ( r.env_from - prev_r.env_to - 1 ).to_s + " " - end - prev_r = r - end - end - linkers - end - - def make_interdomain_sequence( d, mark_short = true ) s = "" d /= 20