From ce05d243890402c51bed2f692b7eaae17a2e5f33 Mon Sep 17 00:00:00 2001 From: cmzmasek Date: Tue, 19 Sep 2017 10:34:51 -0700 Subject: [PATCH] in progress... --- forester/python/clad_sum.py | 41 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) create mode 100644 forester/python/clad_sum.py diff --git a/forester/python/clad_sum.py b/forester/python/clad_sum.py new file mode 100644 index 0000000..93132b7 --- /dev/null +++ b/forester/python/clad_sum.py @@ -0,0 +1,41 @@ +from sys import argv +import re + +infile = argv[ 1 ] + +print( 'Infile: ', infile ) + +match = 0 +lessspecific = 0 +qu = 0 +na = 0 +no_match = 0 + +with open( infile ) as f: + for line in f: + line = line.strip() + if not line.startswith('#'): + elements = line.split('\t') + if elements[ 1 ] == 'Matching Clades': + if (elements[ 0 ].endswith(elements[ 2 ])): + match+=1 + else: + my_regex = r".+\|" + re.escape(elements[ 2 ]) + r"\." + if re.search(my_regex, elements[ 0 ]): + lessspecific+=1 + elif elements[ 2 ] == '?': + qu+=1 + print('? : ', line) + elif elements[ 0 ].endswith( 'NA' ): + na+=1 + print('NA : ', line) + else: + no_match+=1 + print('no match: ', line) + +print() +print( 'Match :', match ) +print( 'Less specific match :', lessspecific ) +print( 'No match: result undeceided ("?"):', qu ) +print( 'No match: target is "NA" :', na ) +print( 'No match :', no_match ) -- 1.7.10.2