From f23edb1da574ceea56f2570996698ab29e61e129 Mon Sep 17 00:00:00 2001 From: Ben Soares Date: Wed, 16 Nov 2022 09:27:49 +0000 Subject: [PATCH] JAL-4019 finalised a boolean option and tidied some comments --- src/jalview/util/Comparison.java | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/src/jalview/util/Comparison.java b/src/jalview/util/Comparison.java index 68988c3..9fea705 100644 --- a/src/jalview/util/Comparison.java +++ b/src/jalview/util/Comparison.java @@ -42,6 +42,8 @@ public class Comparison private static final int NUCLEOTIDE_COUNT_VERY_SHORT_SEQUENCE; + private static final boolean NUCLEOTIDE_AMBIGUITY_DETECTION; + public static final char GAP_SPACE = ' '; public static final char GAP_DOT = '.'; @@ -54,6 +56,7 @@ public class Comparison static { + // these options read only at start of session NUCLEOTIDE_COUNT_PERCENT = Cache.getDefault("NUCLEOTIDE_COUNT_PERCENT", 55); NUCLEOTIDE_COUNT_LONG_SEQUENCE_AMBIGUITY_PERCENT = Cache.getDefault( @@ -62,6 +65,8 @@ public class Comparison .getDefault("NUCLEOTIDE_COUNT_SHORT", 100); NUCLEOTIDE_COUNT_VERY_SHORT_SEQUENCE = Cache .getDefault("NUCLEOTIDE_COUNT_VERY_SHORT", 4); + NUCLEOTIDE_AMBIGUITY_DETECTION = Cache + .getDefault("NUCLEOTIDE_AMBIGUITY_DETECTION", true); } /** @@ -324,7 +329,7 @@ public class Comparison } long allCount = ntCount + aaCount; - if (Cache.getDefault("NUCLEOTIDE_AMBIGUITY_DETECTION", true)) + if (NUCLEOTIDE_AMBIGUITY_DETECTION) { Console.debug("Performing new nucleotide detection routine"); if (allCount > NUCLEOTIDE_COUNT_SHORT_SEQUENCE) @@ -393,18 +398,19 @@ public class Comparison // vs := NUCLEOTIDE_COUNT_VERY_SHORT_SEQUENCE // s := NUCLEOTIDE_COUNT_SHORT_SEQUENCE // lp := NUCLEOTIDE_COUNT_LOWER_PERCENT - // vsp := 1 - (1/a) # this is the proportion of required nucleotides in - // # a VERY_SHORT Sequence (4 bases). - // # should be all but one base is nucleotide. + // vsp := 1 - (1/a) # this is the proportion of required definite + // nucleotides + // # in a VERY_SHORT Sequence (4 bases). + // # This should be equivalent to all but one base in the sequence. // p := (a - vs)/(s - vs) # proportion of the way between // # VERY_SHORT and SHORT thresholds. - // tp := vsp + p * (lp/100 - vsp) # the proportion of nucleotides + // tp := vsp + p * (lp/100 - vsp) # the proportion of definite nucleotides // # required for this length of sequence. // minNt := tp * a # the minimum number of definite nucleotide bases - // # required for this length of sequences. + // # required for this length of sequence. // // We are then essentially returning: - // # ntCount >= 55% of allCount and the rest are all nucleotide ambiguity + // # ntCount >= 55% of allCount and the rest are all nucleotide ambiguity: // ntCount >= tp * allCount && nCount + ntaCount == aaCount // but without going into float/double land long LHS = 100 * allCount -- 1.7.10.2