X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Futil%2FComparison.java;h=e9ff931fe2db8b5bdb0fddf12ad1120d005feaa7;hb=1f66bdde8f5379d17fe56001443e0e6fde1150d1;hp=af6052cf8172c9777e72372c3d82e9ef8fee7995;hpb=6fc54333dcca130b378144b067a9a016c9e1cc41;p=jalview.git diff --git a/src/jalview/util/Comparison.java b/src/jalview/util/Comparison.java index af6052c..e9ff931 100644 --- a/src/jalview/util/Comparison.java +++ b/src/jalview/util/Comparison.java @@ -42,6 +42,8 @@ public class Comparison private static final int NUCLEOTIDE_COUNT_VERY_SHORT_SEQUENCE; + private static final boolean NUCLEOTIDE_AMBIGUITY_DETECTION; + public static final char GAP_SPACE = ' '; public static final char GAP_DOT = '.'; @@ -54,6 +56,7 @@ public class Comparison static { + // these options read only at start of session NUCLEOTIDE_COUNT_PERCENT = Cache.getDefault("NUCLEOTIDE_COUNT_PERCENT", 55); NUCLEOTIDE_COUNT_LONG_SEQUENCE_AMBIGUITY_PERCENT = Cache.getDefault( @@ -62,6 +65,8 @@ public class Comparison .getDefault("NUCLEOTIDE_COUNT_SHORT", 100); NUCLEOTIDE_COUNT_VERY_SHORT_SEQUENCE = Cache .getDefault("NUCLEOTIDE_COUNT_VERY_SHORT", 4); + NUCLEOTIDE_AMBIGUITY_DETECTION = Cache + .getDefault("NUCLEOTIDE_AMBIGUITY_DETECTION", true); } /** @@ -324,17 +329,17 @@ public class Comparison } long allCount = ntCount + aaCount; - if (Cache.getDefault("NUCLEOTIDE_AMBIGUITY_DETECTION", true)) + if (NUCLEOTIDE_AMBIGUITY_DETECTION) { - Console.info("Performing new nucleotide detection routine"); + Console.debug("Performing new nucleotide detection routine"); if (allCount > NUCLEOTIDE_COUNT_SHORT_SEQUENCE) { // a long sequence. // check for at least 55% nucleotide, and nucleotide and ambiguity codes // (including N) must make up 95% - return ntCount * 100 > NUCLEOTIDE_COUNT_PERCENT * allCount + return ntCount * 100 >= NUCLEOTIDE_COUNT_PERCENT * allCount && 100 * (ntCount + nCount - + ntaCount) > NUCLEOTIDE_COUNT_LONG_SEQUENCE_AMBIGUITY_PERCENT + + ntaCount) >= NUCLEOTIDE_COUNT_LONG_SEQUENCE_AMBIGUITY_PERCENT * allCount; } else if (allCount > NUCLEOTIDE_COUNT_VERY_SHORT_SEQUENCE) @@ -342,7 +347,7 @@ public class Comparison // a short sequence. // check if a short sequence is at least 55% nucleotide and the rest of // the symbols are all X or all N - if (ntCount * 100 > NUCLEOTIDE_COUNT_PERCENT * allCount + if (ntCount * 100 >= NUCLEOTIDE_COUNT_PERCENT * allCount && (nCount == aaCount || xCount == aaCount)) { return true; @@ -365,7 +370,7 @@ public class Comparison } else { - Console.info("Performing old nucleotide detection routine"); + Console.debug("Performing old nucleotide detection routine"); /* * Check for nucleotide count > 85% of total count (in a form that evades * int / float conversion or divide by zero). @@ -393,18 +398,19 @@ public class Comparison // vs := NUCLEOTIDE_COUNT_VERY_SHORT_SEQUENCE // s := NUCLEOTIDE_COUNT_SHORT_SEQUENCE // lp := NUCLEOTIDE_COUNT_LOWER_PERCENT - // vsp := 1 - (1/a) # this is the proportion of required nucleotides in - // # a VERY_SHORT Sequence (4 bases). - // # should be all but one base is nucleotide. + // vsp := 1 - (1/a) # this is the proportion of required definite + // nucleotides + // # in a VERY_SHORT Sequence (4 bases). + // # This should be equivalent to all but one base in the sequence. // p := (a - vs)/(s - vs) # proportion of the way between // # VERY_SHORT and SHORT thresholds. - // tp := vsp + p * (lp/100 - vsp) # the proportion of nucleotides + // tp := vsp + p * (lp/100 - vsp) # the proportion of definite nucleotides // # required for this length of sequence. // minNt := tp * a # the minimum number of definite nucleotide bases - // # required for this length of sequences. + // # required for this length of sequence. // // We are then essentially returning: - // # ntCount >= 55% of allCount and the rest are all nucleotide ambiguity + // # ntCount >= 55% of allCount and the rest are all nucleotide ambiguity: // ntCount >= tp * allCount && nCount + ntaCount == aaCount // but without going into float/double land long LHS = 100 * allCount