From 75ea8cab6b3f375b65a6146c7de37f4107294cc5 Mon Sep 17 00:00:00 2001 From: Jim Procter Date: Tue, 29 Sep 2020 15:26:34 +0100 Subject: [PATCH] JAL-3760 refactor nucleotide proportion test to per-sequence and return false if any sequence appears protein-like --- src/jalview/util/Comparison.java | 69 +++++++++++++++++++++----------------- 1 file changed, 39 insertions(+), 30 deletions(-) diff --git a/src/jalview/util/Comparison.java b/src/jalview/util/Comparison.java index d4fc233..286bfb2 100644 --- a/src/jalview/util/Comparison.java +++ b/src/jalview/util/Comparison.java @@ -268,7 +268,38 @@ public class Comparison */ public static final boolean isNucleotide(SequenceI seq) { - return isNucleotide(new SequenceI[] { seq }); + if (seq==null) + { + return false; + } + long ntCount = 0; + long aaCount = 0; + + int len = seq.getLength(); + for (int i = 0; i < len; i++) + { + char c = seq.getCharAt(i); + if (isNucleotide(c)) + { + ntCount++; + } + else if (!isGap(c)) + { + aaCount++; + } + } + /* + * Check for nucleotide count > 85% of total count (in a form that evades + * int / float conversion or divide by zero). + */ + if (ntCount * 100 > EIGHTY_FIVE * (ntCount + aaCount)) + { + return true; + } + else + { + return false; + } } /** @@ -285,45 +316,23 @@ public class Comparison { return false; } - - int ntCount = 0; - int aaCount = 0; + // true if we have seen a nucleotide sequence + boolean na=false; for (SequenceI seq : seqs) { if (seq == null) { continue; } + na=true; // TODO could possibly make an informed guess just from the first sequence // to save a lengthy calculation - int len = seq.getLength(); - for (int i = 0; i < len; i++) - { - char c = seq.getCharAt(i); - if (isNucleotide(c)) - { - ntCount++; - } - else if (!isGap(c)) - { - aaCount++; - } + if (seq.isProtein()) { + // if even one looks like protein, the alignment is protein + return false; } } - - /* - * Check for nucleotide count > 85% of total count (in a form that evades - * int / float conversion or divide by zero). - */ - if (ntCount * 100 > EIGHTY_FIVE * (ntCount + aaCount)) - { - return true; - } - else - { - return false; - } - + return na; } /** -- 1.7.10.2