X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Futil%2FComparison.java;h=2dcbeb5a1d2a076e9b118604510f332b6f3a26ff;hb=e134aeee6802ab52748920ec07220c6a6431b77c;hp=cd98ee74bf560bf426c948950cc1e094809ff3a4;hpb=04c8f7bff663aa469127e9eed4164e02933782f1;p=jalview.git diff --git a/src/jalview/util/Comparison.java b/src/jalview/util/Comparison.java index cd98ee7..2dcbeb5 100644 --- a/src/jalview/util/Comparison.java +++ b/src/jalview/util/Comparison.java @@ -20,11 +20,11 @@ */ package jalview.util; -import jalview.datamodel.SequenceI; - import java.util.ArrayList; import java.util.List; +import jalview.datamodel.SequenceI; + /** * Assorted methods for analysing or comparing sequences. */ @@ -32,6 +32,8 @@ public class Comparison { private static final int EIGHTY_FIVE = 85; + private static final int NINETY_NINE = 99; + private static final int TO_UPPER_CASE = 'a' - 'A'; public static final char GAP_SPACE = ' '; @@ -256,7 +258,7 @@ public class Comparison */ public static final boolean isGap(char c) { - return (c == GAP_DASH || c == GAP_DOT || c == GAP_SPACE) ? true : false; + return c == GAP_DASH || c == GAP_DOT || c == GAP_SPACE; } /** @@ -275,12 +277,13 @@ public class Comparison long ntCount = 0; long aaCount = 0; long nCount = 0; + long ntaCount = 0; int len = seq.getLength(); for (int i = 0; i < len; i++) { char c = seq.getCharAt(i); - if (isNucleotide(c) || isX(c)) + if (isNucleotide(c)) { ntCount++; } @@ -291,6 +294,13 @@ public class Comparison { nCount++; } + else + { + if (isNucleotideAmbiguity(c)) + { + ntaCount++; + } + } } } /* @@ -304,6 +314,12 @@ public class Comparison } else { + // check for very large proportion of nucleotide and all ambiguity codes + if ((ntCount + nCount + ntaCount) * 100 >= NINETY_NINE + * (ntCount + aaCount)) + { + return ntCount > 0; + } return false; } } @@ -350,11 +366,13 @@ public class Comparison */ public static boolean isNucleotide(char c) { - if ('a' <= c && c <= 'z') - { - c -= TO_UPPER_CASE; - } - switch (c) + return isNucleotide(c, false); + } + + public static boolean isNucleotide(char c, boolean countAmbiguity) + { + char C = Character.toUpperCase(c); + switch (C) { case 'A': case 'C': @@ -363,29 +381,45 @@ public class Comparison case 'U': return true; } + if (countAmbiguity) + { + boolean ambiguity = isNucleotideAmbiguity(C); + if (ambiguity) + return true; + } return false; } - public static boolean isN(char c) + public static boolean isNucleotideAmbiguity(char c) { - switch (c) + switch (Character.toUpperCase(c)) { - case 'N': - case 'n': + case 'I': + case 'X': + case 'R': + case 'Y': + case 'W': + case 'S': + case 'M': + case 'K': + case 'B': + case 'H': + case 'D': + case 'V': return true; + case 'N': // not counting N as nucleotide } return false; } + public static boolean isN(char c) + { + return 'n' == Character.toLowerCase(c); + } + public static boolean isX(char c) { - switch (c) - { - case 'X': - case 'x': - return true; - } - return false; + return 'x' == Character.toLowerCase(c); } /** @@ -456,13 +490,7 @@ public class Comparison public static boolean isSameResidue(char c1, char c2, boolean caseSensitive) { - if (caseSensitive) - { - return (c1 == c2); - } - else - { - return Character.toUpperCase(c1) == Character.toUpperCase(c2); - } + return caseSensitive ? c1 == c2 + : Character.toUpperCase(c1) == Character.toUpperCase(c2); } }