From e134aeee6802ab52748920ec07220c6a6431b77c Mon Sep 17 00:00:00 2001 From: Ben Soares Date: Thu, 10 Nov 2022 15:21:08 +0000 Subject: [PATCH] JAL-4019 Nucleotide Ambiguity Colour Scheme added. Test for nucleotide sequence adapted to recognise nucleotide sequences with (more than 15%) ambiguity codes. Two tests fixed. --- resources/lang/Messages.properties | 1 + resources/lang/Messages_es.properties | 1 + src/jalview/schemes/JalviewColourScheme.java | 4 +- .../schemes/NucleotideAmbiguityColourScheme.java | 40 ++++++ src/jalview/schemes/ResidueProperties.java | 133 ++++++++++---------- src/jalview/util/Comparison.java | 84 ++++++++----- 6 files changed, 170 insertions(+), 93 deletions(-) create mode 100644 src/jalview/schemes/NucleotideAmbiguityColourScheme.java diff --git a/resources/lang/Messages.properties b/resources/lang/Messages.properties index 3843ddb..fff8ff4 100644 --- a/resources/lang/Messages.properties +++ b/resources/lang/Messages.properties @@ -201,6 +201,7 @@ label.colourScheme_turnpropensity = Turn Propensity label.colourScheme_buriedindex = Buried Index label.colourScheme_purine/pyrimidine = Purine/Pyrimidine label.colourScheme_nucleotide = Nucleotide +label.colourScheme_nucleotideambiguity = Nucleotide Ambiguity label.colourScheme_t-coffeescores = T-Coffee Scores label.colourScheme_rnahelices = By RNA Helices label.colourScheme_sequenceid = Sequence ID Colour diff --git a/resources/lang/Messages_es.properties b/resources/lang/Messages_es.properties index d0bfd65..ec090ae 100644 --- a/resources/lang/Messages_es.properties +++ b/resources/lang/Messages_es.properties @@ -194,6 +194,7 @@ label.colourScheme_turnpropensity = Tendencia de giro label.colourScheme_buriedindex = Índice de encubrimiento label.colourScheme_purine/pyrimidine = Purina/Pirimidina label.colourScheme_nucleotide = Nucleótido +label.colourScheme_nucleotideambiguity = Ambigüedad de nucleótido label.colourScheme_t-coffeescores = Puntuación del T-Coffee label.colourScheme_rnahelices = Por hélices de RNA label.colourScheme_sequenceid = Color de ID de secuencia diff --git a/src/jalview/schemes/JalviewColourScheme.java b/src/jalview/schemes/JalviewColourScheme.java index 965a26b..accdc8a 100644 --- a/src/jalview/schemes/JalviewColourScheme.java +++ b/src/jalview/schemes/JalviewColourScheme.java @@ -44,10 +44,12 @@ public enum JalviewColourScheme Turn("Turn Propensity", TurnColourScheme.class), Buried("Buried Index", BuriedColourScheme.class), Nucleotide("Nucleotide", NucleotideColourScheme.class), + NucleotideAmbiguity("Nucleotide Ambiguity", + NucleotideAmbiguityColourScheme.class), PurinePyrimidine("Purine/Pyrimidine", PurinePyrimidineColourScheme.class), RNAHelices("RNA Helices", RNAHelicesColour.class), TCoffee("T-Coffee Scores", TCoffeeColourScheme.class), - IdColour("Sequence ID", IdColourScheme.class); + IdColour("Sequence ID", IdColourScheme.class),; // RNAInteraction("RNA Interaction type", RNAInteractionColourScheme.class) private String name; diff --git a/src/jalview/schemes/NucleotideAmbiguityColourScheme.java b/src/jalview/schemes/NucleotideAmbiguityColourScheme.java new file mode 100644 index 0000000..dd42db3 --- /dev/null +++ b/src/jalview/schemes/NucleotideAmbiguityColourScheme.java @@ -0,0 +1,40 @@ +package jalview.schemes; + +import jalview.api.AlignViewportI; +import jalview.datamodel.AnnotatedCollectionI; + +public class NucleotideAmbiguityColourScheme extends ResidueColourScheme +{ + /** + * Creates a new NucleotideColourScheme object. + */ + public NucleotideAmbiguityColourScheme() + { + super(ResidueProperties.nucleotideIndex, + ResidueProperties.nucleotideAmbiguity); + } + + @Override + public boolean isNucleotideSpecific() + { + return true; + } + + @Override + public String getSchemeName() + { + return JalviewColourScheme.NucleotideAmbiguity.toString(); + } + + /** + * Returns a new instance of this colour scheme with which the given data may + * be coloured + */ + @Override + public ColourSchemeI getInstance(AlignViewportI view, + AnnotatedCollectionI coll) + { + return new NucleotideAmbiguityColourScheme(); + } + +} diff --git a/src/jalview/schemes/ResidueProperties.java b/src/jalview/schemes/ResidueProperties.java index 7ad35c3..73b48a9 100755 --- a/src/jalview/schemes/ResidueProperties.java +++ b/src/jalview/schemes/ResidueProperties.java @@ -20,10 +20,6 @@ */ package jalview.schemes; -import java.util.Locale; - -import jalview.analysis.GeneticCodes; - import java.awt.Color; import java.util.ArrayList; import java.util.Arrays; @@ -31,9 +27,12 @@ import java.util.Enumeration; import java.util.HashMap; import java.util.Hashtable; import java.util.List; +import java.util.Locale; import java.util.Map; import java.util.Vector; +import jalview.analysis.GeneticCodes; + public class ResidueProperties { // Stores residue codes/names and colours and other things @@ -119,74 +118,50 @@ public class ResidueProperties /** * maximum (gap) index for matrices involving nucleotide alphabet */ - public final static int maxNucleotideIndex = 10; + // public final static int maxNucleotideIndex = 10; + public final static int maxNucleotideIndex; static { + + String[][] namesArray = { { "a", "Adenine" }, { "g", "Guanine" }, + { "c", "Cytosine" }, + { "t", "Thymine" }, + { "u", "Uracil" }, + { "i", "Inosine" }, + { "x", "Xanthine" }, + { "r", "Unknown Purine" }, + { "y", "Unknown Pyrimidine" }, + { "n", "Unknown" }, + { "w", "Weak nucleotide (A or T)" }, + { "s", "Strong nucleotide (G or C)" }, + { "m", "Amino (A or C)" }, + { "k", "Keto (G or T)" }, + { "b", "Not A (G or C or T)" }, + { "h", "Not G (A or C or T)" }, + { "d", "Not C (A or G or T)" }, + { "v", "Not T (A or G or C" } }; + + // "gap" index + maxNucleotideIndex = namesArray.length + 1; + nucleotideIndex = new int[255]; for (int i = 0; i < 255; i++) { - nucleotideIndex[i] = 10; // non-nucleotide symbols are all non-gap gaps. + nucleotideIndex[i] = maxNucleotideIndex; // non-nucleotide symbols are all + // non-gap gaps. } - nucleotideIndex['A'] = 0; - nucleotideIndex['a'] = 0; - nucleotideIndex['C'] = 1; - nucleotideIndex['c'] = 1; - nucleotideIndex['G'] = 2; - nucleotideIndex['g'] = 2; - nucleotideIndex['T'] = 3; - nucleotideIndex['t'] = 3; - nucleotideIndex['U'] = 4; - nucleotideIndex['u'] = 4; - nucleotideIndex['I'] = 5; - nucleotideIndex['i'] = 5; - nucleotideIndex['X'] = 6; - nucleotideIndex['x'] = 6; - nucleotideIndex['R'] = 7; - nucleotideIndex['r'] = 7; - nucleotideIndex['Y'] = 8; - nucleotideIndex['y'] = 8; - nucleotideIndex['N'] = 9; - nucleotideIndex['n'] = 9; - - nucleotideName.put("A", "Adenine"); - nucleotideName.put("a", "Adenine"); - nucleotideName.put("G", "Guanine"); - nucleotideName.put("g", "Guanine"); - nucleotideName.put("C", "Cytosine"); - nucleotideName.put("c", "Cytosine"); - nucleotideName.put("T", "Thymine"); - nucleotideName.put("t", "Thymine"); - nucleotideName.put("U", "Uracil"); - nucleotideName.put("u", "Uracil"); - nucleotideName.put("I", "Inosine"); - nucleotideName.put("i", "Inosine"); - nucleotideName.put("X", "Xanthine"); - nucleotideName.put("x", "Xanthine"); - nucleotideName.put("R", "Unknown Purine"); - nucleotideName.put("r", "Unknown Purine"); - nucleotideName.put("Y", "Unknown Pyrimidine"); - nucleotideName.put("y", "Unknown Pyrimidine"); - nucleotideName.put("N", "Unknown"); - nucleotideName.put("n", "Unknown"); - nucleotideName.put("W", "Weak nucleotide (A or T)"); - nucleotideName.put("w", "Weak nucleotide (A or T)"); - nucleotideName.put("S", "Strong nucleotide (G or C)"); - nucleotideName.put("s", "Strong nucleotide (G or C)"); - nucleotideName.put("M", "Amino (A or C)"); - nucleotideName.put("m", "Amino (A or C)"); - nucleotideName.put("K", "Keto (G or T)"); - nucleotideName.put("k", "Keto (G or T)"); - nucleotideName.put("B", "Not A (G or C or T)"); - nucleotideName.put("b", "Not A (G or C or T)"); - nucleotideName.put("H", "Not G (A or C or T)"); - nucleotideName.put("h", "Not G (A or C or T)"); - nucleotideName.put("D", "Not C (A or G or T)"); - nucleotideName.put("d", "Not C (A or G or T)"); - nucleotideName.put("V", "Not T (A or G or C"); - nucleotideName.put("v", "Not T (A or G or C"); - + for (int i = 0; i < namesArray.length; i++) + { + char c = namesArray[i][0].charAt(0); + nucleotideIndex[c] = i; + // Character.toUpperCase is Locale insensitive + nucleotideIndex[Character.toUpperCase(c)] = i; + nucleotideName.put(namesArray[i][0], namesArray[i][1]); + nucleotideName.put(namesArray[i][0].toUpperCase(Locale.ROOT), + namesArray[i][1]); + } } static @@ -363,6 +338,36 @@ public class ResidueProperties Color.white, // R Color.white, // Y Color.white, // N + Color.white, // w + Color.white, // s + Color.white, // m + Color.white, // k + Color.white, // b + Color.white, // h + Color.white, // d + Color.white, // v + Color.white, // Gap + }; + + public static final Color[] nucleotideAmbiguity = { + Color.decode("#f0fff0"), // a + Color.decode("#f0fff0"), // g + Color.decode("#f0fff0"), // c + Color.decode("#f0fff0"), // t + Color.decode("#f0fff0"), // u + Color.decode("#ffffff"), // i + Color.decode("#ffffff"), // x + Color.decode("#CD5C5C"), // r + Color.decode("#008000"), // y + Color.decode("#2f4f4f"), // n + Color.decode("#4682B4"), // w + Color.decode("#FF8C00"), // s + Color.decode("#9ACD32"), // m + Color.decode("#9932CC"), // k + Color.decode("#8b4513"), // b + Color.decode("#808080"), // h + Color.decode("#483D8B"), // d + Color.decode("#b8860b"), // v Color.white, // Gap }; diff --git a/src/jalview/util/Comparison.java b/src/jalview/util/Comparison.java index cd98ee7..2dcbeb5 100644 --- a/src/jalview/util/Comparison.java +++ b/src/jalview/util/Comparison.java @@ -20,11 +20,11 @@ */ package jalview.util; -import jalview.datamodel.SequenceI; - import java.util.ArrayList; import java.util.List; +import jalview.datamodel.SequenceI; + /** * Assorted methods for analysing or comparing sequences. */ @@ -32,6 +32,8 @@ public class Comparison { private static final int EIGHTY_FIVE = 85; + private static final int NINETY_NINE = 99; + private static final int TO_UPPER_CASE = 'a' - 'A'; public static final char GAP_SPACE = ' '; @@ -256,7 +258,7 @@ public class Comparison */ public static final boolean isGap(char c) { - return (c == GAP_DASH || c == GAP_DOT || c == GAP_SPACE) ? true : false; + return c == GAP_DASH || c == GAP_DOT || c == GAP_SPACE; } /** @@ -275,12 +277,13 @@ public class Comparison long ntCount = 0; long aaCount = 0; long nCount = 0; + long ntaCount = 0; int len = seq.getLength(); for (int i = 0; i < len; i++) { char c = seq.getCharAt(i); - if (isNucleotide(c) || isX(c)) + if (isNucleotide(c)) { ntCount++; } @@ -291,6 +294,13 @@ public class Comparison { nCount++; } + else + { + if (isNucleotideAmbiguity(c)) + { + ntaCount++; + } + } } } /* @@ -304,6 +314,12 @@ public class Comparison } else { + // check for very large proportion of nucleotide and all ambiguity codes + if ((ntCount + nCount + ntaCount) * 100 >= NINETY_NINE + * (ntCount + aaCount)) + { + return ntCount > 0; + } return false; } } @@ -350,11 +366,13 @@ public class Comparison */ public static boolean isNucleotide(char c) { - if ('a' <= c && c <= 'z') - { - c -= TO_UPPER_CASE; - } - switch (c) + return isNucleotide(c, false); + } + + public static boolean isNucleotide(char c, boolean countAmbiguity) + { + char C = Character.toUpperCase(c); + switch (C) { case 'A': case 'C': @@ -363,29 +381,45 @@ public class Comparison case 'U': return true; } + if (countAmbiguity) + { + boolean ambiguity = isNucleotideAmbiguity(C); + if (ambiguity) + return true; + } return false; } - public static boolean isN(char c) + public static boolean isNucleotideAmbiguity(char c) { - switch (c) + switch (Character.toUpperCase(c)) { - case 'N': - case 'n': + case 'I': + case 'X': + case 'R': + case 'Y': + case 'W': + case 'S': + case 'M': + case 'K': + case 'B': + case 'H': + case 'D': + case 'V': return true; + case 'N': // not counting N as nucleotide } return false; } + public static boolean isN(char c) + { + return 'n' == Character.toLowerCase(c); + } + public static boolean isX(char c) { - switch (c) - { - case 'X': - case 'x': - return true; - } - return false; + return 'x' == Character.toLowerCase(c); } /** @@ -456,13 +490,7 @@ public class Comparison public static boolean isSameResidue(char c1, char c2, boolean caseSensitive) { - if (caseSensitive) - { - return (c1 == c2); - } - else - { - return Character.toUpperCase(c1) == Character.toUpperCase(c2); - } + return caseSensitive ? c1 == c2 + : Character.toUpperCase(c1) == Character.toUpperCase(c2); } } -- 1.7.10.2