*/
package jalview.schemes;
-import java.util.Locale;
-
-import jalview.analysis.GeneticCodes;
-
import java.awt.Color;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.Hashtable;
import java.util.List;
+import java.util.Locale;
import java.util.Map;
import java.util.Vector;
+import jalview.analysis.GeneticCodes;
+
public class ResidueProperties
{
// Stores residue codes/names and colours and other things
/**
* maximum (gap) index for matrices involving nucleotide alphabet
*/
- public final static int maxNucleotideIndex = 10;
+ // public final static int maxNucleotideIndex = 10;
+ public final static int maxNucleotideIndex;
static
{
+
+ String[][] namesArray = { { "a", "Adenine" }, { "g", "Guanine" },
+ { "c", "Cytosine" },
+ { "t", "Thymine" },
+ { "u", "Uracil" },
+ { "i", "Inosine" },
+ { "x", "Xanthine" },
+ { "r", "Unknown Purine" },
+ { "y", "Unknown Pyrimidine" },
+ { "n", "Unknown" },
+ { "w", "Weak nucleotide (A or T)" },
+ { "s", "Strong nucleotide (G or C)" },
+ { "m", "Amino (A or C)" },
+ { "k", "Keto (G or T)" },
+ { "b", "Not A (G or C or T)" },
+ { "h", "Not G (A or C or T)" },
+ { "d", "Not C (A or G or T)" },
+ { "v", "Not T (A or G or C" } };
+
+ // "gap" index
+ maxNucleotideIndex = namesArray.length + 1;
+
nucleotideIndex = new int[255];
for (int i = 0; i < 255; i++)
{
- nucleotideIndex[i] = 10; // non-nucleotide symbols are all non-gap gaps.
+ nucleotideIndex[i] = maxNucleotideIndex; // non-nucleotide symbols are all
+ // non-gap gaps.
}
- nucleotideIndex['A'] = 0;
- nucleotideIndex['a'] = 0;
- nucleotideIndex['C'] = 1;
- nucleotideIndex['c'] = 1;
- nucleotideIndex['G'] = 2;
- nucleotideIndex['g'] = 2;
- nucleotideIndex['T'] = 3;
- nucleotideIndex['t'] = 3;
- nucleotideIndex['U'] = 4;
- nucleotideIndex['u'] = 4;
- nucleotideIndex['I'] = 5;
- nucleotideIndex['i'] = 5;
- nucleotideIndex['X'] = 6;
- nucleotideIndex['x'] = 6;
- nucleotideIndex['R'] = 7;
- nucleotideIndex['r'] = 7;
- nucleotideIndex['Y'] = 8;
- nucleotideIndex['y'] = 8;
- nucleotideIndex['N'] = 9;
- nucleotideIndex['n'] = 9;
-
- nucleotideName.put("A", "Adenine");
- nucleotideName.put("a", "Adenine");
- nucleotideName.put("G", "Guanine");
- nucleotideName.put("g", "Guanine");
- nucleotideName.put("C", "Cytosine");
- nucleotideName.put("c", "Cytosine");
- nucleotideName.put("T", "Thymine");
- nucleotideName.put("t", "Thymine");
- nucleotideName.put("U", "Uracil");
- nucleotideName.put("u", "Uracil");
- nucleotideName.put("I", "Inosine");
- nucleotideName.put("i", "Inosine");
- nucleotideName.put("X", "Xanthine");
- nucleotideName.put("x", "Xanthine");
- nucleotideName.put("R", "Unknown Purine");
- nucleotideName.put("r", "Unknown Purine");
- nucleotideName.put("Y", "Unknown Pyrimidine");
- nucleotideName.put("y", "Unknown Pyrimidine");
- nucleotideName.put("N", "Unknown");
- nucleotideName.put("n", "Unknown");
- nucleotideName.put("W", "Weak nucleotide (A or T)");
- nucleotideName.put("w", "Weak nucleotide (A or T)");
- nucleotideName.put("S", "Strong nucleotide (G or C)");
- nucleotideName.put("s", "Strong nucleotide (G or C)");
- nucleotideName.put("M", "Amino (A or C)");
- nucleotideName.put("m", "Amino (A or C)");
- nucleotideName.put("K", "Keto (G or T)");
- nucleotideName.put("k", "Keto (G or T)");
- nucleotideName.put("B", "Not A (G or C or T)");
- nucleotideName.put("b", "Not A (G or C or T)");
- nucleotideName.put("H", "Not G (A or C or T)");
- nucleotideName.put("h", "Not G (A or C or T)");
- nucleotideName.put("D", "Not C (A or G or T)");
- nucleotideName.put("d", "Not C (A or G or T)");
- nucleotideName.put("V", "Not T (A or G or C");
- nucleotideName.put("v", "Not T (A or G or C");
-
+ for (int i = 0; i < namesArray.length; i++)
+ {
+ char c = namesArray[i][0].charAt(0);
+ nucleotideIndex[c] = i;
+ // Character.toUpperCase is Locale insensitive
+ nucleotideIndex[Character.toUpperCase(c)] = i;
+ nucleotideName.put(namesArray[i][0], namesArray[i][1]);
+ nucleotideName.put(namesArray[i][0].toUpperCase(Locale.ROOT),
+ namesArray[i][1]);
+ }
}
static
Color.white, // R
Color.white, // Y
Color.white, // N
+ Color.white, // w
+ Color.white, // s
+ Color.white, // m
+ Color.white, // k
+ Color.white, // b
+ Color.white, // h
+ Color.white, // d
+ Color.white, // v
+ Color.white, // Gap
+ };
+
+ public static final Color[] nucleotideAmbiguity = {
+ Color.decode("#f0fff0"), // a
+ Color.decode("#f0fff0"), // g
+ Color.decode("#f0fff0"), // c
+ Color.decode("#f0fff0"), // t
+ Color.decode("#f0fff0"), // u
+ Color.decode("#ffffff"), // i
+ Color.decode("#ffffff"), // x
+ Color.decode("#CD5C5C"), // r
+ Color.decode("#008000"), // y
+ Color.decode("#2f4f4f"), // n
+ Color.decode("#4682B4"), // w
+ Color.decode("#FF8C00"), // s
+ Color.decode("#9ACD32"), // m
+ Color.decode("#9932CC"), // k
+ Color.decode("#8b4513"), // b
+ Color.decode("#808080"), // h
+ Color.decode("#483D8B"), // d
+ Color.decode("#b8860b"), // v
Color.white, // Gap
};
*/
package jalview.util;
-import jalview.datamodel.SequenceI;
-
import java.util.ArrayList;
import java.util.List;
+import jalview.datamodel.SequenceI;
+
/**
* Assorted methods for analysing or comparing sequences.
*/
{
private static final int EIGHTY_FIVE = 85;
+ private static final int NINETY_NINE = 99;
+
private static final int TO_UPPER_CASE = 'a' - 'A';
public static final char GAP_SPACE = ' ';
*/
public static final boolean isGap(char c)
{
- return (c == GAP_DASH || c == GAP_DOT || c == GAP_SPACE) ? true : false;
+ return c == GAP_DASH || c == GAP_DOT || c == GAP_SPACE;
}
/**
long ntCount = 0;
long aaCount = 0;
long nCount = 0;
+ long ntaCount = 0;
int len = seq.getLength();
for (int i = 0; i < len; i++)
{
char c = seq.getCharAt(i);
- if (isNucleotide(c) || isX(c))
+ if (isNucleotide(c))
{
ntCount++;
}
{
nCount++;
}
+ else
+ {
+ if (isNucleotideAmbiguity(c))
+ {
+ ntaCount++;
+ }
+ }
}
}
/*
}
else
{
+ // check for very large proportion of nucleotide and all ambiguity codes
+ if ((ntCount + nCount + ntaCount) * 100 >= NINETY_NINE
+ * (ntCount + aaCount))
+ {
+ return ntCount > 0;
+ }
return false;
}
}
*/
public static boolean isNucleotide(char c)
{
- if ('a' <= c && c <= 'z')
- {
- c -= TO_UPPER_CASE;
- }
- switch (c)
+ return isNucleotide(c, false);
+ }
+
+ public static boolean isNucleotide(char c, boolean countAmbiguity)
+ {
+ char C = Character.toUpperCase(c);
+ switch (C)
{
case 'A':
case 'C':
case 'U':
return true;
}
+ if (countAmbiguity)
+ {
+ boolean ambiguity = isNucleotideAmbiguity(C);
+ if (ambiguity)
+ return true;
+ }
return false;
}
- public static boolean isN(char c)
+ public static boolean isNucleotideAmbiguity(char c)
{
- switch (c)
+ switch (Character.toUpperCase(c))
{
- case 'N':
- case 'n':
+ case 'I':
+ case 'X':
+ case 'R':
+ case 'Y':
+ case 'W':
+ case 'S':
+ case 'M':
+ case 'K':
+ case 'B':
+ case 'H':
+ case 'D':
+ case 'V':
return true;
+ case 'N': // not counting N as nucleotide
}
return false;
}
+ public static boolean isN(char c)
+ {
+ return 'n' == Character.toLowerCase(c);
+ }
+
public static boolean isX(char c)
{
- switch (c)
- {
- case 'X':
- case 'x':
- return true;
- }
- return false;
+ return 'x' == Character.toLowerCase(c);
}
/**
public static boolean isSameResidue(char c1, char c2,
boolean caseSensitive)
{
- if (caseSensitive)
- {
- return (c1 == c2);
- }
- else
- {
- return Character.toUpperCase(c1) == Character.toUpperCase(c2);
- }
+ return caseSensitive ? c1 == c2
+ : Character.toUpperCase(c1) == Character.toUpperCase(c2);
}
}