public static final char GAP_DASH = '-';
+ public static final char GAP_TILDE = '~';
+
+ public static final char GAP_PIPE = '|';
+
+ public static final char GAP_COLON = ':';
+
+ public static final char GAP_LPAREN = '(';
+
+ public static final char GAP_RPAREN = ')';
+
+ public static final char GAP_LSQBR = '[';
+
+ public static final char GAP_RSQBR = ']';
+
+ public static final char GAP_LBRACE = '{';
+
+ public static final char GAP_RBRACE = '}';
+
public static final String GapChars = new String(
new char[]
- { GAP_SPACE, GAP_DOT, GAP_DASH });
+ { GAP_SPACE, GAP_DOT, GAP_DASH, GAP_TILDE, GAP_PIPE, GAP_COLON,
+ GAP_LPAREN,
+ GAP_RPAREN, GAP_LSQBR, GAP_RSQBR, GAP_LBRACE, GAP_RBRACE });
/**
* DOCUMENT ME!
*/
public static final boolean isGap(char c)
{
- return (c == GAP_DASH || c == GAP_DOT || c == GAP_SPACE) ? true : false;
+ switch (c)
+ {
+ case GAP_SPACE:
+ case GAP_DOT:
+ case GAP_DASH:
+ case GAP_TILDE:
+ case GAP_PIPE:
+ case GAP_COLON:
+ case GAP_LPAREN:
+ case GAP_RPAREN:
+ case GAP_LSQBR:
+ case GAP_RSQBR:
+ case GAP_LBRACE:
+ case GAP_RBRACE:
+ return true;
+ default:
+ return false;
+ }
}
/**
{
return false;
}
- char[][] letters = new char[seqs.length][];
- for (int i = 0; i < seqs.length; i++)
- {
- if (seqs[i] != null)
- {
- char[] sequence = seqs[i].getSequence();
- if (sequence != null)
- {
- letters[i] = sequence;
- }
- }
- }
-
- return areNucleotide(letters);
- }
- /**
- * Answers true if more than 85% of the sequence residues (ignoring gaps) are
- * A, G, C, T or U, else false. This is just a heuristic guess and may give a
- * wrong answer (as AGCT are also amino acid codes).
- *
- * @param letters
- * @return
- */
- static final boolean areNucleotide(char[][] letters)
- {
int ntCount = 0;
int aaCount = 0;
- for (char[] seq : letters)
+ for (SequenceI seq : seqs)
{
if (seq == null)
{
}
// TODO could possibly make an informed guess just from the first sequence
// to save a lengthy calculation
- for (char c : seq)
+ int len = seq.getLength();
+ for (int i = 0; i < len; i++)
{
+ char c = seq.getCharAt(i);
if (isNucleotide(c))
{
ntCount++;
{
return false;
}
- List<SequenceI> flattened = new ArrayList<SequenceI>();
+ List<SequenceI> flattened = new ArrayList<>();
for (SequenceI[] ss : seqs)
{
for (SequenceI s : ss)