*/
package jalview.util;
+import jalview.datamodel.SequenceI;
+
import java.util.ArrayList;
import java.util.List;
-import jalview.datamodel.SequenceI;
-
/**
* Assorted methods for analysing or comparing sequences.
*/
private static final char GAP_DASH = '-';
- public static final String GapChars = new String(new char[]
- { GAP_SPACE, GAP_DOT, GAP_DASH });
+ public static final String GapChars = new String(new char[] { GAP_SPACE,
+ GAP_DOT, GAP_DASH });
/**
* DOCUMENT ME!
*/
public static final boolean isGap(char c)
{
- return (c == GAP_DASH || c == GAP_DOT || c == GAP_SPACE) ? true : false;
+ return (c == GAP_DASH || c == GAP_DOT || c == GAP_SPACE);
}
/**
{
return false;
}
+ char[][] letters = new char[seqs.length][];
+ for (int i = 0; i < seqs.length; i++)
+ {
+ if (seqs[i] != null)
+ {
+ char[] sequence = seqs[i].getSequence();
+ if (sequence != null)
+ {
+ letters[i] = sequence;
+ }
+ }
+ }
+
+ return areNucleotide(letters);
+ }
+
+ /**
+ * Answers true if more than 85% of the sequence residues (ignoring gaps) are
+ * A, G, C, T or U, else false. This is just a heuristic guess and may give a
+ * wrong answer (as AGCT are also amino acid codes).
+ *
+ * @param letters
+ * @return
+ */
+ public static final boolean areNucleotide(char[][] letters)
+ {
int ntCount = 0;
int aaCount = 0;
- for (SequenceI seq : seqs)
+ for (char[] seq : letters)
{
if (seq == null)
{
}
// TODO could possibly make an informed guess just from the first sequence
// to save a lengthy calculation
- for (char c : seq.getSequence())
+ for (char c : seq)
{
if ('a' <= c && c <= 'z')
{
List<SequenceI> flattened = new ArrayList<SequenceI>();
for (SequenceI[] ss : seqs)
{
- for (SequenceI s : ss) {
- flattened.add(s);
+ for (SequenceI s : ss)
+ {
+ flattened.add(s);
}
}
- final SequenceI[] oneDArray = flattened.toArray(new SequenceI[flattened.size()]);
+ final SequenceI[] oneDArray = flattened.toArray(new SequenceI[flattened
+ .size()]);
return isNucleotide(oneDArray);
}
}