*/
package jalview.analysis;
+import java.util.Locale;
+
import jalview.datamodel.Alignment;
import jalview.datamodel.AlignmentI;
import jalview.datamodel.Sequence;
import jalview.gui.JvOptionPane;
import jalview.io.FastaFile;
+import java.io.File;
+import java.io.FileNotFoundException;
+import java.io.PrintStream;
import java.util.Arrays;
import java.util.Random;
import org.testng.annotations.BeforeClass;
/**
- * Generates, and outputs in Fasta format, a random DNA alignment for given
- * sequence length and count. Will regenerate the same alignment each time if
- * the same random seed is used (so may be used for reproducible unit tests).
- * Not guaranteed to reproduce the same results between versions, as the rules
- * may get tweaked to produce more 'realistic' results.
- *
- * Arguments:
- * <ul>
- * <li>length (number of bases in each sequence)</li>
- * <li>height (number of sequences)</li>
- * <li>a whole number random seed</li>
- * <li>percentage of gaps to include (0-100)</li>
- * <li>percentage chance of variation of each position (0-100)</li>
- * </ul>
+ * Generates, and outputs in Fasta format, a random peptide or nucleotide
+ * alignment for given sequence length and count. Will regenerate the same
+ * alignment each time if the same random seed is used (so may be used for
+ * reproducible unit tests). Not guaranteed to reproduce the same results
+ * between versions, as the rules may get tweaked to produce more 'realistic'
+ * results.
*
* @author gmcarstairs
- *
*/
public class AlignmentGenerator
{
-
- @BeforeClass(alwaysRun = true)
- public void setUpJvOptionPane()
- {
- JvOptionPane.setInteractiveMode(false);
- JvOptionPane.setMockResponse(JvOptionPane.CANCEL_OPTION);
- }
-
private static final char GAP = '-';
private static final char ZERO = '0';
private Random random;
+ private PrintStream ps;
/**
- * Outputs a DNA 'alignment' where each position is a random choice from
- * 'GTCA-'.
+ * Outputs a pseudo-randomly generated nucleotide or peptide alignment
+ * Arguments:
+ * <ul>
+ * <li>n (for nucleotide) or p (for peptide)</li>
+ * <li>length (number of bases in each sequence)</li>
+ * <li>height (number of sequences)</li>
+ * <li>a whole number random seed</li>
+ * <li>percentage of gaps to include (0-100)</li>
+ * <li>percentage chance of variation of each position (0-100)</li>
+ * <li>(optional) path to a file to write the alignment to</li>
+ * </ul>
+ *
*
* @param args
+ * @throws FileNotFoundException
*/
- public static void main(String[] args)
+ public static void main(String[] args) throws FileNotFoundException
{
- if (args.length != 6)
+ if (args.length != 6 && args.length != 7)
{
usage();
return;
}
- BASES = args[0].toLowerCase().startsWith("n") ? NUCS : PEPS;
+
+ PrintStream ps = System.out;
+ if (args.length == 7)
+ {
+ ps = new PrintStream(new File(args[6]));
+ }
+
+ boolean nucleotide = args[0].toLowerCase(Locale.ROOT).startsWith("n");
int width = Integer.parseInt(args[1]);
int height = Integer.parseInt(args[2]);
long randomSeed = Long.valueOf(args[3]);
int gapPercentage = Integer.valueOf(args[4]);
int changePercentage = Integer.valueOf(args[5]);
- AlignmentI al = new AlignmentGenerator().generate(width, height,
+
+ ps.println("; " + height + " sequences of " + width + " bases with "
+ + gapPercentage + "% gaps and " + changePercentage
+ + "% mutations (random seed = " + randomSeed + ")");
+
+ new AlignmentGenerator(nucleotide, ps).generate(width, height,
randomSeed, gapPercentage, changePercentage);
- System.out.println("; " + height + " sequences of " + width
- + " bases with " + gapPercentage + "% gaps and "
- + changePercentage + "% mutations (random seed = " + randomSeed
- + ")");
- System.out.println(new FastaFile().print(al.getSequencesArray(), true));
+ if (ps != System.out)
+ {
+ ps.close();
+ }
}
/**
- * Print parameter help.
+ * Prints parameter help
*/
private static void usage()
{
System.out.println("Usage:");
System.out.println("arg0: n (for nucleotide) or p (for peptide)");
System.out.println("arg1: number of (non-gap) bases per sequence");
- System.out.println("arg2: number sequences");
- System.out
- .println("arg3: an integer as random seed (same seed = same results)");
+ System.out.println("arg2: number of sequences");
+ System.out.println(
+ "arg3: an integer as random seed (same seed = same results)");
System.out.println("arg4: percentage of gaps to (randomly) generate");
- System.out
- .println("arg5: percentage of 'mutations' to (randomly) generate");
+ System.out.println(
+ "arg5: percentage of 'mutations' to (randomly) generate");
+ System.out.println(
+ "arg6: (optional) path to output file (default is sysout)");
System.out.println("Example: AlignmentGenerator n 12 15 387 10 5");
- System.out
- .println("- 15 nucleotide sequences of 12 bases each, approx 10% gaps and 5% mutations, random seed = 387");
+ System.out.println(
+ "- 15 nucleotide sequences of 12 bases each, approx 10% gaps and 5% mutations, random seed = 387");
}
/**
- * Default constructor
+ * Constructor that sets nucleotide or peptide symbol set, and also writes the
+ * generated alignment to sysout
*/
- public AlignmentGenerator()
+ public AlignmentGenerator(boolean nuc)
{
+ this(nuc, System.out);
+ }
+ /**
+ * Constructor that sets nucleotide or peptide symbol set, and also writes the
+ * generated alignment to the specified output stream (if not null). This can
+ * be used to write the alignment to a file or sysout.
+ */
+ public AlignmentGenerator(boolean nucleotide, PrintStream printStream)
+ {
+ BASES = nucleotide ? NUCS : PEPS;
+ ps = printStream;
}
/**
- * Outputs a DNA 'alignment' of given width and height, where each position is
- * a random choice from 'GTCA-'.
+ * Outputs an 'alignment' of given width and height, where each position is a
+ * random choice from the symbol alphabet, or - for gap
*
* @param width
* @param height
seqno + 1, width, changePercentage);
}
AlignmentI al = new Alignment(seqs);
+
+ if (ps != null)
+ {
+ ps.println(new FastaFile().print(al.getSequencesArray(), true));
+ }
+
return al;
}
for (int count = 0; count < length;)
{
boolean addGap = random.nextInt(100) < gapPercentage;
- char c = addGap ? GAP : BASES[random.nextInt(Integer.MAX_VALUE)
- % BASES.length];
+ char c = addGap ? GAP
+ : BASES[random.nextInt(Integer.MAX_VALUE) % BASES.length];
seq.append(c);
if (!addGap)
{
* @param changePercentage
* @return
*/
- private SequenceI generateAnotherSequence(char[] ds, int seqno,
- int width, int changePercentage)
+ private SequenceI generateAnotherSequence(char[] ds, int seqno, int width,
+ int changePercentage)
{
int length = ds.length;
char[] seq = new char[length];