+++ /dev/null
-/*
- * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
- * Copyright (C) $$Year-Rel$$ The Jalview Authors
- *
- * This file is part of Jalview.
- *
- * Jalview is free software: you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation, either version 3
- * of the License, or (at your option) any later version.
- *
- * Jalview is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty
- * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
- * PURPOSE. See the GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
- * The Jalview Authors are detailed in the 'AUTHORS' file.
- */
-package jalview.analysis;
-
-import jalview.datamodel.Alignment;
-import jalview.datamodel.AlignmentI;
-import jalview.datamodel.Sequence;
-import jalview.datamodel.SequenceI;
-import jalview.gui.JvOptionPane;
-import jalview.io.FastaFile;
-
-import java.util.Arrays;
-import java.util.Random;
-
-import org.testng.annotations.BeforeClass;
-
-/**
- * Generates, and outputs in Fasta format, a random DNA alignment for given
- * sequence length and count. Will regenerate the same alignment each time if
- * the same random seed is used (so may be used for reproducible unit tests).
- * Not guaranteed to reproduce the same results between versions, as the rules
- * may get tweaked to produce more 'realistic' results.
- *
- * Arguments:
- * <ul>
- * <li>length (number of bases in each sequence)</li>
- * <li>height (number of sequences)</li>
- * <li>a whole number random seed</li>
- * <li>percentage of gaps to include (0-100)</li>
- * <li>percentage chance of variation of each position (0-100)</li>
- * </ul>
- *
- * @author gmcarstairs
- *
- */
-public class DnaAlignmentGenerator
-{
-
- @BeforeClass(alwaysRun = true)
- public void setUpJvOptionPane()
- {
- JvOptionPane.setInteractiveMode(false);
- JvOptionPane.setMockResponse(JvOptionPane.CANCEL_OPTION);
- }
-
- private static final char GAP = '-';
-
- private static final char ZERO = '0';
-
- private static final char[] BASES = new char[] { 'G', 'T', 'C', 'A' };
-
- private Random random;
-
-
- /**
- * Outputs a DNA 'alignment' where each position is a random choice from
- * 'GTCA-'.
- *
- * @param args
- */
- public static void main(String[] args)
- {
- if (args.length != 5)
- {
- usage();
- return;
- }
- int width = Integer.parseInt(args[0]);
- int height = Integer.parseInt(args[1]);
- long randomSeed = Long.valueOf(args[2]);
- int gapPercentage = Integer.valueOf(args[3]);
- int changePercentage = Integer.valueOf(args[4]);
- AlignmentI al = new DnaAlignmentGenerator().generate(width, height,
- randomSeed, gapPercentage, changePercentage);
-
- System.out.println("; " + height + " sequences of " + width
- + " bases with " + gapPercentage + "% gaps and "
- + changePercentage + "% mutations (random seed = " + randomSeed
- + ")");
- System.out.println(new FastaFile().print(al.getSequencesArray(), true));
- }
-
- /**
- * Print parameter help.
- */
- private static void usage()
- {
- System.out.println("Usage:");
- System.out.println("arg0: number of (non-gap) bases per sequence");
- System.out.println("arg1: number sequences");
- System.out
- .println("arg2: an integer as random seed (same seed = same results)");
- System.out.println("arg3: percentage of gaps to (randomly) generate");
- System.out
- .println("arg4: percentage of 'mutations' to (randomly) generate");
- System.out.println("Example: DnaAlignmentGenerator 12 15 387 10 5");
- System.out
- .println("- 15 sequences of 12 bases each, approx 10% gaps and 5% mutations, random seed = 387");
-
- }
-
- /**
- * Default constructor
- */
- public DnaAlignmentGenerator()
- {
-
- }
-
- /**
- * Outputs a DNA 'alignment' of given width and height, where each position is
- * a random choice from 'GTCA-'.
- *
- * @param width
- * @param height
- * @param randomSeed
- * @param changePercentage
- * @param gapPercentage
- */
- public AlignmentI generate(int width, int height, long randomSeed,
- int gapPercentage, int changePercentage)
- {
- SequenceI[] seqs = new SequenceI[height];
- random = new Random(randomSeed);
- seqs[0] = generateSequence(1, width, gapPercentage);
- for (int seqno = 1; seqno < height; seqno++)
- {
- seqs[seqno] = generateAnotherSequence(seqs[0].getSequence(),
- seqno + 1, width, changePercentage);
- }
- AlignmentI al = new Alignment(seqs);
- return al;
- }
-
- /**
- * Outputs a DNA 'sequence' of given length, with some random gaps included.
- *
- * @param seqno
- * @param length
- * @param gapPercentage
- */
- private SequenceI generateSequence(int seqno, int length,
- int gapPercentage)
- {
- StringBuilder seq = new StringBuilder(length);
-
- /*
- * Loop till we've added 'length' bases (excluding gaps)
- */
- for (int count = 0; count < length;)
- {
- boolean addGap = random.nextInt(100) < gapPercentage;
- char c = addGap ? GAP : BASES[random.nextInt(Integer.MAX_VALUE) % 4];
- seq.append(c);
- if (!addGap)
- {
- count++;
- }
- }
- final String seqName = "SEQ" + seqno;
- final String seqString = seq.toString();
- SequenceI sq = new Sequence(seqName, seqString);
- sq.createDatasetSequence();
- return sq;
- }
-
- /**
- * Generate a sequence approximately aligned to the first one.
- *
- * @param ds
- * @param seqno
- * @param width
- * number of bases
- * @param changePercentage
- * @return
- */
- private SequenceI generateAnotherSequence(char[] ds, int seqno,
- int width, int changePercentage)
- {
- int length = ds.length;
- char[] seq = new char[length];
- Arrays.fill(seq, ZERO);
- int gapsWanted = length - width;
- int gapsAdded = 0;
-
- /*
- * First 'randomly' mimic gaps in model sequence.
- */
- for (int pos = 0; pos < length; pos++)
- {
- if (ds[pos] == GAP)
- {
- /*
- * Add a gap at the same position with changePercentage likelihood
- */
- seq[pos] = randomCharacter(GAP, changePercentage);
- if (seq[pos] == GAP)
- {
- gapsAdded++;
- }
- }
- }
-
- /*
- * Next scatter any remaining gaps (if any) at random. This gives an even
- * distribution.
- */
- while (gapsAdded < gapsWanted)
- {
- boolean added = false;
- while (!added)
- {
- int pos = random.nextInt(length);
- if (seq[pos] != GAP)
- {
- seq[pos] = GAP;
- added = true;
- gapsAdded++;
- }
- }
- }
-
- /*
- * Finally fill in the rest with randomly mutated bases.
- */
- for (int pos = 0; pos < length; pos++)
- {
- if (seq[pos] == ZERO)
- {
- char c = randomCharacter(ds[pos], changePercentage);
- seq[pos] = c;
- }
- }
- final String seqName = "SEQ" + seqno;
- final String seqString = new String(seq);
- SequenceI sq = new Sequence(seqName, seqString);
- sq.createDatasetSequence();
- return sq;
- }
-
- /**
- * Returns a random character that is changePercentage% likely to match the
- * given type (as base or gap).
- *
- * @param changePercentage
- *
- * @param c
- * @return
- */
- private char randomCharacter(char c, int changePercentage)
- {
- final boolean mutation = random.nextInt(100) < changePercentage;
-
- if (!mutation)
- {
- return c;
- }
-
- char newchar = c;
- while (newchar == c)
- {
- newchar = BASES[random.nextInt(Integer.MAX_VALUE) % 4];
- }
- return newchar;
- }
-}