--- /dev/null
+/* Copyright (c) 2009 Peter Troshin\r
+ * \r
+ * JAva Bioinformatics Analysis Web Services (JABAWS) @version: 1.0 \r
+ * \r
+ * This library is free software; you can redistribute it and/or modify it under the terms of the\r
+ * Apache License version 2 as published by the Apache Software Foundation\r
+ * \r
+ * This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without\r
+ * even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the Apache \r
+ * License for more details.\r
+ * \r
+ * A copy of the license is in apache_license.txt. It is also available here:\r
+ * @see: http://www.apache.org/licenses/LICENSE-2.0.txt\r
+ * \r
+ * Any republication or derived work distributed in source code form\r
+ * must include this copyright and license notice.\r
+ */\r
+\r
+package compbio.data.sequence;\r
+\r
+import java.util.ArrayList;\r
+import java.util.List;\r
+import java.util.Random;\r
+\r
+public class FastaSequenceGenerator {\r
+\r
+ enum SeqType {\r
+ DNA, PROTEIN\r
+ }\r
+\r
+ enum ProteinAlphabet {\r
+ A, R, N, D, C, E, Q, G, H, I, L, K, M, F, P, S, T, W, V\r
+ };\r
+\r
+ enum DNAAlphabet {\r
+ C, T, G, A, U\r
+ };\r
+\r
+ enum Letters {\r
+ q, w, e, r, t, y, u, i, o, p, a, s, d, f, g, h, j, k, l, z, x, c, v, b, n, m\r
+ };\r
+\r
+ final SeqType seqtype;\r
+ final int seqNumber;\r
+ final Random rand;\r
+\r
+ /**\r
+ * \r
+ * @param type\r
+ * of the sequence to be generated one of DNA or PROTEIN\r
+ * @param seqNumber\r
+ * number of sequences to be generated\r
+ */\r
+ public FastaSequenceGenerator(SeqType type, int seqNumber) {\r
+ this.seqtype = type;\r
+ this.seqNumber = seqNumber;\r
+ this.rand = new Random();\r
+ }\r
+\r
+ /**\r
+ * Generate a list of Fasta formatted sequences with sequence length between\r
+ * 0.5 to 1 of maxLenght. Name of the sequence as well as the sequence is\r
+ * generated randomly\r
+ * \r
+ * @param maxSeqLength\r
+ * maximum length of generated sequence\r
+ * @return\r
+ */\r
+ public List<FastaSequence> generateFasta(int maxSeqLength) {\r
+ List<FastaSequence> fastal = new ArrayList<FastaSequence>();\r
+ FastaSequence seq = null;\r
+ for (int i = 0; i < seqNumber; i++) {\r
+ switch (this.seqtype) {\r
+ case DNA:\r
+ seq = new FastaSequence(generateName(), generateDna(\r
+ maxSeqLength, getRandomNumber(0.5, 0.99)));\r
+ break;\r
+ case PROTEIN:\r
+ seq = new FastaSequence(generateName(), generateProtein(\r
+ maxSeqLength, getRandomNumber(0.5, 0.99)));\r
+ break;\r
+ default:\r
+ throw new AssertionError("Cannot recognise a type!");\r
+ }\r
+ fastal.add(seq);\r
+ }\r
+ return fastal;\r
+ }\r
+\r
+ private String generateName() {\r
+ Letters[] letters = Letters.values();\r
+ int max = letters.length - 1;\r
+ StringBuilder sb = new StringBuilder();\r
+ for (int i = 0; i < 10; i++) {\r
+ sb.append(letters[getRandomNumber(0, max)]);\r
+ }\r
+ return sb.toString();\r
+ }\r
+\r
+ private String generateProtein(int length, double variability) {\r
+ ProteinAlphabet[] proteinA = ProteinAlphabet.values();\r
+ int max = proteinA.length - 1;\r
+ StringBuilder sb = new StringBuilder();\r
+ for (int i = 0; i < length * variability; i++) {\r
+ sb.append(proteinA[getRandomNumber(max)]);\r
+ }\r
+ return sb.toString();\r
+ }\r
+\r
+ private String generateDna(int length, double variability) {\r
+ if (variability == 0) {\r
+ variability = 1;\r
+ }\r
+ DNAAlphabet[] dnaA = DNAAlphabet.values();\r
+ int max = dnaA.length - 1;\r
+ StringBuilder sb = new StringBuilder();\r
+ for (int i = 0; i < length * variability; i++) {\r
+ sb.append(dnaA[getRandomNumber(max)]);\r
+ }\r
+ return sb.toString();\r
+ }\r
+\r
+ /*\r
+ * Returns random integers in range from 0 to max\r
+ * \r
+ * @param max\r
+ * \r
+ * @return\r
+ */\r
+ private int getRandomNumber(int max) {\r
+ return rand.nextInt(max);\r
+ }\r
+\r
+ /*\r
+ * Returns random integers with value in range from min to max\r
+ * \r
+ * @param min\r
+ * \r
+ * @param max\r
+ * \r
+ * @return\r
+ */\r
+ private int getRandomNumber(int min, int max) {\r
+ return new Long(Math.round((max - min) * rand.nextDouble() + min))\r
+ .intValue();\r
+ }\r
+\r
+ private double getRandomNumber(double min, double max) {\r
+ return (max - min) * rand.nextDouble() + min;\r
+ }\r
+}\r