1 /* Copyright (c) 2009 Peter Troshin
\r
3 * JAva Bioinformatics Analysis Web Services (JABAWS) @version: 1.0
\r
5 * This library is free software; you can redistribute it and/or modify it under the terms of the
\r
6 * Apache License version 2 as published by the Apache Software Foundation
\r
8 * This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without
\r
9 * even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the Apache
\r
10 * License for more details.
\r
12 * A copy of the license is in apache_license.txt. It is also available here:
\r
13 * @see: http://www.apache.org/licenses/LICENSE-2.0.txt
\r
15 * Any republication or derived work distributed in source code form
\r
16 * must include this copyright and license notice.
\r
19 package compbio.data.sequence;
\r
21 import java.util.ArrayList;
\r
22 import java.util.List;
\r
23 import java.util.Random;
\r
25 public class FastaSequenceGenerator {
\r
31 enum ProteinAlphabet {
\r
32 A, R, N, D, C, E, Q, G, H, I, L, K, M, F, P, S, T, W, V
\r
40 q, w, e, r, t, y, u, i, o, p, a, s, d, f, g, h, j, k, l, z, x, c, v, b, n, m
\r
43 final SeqType seqtype;
\r
44 final int seqNumber;
\r
50 * of the sequence to be generated one of DNA or PROTEIN
\r
52 * number of sequences to be generated
\r
54 public FastaSequenceGenerator(SeqType type, int seqNumber) {
\r
55 this.seqtype = type;
\r
56 this.seqNumber = seqNumber;
\r
57 this.rand = new Random();
\r
61 * Generate a list of Fasta formatted sequences with sequence length between
\r
62 * 0.5 to 1 of maxLenght. Name of the sequence as well as the sequence is
\r
63 * generated randomly
\r
65 * @param maxSeqLength
\r
66 * maximum length of generated sequence
\r
69 public List<FastaSequence> generateFasta(int maxSeqLength) {
\r
70 List<FastaSequence> fastal = new ArrayList<FastaSequence>();
\r
71 FastaSequence seq = null;
\r
72 for (int i = 0; i < seqNumber; i++) {
\r
73 switch (this.seqtype) {
\r
75 seq = new FastaSequence(generateName(), generateDna(
\r
76 maxSeqLength, getRandomNumber(0.5, 0.99)));
\r
79 seq = new FastaSequence(generateName(), generateProtein(
\r
80 maxSeqLength, getRandomNumber(0.5, 0.99)));
\r
83 throw new AssertionError("Cannot recognise a type!");
\r
90 private String generateName() {
\r
91 Letters[] letters = Letters.values();
\r
92 int max = letters.length - 1;
\r
93 StringBuilder sb = new StringBuilder();
\r
94 for (int i = 0; i < 10; i++) {
\r
95 sb.append(letters[getRandomNumber(0, max)]);
\r
97 return sb.toString();
\r
100 private String generateProtein(int length, double variability) {
\r
101 ProteinAlphabet[] proteinA = ProteinAlphabet.values();
\r
102 int max = proteinA.length - 1;
\r
103 StringBuilder sb = new StringBuilder();
\r
104 for (int i = 0; i < length * variability; i++) {
\r
105 sb.append(proteinA[getRandomNumber(max)]);
\r
107 return sb.toString();
\r
110 private String generateDna(int length, double variability) {
\r
111 if (variability == 0) {
\r
114 DNAAlphabet[] dnaA = DNAAlphabet.values();
\r
115 int max = dnaA.length - 1;
\r
116 StringBuilder sb = new StringBuilder();
\r
117 for (int i = 0; i < length * variability; i++) {
\r
118 sb.append(dnaA[getRandomNumber(max)]);
\r
120 return sb.toString();
\r
124 * Returns random integers in range from 0 to max
\r
130 private int getRandomNumber(int max) {
\r
131 return rand.nextInt(max);
\r
135 * Returns random integers with value in range from min to max
\r
143 private int getRandomNumber(int min, int max) {
\r
144 return new Long(Math.round((max - min) * rand.nextDouble() + min))
\r
148 private double getRandomNumber(double min, double max) {
\r
149 return (max - min) * rand.nextDouble() + min;
\r