1 package compbio.cassandra;
3 import java.util.regex.Matcher;
4 import java.util.regex.Pattern;
6 import javax.xml.bind.annotation.XmlAccessType;
7 import javax.xml.bind.annotation.XmlAccessorType;
9 //import compbio.util.SysPrefs;
10 //import compbio.util.annotation.Immutable;
13 * A FASTA formatted sequence. Please note that this class does not make any
14 * assumptions as to what sequence it stores e.g. it could be nucleotide,
15 * protein or even gapped alignment sequence! The only guarantee it makes is
16 * that the sequence does not contain white space characters e.g. spaces, new
21 * @version 1.0 September 2009
24 @XmlAccessorType(XmlAccessType.FIELD)
26 public class FastaSequence {
33 // TODO what about gapped sequence here! should be indicated
35 * Returns the string representation of sequence
37 private String sequence;
40 // Default constructor for JaxB
44 * Upon construction the any whitespace characters are removed from the
50 public FastaSequence(String id, String sequence) {
52 this.sequence = sequence;
56 * Gets the value of id
58 * @return the value of id
60 public String getId() {
65 * Gets the value of sequence
67 * @return the value of sequence
69 public String getSequence() {
73 public static int countMatchesInSequence(final String theString,
74 final String theRegExp) {
75 final Pattern p = Pattern.compile(theRegExp);
76 final Matcher m = p.matcher(theString);
84 public String getFormattedFasta() {
85 return getFormatedSequence(80);
90 * @return one line name, next line sequence, no matter what the sequence
93 /* public String getOnelineFasta() {
94 String fasta = ">" + getId() + SysPrefs.newlinechar;
95 fasta += getSequence() + SysPrefs.newlinechar;
100 * Format sequence per width letter in one string. Without spaces.
102 * @return multiple line formated sequence, one line width letters length
105 public String getFormatedSequence(final int width) {
106 if (sequence == null) {
110 assert width >= 0 : "Wrong width parameter ";
112 final StringBuilder sb = new StringBuilder(sequence);
113 // int tail = nrOfWindows % WIN_SIZE;
114 // final int turns = (nrOfWindows - tail) / WIN_SIZE;
116 int tailLen = sequence.length() % width;
117 // add up inserted new line chars
118 int nchunks = (sequence.length() - tailLen) / width;
119 int nlineCharcounter = 0;
121 for (int i = 1; i <= nchunks; i++) {
122 insPos = width * i + nlineCharcounter;
123 // to prevent inserting new line in the very end of a sequence then
124 // it would have failed.
125 if (sb.length() <= insPos) {
128 sb.insert(insPos, "\n");
131 // sb.insert(insPos + tailLen, "\n");
132 return sb.toString();
137 * @return sequence length
139 public int getLength() {
140 return this.sequence.length();
144 * Same as oneLineFasta
147 // public String toString() {
148 // return this.getOnelineFasta();
152 public int hashCode() {
153 final int prime = 17;
155 result = prime * result + ((id == null) ? 0 : id.hashCode());
156 result = prime * result
157 + ((sequence == null) ? 0 : sequence.hashCode());
162 public boolean equals(Object obj) {
166 if (!(obj instanceof FastaSequence)) {
169 FastaSequence fs = (FastaSequence) obj;
170 if (!fs.getId().equals(this.getId())) {
173 if (!fs.getSequence().equalsIgnoreCase(this.getSequence())) {