1 /* Copyright (c) 2011 Peter Troshin
\r
3 * JAva Bioinformatics Analysis Web Services (JABAWS) @version: 2.0
\r
5 * This library is free software; you can redistribute it and/or modify it under the terms of the
\r
6 * Apache License version 2 as published by the Apache Software Foundation
\r
8 * This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without
\r
9 * even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the Apache
\r
10 * License for more details.
\r
12 * A copy of the license is in apache_license.txt. It is also available here:
\r
13 * @see: http://www.apache.org/licenses/LICENSE-2.0.txt
\r
15 * Any republication or derived work distributed in source code form
\r
16 * must include this copyright and license notice.
\r
19 package compbio.data.sequence;
\r
21 import java.util.regex.Matcher;
\r
22 import java.util.regex.Pattern;
\r
24 import javax.xml.bind.annotation.XmlAccessType;
\r
25 import javax.xml.bind.annotation.XmlAccessorType;
\r
27 import compbio.util.SysPrefs;
\r
28 import compbio.util.annotation.Immutable;
\r
31 * A FASTA formatted sequence. Please note that this class does not make any
\r
32 * assumptions as to what sequence it stores e.g. it could be nucleotide,
\r
33 * protein or even gapped alignment sequence! The only guarantee it makes is
\r
34 * that the sequence does not contain white space characters e.g. spaces, new
\r
39 * @version 1.0 September 2009
\r
42 @XmlAccessorType(XmlAccessType.FIELD)
\r
44 public class FastaSequence {
\r
51 // TODO what about gapped sequence here! should be indicated
\r
53 * Returns the string representation of sequence
\r
55 private String sequence;
\r
58 // Default constructor for JaxB
\r
62 * Upon construction the any whitespace characters are removed from the
\r
68 public FastaSequence(String id, String sequence) {
\r
70 this.sequence = SequenceUtil.cleanSequence(sequence);
\r
74 * Gets the value of id
\r
76 * @return the value of id
\r
78 public String getId() {
\r
83 * Gets the value of sequence
\r
85 * @return the value of sequence
\r
87 public String getSequence() {
\r
88 return this.sequence;
\r
91 public static int countMatchesInSequence(final String theString,
\r
92 final String theRegExp) {
\r
93 final Pattern p = Pattern.compile(theRegExp);
\r
94 final Matcher m = p.matcher(theString);
\r
102 public String getFormattedFasta() {
\r
103 return getFormatedSequence(80);
\r
108 * @return one line name, next line sequence, no matter what the sequence
\r
111 public String getOnelineFasta() {
\r
112 String fasta = ">" + getId() + SysPrefs.newlinechar;
\r
113 fasta += getSequence() + SysPrefs.newlinechar;
\r
118 * Format sequence per width letter in one string. Without spaces.
\r
120 * @return multiple line formated sequence, one line width letters length
\r
123 public String getFormatedSequence(final int width) {
\r
124 if (sequence == null) {
\r
128 assert width >= 0 : "Wrong width parameter ";
\r
130 final StringBuilder sb = new StringBuilder(sequence);
\r
131 // int tail = nrOfWindows % WIN_SIZE;
\r
132 // final int turns = (nrOfWindows - tail) / WIN_SIZE;
\r
134 int tailLen = sequence.length() % width;
\r
135 // add up inserted new line chars
\r
136 int nchunks = (sequence.length() - tailLen) / width;
\r
137 int nlineCharcounter = 0;
\r
139 for (int i = 1; i <= nchunks; i++) {
\r
140 insPos = width * i + nlineCharcounter;
\r
141 // to prevent inserting new line in the very end of a sequence then
\r
142 // it would have failed.
\r
143 if (sb.length() <= insPos) {
\r
146 sb.insert(insPos, "\n");
\r
147 nlineCharcounter++;
\r
149 // sb.insert(insPos + tailLen, "\n");
\r
150 return sb.toString();
\r
155 * @return sequence length
\r
157 public int getLength() {
\r
158 return this.sequence.length();
\r
162 * Same as oneLineFasta
\r
165 public String toString() {
\r
166 return this.getOnelineFasta();
\r
170 public int hashCode() {
\r
171 final int prime = 17;
\r
173 result = prime * result + ((id == null) ? 0 : id.hashCode());
\r
174 result = prime * result
\r
175 + ((sequence == null) ? 0 : sequence.hashCode());
\r
180 public boolean equals(Object obj) {
\r
184 if (!(obj instanceof FastaSequence)) {
\r
187 FastaSequence fs = (FastaSequence) obj;
\r
188 if (!fs.getId().equals(this.getId())) {
\r
191 if (!fs.getSequence().equalsIgnoreCase(this.getSequence())) {
\r