2 * Copyright (c) 2009 Peter Troshin JAva Bioinformatics Analysis Web Services
\r
3 * (JABAWS) @version: 1.0 This library is free software; you can redistribute it
\r
4 * and/or modify it under the terms of the Apache License version 2 as published
\r
5 * by the Apache Software Foundation This library is distributed in the hope
\r
6 * that it will be useful, but WITHOUT ANY WARRANTY; without even the implied
\r
7 * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
\r
8 * Apache License for more details. A copy of the license is in
\r
9 * apache_license.txt. It is also available here:
\r
10 * @see: http://www.apache.org/licenses/LICENSE-2.0.txt Any republication or
\r
11 * derived work distributed in source code form must include this copyright and
\r
15 package compbio.data.sequence;
\r
17 import java.util.regex.Matcher;
\r
18 import java.util.regex.Pattern;
\r
20 import javax.xml.bind.annotation.XmlAccessType;
\r
21 import javax.xml.bind.annotation.XmlAccessorType;
\r
23 import compbio.util.SysPrefs;
\r
24 import compbio.util.annotation.Immutable;
\r
27 * A FASTA formatted sequence. Please note that this class does not make any
\r
28 * assumptions as to what sequence it stores e.g. it could be nucleotide,
\r
29 * protein or even gapped alignment sequence! The only guarantee it makes is
\r
30 * that the sequence does not contain white space characters e.g. spaces, new
\r
35 * @version 1.0 September 2009
\r
38 @XmlAccessorType(XmlAccessType.FIELD)
\r
40 public class FastaSequence {
\r
47 // TODO what about gapped sequence here! should be indicated
\r
49 * Returns the string representation of sequence
\r
51 private String sequence;
\r
54 // Default constructor for JaxB
\r
58 * Upon construction the any whitespace characters are removed from the
\r
64 public FastaSequence(String id, String sequence) {
\r
66 this.sequence = SequenceUtil.cleanSequence(sequence);
\r
70 * Gets the value of id
\r
72 * @return the value of id
\r
74 public String getId() {
\r
79 * Gets the value of sequence
\r
81 * @return the value of sequence
\r
83 public String getSequence() {
\r
84 return this.sequence;
\r
87 public static int countMatchesInSequence(final String theString,
\r
88 final String theRegExp) {
\r
89 final Pattern p = Pattern.compile(theRegExp);
\r
90 final Matcher m = p.matcher(theString);
\r
98 public String getFormattedFasta() {
\r
99 return getFormatedSequence(80);
\r
104 * @return one line name, next line sequence, no matter what the sequence
\r
107 public String getOnelineFasta() {
\r
108 String fasta = ">" + getId() + SysPrefs.newlinechar;
\r
109 fasta += getSequence() + SysPrefs.newlinechar;
\r
114 * Format sequence per width letter in one string. Without spaces.
\r
116 * @return multiple line formated sequence, one line width letters length
\r
119 public String getFormatedSequence(final int width) {
\r
120 if (sequence == null) {
\r
124 assert width >= 0 : "Wrong width parameter ";
\r
126 final StringBuilder sb = new StringBuilder(sequence);
\r
127 // int tail = nrOfWindows % WIN_SIZE;
\r
128 // final int turns = (nrOfWindows - tail) / WIN_SIZE;
\r
130 int tailLen = sequence.length() % width;
\r
131 // add up inserted new line chars
\r
132 int nchunks = (sequence.length() - tailLen) / width;
\r
133 int nlineCharcounter = 0;
\r
135 for (int i = 1; i <= nchunks; i++) {
\r
136 insPos = width * i + nlineCharcounter;
\r
137 // to prevent inserting new line in the very end of a sequence then
\r
138 // it would have failed.
\r
139 if (sb.length() <= insPos) {
\r
142 sb.insert(insPos, "\n");
\r
143 nlineCharcounter++;
\r
145 // sb.insert(insPos + tailLen, "\n");
\r
146 return sb.toString();
\r
151 * @return sequence length
\r
153 public int getLength() {
\r
154 return this.sequence.length();
\r
158 * Same as oneLineFasta
\r
161 public String toString() {
\r
162 return this.getOnelineFasta();
\r
166 public int hashCode() {
\r
167 final int prime = 17;
\r
169 result = prime * result + ((id == null) ? 0 : id.hashCode());
\r
170 result = prime * result
\r
171 + ((sequence == null) ? 0 : sequence.hashCode());
\r
176 public boolean equals(Object obj) {
\r
180 if (!(obj instanceof FastaSequence)) {
\r
183 FastaSequence fs = (FastaSequence) obj;
\r
184 if (!fs.getId().equals(this.getId())) {
\r
187 if (!fs.getSequence().equalsIgnoreCase(this.getSequence())) {
\r