1 /* Copyright (c) 2009 Peter Troshin
\r
3 * JAva Bioinformatics Analysis Web Services (JABAWS) @version: 1.0
\r
5 * This library is free software; you can redistribute it and/or modify it under the terms of the
\r
6 * Apache License version 2 as published by the Apache Software Foundation
\r
8 * This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without
\r
9 * even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the Apache
\r
10 * License for more details.
\r
12 * A copy of the license is in apache_license.txt. It is also available here:
\r
13 * @see: http://www.apache.org/licenses/LICENSE-2.0.txt
\r
15 * Any republication or derived work distributed in source code form
\r
16 * must include this copyright and license notice.
\r
19 package compbio.data.sequence;
\r
21 import java.util.regex.Matcher;
\r
22 import java.util.regex.Pattern;
\r
24 import javax.xml.bind.annotation.XmlAccessType;
\r
25 import javax.xml.bind.annotation.XmlAccessorType;
\r
27 import compbio.util.SysPrefs;
\r
28 import compbio.util.annotation.Immutable;
\r
31 * A FASTA formatted sequence. Please note that this class does not make any
\r
32 * assumptions as to what sequence it store e.g. it could be nucleotide, protein
\r
33 * or even gapped alignment sequence! The only guarantee it makes is that the
\r
34 * sequence does not contain white space characters e.g. spaces, new lines etc
\r
38 * Date September 2009
\r
41 @XmlAccessorType(XmlAccessType.FIELD)
\r
43 public class FastaSequence {
\r
50 // TODO what about gapped sequence here! should be indicated
\r
52 * Returns the string representation of sequence
\r
54 private String sequence;
\r
56 private FastaSequence() {
\r
57 // Default constructor for JaxB
\r
61 * Upon construction the any whitespace characters are removed from the
\r
67 public FastaSequence(String id, String sequence) {
\r
69 this.sequence = SequenceUtil.cleanSequence(sequence);
\r
73 * Gets the value of id
\r
75 * @return the value of id
\r
77 public String getId() {
\r
82 * Gets the value of sequence
\r
84 * @return the value of sequence
\r
86 public String getSequence() {
\r
87 return this.sequence;
\r
90 public static int countMatchesInSequence(final String theString,
\r
91 final String theRegExp) {
\r
92 final Pattern p = Pattern.compile(theRegExp);
\r
93 final Matcher m = p.matcher(theString);
\r
101 public String getFormattedFasta() {
\r
102 return getFormatedSequence(80);
\r
107 * @return one line name, next line sequence, no matter what the sequence
\r
110 public String getOnelineFasta() {
\r
111 String fasta = ">" + getId() + SysPrefs.newlinechar;
\r
112 fasta += getSequence() + SysPrefs.newlinechar;
\r
117 * Format sequence per width letter in one string. Without spaces.
\r
119 * @return multiple line formated sequence, one line width letters length
\r
122 public String getFormatedSequence(final int width) {
\r
123 if (sequence == null) {
\r
127 assert width >= 0 : "Wrong width parameter ";
\r
129 final StringBuilder sb = new StringBuilder(sequence);
\r
130 int nchunks = sequence.length() / width;
\r
131 // add up inserted new line chars
\r
132 nchunks = (nchunks + sequence.length()) / width;
\r
133 int nlineCharcounter = 0;
\r
134 for (int i = 1; i <= nchunks; i++) {
\r
135 int insPos = width * i + nlineCharcounter;
\r
136 // to prevent inserting new line in the very end of a sequence then
\r
137 // it would have failed.
\r
138 // Also covers the case when the sequences shorter than width
\r
139 if (sb.length() <= insPos) {
\r
142 sb.insert(insPos, "\n");
\r
143 nlineCharcounter++;
\r
145 return sb.toString();
\r
150 * @return sequence length
\r
152 public int getLength() {
\r
153 return this.sequence.length();
\r
157 * Same as oneLineFasta
\r
160 public String toString() {
\r
161 return this.getOnelineFasta();
\r
165 public int hashCode() {
\r
166 final int prime = 17;
\r
168 result = prime * result + ((id == null) ? 0 : id.hashCode());
\r
169 result = prime * result
\r
170 + ((sequence == null) ? 0 : sequence.hashCode());
\r
175 public boolean equals(Object obj) {
\r
179 if (!(obj instanceof FastaSequence)) {
\r
182 FastaSequence fs = (FastaSequence) obj;
\r
183 if (!fs.getId().equals(this.getId())) {
\r
186 if (!fs.getSequence().equalsIgnoreCase(this.getSequence())) {
\r