2 * Copyright (c) 2009 Peter Troshin JAva Bioinformatics Analysis Web Services
\r
3 * (JABAWS) @version: 1.0 This library is free software; you can redistribute it
\r
4 * and/or modify it under the terms of the Apache License version 2 as published
\r
5 * by the Apache Software Foundation This library is distributed in the hope
\r
6 * that it will be useful, but WITHOUT ANY WARRANTY; without even the implied
\r
7 * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
\r
8 * Apache License for more details. A copy of the license is in
\r
9 * apache_license.txt. It is also available here:
\r
10 * @see: http://www.apache.org/licenses/LICENSE-2.0.txt Any republication or
\r
11 * derived work distributed in source code form must include this copyright and
\r
15 package compbio.data.sequence;
\r
17 import java.util.regex.Matcher;
\r
18 import java.util.regex.Pattern;
\r
20 import javax.xml.bind.annotation.XmlAccessType;
\r
21 import javax.xml.bind.annotation.XmlAccessorType;
\r
23 import compbio.util.SysPrefs;
\r
24 import compbio.util.annotation.Immutable;
\r
27 * A FASTA formatted sequence. Please note that this class does not make any
\r
28 * assumptions as to what sequence it store e.g. it could be nucleotide, protein
\r
29 * or even gapped alignment sequence! The only guarantee it makes is that the
\r
30 * sequence does not contain white space characters e.g. spaces, new lines etc
\r
34 * Date September 2009
\r
37 @XmlAccessorType(XmlAccessType.FIELD)
\r
39 public class FastaSequence {
\r
46 // TODO what about gapped sequence here! should be indicated
\r
48 * Returns the string representation of sequence
\r
50 private String sequence;
\r
52 private FastaSequence() {
\r
53 // Default constructor for JaxB
\r
57 * Upon construction the any whitespace characters are removed from the
\r
63 public FastaSequence(String id, String sequence) {
\r
65 this.sequence = SequenceUtil.cleanSequence(sequence);
\r
69 * Gets the value of id
\r
71 * @return the value of id
\r
73 public String getId() {
\r
78 * Gets the value of sequence
\r
80 * @return the value of sequence
\r
82 public String getSequence() {
\r
83 return this.sequence;
\r
86 public static int countMatchesInSequence(final String theString,
\r
87 final String theRegExp) {
\r
88 final Pattern p = Pattern.compile(theRegExp);
\r
89 final Matcher m = p.matcher(theString);
\r
97 public String getFormattedFasta() {
\r
98 return getFormatedSequence(80);
\r
103 * @return one line name, next line sequence, no matter what the sequence
\r
106 public String getOnelineFasta() {
\r
107 String fasta = ">" + getId() + SysPrefs.newlinechar;
\r
108 fasta += getSequence() + SysPrefs.newlinechar;
\r
113 * Format sequence per width letter in one string. Without spaces.
\r
115 * @return multiple line formated sequence, one line width letters length
\r
118 public String getFormatedSequence(final int width) {
\r
119 if (sequence == null) {
\r
123 assert width >= 0 : "Wrong width parameter ";
\r
125 final StringBuilder sb = new StringBuilder(sequence);
\r
126 // int tail = nrOfWindows % WIN_SIZE;
\r
127 // final int turns = (nrOfWindows - tail) / WIN_SIZE;
\r
129 int tailLen = sequence.length() % width;
\r
130 // add up inserted new line chars
\r
131 int nchunks = (sequence.length() - tailLen) / width;
\r
132 int nlineCharcounter = 0;
\r
134 for (int i = 1; i <= nchunks; i++) {
\r
135 insPos = width * i + nlineCharcounter;
\r
136 // to prevent inserting new line in the very end of a sequence then
\r
137 // it would have failed.
\r
138 if (sb.length() <= insPos) {
\r
141 sb.insert(insPos, "\n");
\r
142 nlineCharcounter++;
\r
144 // sb.insert(insPos + tailLen, "\n");
\r
145 return sb.toString();
\r
150 * @return sequence length
\r
152 public int getLength() {
\r
153 return this.sequence.length();
\r
157 * Same as oneLineFasta
\r
160 public String toString() {
\r
161 return this.getOnelineFasta();
\r
165 public int hashCode() {
\r
166 final int prime = 17;
\r
168 result = prime * result + ((id == null) ? 0 : id.hashCode());
\r
169 result = prime * result
\r
170 + ((sequence == null) ? 0 : sequence.hashCode());
\r
175 public boolean equals(Object obj) {
\r
179 if (!(obj instanceof FastaSequence)) {
\r
182 FastaSequence fs = (FastaSequence) obj;
\r
183 if (!fs.getId().equals(this.getId())) {
\r
186 if (!fs.getSequence().equalsIgnoreCase(this.getSequence())) {
\r