Replace old textng jar with new one
[proteocache.git] / datadb / compbio / cassandra / FastaSequence.java
1 package compbio.cassandra;
2
3 import java.util.regex.Matcher;
4 import java.util.regex.Pattern;
5
6 import javax.xml.bind.annotation.XmlAccessType;
7 import javax.xml.bind.annotation.XmlAccessorType;
8
9 //import compbio.util.SysPrefs;
10 //import compbio.util.annotation.Immutable;
11
12 /**
13  * A FASTA formatted sequence. Please note that this class does not make any
14  * assumptions as to what sequence it stores e.g. it could be nucleotide,
15  * protein or even gapped alignment sequence! The only guarantee it makes is
16  * that the sequence does not contain white space characters e.g. spaces, new
17  * lines etc
18  * 
19  * @author pvtroshin
20  * 
21  * @version 1.0 September 2009
22  */
23
24 @XmlAccessorType(XmlAccessType.FIELD)
25 //@Immutable
26 public class FastaSequence {
27
28         /**
29          * Sequence id
30          */
31         private String id;
32
33         // TODO what about gapped sequence here! should be indicated
34         /**
35          * Returns the string representation of sequence
36          */
37         private String sequence;
38
39         FastaSequence() {
40                 // Default constructor for JaxB
41         }
42
43         /**
44          * Upon construction the any whitespace characters are removed from the
45          * sequence
46          * 
47          * @param id
48          * @param sequence
49          */
50         public FastaSequence(String id, String sequence) {
51                 this.id = id;
52                 this.sequence = sequence;
53         }
54
55         /**
56          * Gets the value of id
57          * 
58          * @return the value of id
59          */
60         public String getId() {
61                 return this.id;
62         }
63
64         /**
65          * Gets the value of sequence
66          * 
67          * @return the value of sequence
68          */
69         public String getSequence() {
70                 return this.sequence;
71         }
72
73         public static int countMatchesInSequence(final String theString,
74                         final String theRegExp) {
75                 final Pattern p = Pattern.compile(theRegExp);
76                 final Matcher m = p.matcher(theString);
77                 int cnt = 0;
78                 while (m.find()) {
79                         cnt++;
80                 }
81                 return cnt;
82         }
83
84         public String getFormattedFasta() {
85                 return getFormatedSequence(80);
86         }
87
88         /**
89          * 
90          * @return one line name, next line sequence, no matter what the sequence
91          *         length is
92          */
93 /*      public String getOnelineFasta() {
94                 String fasta = ">" + getId() + SysPrefs.newlinechar;
95                 fasta += getSequence() + SysPrefs.newlinechar;
96                 return fasta;
97         }
98
99         /**
100          * Format sequence per width letter in one string. Without spaces.
101          * 
102          * @return multiple line formated sequence, one line width letters length
103          * 
104          */
105         public String getFormatedSequence(final int width) {
106                 if (sequence == null) {
107                         return "";
108                 }
109
110                 assert width >= 0 : "Wrong width parameter ";
111
112                 final StringBuilder sb = new StringBuilder(sequence);
113                 // int tail = nrOfWindows % WIN_SIZE;
114                 // final int turns = (nrOfWindows - tail) / WIN_SIZE;
115
116                 int tailLen = sequence.length() % width;
117                 // add up inserted new line chars
118                 int nchunks = (sequence.length() - tailLen) / width;
119                 int nlineCharcounter = 0;
120                 int insPos = 0;
121                 for (int i = 1; i <= nchunks; i++) {
122                         insPos = width * i + nlineCharcounter;
123                         // to prevent inserting new line in the very end of a sequence then
124                         // it would have failed.
125                         if (sb.length() <= insPos) {
126                                 break;
127                         }
128                         sb.insert(insPos, "\n");
129                         nlineCharcounter++;
130                 }
131                 // sb.insert(insPos + tailLen, "\n");
132                 return sb.toString();
133         }
134
135         /**
136          * 
137          * @return sequence length
138          */
139         public int getLength() {
140                 return this.sequence.length();
141         }
142
143         /**
144          * Same as oneLineFasta
145          */
146 //      @Override
147 //      public String toString() {
148 //              return this.getOnelineFasta();
149         // }
150
151         @Override
152         public int hashCode() {
153                 final int prime = 17;
154                 int result = 1;
155                 result = prime * result + ((id == null) ? 0 : id.hashCode());
156                 result = prime * result
157                                 + ((sequence == null) ? 0 : sequence.hashCode());
158                 return result;
159         }
160
161         @Override
162         public boolean equals(Object obj) {
163                 if (obj == null) {
164                         return false;
165                 }
166                 if (!(obj instanceof FastaSequence)) {
167                         return false;
168                 }
169                 FastaSequence fs = (FastaSequence) obj;
170                 if (!fs.getId().equals(this.getId())) {
171                         return false;
172                 }
173                 if (!fs.getSequence().equalsIgnoreCase(this.getSequence())) {
174                         return false;
175                 }
176                 return true;
177         }
178
179 }