2 // forester -- software libraries and applications
3 // for genomics and evolutionary biology research.
5 // Copyright (C) 2010 Christian M Zmasek
6 // Copyright (C) 2010 Sanford-Burnham Medical Research Institute
9 // This library is free software; you can redistribute it and/or
10 // modify it under the terms of the GNU Lesser General Public
11 // License as published by the Free Software Foundation; either
12 // version 2.1 of the License, or (at your option) any later version.
14 // This library is distributed in the hope that it will be useful,
15 // but WITHOUT ANY WARRANTY; without even the implied warranty of
16 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 // Lesser General Public License for more details.
19 // You should have received a copy of the GNU Lesser General Public
20 // License along with this library; if not, write to the Free Software
21 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
23 // Contact: phylosoft @ gmail . com
24 // WWW: https://sites.google.com/site/cmzmasek/home/software/forester
26 package org.forester.ws.seqdb;
28 import java.util.List;
29 import java.util.SortedSet;
30 import java.util.TreeSet;
31 import java.util.regex.Matcher;
32 import java.util.regex.Pattern;
34 import org.forester.go.GoTerm;
35 import org.forester.phylogeny.data.Accession;
36 import org.forester.phylogeny.data.Annotation;
37 import org.forester.sequence.MolecularSequence;
38 import org.forester.util.ForesterUtil;
40 public final class EbiDbEntry implements SequenceDatabaseEntry {
42 private SortedSet<Annotation> _annotations;
43 private String _chromosome;
44 private SortedSet<Accession> _cross_references;
46 private String _gene_name;
49 // FIXME actually this is NCBI entry
50 //http://www.ebi.ac.uk/Tools/dbfetch/dbfetch/emb/AAR37336/
52 private String _provider;
53 private String _symbol;
54 private String _tax_id;
56 // TODO PUBMED 15798186
58 // source /db_xref="taxon:9606"
62 // /db_xref="MIM:604739"
65 // /db_xref="MIM:604739"
66 // /db_xref="InterPro:IPR002475"
68 // /db_xref="UniProtKB/TrEMBL:Q5J7V1" <- reparse?
72 LOCUS NM_184234 2881 bp mRNA linear PRI 16-JUN-2013
73 DEFINITION Homo sapiens RNA binding motif protein 39 (RBM39), transcript
76 VERSION NM_184234.2 GI:336176061
78 SOURCE Homo sapiens (human)
80 Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi;
81 Mammalia; Eutheria; Euarchontoglires; Primates; Haplorrhini;
82 Catarrhini; Hominidae; Homo.
83 REFERENCE 1 (bases 1 to 2881)
84 AUTHORS Sillars-Hardebol,A.H., Carvalho,B., Belien,J.A., de Wit,M.,
85 Delis-van Diemen,P.M., Tijssen,M., van de Wiel,M.A., Ponten,F.,
86 Meijer,G.A. and Fijneman,R.J.
87 TITLE CSE1L, DIDO1 and RBM39 in colorectal adenoma to carcinoma
89 JOURNAL Cell Oncol (Dordr) 35 (4), 293-300 (2012)
91 REMARK GeneRIF: Data show that CSE1L, DIDO1 and RBM39 mRNA expression
92 levels correlated with chromosome 20q DNA copy number status.
93 REFERENCE 2 (bases 1 to 2881)
94 AUTHORS Huang,G., Zhou,Z., Wang,H. and Kleinerman,E.S.
95 TITLE CAPER-alpha alternative splicing regulates the expression of
96 vascular endothelial growth factor(1)(6)(5) in Ewing sarcoma cells
97 JOURNAL Cancer 118 (8), 2106-2116 (2012)
99 REMARK GeneRIF: Increased VEGF(165) expression is secondary to the
100 down-regulation of CAPER-alpha by EWS/FLI-1. CAPER-alpha mediates
101 alternative splicing and controls the shift from VEGF(189) to
103 REFERENCE 3 (bases 1 to 2881)
104 AUTHORS Han,B., Stockwin,L.H., Hancock,C., Yu,S.X., Hollingshead,M.G. and
106 TITLE Proteomic analysis of nuclei isolated from cancer cell lines
107 treated with indenoisoquinoline NSC 724998, a novel topoisomerase I
109 JOURNAL J. Proteome Res. 9 (8), 4016-4027 (2010)
111 REMARK Erratum:[J Proteome Res. 2011 Apr 1;10(4):2128]
112 REFERENCE 4 (bases 1 to 2881)
113 AUTHORS Zhang,J.Y., Looi,K.S. and Tan,E.M.
114 TITLE Identification of tumor-associated antigens as diagnostic and
115 predictive biomarkers in cancer
116 JOURNAL Methods Mol. Biol. 520, 1-10 (2009)
118 REFERENCE 5 (bases 1 to 2881)
119 AUTHORS Dutta,J., Fan,G. and Gelinas,C.
120 TITLE CAPERalpha is a novel Rel-TAD-interacting factor that inhibits
121 lymphocyte transformation by the potent Rel/NF-kappaB oncoprotein
123 JOURNAL J. Virol. 82 (21), 10792-10802 (2008)
125 REMARK GeneRIF: this study identifies CAPERalpha (RNA binding motif
126 protein 39) as a new transcriptional coregulator for v-Rel and
127 reveals an important role in modulating Rel's oncogenic activity.
128 REFERENCE 6 (bases 1 to 2881)
129 AUTHORS Cazalla,D., Newton,K. and Caceres,J.F.
130 TITLE A novel SR-related protein is required for the second step of
132 JOURNAL Mol. Cell. Biol. 25 (8), 2969-2980 (2005)
134 REFERENCE 7 (bases 1 to 2881)
135 AUTHORS Dowhan,D.H., Hong,E.P., Auboeuf,D., Dennis,A.P., Wilson,M.M.,
136 Berget,S.M. and O'Malley,B.W.
137 TITLE Steroid hormone receptor coactivation and alternative RNA splicing
138 by U2AF65-related proteins CAPERalpha and CAPERbeta
139 JOURNAL Mol. Cell 17 (3), 429-439 (2005)
141 REFERENCE 8 (bases 1 to 2881)
142 AUTHORS Sun,N.N., Fastje,C.D., Wong,S.S., Sheppard,P.R., Macdonald,S.J.,
143 Ridenour,G., Hyde,J.D. and Witten,M.L.
144 TITLE Dose-dependent transcriptome changes by metal ores on a human acute
145 lymphoblastic leukemia cell line
146 JOURNAL Toxicol Ind Health 19 (7-10), 157-163 (2003)
148 REMARK GeneRIF: 10 genes were down-regulated following treatment of the
149 T-ALL cells with 0.15 and 1.5 microg/mL of metal ores at 72 h
150 REFERENCE 9 (bases 1 to 2881)
151 AUTHORS Jung,D.J., Na,S.Y., Na,D.S. and Lee,J.W.
152 TITLE Molecular cloning and characterization of CAPER, a novel
153 coactivator of activating protein-1 and estrogen receptors
154 JOURNAL J. Biol. Chem. 277 (2), 1229-1234 (2002)
156 REMARK GeneRIF: This paper describes the mouse gene.
157 REFERENCE 10 (bases 1 to 2881)
158 AUTHORS Imai,H., Chan,E.K., Kiyosawa,K., Fu,X.D. and Tan,E.M.
159 TITLE Novel nuclear autoantigen with splicing factor motifs identified
160 with antibody from hepatocellular carcinoma
161 JOURNAL J. Clin. Invest. 92 (5), 2419-2426 (1993)
163 COMMENT REVIEWED REFSEQ: This record has been curated by NCBI staff. The
164 reference sequence was derived from DC346351.1, BC141835.1 and
166 On Jun 16, 2011 this sequence version replaced gi:35493810.
168 Summary: This gene encodes a member of the U2AF65 family of
169 proteins. The encoded protein is found in the nucleus, where it
170 co-localizes with core spliceosomal proteins. It has been shown to
171 play a role in both steroid hormone receptor-mediated transcription
172 and alternative splicing, and it is also a transcriptional
173 coregulator of the viral oncoprotein v-Rel. Multiple transcript
174 variants have been observed for this gene. A related pseudogene has
175 been identified on chromosome X. [provided by RefSeq, Aug 2011].
177 Transcript Variant: This variant (1) encodes the longest isoform
178 (a, also called CC1.4).
180 Publication Note: This RefSeq record includes a subset of the
181 publications that are available for this gene. Please see the Gene
182 record to access additional publications.
184 ##Evidence-Data-START##
185 Transcript exon combination :: BC141835.1, L10911.1 [ECO:0000332]
186 RNAseq introns :: mixed/partial sample support
187 ERS025081, ERS025082 [ECO:0000350]
188 ##Evidence-Data-END##
189 COMPLETENESS: complete on the 3' end.
190 PRIMARY REFSEQ_SPAN PRIMARY_IDENTIFIER PRIMARY_SPAN COMP
191 1-578 DC346351.1 3-580
192 579-2872 BC141835.1 429-2722
193 2873-2881 C75555.1 1-9 c
194 FEATURES Location/Qualifiers
196 /organism="Homo sapiens"
198 /db_xref="taxon:9606"
203 /gene_synonym="CAPER; CAPERalpha; FSAP59; HCC1; RNPC2"
204 /note="RNA binding motif protein 39"
205 /db_xref="GeneID:9584"
206 /db_xref="HGNC:15923"
207 /db_xref="HPRD:09201"
208 /db_xref="MIM:604739"
211 /gene_synonym="CAPER; CAPERalpha; FSAP59; HCC1; RNPC2"
212 /inference="alignment:Splign:1.39.8"
215 /gene_synonym="CAPER; CAPERalpha; FSAP59; HCC1; RNPC2"
216 /standard_name="REN58946"
217 /db_xref="UniSTS:383746"
218 misc_feature 221..223
220 /gene_synonym="CAPER; CAPERalpha; FSAP59; HCC1; RNPC2"
221 /note="upstream in-frame stop codon"
224 /gene_synonym="CAPER; CAPERalpha; FSAP59; HCC1; RNPC2"
225 /standard_name="G64285"
226 /db_xref="UniSTS:158667"
229 /gene_synonym="CAPER; CAPERalpha; FSAP59; HCC1; RNPC2"
230 /inference="alignment:Splign:1.39.8"
233 /gene_synonym="CAPER; CAPERalpha; FSAP59; HCC1; RNPC2"
234 /note="isoform a is encoded by transcript variant 1;
235 coactivator of activating protein-1 and estrogen
236 receptors; functional spliceosome-associated protein 59;
237 RNA-binding region (RNP1, RRM) containing 2;
238 hepatocellular carcinoma protein 1; splicing factor HCC1"
240 /product="RNA-binding protein 39 isoform a"
241 /protein_id="NP_909122.1"
242 /db_xref="GI:35493811"
243 /db_xref="CCDS:CCDS13266.1"
244 /db_xref="GeneID:9584"
245 /db_xref="HGNC:15923"
246 /db_xref="HPRD:09201"
247 /db_xref="MIM:604739"
248 /translation="MADDIDIEAMLEAPYKKDENKLSSANGHEERSKKRKKSKSRSRS
249 HERKRSKSKERKRSRDRERKKSKSRERKRSRSKERRRSRSRSRDRRFRGRYRSPYSGP
250 KFNSAIRGKIGLPHSIKLSRRRSRSKSPFRKDKSPVREPIDNLTPEERDARTVFCMQL
251 AARIRPRDLEEFFSTVGKVRDVRMISDRNSRRSKGIAYVEFVDVSSVPLAIGLTGQRV
252 LGVPIIVQASQAEKNRAAAMANNLQKGSAGPMRLYVGSLHFNITEDMLRGIFEPFGRI
253 ESIQLMMDSETGRSKGYGFITFSDSECAKKALEQLNGFELAGRPMKVGHVTERTDASS
254 ASSFLDSDELERTGIDLGTTGRLQLMARLAEGTGLQIPPAAQQALQMSGSLAFGAVAE
255 FSFVIDLQTRLSQQTEASALAAAASVQPLATQCFQLSNMFNPQTEEEVGWDTEIKDDV
256 IEECNKHGGVIHIYVDKNSAQGNVYVKCPSIAAAIAAVNALHGRWFAGKMITAAYVPL
257 PTYHNLFPDSMTATQLLVPSRR"
258 misc_feature 413..415
260 /gene_synonym="CAPER; CAPERalpha; FSAP59; HCC1; RNPC2"
261 /experiment="experimental evidence, no additional details
263 /note="N-acetylalanine; propagated from
264 UniProtKB/Swiss-Prot (Q14498.2); acetylation site"
268 /gene_synonym="CAPER; CAPERalpha; FSAP59; HCC1; RNPC2"
269 /inference="alignment:Splign:1.39.8"
273 /gene_synonym="CAPER; CAPERalpha; FSAP59; HCC1; RNPC2"
274 /inference="alignment:Splign:1.39.8"
277 /gene_synonym="CAPER; CAPERalpha; FSAP59; HCC1; RNPC2"
278 /standard_name="REN58786"
279 /db_xref="UniSTS:383586"
282 /gene_synonym="CAPER; CAPERalpha; FSAP59; HCC1; RNPC2"
283 /standard_name="D19S1033"
284 /db_xref="UniSTS:154759"
287 /gene_synonym="CAPER; CAPERalpha; FSAP59; HCC1; RNPC2"
288 /standard_name="REN58785"
289 /db_xref="UniSTS:383585"
291 polyA_signal 2851..2856
293 /gene_synonym="CAPER; CAPERalpha; FSAP59; HCC1; RNPC2"
296 /gene_synonym="CAPER; CAPERalpha; FSAP59; HCC1; RNPC2"
298 1 atttggagct tggggcagct tctcgcgaga gcccgtgctg agggctctgt gaggccccgt
299 61 gtgtttgtgt gtgtgtatgt gtgctggtga atgtgagtac agggaagcag cggccgccat
300 121 ttcagggagc ttgtcgacgc tgtcgcaggg gtggatcctg agctgccgaa gccgccgtcc
301 181 tgctctcccg cgtgggcttc tctaattcca ttgttttttt tagattctct cgggcctagc
302 241 cgtccttgga acccgatatt cgggctgggc ggttccgcgg cctgggccta ggggcttaac
307 private EbiDbEntry() {
311 public Object clone() throws CloneNotSupportedException {
312 throw new CloneNotSupportedException();
316 public String getAccession() {
321 public SortedSet<Annotation> getAnnotations() {
326 public String getChromosome() {
331 public SortedSet<Accession> getCrossReferences() {
332 return _cross_references;
336 public String getGeneName() {
341 public SortedSet<GoTerm> getGoTerms() {
346 public String getMap() {
351 public String getProvider() {
356 public String getSequenceName() {
361 public String getSequenceSymbol() {
366 public String getTaxonomyIdentifier() {
371 public String getTaxonomyScientificName() {
376 public boolean isEmpty() {
377 return ( ForesterUtil.isEmpty( getAccession() ) && ForesterUtil.isEmpty( getSequenceName() )
378 && ForesterUtil.isEmpty( getTaxonomyScientificName() )
379 && ForesterUtil.isEmpty( getTaxonomyIdentifier() ) && ForesterUtil.isEmpty( getSequenceSymbol() ) );
382 public void setProvider( final String provider ) {
383 _provider = provider;
386 private void addAnnotation( final Annotation annotation ) {
387 if ( _annotations == null ) {
388 _annotations = new TreeSet<Annotation>();
390 _annotations.add( annotation );
393 private void addCrossReference( final Accession accession ) {
394 if ( _cross_references == null ) {
395 _cross_references = new TreeSet<Accession>();
397 System.out.println( "XREF ADDED: " + accession );
398 _cross_references.add( accession );
401 private void setAccession( final String pa ) {
407 private void setChromosome( final String chromosome ) {
408 _chromosome = chromosome;
411 private void setGeneName( final String gene_name ) {
412 if ( _gene_name == null ) {
413 _gene_name = gene_name;
417 private void setMap( final String map ) {
421 private void setSequenceName( final String rec_name ) {
427 private void setSequenceSymbol( final String symbol ) {
431 private void setTaxId( final String tax_id ) {
432 if ( _tax_id == null ) {
437 private void setTaxonomyScientificName( final String os ) {
443 // public static SequenceDatabaseEntry createInstanceFromPlainText( final List<String> lines ) {
444 // final EbiDbEntry e = new EbiDbEntry();
445 // for( final String line : lines ) {
446 // if ( line.startsWith( "PA" ) ) {
447 // e.setPA( SequenceDbWsTools.extractFrom( line, "PA" ) );
449 // else if ( line.startsWith( "DE" ) ) {
450 // e.setDe( SequenceDbWsTools.extractFrom( line, "DE" ) );
452 // else if ( line.startsWith( "OS" ) ) {
453 // if ( line.indexOf( "(" ) > 0 ) {
454 // e.setOs( SequenceDbWsTools.extractFromTo( line, "OS", "(" ) );
457 // e.setOs( SequenceDbWsTools.extractFrom( line, "OS" ) );
460 // else if ( line.startsWith( "OX" ) ) {
461 // if ( line.indexOf( "NCBI_TaxID=" ) > 0 ) {
462 // e.setTaxId( SequenceDbWsTools.extractFromTo( line, "NCBI_TaxID=", ";" ) );
468 public static SequenceDatabaseEntry createInstanceFromPlainTextForRefSeq( final List<String> lines ) {
469 final Pattern X_PATTERN = Pattern.compile( "^[A-Z]+" );
470 final Pattern chromosome_PATTERN = Pattern.compile( "\\s+/chromosome=\"(\\w+)\"" );
471 final Pattern map_PATTERN = Pattern.compile( "\\s+/map=\"([\\w+\\.])\"" );
472 final Pattern gene_PATTERN = Pattern.compile( "\\s+/gene=\"(.+)\"" );
473 final Pattern mim_PATTERN = Pattern.compile( "\\s+/db_xref=\"MIM:(\\d+)\"" );
474 final Pattern taxon_PATTERN = Pattern.compile( "\\s+/db_xref=\"taxon:(\\d+)\"" );
475 final Pattern interpro_PATTERN = Pattern.compile( "\\s+/db_xref=\"InterPro:([A-Z0-9]+)\"" );
476 final Pattern uniprot_PATTERN = Pattern.compile( "\\s+/db_xref=\"UniProtKB/[A-Za-z-]*:(\\w+)\"" );
477 final Pattern hgnc_PATTERN = Pattern.compile( "\\s+/db_xref=\"[A-Z:]*HGNC:(\\d+)\"" );
478 final Pattern geneid_PATTERN = Pattern.compile( "\\s+/db_xref=\"GeneID:(\\d+)\"" );
479 final Pattern pdb_PATTERN = Pattern.compile( "\\s+/db_xref=\"PDB:([A-Z0-9]+)\"" );
480 final Pattern ec_PATTERN = Pattern.compile( "\\s+/EC_number=\"([\\.\\-\\d]+)\"" );
481 final Pattern product_PATTERN = Pattern.compile( "\\s+/product=\"(\\w{1,10})\"" );
482 final EbiDbEntry e = new EbiDbEntry();
483 final StringBuilder def = new StringBuilder();
484 boolean in_definition = false;
485 boolean in_features = false;
486 boolean in_source = false;
487 boolean in_gene = false;
488 boolean in_cds = false;
489 boolean in_mrna = false;
490 boolean in_protein = false;
491 for( final String line : lines ) {
492 if ( line.startsWith( "ACCESSION " ) ) {
493 e.setAccession( SequenceDbWsTools.extractFrom( line, "ACCESSION" ) );
494 in_definition = false;
496 else if ( line.startsWith( "ID " ) ) {
497 e.setAccession( SequenceDbWsTools.extractFromTo( line, "ID", ";" ) );
498 in_definition = false;
500 else if ( line.startsWith( "DEFINITION " ) || ( line.startsWith( "DE " ) ) ) {
501 boolean definiton = false;
502 if ( line.startsWith( "DEFINITION " ) ) {
505 if ( line.indexOf( "[" ) > 0 ) {
507 x( def, ( SequenceDbWsTools.extractFromTo( line, "DEFINITION", "[" ) ) );
510 x( def, ( SequenceDbWsTools.extractFromTo( line, "DE", "[" ) ) );
513 else if ( line.indexOf( "." ) > 0 ) {
515 x( def, ( SequenceDbWsTools.extractFromTo( line, "DEFINITION", "." ) ) );
518 x( def, ( SequenceDbWsTools.extractFromTo( line, "DE", "." ) ) );
523 x( def, ( SequenceDbWsTools.extractFrom( line, "DEFINITION" ) ) );
526 x( def, ( SequenceDbWsTools.extractFrom( line, "DE" ) ) );
530 in_definition = true;
533 else if ( line.startsWith( " ORGANISM " ) ) {
534 if ( line.indexOf( "(" ) > 0 ) {
535 e.setTaxonomyScientificName( SequenceDbWsTools.extractFromTo( line, " ORGANISM", "(" ) );
538 e.setTaxonomyScientificName( SequenceDbWsTools.extractFrom( line, " ORGANISM" ) );
542 else if ( line.startsWith( "OS " ) ) {
543 if ( line.indexOf( "(" ) > 0 ) {
544 e.setTaxonomyScientificName( SequenceDbWsTools.extractFromTo( line, "OS", "(" ) );
547 e.setTaxonomyScientificName( SequenceDbWsTools.extractFrom( line, "OS" ) );
550 else if ( line.startsWith( " " ) && in_definition ) {
552 if ( line.indexOf( "[" ) > 0 ) {
553 def.append( SequenceDbWsTools.extractTo( line, "[" ) );
555 else if ( line.indexOf( "." ) > 0 ) {
556 def.append( SequenceDbWsTools.extractTo( line, "." ) );
559 def.append( line.trim() );
563 in_definition = false;
565 if ( !line.startsWith( "FT " ) && X_PATTERN.matcher( line ).find() ) {
574 if ( line.startsWith( "FEATURES " ) || line.startsWith( "FT " ) ) {
577 if ( in_features && ( line.startsWith( " source " ) || line.startsWith( "FT source " ) ) ) {
584 if ( in_features && ( line.startsWith( " gene " ) || line.startsWith( "FT gene " ) ) ) {
591 if ( in_features && ( line.startsWith( " CDS " ) || line.startsWith( "FT CDS " ) ) ) {
598 if ( in_features && ( line.startsWith( " Protein " ) || line.startsWith( "FT Protein " ) ) ) {
605 if ( in_features && ( line.startsWith( " mRNA " ) || line.startsWith( "FT mRNA " ) ) ) {
613 final Matcher ti = taxon_PATTERN.matcher( line );
615 e.setTaxId( ti.group( 1 ) );
617 final Matcher chr = chromosome_PATTERN.matcher( line );
619 e.setChromosome( chr.group( 1 ) );
621 final Matcher map = map_PATTERN.matcher( line );
623 e.setMap( map.group( 1 ) );
626 if ( in_cds || in_gene ) {
627 final Matcher hgnc = hgnc_PATTERN.matcher( line );
629 e.addCrossReference( new Accession( hgnc.group( 1 ), "hgnc" ) );
631 final Matcher geneid = geneid_PATTERN.matcher( line );
632 if ( geneid.find() ) {
633 e.addCrossReference( new Accession( geneid.group( 1 ), "geneid" ) );
636 if ( in_protein || in_cds || in_gene || in_mrna ) {
637 final Matcher ec = ec_PATTERN.matcher( line );
639 e.addAnnotation( new Annotation( "EC", ec.group( 1 ) ) );
641 final Matcher gene = gene_PATTERN.matcher( line );
643 e.setGeneName( gene.group( 1 ) );
645 final Matcher uniprot = uniprot_PATTERN.matcher( line );
646 if ( uniprot.find() ) {
647 e.addCrossReference( new Accession( uniprot.group( 1 ), "uniprot" ) );
649 final Matcher interpro = interpro_PATTERN.matcher( line );
650 if ( interpro.find() ) {
651 e.addCrossReference( new Accession( interpro.group( 1 ), "interpro" ) );
653 final Matcher mim = mim_PATTERN.matcher( line );
655 e.addCrossReference( new Accession( mim.group( 1 ), "mim" ) );
657 final Matcher product = product_PATTERN.matcher( line );
658 if ( product.find() ) {
659 e.setSequenceSymbol( product.group( 1 ) );
661 final Matcher pdb = pdb_PATTERN.matcher( line );
663 e.addCrossReference( new Accession( pdb.group( 1 ), "pdb" ) );
667 if ( def.length() > 0 ) {
668 e.setSequenceName( def.toString().trim() );
673 private static void x( final StringBuilder sb, final String s ) {
674 if ( sb.length() > 0 ) {
677 sb.append( s.trim() );
681 public MolecularSequence getMolecularSequence() {
682 // TODO Auto-generated method stub