2 // forester -- software libraries and applications
3 // for genomics and evolutionary biology research.
5 // Copyright (C) 2010 Christian M Zmasek
6 // Copyright (C) 2010 Sanford-Burnham Medical Research Institute
9 // This library is free software; you can redistribute it and/or
10 // modify it under the terms of the GNU Lesser General Public
11 // License as published by the Free Software Foundation; either
12 // version 2.1 of the License, or (at your option) any later version.
14 // This library is distributed in the hope that it will be useful,
15 // but WITHOUT ANY WARRANTY; without even the implied warranty of
16 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 // Lesser General Public License for more details.
19 // You should have received a copy of the GNU Lesser General Public
20 // License along with this library; if not, write to the Free Software
21 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
23 // Contact: phylosoft @ gmail . com
24 // WWW: https://sites.google.com/site/cmzmasek/home/software/forester
26 package org.forester.ws.seqdb;
28 import java.util.List;
29 import java.util.SortedSet;
30 import java.util.TreeSet;
31 import java.util.regex.Matcher;
32 import java.util.regex.Pattern;
34 import org.forester.go.GoTerm;
35 import org.forester.phylogeny.data.Accession;
36 import org.forester.phylogeny.data.Annotation;
37 import org.forester.sequence.MolecularSequence;
38 import org.forester.util.ForesterUtil;
40 public final class EbiDbEntry implements SequenceDatabaseEntry {
42 private final static boolean DEBUG = false;
43 private SortedSet<Annotation> _annotations;
44 private String _chromosome;
45 private SortedSet<Accession> _cross_references;
47 private String _gene_name;
50 // FIXME actually this is NCBI entry
51 //http://www.ebi.ac.uk/Tools/dbfetch/dbfetch/emb/AAR37336/
53 private String _provider;
54 private String _symbol;
55 private String _tax_id;
57 // TODO PUBMED 15798186
59 // source /db_xref="taxon:9606"
63 // /db_xref="MIM:604739"
66 // /db_xref="MIM:604739"
67 // /db_xref="InterPro:IPR002475"
69 // /db_xref="UniProtKB/TrEMBL:Q5J7V1" <- reparse?
73 LOCUS NM_184234 2881 bp mRNA linear PRI 16-JUN-2013
74 DEFINITION Homo sapiens RNA binding motif protein 39 (RBM39), transcript
77 VERSION NM_184234.2 GI:336176061
79 SOURCE Homo sapiens (human)
81 Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi;
82 Mammalia; Eutheria; Euarchontoglires; Primates; Haplorrhini;
83 Catarrhini; Hominidae; Homo.
84 REFERENCE 1 (bases 1 to 2881)
85 AUTHORS Sillars-Hardebol,A.H., Carvalho,B., Belien,J.A., de Wit,M.,
86 Delis-van Diemen,P.M., Tijssen,M., van de Wiel,M.A., Ponten,F.,
87 Meijer,G.A. and Fijneman,R.J.
88 TITLE CSE1L, DIDO1 and RBM39 in colorectal adenoma to carcinoma
90 JOURNAL Cell Oncol (Dordr) 35 (4), 293-300 (2012)
92 REMARK GeneRIF: Data show that CSE1L, DIDO1 and RBM39 mRNA expression
93 levels correlated with chromosome 20q DNA copy number status.
94 REFERENCE 2 (bases 1 to 2881)
95 AUTHORS Huang,G., Zhou,Z., Wang,H. and Kleinerman,E.S.
96 TITLE CAPER-alpha alternative splicing regulates the expression of
97 vascular endothelial growth factor(1)(6)(5) in Ewing sarcoma cells
98 JOURNAL Cancer 118 (8), 2106-2116 (2012)
100 REMARK GeneRIF: Increased VEGF(165) expression is secondary to the
101 down-regulation of CAPER-alpha by EWS/FLI-1. CAPER-alpha mediates
102 alternative splicing and controls the shift from VEGF(189) to
104 REFERENCE 3 (bases 1 to 2881)
105 AUTHORS Han,B., Stockwin,L.H., Hancock,C., Yu,S.X., Hollingshead,M.G. and
107 TITLE Proteomic analysis of nuclei isolated from cancer cell lines
108 treated with indenoisoquinoline NSC 724998, a novel topoisomerase I
110 JOURNAL J. Proteome Res. 9 (8), 4016-4027 (2010)
112 REMARK Erratum:[J Proteome Res. 2011 Apr 1;10(4):2128]
113 REFERENCE 4 (bases 1 to 2881)
114 AUTHORS Zhang,J.Y., Looi,K.S. and Tan,E.M.
115 TITLE Identification of tumor-associated antigens as diagnostic and
116 predictive biomarkers in cancer
117 JOURNAL Methods Mol. Biol. 520, 1-10 (2009)
119 REFERENCE 5 (bases 1 to 2881)
120 AUTHORS Dutta,J., Fan,G. and Gelinas,C.
121 TITLE CAPERalpha is a novel Rel-TAD-interacting factor that inhibits
122 lymphocyte transformation by the potent Rel/NF-kappaB oncoprotein
124 JOURNAL J. Virol. 82 (21), 10792-10802 (2008)
126 REMARK GeneRIF: this study identifies CAPERalpha (RNA binding motif
127 protein 39) as a new transcriptional coregulator for v-Rel and
128 reveals an important role in modulating Rel's oncogenic activity.
129 REFERENCE 6 (bases 1 to 2881)
130 AUTHORS Cazalla,D., Newton,K. and Caceres,J.F.
131 TITLE A novel SR-related protein is required for the second step of
133 JOURNAL Mol. Cell. Biol. 25 (8), 2969-2980 (2005)
135 REFERENCE 7 (bases 1 to 2881)
136 AUTHORS Dowhan,D.H., Hong,E.P., Auboeuf,D., Dennis,A.P., Wilson,M.M.,
137 Berget,S.M. and O'Malley,B.W.
138 TITLE Steroid hormone receptor coactivation and alternative RNA splicing
139 by U2AF65-related proteins CAPERalpha and CAPERbeta
140 JOURNAL Mol. Cell 17 (3), 429-439 (2005)
142 REFERENCE 8 (bases 1 to 2881)
143 AUTHORS Sun,N.N., Fastje,C.D., Wong,S.S., Sheppard,P.R., Macdonald,S.J.,
144 Ridenour,G., Hyde,J.D. and Witten,M.L.
145 TITLE Dose-dependent transcriptome changes by metal ores on a human acute
146 lymphoblastic leukemia cell line
147 JOURNAL Toxicol Ind Health 19 (7-10), 157-163 (2003)
149 REMARK GeneRIF: 10 genes were down-regulated following treatment of the
150 T-ALL cells with 0.15 and 1.5 microg/mL of metal ores at 72 h
151 REFERENCE 9 (bases 1 to 2881)
152 AUTHORS Jung,D.J., Na,S.Y., Na,D.S. and Lee,J.W.
153 TITLE Molecular cloning and characterization of CAPER, a novel
154 coactivator of activating protein-1 and estrogen receptors
155 JOURNAL J. Biol. Chem. 277 (2), 1229-1234 (2002)
157 REMARK GeneRIF: This paper describes the mouse gene.
158 REFERENCE 10 (bases 1 to 2881)
159 AUTHORS Imai,H., Chan,E.K., Kiyosawa,K., Fu,X.D. and Tan,E.M.
160 TITLE Novel nuclear autoantigen with splicing factor motifs identified
161 with antibody from hepatocellular carcinoma
162 JOURNAL J. Clin. Invest. 92 (5), 2419-2426 (1993)
164 COMMENT REVIEWED REFSEQ: This record has been curated by NCBI staff. The
165 reference sequence was derived from DC346351.1, BC141835.1 and
167 On Jun 16, 2011 this sequence version replaced gi:35493810.
169 Summary: This gene encodes a member of the U2AF65 family of
170 proteins. The encoded protein is found in the nucleus, where it
171 co-localizes with core spliceosomal proteins. It has been shown to
172 play a role in both steroid hormone receptor-mediated transcription
173 and alternative splicing, and it is also a transcriptional
174 coregulator of the viral oncoprotein v-Rel. Multiple transcript
175 variants have been observed for this gene. A related pseudogene has
176 been identified on chromosome X. [provided by RefSeq, Aug 2011].
178 Transcript Variant: This variant (1) encodes the longest isoform
179 (a, also called CC1.4).
181 Publication Note: This RefSeq record includes a subset of the
182 publications that are available for this gene. Please see the Gene
183 record to access additional publications.
185 ##Evidence-Data-START##
186 Transcript exon combination :: BC141835.1, L10911.1 [ECO:0000332]
187 RNAseq introns :: mixed/partial sample support
188 ERS025081, ERS025082 [ECO:0000350]
189 ##Evidence-Data-END##
190 COMPLETENESS: complete on the 3' end.
191 PRIMARY REFSEQ_SPAN PRIMARY_IDENTIFIER PRIMARY_SPAN COMP
192 1-578 DC346351.1 3-580
193 579-2872 BC141835.1 429-2722
194 2873-2881 C75555.1 1-9 c
195 FEATURES Location/Qualifiers
197 /organism="Homo sapiens"
199 /db_xref="taxon:9606"
204 /gene_synonym="CAPER; CAPERalpha; FSAP59; HCC1; RNPC2"
205 /note="RNA binding motif protein 39"
206 /db_xref="GeneID:9584"
207 /db_xref="HGNC:15923"
208 /db_xref="HPRD:09201"
209 /db_xref="MIM:604739"
212 /gene_synonym="CAPER; CAPERalpha; FSAP59; HCC1; RNPC2"
213 /inference="alignment:Splign:1.39.8"
216 /gene_synonym="CAPER; CAPERalpha; FSAP59; HCC1; RNPC2"
217 /standard_name="REN58946"
218 /db_xref="UniSTS:383746"
219 misc_feature 221..223
221 /gene_synonym="CAPER; CAPERalpha; FSAP59; HCC1; RNPC2"
222 /note="upstream in-frame stop codon"
225 /gene_synonym="CAPER; CAPERalpha; FSAP59; HCC1; RNPC2"
226 /standard_name="G64285"
227 /db_xref="UniSTS:158667"
230 /gene_synonym="CAPER; CAPERalpha; FSAP59; HCC1; RNPC2"
231 /inference="alignment:Splign:1.39.8"
234 /gene_synonym="CAPER; CAPERalpha; FSAP59; HCC1; RNPC2"
235 /note="isoform a is encoded by transcript variant 1;
236 coactivator of activating protein-1 and estrogen
237 receptors; functional spliceosome-associated protein 59;
238 RNA-binding region (RNP1, RRM) containing 2;
239 hepatocellular carcinoma protein 1; splicing factor HCC1"
241 /product="RNA-binding protein 39 isoform a"
242 /protein_id="NP_909122.1"
243 /db_xref="GI:35493811"
244 /db_xref="CCDS:CCDS13266.1"
245 /db_xref="GeneID:9584"
246 /db_xref="HGNC:15923"
247 /db_xref="HPRD:09201"
248 /db_xref="MIM:604739"
249 /translation="MADDIDIEAMLEAPYKKDENKLSSANGHEERSKKRKKSKSRSRS
250 HERKRSKSKERKRSRDRERKKSKSRERKRSRSKERRRSRSRSRDRRFRGRYRSPYSGP
251 KFNSAIRGKIGLPHSIKLSRRRSRSKSPFRKDKSPVREPIDNLTPEERDARTVFCMQL
252 AARIRPRDLEEFFSTVGKVRDVRMISDRNSRRSKGIAYVEFVDVSSVPLAIGLTGQRV
253 LGVPIIVQASQAEKNRAAAMANNLQKGSAGPMRLYVGSLHFNITEDMLRGIFEPFGRI
254 ESIQLMMDSETGRSKGYGFITFSDSECAKKALEQLNGFELAGRPMKVGHVTERTDASS
255 ASSFLDSDELERTGIDLGTTGRLQLMARLAEGTGLQIPPAAQQALQMSGSLAFGAVAE
256 FSFVIDLQTRLSQQTEASALAAAASVQPLATQCFQLSNMFNPQTEEEVGWDTEIKDDV
257 IEECNKHGGVIHIYVDKNSAQGNVYVKCPSIAAAIAAVNALHGRWFAGKMITAAYVPL
258 PTYHNLFPDSMTATQLLVPSRR"
259 misc_feature 413..415
261 /gene_synonym="CAPER; CAPERalpha; FSAP59; HCC1; RNPC2"
262 /experiment="experimental evidence, no additional details
264 /note="N-acetylalanine; propagated from
265 UniProtKB/Swiss-Prot (Q14498.2); acetylation site"
269 /gene_synonym="CAPER; CAPERalpha; FSAP59; HCC1; RNPC2"
270 /inference="alignment:Splign:1.39.8"
274 /gene_synonym="CAPER; CAPERalpha; FSAP59; HCC1; RNPC2"
275 /inference="alignment:Splign:1.39.8"
278 /gene_synonym="CAPER; CAPERalpha; FSAP59; HCC1; RNPC2"
279 /standard_name="REN58786"
280 /db_xref="UniSTS:383586"
283 /gene_synonym="CAPER; CAPERalpha; FSAP59; HCC1; RNPC2"
284 /standard_name="D19S1033"
285 /db_xref="UniSTS:154759"
288 /gene_synonym="CAPER; CAPERalpha; FSAP59; HCC1; RNPC2"
289 /standard_name="REN58785"
290 /db_xref="UniSTS:383585"
292 polyA_signal 2851..2856
294 /gene_synonym="CAPER; CAPERalpha; FSAP59; HCC1; RNPC2"
297 /gene_synonym="CAPER; CAPERalpha; FSAP59; HCC1; RNPC2"
299 1 atttggagct tggggcagct tctcgcgaga gcccgtgctg agggctctgt gaggccccgt
300 61 gtgtttgtgt gtgtgtatgt gtgctggtga atgtgagtac agggaagcag cggccgccat
301 121 ttcagggagc ttgtcgacgc tgtcgcaggg gtggatcctg agctgccgaa gccgccgtcc
302 181 tgctctcccg cgtgggcttc tctaattcca ttgttttttt tagattctct cgggcctagc
303 241 cgtccttgga acccgatatt cgggctgggc ggttccgcgg cctgggccta ggggcttaac
308 private EbiDbEntry() {
312 public Object clone() throws CloneNotSupportedException {
313 throw new CloneNotSupportedException();
317 public String getAccession() {
322 public SortedSet<Annotation> getAnnotations() {
327 public String getChromosome() {
332 public SortedSet<Accession> getCrossReferences() {
333 return _cross_references;
337 public String getGeneName() {
342 public SortedSet<GoTerm> getGoTerms() {
347 public String getMap() {
352 public String getProvider() {
357 public String getSequenceName() {
362 public String getSequenceSymbol() {
367 public String getTaxonomyIdentifier() {
372 public String getTaxonomyScientificName() {
377 public boolean isEmpty() {
378 return ( ForesterUtil.isEmpty( getAccession() ) && ForesterUtil.isEmpty( getSequenceName() )
379 && ForesterUtil.isEmpty( getTaxonomyScientificName() )
380 && ForesterUtil.isEmpty( getTaxonomyIdentifier() ) && ForesterUtil.isEmpty( getSequenceSymbol() ) );
383 public void setProvider( final String provider ) {
384 _provider = provider;
387 private void addAnnotation( final Annotation annotation ) {
388 if ( _annotations == null ) {
389 _annotations = new TreeSet<Annotation>();
391 _annotations.add( annotation );
394 private void addCrossReference( final Accession accession ) {
395 if ( _cross_references == null ) {
396 _cross_references = new TreeSet<Accession>();
399 System.out.println( "XREF ADDED: " + accession );
401 _cross_references.add( accession );
404 private void setAccession( final String pa ) {
410 private void setChromosome( final String chromosome ) {
411 _chromosome = chromosome;
414 private void setGeneName( final String gene_name ) {
415 if ( _gene_name == null ) {
416 _gene_name = gene_name;
420 private void setMap( final String map ) {
424 private void setSequenceName( final String rec_name ) {
430 private void setSequenceSymbol( final String symbol ) {
434 private void setTaxId( final String tax_id ) {
435 if ( _tax_id == null ) {
440 private void setTaxonomyScientificName( final String os ) {
446 // public static SequenceDatabaseEntry createInstanceFromPlainText( final List<String> lines ) {
447 // final EbiDbEntry e = new EbiDbEntry();
448 // for( final String line : lines ) {
449 // if ( line.startsWith( "PA" ) ) {
450 // e.setPA( SequenceDbWsTools.extractFrom( line, "PA" ) );
452 // else if ( line.startsWith( "DE" ) ) {
453 // e.setDe( SequenceDbWsTools.extractFrom( line, "DE" ) );
455 // else if ( line.startsWith( "OS" ) ) {
456 // if ( line.indexOf( "(" ) > 0 ) {
457 // e.setOs( SequenceDbWsTools.extractFromTo( line, "OS", "(" ) );
460 // e.setOs( SequenceDbWsTools.extractFrom( line, "OS" ) );
463 // else if ( line.startsWith( "OX" ) ) {
464 // if ( line.indexOf( "NCBI_TaxID=" ) > 0 ) {
465 // e.setTaxId( SequenceDbWsTools.extractFromTo( line, "NCBI_TaxID=", ";" ) );
471 public static SequenceDatabaseEntry createInstanceFromPlainTextForRefSeq( final List<String> lines ) {
472 final Pattern X_PATTERN = Pattern.compile( "^[A-Z]+" );
473 final Pattern chromosome_PATTERN = Pattern.compile( "\\s+/chromosome=\"(\\w+)\"" );
474 final Pattern map_PATTERN = Pattern.compile( "\\s+/map=\"([\\w+\\.])\"" );
475 final Pattern gene_PATTERN = Pattern.compile( "\\s+/gene=\"(.+)\"" );
476 final Pattern mim_PATTERN = Pattern.compile( "\\s+/db_xref=\"MIM:(\\d+)\"" );
477 final Pattern taxon_PATTERN = Pattern.compile( "\\s+/db_xref=\"taxon:(\\d+)\"" );
478 final Pattern interpro_PATTERN = Pattern.compile( "\\s+/db_xref=\"InterPro:([A-Z0-9]+)\"" );
479 final Pattern uniprot_PATTERN = Pattern.compile( "\\s+/db_xref=\"UniProtKB/[A-Za-z-]*:(\\w+)\"" );
480 final Pattern hgnc_PATTERN = Pattern.compile( "\\s+/db_xref=\"[A-Z:]*HGNC:(\\d+)\"" );
481 final Pattern geneid_PATTERN = Pattern.compile( "\\s+/db_xref=\"GeneID:(\\d+)\"" );
482 final Pattern pdb_PATTERN = Pattern.compile( "\\s+/db_xref=\"PDB:([A-Z0-9]+)\"" );
483 final Pattern ec_PATTERN = Pattern.compile( "\\s+/EC_number=\"([\\.\\-\\d]+)\"" );
484 final Pattern product_PATTERN = Pattern.compile( "\\s+/product=\"(\\w{1,10})\"" );
485 final EbiDbEntry e = new EbiDbEntry();
486 final StringBuilder def = new StringBuilder();
487 boolean in_definition = false;
488 boolean in_features = false;
489 boolean in_source = false;
490 boolean in_gene = false;
491 boolean in_cds = false;
492 boolean in_mrna = false;
493 boolean in_protein = false;
494 for( final String line : lines ) {
495 if ( line.startsWith( "ACCESSION " ) ) {
496 e.setAccession( SequenceDbWsTools.extractFrom( line, "ACCESSION" ) );
497 in_definition = false;
499 else if ( line.startsWith( "ID " ) ) {
500 e.setAccession( SequenceDbWsTools.extractFromTo( line, "ID", ";" ) );
501 in_definition = false;
503 else if ( line.startsWith( "DEFINITION " ) || ( line.startsWith( "DE " ) ) ) {
504 boolean definiton = false;
505 if ( line.startsWith( "DEFINITION " ) ) {
508 if ( line.indexOf( "[" ) > 0 ) {
510 x( def, ( SequenceDbWsTools.extractFromTo( line, "DEFINITION", "[" ) ) );
513 x( def, ( SequenceDbWsTools.extractFromTo( line, "DE", "[" ) ) );
516 else if ( line.indexOf( "." ) > 0 ) {
518 x( def, ( SequenceDbWsTools.extractFromTo( line, "DEFINITION", "." ) ) );
521 x( def, ( SequenceDbWsTools.extractFromTo( line, "DE", "." ) ) );
526 x( def, ( SequenceDbWsTools.extractFrom( line, "DEFINITION" ) ) );
529 x( def, ( SequenceDbWsTools.extractFrom( line, "DE" ) ) );
533 in_definition = true;
536 else if ( line.startsWith( " ORGANISM " ) ) {
537 if ( line.indexOf( "(" ) > 0 ) {
538 e.setTaxonomyScientificName( SequenceDbWsTools.extractFromTo( line, " ORGANISM", "(" ) );
541 e.setTaxonomyScientificName( SequenceDbWsTools.extractFrom( line, " ORGANISM" ) );
545 else if ( line.startsWith( "OS " ) ) {
546 if ( line.indexOf( "(" ) > 0 ) {
547 e.setTaxonomyScientificName( SequenceDbWsTools.extractFromTo( line, "OS", "(" ) );
550 e.setTaxonomyScientificName( SequenceDbWsTools.extractFrom( line, "OS" ) );
553 else if ( line.startsWith( " " ) && in_definition ) {
555 if ( line.indexOf( "[" ) > 0 ) {
556 def.append( SequenceDbWsTools.extractTo( line, "[" ) );
558 else if ( line.indexOf( "." ) > 0 ) {
559 def.append( SequenceDbWsTools.extractTo( line, "." ) );
562 def.append( line.trim() );
566 in_definition = false;
568 if ( !line.startsWith( "FT " ) && X_PATTERN.matcher( line ).find() ) {
577 if ( line.startsWith( "FEATURES " ) || line.startsWith( "FT " ) ) {
580 if ( in_features && ( line.startsWith( " source " ) || line.startsWith( "FT source " ) ) ) {
587 if ( in_features && ( line.startsWith( " gene " ) || line.startsWith( "FT gene " ) ) ) {
594 if ( in_features && ( line.startsWith( " CDS " ) || line.startsWith( "FT CDS " ) ) ) {
601 if ( in_features && ( line.startsWith( " Protein " ) || line.startsWith( "FT Protein " ) ) ) {
608 if ( in_features && ( line.startsWith( " mRNA " ) || line.startsWith( "FT mRNA " ) ) ) {
616 final Matcher ti = taxon_PATTERN.matcher( line );
618 e.setTaxId( ti.group( 1 ) );
620 final Matcher chr = chromosome_PATTERN.matcher( line );
622 e.setChromosome( chr.group( 1 ) );
624 final Matcher map = map_PATTERN.matcher( line );
626 e.setMap( map.group( 1 ) );
629 if ( in_cds || in_gene ) {
630 final Matcher hgnc = hgnc_PATTERN.matcher( line );
632 e.addCrossReference( new Accession( hgnc.group( 1 ), "hgnc" ) );
634 final Matcher geneid = geneid_PATTERN.matcher( line );
635 if ( geneid.find() ) {
636 e.addCrossReference( new Accession( geneid.group( 1 ), "geneid" ) );
639 if ( in_protein || in_cds || in_gene || in_mrna ) {
640 final Matcher ec = ec_PATTERN.matcher( line );
642 e.addAnnotation( new Annotation( "EC", ec.group( 1 ) ) );
644 final Matcher gene = gene_PATTERN.matcher( line );
646 e.setGeneName( gene.group( 1 ) );
648 final Matcher uniprot = uniprot_PATTERN.matcher( line );
649 if ( uniprot.find() ) {
650 e.addCrossReference( new Accession( uniprot.group( 1 ), "uniprot" ) );
652 final Matcher interpro = interpro_PATTERN.matcher( line );
653 if ( interpro.find() ) {
654 e.addCrossReference( new Accession( interpro.group( 1 ), "interpro" ) );
656 final Matcher mim = mim_PATTERN.matcher( line );
658 e.addCrossReference( new Accession( mim.group( 1 ), "mim" ) );
660 final Matcher product = product_PATTERN.matcher( line );
661 if ( product.find() ) {
662 e.setSequenceSymbol( product.group( 1 ) );
664 final Matcher pdb = pdb_PATTERN.matcher( line );
666 e.addCrossReference( new Accession( pdb.group( 1 ), "pdb" ) );
670 if ( def.length() > 0 ) {
671 e.setSequenceName( def.toString().trim() );
676 private static void x( final StringBuilder sb, final String s ) {
677 if ( sb.length() > 0 ) {
680 sb.append( s.trim() );
684 public MolecularSequence getMolecularSequence() {
685 // TODO Auto-generated method stub