2 // forester -- software libraries and applications
3 // for genomics and evolutionary biology research.
5 // Copyright (C) 2010 Christian M Zmasek
6 // Copyright (C) 2010 Sanford-Burnham Medical Research Institute
9 // This library is free software; you can redistribute it and/or
10 // modify it under the terms of the GNU Lesser General Public
11 // License as published by the Free Software Foundation; either
12 // version 2.1 of the License, or (at your option) any later version.
14 // This library is distributed in the hope that it will be useful,
15 // but WITHOUT ANY WARRANTY; without even the implied warranty of
16 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 // Lesser General Public License for more details.
19 // You should have received a copy of the GNU Lesser General Public
20 // License along with this library; if not, write to the Free Software
21 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
23 // Contact: phylosoft @ gmail . com
24 // WWW: https://sites.google.com/site/cmzmasek/home/software/forester
26 package org.forester.ws.seqdb;
28 import java.util.List;
30 import org.forester.go.GoTerm;
31 import org.forester.phylogeny.data.Accession;
32 import org.forester.util.ForesterUtil;
34 public final class EbiDbEntry implements SequenceDatabaseEntry {
36 // FIXME actually this is NCBI entry
37 //http://www.ebi.ac.uk/Tools/dbfetch/dbfetch/emb/AAR37336/
41 private String _tax_id;
42 private String _symbol;
43 private String _provider;
45 // TODO PUBMED 15798186
47 // source /db_xref="taxon:9606"
51 // /db_xref="MIM:604739"
54 // /db_xref="MIM:604739"
55 // /db_xref="InterPro:IPR002475"
57 // /protein_id="NP_909122.1"
58 // /db_xref="UniProtKB/TrEMBL:Q5J7V1" <- reparse?
62 LOCUS NM_184234 2881 bp mRNA linear PRI 16-JUN-2013
63 DEFINITION Homo sapiens RNA binding motif protein 39 (RBM39), transcript
66 VERSION NM_184234.2 GI:336176061
68 SOURCE Homo sapiens (human)
70 Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi;
71 Mammalia; Eutheria; Euarchontoglires; Primates; Haplorrhini;
72 Catarrhini; Hominidae; Homo.
73 REFERENCE 1 (bases 1 to 2881)
74 AUTHORS Sillars-Hardebol,A.H., Carvalho,B., Belien,J.A., de Wit,M.,
75 Delis-van Diemen,P.M., Tijssen,M., van de Wiel,M.A., Ponten,F.,
76 Meijer,G.A. and Fijneman,R.J.
77 TITLE CSE1L, DIDO1 and RBM39 in colorectal adenoma to carcinoma
79 JOURNAL Cell Oncol (Dordr) 35 (4), 293-300 (2012)
81 REMARK GeneRIF: Data show that CSE1L, DIDO1 and RBM39 mRNA expression
82 levels correlated with chromosome 20q DNA copy number status.
83 REFERENCE 2 (bases 1 to 2881)
84 AUTHORS Huang,G., Zhou,Z., Wang,H. and Kleinerman,E.S.
85 TITLE CAPER-alpha alternative splicing regulates the expression of
86 vascular endothelial growth factor(1)(6)(5) in Ewing sarcoma cells
87 JOURNAL Cancer 118 (8), 2106-2116 (2012)
89 REMARK GeneRIF: Increased VEGF(165) expression is secondary to the
90 down-regulation of CAPER-alpha by EWS/FLI-1. CAPER-alpha mediates
91 alternative splicing and controls the shift from VEGF(189) to
93 REFERENCE 3 (bases 1 to 2881)
94 AUTHORS Han,B., Stockwin,L.H., Hancock,C., Yu,S.X., Hollingshead,M.G. and
96 TITLE Proteomic analysis of nuclei isolated from cancer cell lines
97 treated with indenoisoquinoline NSC 724998, a novel topoisomerase I
99 JOURNAL J. Proteome Res. 9 (8), 4016-4027 (2010)
101 REMARK Erratum:[J Proteome Res. 2011 Apr 1;10(4):2128]
102 REFERENCE 4 (bases 1 to 2881)
103 AUTHORS Zhang,J.Y., Looi,K.S. and Tan,E.M.
104 TITLE Identification of tumor-associated antigens as diagnostic and
105 predictive biomarkers in cancer
106 JOURNAL Methods Mol. Biol. 520, 1-10 (2009)
108 REFERENCE 5 (bases 1 to 2881)
109 AUTHORS Dutta,J., Fan,G. and Gelinas,C.
110 TITLE CAPERalpha is a novel Rel-TAD-interacting factor that inhibits
111 lymphocyte transformation by the potent Rel/NF-kappaB oncoprotein
113 JOURNAL J. Virol. 82 (21), 10792-10802 (2008)
115 REMARK GeneRIF: this study identifies CAPERalpha (RNA binding motif
116 protein 39) as a new transcriptional coregulator for v-Rel and
117 reveals an important role in modulating Rel's oncogenic activity.
118 REFERENCE 6 (bases 1 to 2881)
119 AUTHORS Cazalla,D., Newton,K. and Caceres,J.F.
120 TITLE A novel SR-related protein is required for the second step of
122 JOURNAL Mol. Cell. Biol. 25 (8), 2969-2980 (2005)
124 REFERENCE 7 (bases 1 to 2881)
125 AUTHORS Dowhan,D.H., Hong,E.P., Auboeuf,D., Dennis,A.P., Wilson,M.M.,
126 Berget,S.M. and O'Malley,B.W.
127 TITLE Steroid hormone receptor coactivation and alternative RNA splicing
128 by U2AF65-related proteins CAPERalpha and CAPERbeta
129 JOURNAL Mol. Cell 17 (3), 429-439 (2005)
131 REFERENCE 8 (bases 1 to 2881)
132 AUTHORS Sun,N.N., Fastje,C.D., Wong,S.S., Sheppard,P.R., Macdonald,S.J.,
133 Ridenour,G., Hyde,J.D. and Witten,M.L.
134 TITLE Dose-dependent transcriptome changes by metal ores on a human acute
135 lymphoblastic leukemia cell line
136 JOURNAL Toxicol Ind Health 19 (7-10), 157-163 (2003)
138 REMARK GeneRIF: 10 genes were down-regulated following treatment of the
139 T-ALL cells with 0.15 and 1.5 microg/mL of metal ores at 72 h
140 REFERENCE 9 (bases 1 to 2881)
141 AUTHORS Jung,D.J., Na,S.Y., Na,D.S. and Lee,J.W.
142 TITLE Molecular cloning and characterization of CAPER, a novel
143 coactivator of activating protein-1 and estrogen receptors
144 JOURNAL J. Biol. Chem. 277 (2), 1229-1234 (2002)
146 REMARK GeneRIF: This paper describes the mouse gene.
147 REFERENCE 10 (bases 1 to 2881)
148 AUTHORS Imai,H., Chan,E.K., Kiyosawa,K., Fu,X.D. and Tan,E.M.
149 TITLE Novel nuclear autoantigen with splicing factor motifs identified
150 with antibody from hepatocellular carcinoma
151 JOURNAL J. Clin. Invest. 92 (5), 2419-2426 (1993)
153 COMMENT REVIEWED REFSEQ: This record has been curated by NCBI staff. The
154 reference sequence was derived from DC346351.1, BC141835.1 and
156 On Jun 16, 2011 this sequence version replaced gi:35493810.
158 Summary: This gene encodes a member of the U2AF65 family of
159 proteins. The encoded protein is found in the nucleus, where it
160 co-localizes with core spliceosomal proteins. It has been shown to
161 play a role in both steroid hormone receptor-mediated transcription
162 and alternative splicing, and it is also a transcriptional
163 coregulator of the viral oncoprotein v-Rel. Multiple transcript
164 variants have been observed for this gene. A related pseudogene has
165 been identified on chromosome X. [provided by RefSeq, Aug 2011].
167 Transcript Variant: This variant (1) encodes the longest isoform
168 (a, also called CC1.4).
170 Publication Note: This RefSeq record includes a subset of the
171 publications that are available for this gene. Please see the Gene
172 record to access additional publications.
174 ##Evidence-Data-START##
175 Transcript exon combination :: BC141835.1, L10911.1 [ECO:0000332]
176 RNAseq introns :: mixed/partial sample support
177 ERS025081, ERS025082 [ECO:0000350]
178 ##Evidence-Data-END##
179 COMPLETENESS: complete on the 3' end.
180 PRIMARY REFSEQ_SPAN PRIMARY_IDENTIFIER PRIMARY_SPAN COMP
181 1-578 DC346351.1 3-580
182 579-2872 BC141835.1 429-2722
183 2873-2881 C75555.1 1-9 c
184 FEATURES Location/Qualifiers
186 /organism="Homo sapiens"
188 /db_xref="taxon:9606"
193 /gene_synonym="CAPER; CAPERalpha; FSAP59; HCC1; RNPC2"
194 /note="RNA binding motif protein 39"
195 /db_xref="GeneID:9584"
196 /db_xref="HGNC:15923"
197 /db_xref="HPRD:09201"
198 /db_xref="MIM:604739"
201 /gene_synonym="CAPER; CAPERalpha; FSAP59; HCC1; RNPC2"
202 /inference="alignment:Splign:1.39.8"
205 /gene_synonym="CAPER; CAPERalpha; FSAP59; HCC1; RNPC2"
206 /standard_name="REN58946"
207 /db_xref="UniSTS:383746"
208 misc_feature 221..223
210 /gene_synonym="CAPER; CAPERalpha; FSAP59; HCC1; RNPC2"
211 /note="upstream in-frame stop codon"
214 /gene_synonym="CAPER; CAPERalpha; FSAP59; HCC1; RNPC2"
215 /standard_name="G64285"
216 /db_xref="UniSTS:158667"
219 /gene_synonym="CAPER; CAPERalpha; FSAP59; HCC1; RNPC2"
220 /inference="alignment:Splign:1.39.8"
223 /gene_synonym="CAPER; CAPERalpha; FSAP59; HCC1; RNPC2"
224 /note="isoform a is encoded by transcript variant 1;
225 coactivator of activating protein-1 and estrogen
226 receptors; functional spliceosome-associated protein 59;
227 RNA-binding region (RNP1, RRM) containing 2;
228 hepatocellular carcinoma protein 1; splicing factor HCC1"
230 /product="RNA-binding protein 39 isoform a"
231 /protein_id="NP_909122.1"
232 /db_xref="GI:35493811"
233 /db_xref="CCDS:CCDS13266.1"
234 /db_xref="GeneID:9584"
235 /db_xref="HGNC:15923"
236 /db_xref="HPRD:09201"
237 /db_xref="MIM:604739"
238 /translation="MADDIDIEAMLEAPYKKDENKLSSANGHEERSKKRKKSKSRSRS
239 HERKRSKSKERKRSRDRERKKSKSRERKRSRSKERRRSRSRSRDRRFRGRYRSPYSGP
240 KFNSAIRGKIGLPHSIKLSRRRSRSKSPFRKDKSPVREPIDNLTPEERDARTVFCMQL
241 AARIRPRDLEEFFSTVGKVRDVRMISDRNSRRSKGIAYVEFVDVSSVPLAIGLTGQRV
242 LGVPIIVQASQAEKNRAAAMANNLQKGSAGPMRLYVGSLHFNITEDMLRGIFEPFGRI
243 ESIQLMMDSETGRSKGYGFITFSDSECAKKALEQLNGFELAGRPMKVGHVTERTDASS
244 ASSFLDSDELERTGIDLGTTGRLQLMARLAEGTGLQIPPAAQQALQMSGSLAFGAVAE
245 FSFVIDLQTRLSQQTEASALAAAASVQPLATQCFQLSNMFNPQTEEEVGWDTEIKDDV
246 IEECNKHGGVIHIYVDKNSAQGNVYVKCPSIAAAIAAVNALHGRWFAGKMITAAYVPL
247 PTYHNLFPDSMTATQLLVPSRR"
248 misc_feature 413..415
250 /gene_synonym="CAPER; CAPERalpha; FSAP59; HCC1; RNPC2"
251 /experiment="experimental evidence, no additional details
253 /note="N-acetylalanine; propagated from
254 UniProtKB/Swiss-Prot (Q14498.2); acetylation site"
255 misc_feature 692..694
257 /gene_synonym="CAPER; CAPERalpha; FSAP59; HCC1; RNPC2"
258 /experiment="experimental evidence, no additional details
260 /note="Phosphotyrosine; propagated from
261 UniProtKB/Swiss-Prot (Q14498.2); phosphorylation site"
262 misc_feature 698..700
264 /gene_synonym="CAPER; CAPERalpha; FSAP59; HCC1; RNPC2"
265 /experiment="experimental evidence, no additional details
267 /note="Phosphoserine; propagated from UniProtKB/Swiss-Prot
268 (Q14498.2); phosphorylation site"
269 misc_feature 707..709
271 /gene_synonym="CAPER; CAPERalpha; FSAP59; HCC1; RNPC2"
272 /experiment="experimental evidence, no additional details
274 /note="Phosphoserine; propagated from UniProtKB/Swiss-Prot
275 (Q14498.2); phosphorylation site"
276 misc_feature 815..817
278 /gene_synonym="CAPER; CAPERalpha; FSAP59; HCC1; RNPC2"
279 /experiment="experimental evidence, no additional details
281 /note="Phosphoserine; propagated from UniProtKB/Swiss-Prot
282 (Q14498.2); phosphorylation site"
283 misc_feature 845..847
285 /gene_synonym="CAPER; CAPERalpha; FSAP59; HCC1; RNPC2"
286 /experiment="experimental evidence, no additional details
288 /note="Phosphothreonine; propagated from
289 UniProtKB/Swiss-Prot (Q14498.2); phosphorylation site"
290 misc_feature 1280..1627
292 /gene_synonym="CAPER; CAPERalpha; FSAP59; HCC1; RNPC2"
293 /inference="non-experimental evidence, no additional
295 /note="propagated from UniProtKB/Swiss-Prot (Q14498.2);
296 Region: Interaction with JUN (By similarity)"
297 misc_feature 1280..1474
299 /gene_synonym="CAPER; CAPERalpha; FSAP59; HCC1; RNPC2"
300 /inference="non-experimental evidence, no additional
302 /note="propagated from UniProtKB/Swiss-Prot (Q14498.2);
303 Region: Activating domain (By similarity)"
304 misc_feature 1409..1411
306 /gene_synonym="CAPER; CAPERalpha; FSAP59; HCC1; RNPC2"
307 /experiment="experimental evidence, no additional details
309 /note="Phosphoserine; propagated from UniProtKB/Swiss-Prot
310 (Q14498.2); phosphorylation site"
311 misc_feature 1418..1420
313 /gene_synonym="CAPER; CAPERalpha; FSAP59; HCC1; RNPC2"
314 /experiment="experimental evidence, no additional details
316 /note="Phosphoserine; propagated from UniProtKB/Swiss-Prot
317 (Q14498.2); phosphorylation site"
318 misc_feature 1430..1432
320 /gene_synonym="CAPER; CAPERalpha; FSAP59; HCC1; RNPC2"
321 /experiment="experimental evidence, no additional details
323 /note="Phosphoserine; propagated from UniProtKB/Swiss-Prot
324 (Q14498.2); phosphorylation site"
325 misc_feature 1472..1627
327 /gene_synonym="CAPER; CAPERalpha; FSAP59; HCC1; RNPC2"
328 /inference="non-experimental evidence, no additional
330 /note="propagated from UniProtKB/Swiss-Prot (Q14498.2);
331 Region: Interaction with ESR1 and ESR2 (By similarity)"
332 misc_feature 1625..1999
334 /gene_synonym="CAPER; CAPERalpha; FSAP59; HCC1; RNPC2"
335 /inference="non-experimental evidence, no additional
337 /note="propagated from UniProtKB/Swiss-Prot (Q14498.2);
338 Region: Interaction with NCOA6 (By similarity)"
341 /gene_synonym="CAPER; CAPERalpha; FSAP59; HCC1; RNPC2"
342 /inference="alignment:Splign:1.39.8"
345 /gene_synonym="CAPER; CAPERalpha; FSAP59; HCC1; RNPC2"
346 /inference="alignment:Splign:1.39.8"
349 /gene_synonym="CAPER; CAPERalpha; FSAP59; HCC1; RNPC2"
350 /inference="alignment:Splign:1.39.8"
353 /gene_synonym="CAPER; CAPERalpha; FSAP59; HCC1; RNPC2"
354 /inference="alignment:Splign:1.39.8"
357 /gene_synonym="CAPER; CAPERalpha; FSAP59; HCC1; RNPC2"
358 /inference="alignment:Splign:1.39.8"
361 /gene_synonym="CAPER; CAPERalpha; FSAP59; HCC1; RNPC2"
362 /inference="alignment:Splign:1.39.8"
365 /gene_synonym="CAPER; CAPERalpha; FSAP59; HCC1; RNPC2"
366 /inference="alignment:Splign:1.39.8"
369 /gene_synonym="CAPER; CAPERalpha; FSAP59; HCC1; RNPC2"
370 /inference="alignment:Splign:1.39.8"
373 /gene_synonym="CAPER; CAPERalpha; FSAP59; HCC1; RNPC2"
374 /inference="alignment:Splign:1.39.8"
377 /gene_synonym="CAPER; CAPERalpha; FSAP59; HCC1; RNPC2"
378 /inference="alignment:Splign:1.39.8"
381 /gene_synonym="CAPER; CAPERalpha; FSAP59; HCC1; RNPC2"
382 /inference="alignment:Splign:1.39.8"
385 /gene_synonym="CAPER; CAPERalpha; FSAP59; HCC1; RNPC2"
386 /inference="alignment:Splign:1.39.8"
389 /gene_synonym="CAPER; CAPERalpha; FSAP59; HCC1; RNPC2"
390 /inference="alignment:Splign:1.39.8"
393 /gene_synonym="CAPER; CAPERalpha; FSAP59; HCC1; RNPC2"
394 /inference="alignment:Splign:1.39.8"
397 /gene_synonym="CAPER; CAPERalpha; FSAP59; HCC1; RNPC2"
398 /inference="alignment:Splign:1.39.8"
401 /gene_synonym="CAPER; CAPERalpha; FSAP59; HCC1; RNPC2"
402 /standard_name="REN58786"
403 /db_xref="UniSTS:383586"
406 /gene_synonym="CAPER; CAPERalpha; FSAP59; HCC1; RNPC2"
407 /standard_name="D19S1033"
408 /db_xref="UniSTS:154759"
411 /gene_synonym="CAPER; CAPERalpha; FSAP59; HCC1; RNPC2"
412 /standard_name="REN58785"
413 /db_xref="UniSTS:383585"
416 /gene_synonym="CAPER; CAPERalpha; FSAP59; HCC1; RNPC2"
417 /standard_name="REN58784"
418 /db_xref="UniSTS:383584"
421 /gene_synonym="CAPER; CAPERalpha; FSAP59; HCC1; RNPC2"
422 /standard_name="RH69003"
423 /db_xref="UniSTS:85360"
426 /gene_synonym="CAPER; CAPERalpha; FSAP59; HCC1; RNPC2"
427 /standard_name="REN58783"
428 /db_xref="UniSTS:383583"
431 /gene_synonym="CAPER; CAPERalpha; FSAP59; HCC1; RNPC2"
432 /standard_name="RH67917"
433 /db_xref="UniSTS:84037"
434 polyA_signal 2851..2856
436 /gene_synonym="CAPER; CAPERalpha; FSAP59; HCC1; RNPC2"
439 /gene_synonym="CAPER; CAPERalpha; FSAP59; HCC1; RNPC2"
441 1 atttggagct tggggcagct tctcgcgaga gcccgtgctg agggctctgt gaggccccgt
442 61 gtgtttgtgt gtgtgtatgt gtgctggtga atgtgagtac agggaagcag cggccgccat
443 121 ttcagggagc ttgtcgacgc tgtcgcaggg gtggatcctg agctgccgaa gccgccgtcc
444 181 tgctctcccg cgtgggcttc tctaattcca ttgttttttt tagattctct cgggcctagc
445 241 cgtccttgga acccgatatt cgggctgggc ggttccgcgg cctgggccta ggggcttaac
446 301 agtagcaaca gaagcggcgg cggcggcagc agcagcagca gcagcagcaa tctcttcccg
447 361 aacacgagca ccacaggcgc ccgaaggccg gaacaggcgt ttagagaaaa tggcagacga
448 421 tattgatatt gaagcaatgc ttgaggctcc ttacaagaag gatgagaaca agttgagcag
449 481 tgccaacggc catgaagaac gtagcaaaaa gaggaaaaaa agcaagagca gaagtcgtag
450 541 tcatgaacga aagagaagca aaagtaagga acggaagcga agtagagaca gagaaaggaa
451 601 aaagagcaaa agccgtgaaa gaaagcgaag tagaagcaaa gagaggcgac ggagccgctc
452 661 aagaagtcga gatcgaagat ttagaggccg ctacagaagt ccttactccg gaccaaaatt
453 721 taacagtgcc atccgaggaa agattgggtt gcctcatagc atcaaattaa gcagacgacg
454 781 ttcccgaagc aaaagtccat tcagaaaaga caagagccct gtgagagaac ctattgataa
455 841 tttaactcct gaggaaagag atgcaaggac agtcttctgt atgcagctgg cggcaagaat
456 901 tcgaccaagg gatttggaag agtttttctc tacagtagga aaggttcgag atgtgaggat
457 961 gatttctgac agaaattcaa gacgttccaa aggaattgct tatgtggagt tcgtcgatgt
458 1021 tagctcagtg cctctagcaa taggattaac tggccaacga gttttaggcg tgccaatcat
459 1081 agtacaggca tcacaggcag aaaaaaacag agctgcagca atggcaaaca atttacaaaa
460 1141 gggaagtgct ggacctatga ggctttatgt gggctcatta cacttcaaca taactgaaga
461 1201 tatgcttcgt gggatctttg agccttttgg aagaattgaa agtatccagc tgatgatgga
462 1261 cagtgaaact ggtcgatcca agggatatgg atttattaca ttttctgact cagaatgtgc
463 1321 caaaaaggct ttggaacaac ttaatggatt tgaactagca ggaagaccaa tgaaagttgg
464 1381 tcatgttact gaacgtactg atgcttcgag tgctagttca tttttggaca gtgatgaact
465 1441 ggaaaggact ggaattgatt tgggaacaac tggtcgtctt cagttaatgg caagacttgc
466 1501 agagggtaca ggtttgcaga ttccgccagc agcacagcaa gctctacaga tgagtggctc
467 1561 tttggcattt ggtgctgtgg cagaattctc ttttgttata gatttgcaaa caagactttc
468 1621 ccagcagact gaagcttcag ctttagctgc agctgcctct gttcagccac ttgcaacaca
469 1681 atgtttccaa ctctctaaca tgtttaaccc tcaaacagaa gaagaagttg gatgggatac
470 1741 cgagattaag gatgatgtga ttgaagaatg taataaacat ggaggagtta ttcatattta
471 1801 tgttgacaaa aattcagctc agggcaatgt gtatgtgaag tgcccatcaa ttgctgcagc
472 1861 tattgctgct gtcaatgcat tgcatggcag gtggtttgct ggtaaaatga taacagcagc
473 1921 atatgtacct cttccaactt accacaacct gtttcctgat tctatgacag caacacagct
474 1981 actggttcca agtagacgat gaaggaagat atagtccctt atgtatatag ctttttttct
475 2041 ttcttgagaa ttcatcttga gttatctttt atttagataa aaataaagag gcaaggatct
476 2101 actgtcattt gtatgcaatt tcctgttacc ttgaaaaaat aaaaatgtta acaggaatgc
477 2161 agtgtgctca ttctccctaa atagtaaatc ccactgtata caaaactgtt ctcttgttct
478 2221 gccttttaaa atgttcatgt agaaaattaa tgaactatag gaatagctct aggagaacaa
479 2281 atgtgctttc tgtaaaaagg cagaccaggg atgtaatgtt tttaatgttt cagaagccta
480 2341 actttttaca cagtggttac atttcacatt tcactaatgt tgatatttgg ctgatggttg
481 2401 agcagtttct gaaatacaca tttagtgtat ggaaatacaa gacagctaaa gggctgtttg
482 2461 gttagcatct catcttgcat tctgatcaat tggcaagaaa gggagatttc aaaattatat
483 2521 ttcttgatgg tatcttttca attaatgtat ctgtaaaagt ttctttgtaa atactatgtg
484 2581 ttctggtgtg tcttaaaatt ccaaacaaaa tgatccctgc atttcctgaa gatgtttaaa
485 2641 cgtgagagtc tggtaggcaa agcagtctga gaaagaaata ggaaatgcag aaataggttt
486 2701 tgtctggttg catataatct ttgctctttt taagctctgt gagctctgaa atatattttt
487 2761 gggttacttc agtgtgtttg acaagacagc ttgatatttc tatcaaacaa atgactttca
488 2821 tattgcaaca atctttgtaa gaaccactca aataaaagtc tcttaaaaag gccaaaaaaa
493 private EbiDbEntry() {
497 public Object clone() throws CloneNotSupportedException {
498 throw new CloneNotSupportedException();
501 public static SequenceDatabaseEntry createInstanceFromPlainTextForRefSeq( final List<String> lines ) {
502 final EbiDbEntry e = new EbiDbEntry();
503 final StringBuilder def = new StringBuilder();
504 boolean in_def = false;
505 for( final String line : lines ) {
506 // System.out.println( "-" + line );
507 if ( line.startsWith( "ACCESSION" ) ) {
508 e.setPA( SequenceDbWsTools.extractFrom( line, "ACCESSION" ) );
511 else if ( line.startsWith( "DEFINITION" ) ) {
512 if ( line.indexOf( "[" ) > 0 ) {
513 def.append( SequenceDbWsTools.extractFromTo( line, "DEFINITION", "[" ) );
515 else if ( line.indexOf( "." ) > 0 ) {
516 def.append( SequenceDbWsTools.extractFromTo( line, "DEFINITION", "." ) );
519 def.append( SequenceDbWsTools.extractFrom( line, "DEFINITION" ) );
523 else if ( line.startsWith( "SOURCE" ) ) {
524 if ( line.indexOf( "(" ) > 0 ) {
525 e.setOs( SequenceDbWsTools.extractFromTo( line, "SOURCE", "(" ) );
528 e.setOs( SequenceDbWsTools.extractFrom( line, "SOURCE" ) );
532 else if ( line.startsWith( " " ) && in_def ) {
534 if ( line.indexOf( "[" ) > 0 ) {
535 def.append( SequenceDbWsTools.extractTo( line, "[" ) );
537 else if ( line.indexOf( "." ) > 0 ) {
538 def.append( SequenceDbWsTools.extractTo( line, "." ) );
541 def.append( line.trim() );
548 if ( def.length() > 0 ) {
549 e.setDe( def.toString().trim() );
554 public static SequenceDatabaseEntry createInstanceFromPlainText( final List<String> lines ) {
555 final EbiDbEntry e = new EbiDbEntry();
556 for( final String line : lines ) {
557 if ( line.startsWith( "PA" ) ) {
558 e.setPA( SequenceDbWsTools.extractFrom( line, "PA" ) );
560 else if ( line.startsWith( "DE" ) ) {
561 e.setDe( SequenceDbWsTools.extractFrom( line, "DE" ) );
563 else if ( line.startsWith( "OS" ) ) {
564 if ( line.indexOf( "(" ) > 0 ) {
565 e.setOs( SequenceDbWsTools.extractFromTo( line, "OS", "(" ) );
568 e.setOs( SequenceDbWsTools.extractFrom( line, "OS" ) );
571 else if ( line.startsWith( "OX" ) ) {
572 if ( line.indexOf( "NCBI_TaxID=" ) > 0 ) {
573 e.setTaxId( SequenceDbWsTools.extractFromTo( line, "NCBI_TaxID=", ";" ) );
581 public String getAccession() {
585 private void setPA( final String pa ) {
592 public String getSequenceName() {
596 private void setDe( final String rec_name ) {
603 public String getTaxonomyScientificName() {
607 private void setOs( final String os ) {
614 public String getTaxonomyIdentifier() {
618 private void setTaxId( final String tax_id ) {
619 if ( _tax_id == null ) {
625 public String getSequenceSymbol() {
630 public boolean isEmpty() {
631 return ( ForesterUtil.isEmpty( getAccession() ) && ForesterUtil.isEmpty( getSequenceName() )
632 && ForesterUtil.isEmpty( getTaxonomyScientificName() )
633 && ForesterUtil.isEmpty( getTaxonomyIdentifier() ) && ForesterUtil.isEmpty( getSequenceSymbol() ) );
637 public String getProvider() {
641 public void setProvider( final String provider ) {
642 _provider = provider;
646 public String getGeneName() {
651 public List<GoTerm> getGoTerms() {
656 public List<Accession> getCrossReferences() {