inprogress
[jalview.git] / forester / java / src / org / forester / ws / seqdb / EbiDbEntry.java
1 // $Id:
2 // forester -- software libraries and applications
3 // for genomics and evolutionary biology research.
4 //
5 // Copyright (C) 2010 Christian M Zmasek
6 // Copyright (C) 2010 Sanford-Burnham Medical Research Institute
7 // All rights reserved
8 //
9 // This library is free software; you can redistribute it and/or
10 // modify it under the terms of the GNU Lesser General Public
11 // License as published by the Free Software Foundation; either
12 // version 2.1 of the License, or (at your option) any later version.
13 //
14 // This library is distributed in the hope that it will be useful,
15 // but WITHOUT ANY WARRANTY; without even the implied warranty of
16 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 // Lesser General Public License for more details.
18 //
19 // You should have received a copy of the GNU Lesser General Public
20 // License along with this library; if not, write to the Free Software
21 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
22 //
23 // Contact: phylosoft @ gmail . com
24 // WWW: https://sites.google.com/site/cmzmasek/home/software/forester
25
26 package org.forester.ws.seqdb;
27
28 import java.util.List;
29
30 import org.forester.go.GoTerm;
31 import org.forester.phylogeny.data.Accession;
32 import org.forester.util.ForesterUtil;
33
34 public final class EbiDbEntry implements SequenceDatabaseEntry {
35
36     // FIXME actually this is NCBI entry
37     //http://www.ebi.ac.uk/Tools/dbfetch/dbfetch/emb/AAR37336/
38     private String _pa;
39     private String _de;
40     private String _os;
41     private String _tax_id;
42     private String _symbol;
43     private String _provider;
44
45     // TODO  PUBMED   15798186
46     //TODO  (FEATURES) 
47     // source /db_xref="taxon:9606"
48     // gene            1..2881  
49     // /gene="RBM39" 
50     //
51     // /db_xref="MIM:604739"  
52     // CDS
53     // /gene="RBM39"
54     // /db_xref="MIM:604739"
55     // /db_xref="InterPro:IPR002475"
56     // /product="Bcl-2"
57     // /protein_id="NP_909122.1"
58     // /db_xref="UniProtKB/TrEMBL:Q5J7V1" <- reparse?
59     //
60     // Protein
61     /*
62     LOCUS       NM_184234               2881 bp    mRNA    linear   PRI 16-JUN-2013
63     DEFINITION  Homo sapiens RNA binding motif protein 39 (RBM39), transcript
64             variant 1, mRNA.
65     ACCESSION   NM_184234
66     VERSION     NM_184234.2  GI:336176061
67     KEYWORDS    RefSeq.
68     SOURCE      Homo sapiens (human)
69     ORGANISM  Homo sapiens
70             Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi;
71             Mammalia; Eutheria; Euarchontoglires; Primates; Haplorrhini;
72             Catarrhini; Hominidae; Homo.
73     REFERENCE   1  (bases 1 to 2881)
74     AUTHORS   Sillars-Hardebol,A.H., Carvalho,B., Belien,J.A., de Wit,M.,
75             Delis-van Diemen,P.M., Tijssen,M., van de Wiel,M.A., Ponten,F.,
76             Meijer,G.A. and Fijneman,R.J.
77     TITLE     CSE1L, DIDO1 and RBM39 in colorectal adenoma to carcinoma
78             progression
79     JOURNAL   Cell Oncol (Dordr) 35 (4), 293-300 (2012)
80     PUBMED   22711543
81     REMARK    GeneRIF: Data show that CSE1L, DIDO1 and RBM39 mRNA expression
82             levels correlated with chromosome 20q DNA copy number status.
83     REFERENCE   2  (bases 1 to 2881)
84     AUTHORS   Huang,G., Zhou,Z., Wang,H. and Kleinerman,E.S.
85     TITLE     CAPER-alpha alternative splicing regulates the expression of
86             vascular endothelial growth factor(1)(6)(5) in Ewing sarcoma cells
87     JOURNAL   Cancer 118 (8), 2106-2116 (2012)
88     PUBMED   22009261
89     REMARK    GeneRIF: Increased VEGF(165) expression is secondary to the
90             down-regulation of CAPER-alpha by EWS/FLI-1. CAPER-alpha mediates
91             alternative splicing and controls the shift from VEGF(189) to
92             VEGF(165) .
93     REFERENCE   3  (bases 1 to 2881)
94     AUTHORS   Han,B., Stockwin,L.H., Hancock,C., Yu,S.X., Hollingshead,M.G. and
95             Newton,D.L.
96     TITLE     Proteomic analysis of nuclei isolated from cancer cell lines
97             treated with indenoisoquinoline NSC 724998, a novel topoisomerase I
98             inhibitor
99     JOURNAL   J. Proteome Res. 9 (8), 4016-4027 (2010)
100     PUBMED   20515076
101     REMARK    Erratum:[J Proteome Res. 2011 Apr 1;10(4):2128]
102     REFERENCE   4  (bases 1 to 2881)
103     AUTHORS   Zhang,J.Y., Looi,K.S. and Tan,E.M.
104     TITLE     Identification of tumor-associated antigens as diagnostic and
105             predictive biomarkers in cancer
106     JOURNAL   Methods Mol. Biol. 520, 1-10 (2009)
107     PUBMED   19381943
108     REFERENCE   5  (bases 1 to 2881)
109     AUTHORS   Dutta,J., Fan,G. and Gelinas,C.
110     TITLE     CAPERalpha is a novel Rel-TAD-interacting factor that inhibits
111             lymphocyte transformation by the potent Rel/NF-kappaB oncoprotein
112             v-Rel
113     JOURNAL   J. Virol. 82 (21), 10792-10802 (2008)
114     PUBMED   18753212
115     REMARK    GeneRIF: this study identifies CAPERalpha (RNA binding motif
116             protein 39) as a new transcriptional coregulator for v-Rel and
117             reveals an important role in modulating Rel's oncogenic activity.
118     REFERENCE   6  (bases 1 to 2881)
119     AUTHORS   Cazalla,D., Newton,K. and Caceres,J.F.
120     TITLE     A novel SR-related protein is required for the second step of
121             Pre-mRNA splicing
122     JOURNAL   Mol. Cell. Biol. 25 (8), 2969-2980 (2005)
123     PUBMED   15798186
124     REFERENCE   7  (bases 1 to 2881)
125     AUTHORS   Dowhan,D.H., Hong,E.P., Auboeuf,D., Dennis,A.P., Wilson,M.M.,
126             Berget,S.M. and O'Malley,B.W.
127     TITLE     Steroid hormone receptor coactivation and alternative RNA splicing
128             by U2AF65-related proteins CAPERalpha and CAPERbeta
129     JOURNAL   Mol. Cell 17 (3), 429-439 (2005)
130     PUBMED   15694343
131     REFERENCE   8  (bases 1 to 2881)
132     AUTHORS   Sun,N.N., Fastje,C.D., Wong,S.S., Sheppard,P.R., Macdonald,S.J.,
133             Ridenour,G., Hyde,J.D. and Witten,M.L.
134     TITLE     Dose-dependent transcriptome changes by metal ores on a human acute
135             lymphoblastic leukemia cell line
136     JOURNAL   Toxicol Ind Health 19 (7-10), 157-163 (2003)
137     PUBMED   15747776
138     REMARK    GeneRIF: 10 genes were down-regulated following treatment of the
139             T-ALL cells with 0.15 and 1.5 microg/mL of metal ores at 72 h
140     REFERENCE   9  (bases 1 to 2881)
141     AUTHORS   Jung,D.J., Na,S.Y., Na,D.S. and Lee,J.W.
142     TITLE     Molecular cloning and characterization of CAPER, a novel
143             coactivator of activating protein-1 and estrogen receptors
144     JOURNAL   J. Biol. Chem. 277 (2), 1229-1234 (2002)
145     PUBMED   11704680
146     REMARK    GeneRIF: This paper describes the mouse gene.
147     REFERENCE   10 (bases 1 to 2881)
148     AUTHORS   Imai,H., Chan,E.K., Kiyosawa,K., Fu,X.D. and Tan,E.M.
149     TITLE     Novel nuclear autoantigen with splicing factor motifs identified
150             with antibody from hepatocellular carcinoma
151     JOURNAL   J. Clin. Invest. 92 (5), 2419-2426 (1993)
152     PUBMED   8227358
153     COMMENT     REVIEWED REFSEQ: This record has been curated by NCBI staff. The
154             reference sequence was derived from DC346351.1, BC141835.1 and
155             C75555.1.
156             On Jun 16, 2011 this sequence version replaced gi:35493810.
157             
158             Summary: This gene encodes a member of the U2AF65 family of
159             proteins. The encoded protein is found in the nucleus, where it
160             co-localizes with core spliceosomal proteins. It has been shown to
161             play a role in both steroid hormone receptor-mediated transcription
162             and alternative splicing, and it is also a transcriptional
163             coregulator of the viral oncoprotein v-Rel. Multiple transcript
164             variants have been observed for this gene. A related pseudogene has
165             been identified on chromosome X. [provided by RefSeq, Aug 2011].
166             
167             Transcript Variant: This variant (1) encodes the longest isoform
168             (a, also called CC1.4).
169             
170             Publication Note:  This RefSeq record includes a subset of the
171             publications that are available for this gene. Please see the Gene
172             record to access additional publications.
173             
174             ##Evidence-Data-START##
175             Transcript exon combination :: BC141835.1, L10911.1 [ECO:0000332]
176             RNAseq introns              :: mixed/partial sample support
177                                            ERS025081, ERS025082 [ECO:0000350]
178             ##Evidence-Data-END##
179             COMPLETENESS: complete on the 3' end.
180     PRIMARY     REFSEQ_SPAN         PRIMARY_IDENTIFIER PRIMARY_SPAN        COMP
181             1-578               DC346351.1         3-580
182             579-2872            BC141835.1         429-2722
183             2873-2881           C75555.1           1-9                 c
184     FEATURES             Location/Qualifiers
185      source          1..2881
186                      /organism="Homo sapiens"
187                      /mol_type="mRNA"
188                      /db_xref="taxon:9606"
189                      /chromosome="20"
190                      /map="20q11.22"
191      gene            1..2881
192                      /gene="RBM39"
193                      /gene_synonym="CAPER; CAPERalpha; FSAP59; HCC1; RNPC2"
194                      /note="RNA binding motif protein 39"
195                      /db_xref="GeneID:9584"
196                      /db_xref="HGNC:15923"
197                      /db_xref="HPRD:09201"
198                      /db_xref="MIM:604739"
199      exon            1..396
200                      /gene="RBM39"
201                      /gene_synonym="CAPER; CAPERalpha; FSAP59; HCC1; RNPC2"
202                      /inference="alignment:Splign:1.39.8"
203      STS             35..262
204                      /gene="RBM39"
205                      /gene_synonym="CAPER; CAPERalpha; FSAP59; HCC1; RNPC2"
206                      /standard_name="REN58946"
207                      /db_xref="UniSTS:383746"
208      misc_feature    221..223
209                      /gene="RBM39"
210                      /gene_synonym="CAPER; CAPERalpha; FSAP59; HCC1; RNPC2"
211                      /note="upstream in-frame stop codon"
212      STS             299..453
213                      /gene="RBM39"
214                      /gene_synonym="CAPER; CAPERalpha; FSAP59; HCC1; RNPC2"
215                      /standard_name="G64285"
216                      /db_xref="UniSTS:158667"
217      exon            397..460
218                      /gene="RBM39"
219                      /gene_synonym="CAPER; CAPERalpha; FSAP59; HCC1; RNPC2"
220                      /inference="alignment:Splign:1.39.8"
221      CDS             410..2002
222                      /gene="RBM39"
223                      /gene_synonym="CAPER; CAPERalpha; FSAP59; HCC1; RNPC2"
224                      /note="isoform a is encoded by transcript variant 1;
225                      coactivator of activating protein-1 and estrogen
226                      receptors; functional spliceosome-associated protein 59;
227                      RNA-binding region (RNP1, RRM) containing 2;
228                      hepatocellular carcinoma protein 1; splicing factor HCC1"
229                      /codon_start=1
230                      /product="RNA-binding protein 39 isoform a"
231                      /protein_id="NP_909122.1"
232                      /db_xref="GI:35493811"
233                      /db_xref="CCDS:CCDS13266.1"
234                      /db_xref="GeneID:9584"
235                      /db_xref="HGNC:15923"
236                      /db_xref="HPRD:09201"
237                      /db_xref="MIM:604739"
238                      /translation="MADDIDIEAMLEAPYKKDENKLSSANGHEERSKKRKKSKSRSRS
239                      HERKRSKSKERKRSRDRERKKSKSRERKRSRSKERRRSRSRSRDRRFRGRYRSPYSGP
240                      KFNSAIRGKIGLPHSIKLSRRRSRSKSPFRKDKSPVREPIDNLTPEERDARTVFCMQL
241                      AARIRPRDLEEFFSTVGKVRDVRMISDRNSRRSKGIAYVEFVDVSSVPLAIGLTGQRV
242                      LGVPIIVQASQAEKNRAAAMANNLQKGSAGPMRLYVGSLHFNITEDMLRGIFEPFGRI
243                      ESIQLMMDSETGRSKGYGFITFSDSECAKKALEQLNGFELAGRPMKVGHVTERTDASS
244                      ASSFLDSDELERTGIDLGTTGRLQLMARLAEGTGLQIPPAAQQALQMSGSLAFGAVAE
245                      FSFVIDLQTRLSQQTEASALAAAASVQPLATQCFQLSNMFNPQTEEEVGWDTEIKDDV
246                      IEECNKHGGVIHIYVDKNSAQGNVYVKCPSIAAAIAAVNALHGRWFAGKMITAAYVPL
247                      PTYHNLFPDSMTATQLLVPSRR"
248      misc_feature    413..415
249                      /gene="RBM39"
250                      /gene_synonym="CAPER; CAPERalpha; FSAP59; HCC1; RNPC2"
251                      /experiment="experimental evidence, no additional details
252                      recorded"
253                      /note="N-acetylalanine; propagated from
254                      UniProtKB/Swiss-Prot (Q14498.2); acetylation site"
255      misc_feature    692..694
256                      /gene="RBM39"
257                      /gene_synonym="CAPER; CAPERalpha; FSAP59; HCC1; RNPC2"
258                      /experiment="experimental evidence, no additional details
259                      recorded"
260                      /note="Phosphotyrosine; propagated from
261                      UniProtKB/Swiss-Prot (Q14498.2); phosphorylation site"
262      misc_feature    698..700
263                      /gene="RBM39"
264                      /gene_synonym="CAPER; CAPERalpha; FSAP59; HCC1; RNPC2"
265                      /experiment="experimental evidence, no additional details
266                      recorded"
267                      /note="Phosphoserine; propagated from UniProtKB/Swiss-Prot
268                      (Q14498.2); phosphorylation site"
269      misc_feature    707..709
270                      /gene="RBM39"
271                      /gene_synonym="CAPER; CAPERalpha; FSAP59; HCC1; RNPC2"
272                      /experiment="experimental evidence, no additional details
273                      recorded"
274                      /note="Phosphoserine; propagated from UniProtKB/Swiss-Prot
275                      (Q14498.2); phosphorylation site"
276      misc_feature    815..817
277                      /gene="RBM39"
278                      /gene_synonym="CAPER; CAPERalpha; FSAP59; HCC1; RNPC2"
279                      /experiment="experimental evidence, no additional details
280                      recorded"
281                      /note="Phosphoserine; propagated from UniProtKB/Swiss-Prot
282                      (Q14498.2); phosphorylation site"
283      misc_feature    845..847
284                      /gene="RBM39"
285                      /gene_synonym="CAPER; CAPERalpha; FSAP59; HCC1; RNPC2"
286                      /experiment="experimental evidence, no additional details
287                      recorded"
288                      /note="Phosphothreonine; propagated from
289                      UniProtKB/Swiss-Prot (Q14498.2); phosphorylation site"
290      misc_feature    1280..1627
291                      /gene="RBM39"
292                      /gene_synonym="CAPER; CAPERalpha; FSAP59; HCC1; RNPC2"
293                      /inference="non-experimental evidence, no additional
294                      details recorded"
295                      /note="propagated from UniProtKB/Swiss-Prot (Q14498.2);
296                      Region: Interaction with JUN (By similarity)"
297      misc_feature    1280..1474
298                      /gene="RBM39"
299                      /gene_synonym="CAPER; CAPERalpha; FSAP59; HCC1; RNPC2"
300                      /inference="non-experimental evidence, no additional
301                      details recorded"
302                      /note="propagated from UniProtKB/Swiss-Prot (Q14498.2);
303                      Region: Activating domain (By similarity)"
304      misc_feature    1409..1411
305                      /gene="RBM39"
306                      /gene_synonym="CAPER; CAPERalpha; FSAP59; HCC1; RNPC2"
307                      /experiment="experimental evidence, no additional details
308                      recorded"
309                      /note="Phosphoserine; propagated from UniProtKB/Swiss-Prot
310                      (Q14498.2); phosphorylation site"
311      misc_feature    1418..1420
312                      /gene="RBM39"
313                      /gene_synonym="CAPER; CAPERalpha; FSAP59; HCC1; RNPC2"
314                      /experiment="experimental evidence, no additional details
315                      recorded"
316                      /note="Phosphoserine; propagated from UniProtKB/Swiss-Prot
317                      (Q14498.2); phosphorylation site"
318      misc_feature    1430..1432
319                      /gene="RBM39"
320                      /gene_synonym="CAPER; CAPERalpha; FSAP59; HCC1; RNPC2"
321                      /experiment="experimental evidence, no additional details
322                      recorded"
323                      /note="Phosphoserine; propagated from UniProtKB/Swiss-Prot
324                      (Q14498.2); phosphorylation site"
325      misc_feature    1472..1627
326                      /gene="RBM39"
327                      /gene_synonym="CAPER; CAPERalpha; FSAP59; HCC1; RNPC2"
328                      /inference="non-experimental evidence, no additional
329                      details recorded"
330                      /note="propagated from UniProtKB/Swiss-Prot (Q14498.2);
331                      Region: Interaction with ESR1 and ESR2 (By similarity)"
332      misc_feature    1625..1999
333                      /gene="RBM39"
334                      /gene_synonym="CAPER; CAPERalpha; FSAP59; HCC1; RNPC2"
335                      /inference="non-experimental evidence, no additional
336                      details recorded"
337                      /note="propagated from UniProtKB/Swiss-Prot (Q14498.2);
338                      Region: Interaction with NCOA6 (By similarity)"
339      exon            461..510
340                      /gene="RBM39"
341                      /gene_synonym="CAPER; CAPERalpha; FSAP59; HCC1; RNPC2"
342                      /inference="alignment:Splign:1.39.8"
343      exon            511..705
344                      /gene="RBM39"
345                      /gene_synonym="CAPER; CAPERalpha; FSAP59; HCC1; RNPC2"
346                      /inference="alignment:Splign:1.39.8"
347      exon            706..771
348                      /gene="RBM39"
349                      /gene_synonym="CAPER; CAPERalpha; FSAP59; HCC1; RNPC2"
350                      /inference="alignment:Splign:1.39.8"
351      exon            772..825
352                      /gene="RBM39"
353                      /gene_synonym="CAPER; CAPERalpha; FSAP59; HCC1; RNPC2"
354                      /inference="alignment:Splign:1.39.8"
355      exon            826..943
356                      /gene="RBM39"
357                      /gene_synonym="CAPER; CAPERalpha; FSAP59; HCC1; RNPC2"
358                      /inference="alignment:Splign:1.39.8"
359      exon            944..1096
360                      /gene="RBM39"
361                      /gene_synonym="CAPER; CAPERalpha; FSAP59; HCC1; RNPC2"
362                      /inference="alignment:Splign:1.39.8"
363      exon            1097..1234
364                      /gene="RBM39"
365                      /gene_synonym="CAPER; CAPERalpha; FSAP59; HCC1; RNPC2"
366                      /inference="alignment:Splign:1.39.8"
367      exon            1235..1300
368                      /gene="RBM39"
369                      /gene_synonym="CAPER; CAPERalpha; FSAP59; HCC1; RNPC2"
370                      /inference="alignment:Splign:1.39.8"
371      exon            1301..1505
372                      /gene="RBM39"
373                      /gene_synonym="CAPER; CAPERalpha; FSAP59; HCC1; RNPC2"
374                      /inference="alignment:Splign:1.39.8"
375      exon            1506..1583
376                      /gene="RBM39"
377                      /gene_synonym="CAPER; CAPERalpha; FSAP59; HCC1; RNPC2"
378                      /inference="alignment:Splign:1.39.8"
379      exon            1584..1634
380                      /gene="RBM39"
381                      /gene_synonym="CAPER; CAPERalpha; FSAP59; HCC1; RNPC2"
382                      /inference="alignment:Splign:1.39.8"
383      exon            1635..1716
384                      /gene="RBM39"
385                      /gene_synonym="CAPER; CAPERalpha; FSAP59; HCC1; RNPC2"
386                      /inference="alignment:Splign:1.39.8"
387      exon            1717..1822
388                      /gene="RBM39"
389                      /gene_synonym="CAPER; CAPERalpha; FSAP59; HCC1; RNPC2"
390                      /inference="alignment:Splign:1.39.8"
391      exon            1823..1901
392                      /gene="RBM39"
393                      /gene_synonym="CAPER; CAPERalpha; FSAP59; HCC1; RNPC2"
394                      /inference="alignment:Splign:1.39.8"
395      exon            1902..2874
396                      /gene="RBM39"
397                      /gene_synonym="CAPER; CAPERalpha; FSAP59; HCC1; RNPC2"
398                      /inference="alignment:Splign:1.39.8"
399      STS             1956..2182
400                      /gene="RBM39"
401                      /gene_synonym="CAPER; CAPERalpha; FSAP59; HCC1; RNPC2"
402                      /standard_name="REN58786"
403                      /db_xref="UniSTS:383586"
404      STS             2104..2148
405                      /gene="RBM39"
406                      /gene_synonym="CAPER; CAPERalpha; FSAP59; HCC1; RNPC2"
407                      /standard_name="D19S1033"
408                      /db_xref="UniSTS:154759"
409      STS             2145..2400
410                      /gene="RBM39"
411                      /gene_synonym="CAPER; CAPERalpha; FSAP59; HCC1; RNPC2"
412                      /standard_name="REN58785"
413                      /db_xref="UniSTS:383585"
414      STS             2349..2590
415                      /gene="RBM39"
416                      /gene_synonym="CAPER; CAPERalpha; FSAP59; HCC1; RNPC2"
417                      /standard_name="REN58784"
418                      /db_xref="UniSTS:383584"
419      STS             2450..2669
420                      /gene="RBM39"
421                      /gene_synonym="CAPER; CAPERalpha; FSAP59; HCC1; RNPC2"
422                      /standard_name="RH69003"
423                      /db_xref="UniSTS:85360"
424      STS             2579..2828
425                      /gene="RBM39"
426                      /gene_synonym="CAPER; CAPERalpha; FSAP59; HCC1; RNPC2"
427                      /standard_name="REN58783"
428                      /db_xref="UniSTS:383583"
429      STS             2639..2728
430                      /gene="RBM39"
431                      /gene_synonym="CAPER; CAPERalpha; FSAP59; HCC1; RNPC2"
432                      /standard_name="RH67917"
433                      /db_xref="UniSTS:84037"
434      polyA_signal    2851..2856
435                      /gene="RBM39"
436                      /gene_synonym="CAPER; CAPERalpha; FSAP59; HCC1; RNPC2"
437      polyA_site      2874
438                      /gene="RBM39"
439                      /gene_synonym="CAPER; CAPERalpha; FSAP59; HCC1; RNPC2"
440     ORIGIN      
441         1 atttggagct tggggcagct tctcgcgaga gcccgtgctg agggctctgt gaggccccgt
442        61 gtgtttgtgt gtgtgtatgt gtgctggtga atgtgagtac agggaagcag cggccgccat
443       121 ttcagggagc ttgtcgacgc tgtcgcaggg gtggatcctg agctgccgaa gccgccgtcc
444       181 tgctctcccg cgtgggcttc tctaattcca ttgttttttt tagattctct cgggcctagc
445       241 cgtccttgga acccgatatt cgggctgggc ggttccgcgg cctgggccta ggggcttaac
446       301 agtagcaaca gaagcggcgg cggcggcagc agcagcagca gcagcagcaa tctcttcccg
447       361 aacacgagca ccacaggcgc ccgaaggccg gaacaggcgt ttagagaaaa tggcagacga
448       421 tattgatatt gaagcaatgc ttgaggctcc ttacaagaag gatgagaaca agttgagcag
449       481 tgccaacggc catgaagaac gtagcaaaaa gaggaaaaaa agcaagagca gaagtcgtag
450       541 tcatgaacga aagagaagca aaagtaagga acggaagcga agtagagaca gagaaaggaa
451       601 aaagagcaaa agccgtgaaa gaaagcgaag tagaagcaaa gagaggcgac ggagccgctc
452       661 aagaagtcga gatcgaagat ttagaggccg ctacagaagt ccttactccg gaccaaaatt
453       721 taacagtgcc atccgaggaa agattgggtt gcctcatagc atcaaattaa gcagacgacg
454       781 ttcccgaagc aaaagtccat tcagaaaaga caagagccct gtgagagaac ctattgataa
455       841 tttaactcct gaggaaagag atgcaaggac agtcttctgt atgcagctgg cggcaagaat
456       901 tcgaccaagg gatttggaag agtttttctc tacagtagga aaggttcgag atgtgaggat
457       961 gatttctgac agaaattcaa gacgttccaa aggaattgct tatgtggagt tcgtcgatgt
458      1021 tagctcagtg cctctagcaa taggattaac tggccaacga gttttaggcg tgccaatcat
459      1081 agtacaggca tcacaggcag aaaaaaacag agctgcagca atggcaaaca atttacaaaa
460      1141 gggaagtgct ggacctatga ggctttatgt gggctcatta cacttcaaca taactgaaga
461      1201 tatgcttcgt gggatctttg agccttttgg aagaattgaa agtatccagc tgatgatgga
462      1261 cagtgaaact ggtcgatcca agggatatgg atttattaca ttttctgact cagaatgtgc
463      1321 caaaaaggct ttggaacaac ttaatggatt tgaactagca ggaagaccaa tgaaagttgg
464      1381 tcatgttact gaacgtactg atgcttcgag tgctagttca tttttggaca gtgatgaact
465      1441 ggaaaggact ggaattgatt tgggaacaac tggtcgtctt cagttaatgg caagacttgc
466      1501 agagggtaca ggtttgcaga ttccgccagc agcacagcaa gctctacaga tgagtggctc
467      1561 tttggcattt ggtgctgtgg cagaattctc ttttgttata gatttgcaaa caagactttc
468      1621 ccagcagact gaagcttcag ctttagctgc agctgcctct gttcagccac ttgcaacaca
469      1681 atgtttccaa ctctctaaca tgtttaaccc tcaaacagaa gaagaagttg gatgggatac
470      1741 cgagattaag gatgatgtga ttgaagaatg taataaacat ggaggagtta ttcatattta
471      1801 tgttgacaaa aattcagctc agggcaatgt gtatgtgaag tgcccatcaa ttgctgcagc
472      1861 tattgctgct gtcaatgcat tgcatggcag gtggtttgct ggtaaaatga taacagcagc
473      1921 atatgtacct cttccaactt accacaacct gtttcctgat tctatgacag caacacagct
474      1981 actggttcca agtagacgat gaaggaagat atagtccctt atgtatatag ctttttttct
475      2041 ttcttgagaa ttcatcttga gttatctttt atttagataa aaataaagag gcaaggatct
476      2101 actgtcattt gtatgcaatt tcctgttacc ttgaaaaaat aaaaatgtta acaggaatgc
477      2161 agtgtgctca ttctccctaa atagtaaatc ccactgtata caaaactgtt ctcttgttct
478      2221 gccttttaaa atgttcatgt agaaaattaa tgaactatag gaatagctct aggagaacaa
479      2281 atgtgctttc tgtaaaaagg cagaccaggg atgtaatgtt tttaatgttt cagaagccta
480      2341 actttttaca cagtggttac atttcacatt tcactaatgt tgatatttgg ctgatggttg
481      2401 agcagtttct gaaatacaca tttagtgtat ggaaatacaa gacagctaaa gggctgtttg
482      2461 gttagcatct catcttgcat tctgatcaat tggcaagaaa gggagatttc aaaattatat
483      2521 ttcttgatgg tatcttttca attaatgtat ctgtaaaagt ttctttgtaa atactatgtg
484      2581 ttctggtgtg tcttaaaatt ccaaacaaaa tgatccctgc atttcctgaa gatgtttaaa
485      2641 cgtgagagtc tggtaggcaa agcagtctga gaaagaaata ggaaatgcag aaataggttt
486      2701 tgtctggttg catataatct ttgctctttt taagctctgt gagctctgaa atatattttt
487      2761 gggttacttc agtgtgtttg acaagacagc ttgatatttc tatcaaacaa atgactttca
488      2821 tattgcaaca atctttgtaa gaaccactca aataaaagtc tcttaaaaag gccaaaaaaa
489      2881 a
490     
491     
492     */
493     private EbiDbEntry() {
494     }
495
496     @Override
497     public Object clone() throws CloneNotSupportedException {
498         throw new CloneNotSupportedException();
499     }
500
501     public static SequenceDatabaseEntry createInstanceFromPlainTextForRefSeq( final List<String> lines ) {
502         final EbiDbEntry e = new EbiDbEntry();
503         final StringBuilder def = new StringBuilder();
504         boolean in_def = false;
505         for( final String line : lines ) {
506             //  System.out.println( "-" + line );
507             if ( line.startsWith( "ACCESSION" ) ) {
508                 e.setPA( SequenceDbWsTools.extractFrom( line, "ACCESSION" ) );
509                 in_def = false;
510             }
511             else if ( line.startsWith( "DEFINITION" ) ) {
512                 if ( line.indexOf( "[" ) > 0 ) {
513                     def.append( SequenceDbWsTools.extractFromTo( line, "DEFINITION", "[" ) );
514                 }
515                 else if ( line.indexOf( "." ) > 0 ) {
516                     def.append( SequenceDbWsTools.extractFromTo( line, "DEFINITION", "." ) );
517                 }
518                 else {
519                     def.append( SequenceDbWsTools.extractFrom( line, "DEFINITION" ) );
520                 }
521                 in_def = true;
522             }
523             else if ( line.startsWith( "SOURCE" ) ) {
524                 if ( line.indexOf( "(" ) > 0 ) {
525                     e.setOs( SequenceDbWsTools.extractFromTo( line, "SOURCE", "(" ) );
526                 }
527                 else {
528                     e.setOs( SequenceDbWsTools.extractFrom( line, "SOURCE" ) );
529                 }
530                 in_def = false;
531             }
532             else if ( line.startsWith( " " ) && in_def ) {
533                 def.append( " " );
534                 if ( line.indexOf( "[" ) > 0 ) {
535                     def.append( SequenceDbWsTools.extractTo( line, "[" ) );
536                 }
537                 else if ( line.indexOf( "." ) > 0 ) {
538                     def.append( SequenceDbWsTools.extractTo( line, "." ) );
539                 }
540                 else {
541                     def.append( line.trim() );
542                 }
543             }
544             else {
545                 in_def = false;
546             }
547         }
548         if ( def.length() > 0 ) {
549             e.setDe( def.toString().trim() );
550         }
551         return e;
552     }
553
554     public static SequenceDatabaseEntry createInstanceFromPlainText( final List<String> lines ) {
555         final EbiDbEntry e = new EbiDbEntry();
556         for( final String line : lines ) {
557             if ( line.startsWith( "PA" ) ) {
558                 e.setPA( SequenceDbWsTools.extractFrom( line, "PA" ) );
559             }
560             else if ( line.startsWith( "DE" ) ) {
561                 e.setDe( SequenceDbWsTools.extractFrom( line, "DE" ) );
562             }
563             else if ( line.startsWith( "OS" ) ) {
564                 if ( line.indexOf( "(" ) > 0 ) {
565                     e.setOs( SequenceDbWsTools.extractFromTo( line, "OS", "(" ) );
566                 }
567                 else {
568                     e.setOs( SequenceDbWsTools.extractFrom( line, "OS" ) );
569                 }
570             }
571             else if ( line.startsWith( "OX" ) ) {
572                 if ( line.indexOf( "NCBI_TaxID=" ) > 0 ) {
573                     e.setTaxId( SequenceDbWsTools.extractFromTo( line, "NCBI_TaxID=", ";" ) );
574                 }
575             }
576         }
577         return e;
578     }
579
580     @Override
581     public String getAccession() {
582         return _pa;
583     }
584
585     private void setPA( final String pa ) {
586         if ( _pa == null ) {
587             _pa = pa;
588         }
589     }
590
591     @Override
592     public String getSequenceName() {
593         return _de;
594     }
595
596     private void setDe( final String rec_name ) {
597         if ( _de == null ) {
598             _de = rec_name;
599         }
600     }
601
602     @Override
603     public String getTaxonomyScientificName() {
604         return _os;
605     }
606
607     private void setOs( final String os ) {
608         if ( _os == null ) {
609             _os = os;
610         }
611     }
612
613     @Override
614     public String getTaxonomyIdentifier() {
615         return _tax_id;
616     }
617
618     private void setTaxId( final String tax_id ) {
619         if ( _tax_id == null ) {
620             _tax_id = tax_id;
621         }
622     }
623
624     @Override
625     public String getSequenceSymbol() {
626         return _symbol;
627     }
628
629     @Override
630     public boolean isEmpty() {
631         return ( ForesterUtil.isEmpty( getAccession() ) && ForesterUtil.isEmpty( getSequenceName() )
632                 && ForesterUtil.isEmpty( getTaxonomyScientificName() )
633                 && ForesterUtil.isEmpty( getTaxonomyIdentifier() ) && ForesterUtil.isEmpty( getSequenceSymbol() ) );
634     }
635
636     @Override
637     public String getProvider() {
638         return _provider;
639     }
640
641     public void setProvider( final String provider ) {
642         _provider = provider;
643     }
644
645     @Override
646     public String getGeneName() {
647         return null;
648     }
649
650     @Override
651     public List<GoTerm> getGoTerms() {
652         return null;
653     }
654
655     @Override
656     public List<Accession> getCrossReferences() {
657         return null;
658     }
659 }