From 482c9a54ff2b83c947c43449c7b0d86dc9c8dafd Mon Sep 17 00:00:00 2001 From: "cmzmasek@gmail.com" Date: Fri, 4 Oct 2013 22:40:39 +0000 Subject: [PATCH] inprogress --- .../archaeopteryx/tools/SequenceDataRetriver.java | 8 +- .../src/org/forester/phylogeny/PhylogenyNode.java | 9 + forester/java/src/org/forester/test/Test.java | 130 ++++-- .../src/org/forester/ws/seqdb/DatabaseTools.java | 20 - .../java/src/org/forester/ws/seqdb/EbiDbEntry.java | 494 +++++++++++++++++++- .../org/forester/ws/seqdb/SequenceDbWsTools.java | 45 +- .../src/org/forester/ws/seqdb/UniProtEntry.java | 16 +- 7 files changed, 625 insertions(+), 97 deletions(-) delete mode 100644 forester/java/src/org/forester/ws/seqdb/DatabaseTools.java diff --git a/forester/java/src/org/forester/archaeopteryx/tools/SequenceDataRetriver.java b/forester/java/src/org/forester/archaeopteryx/tools/SequenceDataRetriver.java index cc6a444..fcb71fc 100644 --- a/forester/java/src/org/forester/archaeopteryx/tools/SequenceDataRetriver.java +++ b/forester/java/src/org/forester/archaeopteryx/tools/SequenceDataRetriver.java @@ -38,11 +38,10 @@ import org.forester.ws.seqdb.SequenceDbWsTools; public final class SequenceDataRetriver extends RunnableProcess { - private final static int DEFAULT_LINES_TO_RETURN = 4000; private final Phylogeny _phy; private final MainFrameApplication _mf; private final TreePanel _treepanel; - public final static boolean DEBUG = false; + public final static boolean DEBUG = false; public SequenceDataRetriver( final MainFrameApplication mf, final TreePanel treepanel, final Phylogeny phy ) { _phy = phy; @@ -59,7 +58,10 @@ public final class SequenceDataRetriver extends RunnableProcess { start( _mf, "sequence data" ); SortedSet not_found = null; try { - not_found = SequenceDbWsTools.obtainSeqInformation( _phy, false, true, DEFAULT_LINES_TO_RETURN ); + not_found = SequenceDbWsTools.obtainSeqInformation( _phy, + false, + true, + SequenceDbWsTools.DEFAULT_LINES_TO_RETURN ); } catch ( final UnknownHostException e ) { JOptionPane.showMessageDialog( _mf, diff --git a/forester/java/src/org/forester/phylogeny/PhylogenyNode.java b/forester/java/src/org/forester/phylogeny/PhylogenyNode.java index 2b7c2cf..6b180ca 100644 --- a/forester/java/src/org/forester/phylogeny/PhylogenyNode.java +++ b/forester/java/src/org/forester/phylogeny/PhylogenyNode.java @@ -74,6 +74,15 @@ public final class PhylogenyNode implements Comparable { setSumExtNodes( 1 ); // For ext node, this number is 1 (not 0!!) } + public PhylogenyNode( final String node_name ) { + setId( PhylogenyNode.getNodeCount() ); + PhylogenyNode.increaseNodeCount(); + setSumExtNodes( 1 ); // For ext node, this number is 1 (not 0!!) + if ( node_name != null ) { + getNodeData().setNodeName( node_name ); + } + } + private PhylogenyNode( final String nhx, final NHXParser.TAXONOMY_EXTRACTION taxonomy_extraction, final boolean replace_underscores ) throws NHXFormatException, PhyloXmlDataFormatException { diff --git a/forester/java/src/org/forester/test/Test.java b/forester/java/src/org/forester/test/Test.java index 410dd17..64b7807 100644 --- a/forester/java/src/org/forester/test/Test.java +++ b/forester/java/src/org/forester/test/Test.java @@ -37,7 +37,6 @@ import java.util.List; import java.util.Locale; import java.util.Set; import java.util.SortedSet; -import java.util.TreeSet; import org.forester.application.support_transfer; import org.forester.archaeopteryx.TreePanelUtil; @@ -128,6 +127,7 @@ import org.forester.ws.wabi.TxSearch.TAX_RANK; @SuppressWarnings( "unused") public final class Test { + private final static boolean PERFORM_DB_TESTS = true; private final static double ZERO_DIFF = 1.0E-9; private final static String PATH_TO_TEST_DATA = System.getProperty( "user.dir" ) + ForesterUtil.getFileSeparator() + "test_data" @@ -255,15 +255,17 @@ public final class Test { System.out.println( "failed." ); failed++; } - System.out.print( "Sequence DB tools 2: " ); - if ( testSequenceDbWsTools2() ) { - System.out.println( "OK." ); - succeeded++; - } - else { - System.out.println( "failed." ); - failed++; - System.exit( -1 ); + if ( PERFORM_DB_TESTS ) { + System.out.print( "Sequence DB tools 2: " ); + if ( testSequenceDbWsTools2() ) { + System.out.println( "OK." ); + succeeded++; + } + else { + System.out.println( "failed." ); + failed++; + System.exit( -1 ); + } } System.exit( 0 ); System.out.print( "Hmmscan output parser: " ); @@ -836,23 +838,27 @@ public final class Test { System.out.println( "failed." ); failed++; } - System.out.print( "Uniprot Entry Retrieval: " ); - if ( Test.testUniprotEntryRetrieval() ) { - System.out.println( "OK." ); - succeeded++; - } - else { - System.out.println( "failed." ); - failed++; - } - System.out.print( "Uniprot Taxonomy Search: " ); - if ( Test.testUniprotTaxonomySearch() ) { - System.out.println( "OK." ); - succeeded++; + if ( PERFORM_DB_TESTS ) { + System.out.print( "Uniprot Entry Retrieval: " ); + if ( Test.testUniprotEntryRetrieval() ) { + System.out.println( "OK." ); + succeeded++; + } + else { + System.out.println( "failed." ); + failed++; + } } - else { - System.out.println( "failed." ); - failed++; + if ( PERFORM_DB_TESTS ) { + System.out.print( "Uniprot Taxonomy Search: " ); + if ( Test.testUniprotTaxonomySearch() ) { + System.out.println( "OK." ); + succeeded++; + } + else { + System.out.println( "failed." ); + failed++; + } } //---- String path = ""; @@ -10861,104 +10867,105 @@ public final class Test { private static boolean testSequenceDbWsTools1() { try { - PhylogenyNode n = new PhylogenyNode(); + final PhylogenyNode n = new PhylogenyNode(); n.setName( "NP_001025424" ); Accession acc = SequenceDbWsTools.obtainSeqAccession( n ); - if ( acc == null || !acc.getSource().equals( Source.REFSEQ.toString() ) + if ( ( acc == null ) || !acc.getSource().equals( Source.REFSEQ.toString() ) || !acc.getValue().equals( "NP_001025424" ) ) { return false; } n.setName( "340 0559 -- _NP_001025424_dsfdg15 05" ); acc = SequenceDbWsTools.obtainSeqAccession( n ); - if ( acc == null || !acc.getSource().equals( Source.REFSEQ.toString() ) + if ( ( acc == null ) || !acc.getSource().equals( Source.REFSEQ.toString() ) || !acc.getValue().equals( "NP_001025424" ) ) { return false; } n.setName( "NP_001025424.1" ); acc = SequenceDbWsTools.obtainSeqAccession( n ); - if ( acc == null || !acc.getSource().equals( Source.REFSEQ.toString() ) + if ( ( acc == null ) || !acc.getSource().equals( Source.REFSEQ.toString() ) || !acc.getValue().equals( "NP_001025424" ) ) { return false; } n.setName( "NM_001030253" ); acc = SequenceDbWsTools.obtainSeqAccession( n ); - if ( acc == null || !acc.getSource().equals( Source.REFSEQ.toString() ) + if ( ( acc == null ) || !acc.getSource().equals( Source.REFSEQ.toString() ) || !acc.getValue().equals( "NM_001030253" ) ) { return false; } n.setName( "BCL2_HUMAN" ); acc = SequenceDbWsTools.obtainSeqAccession( n ); - if ( acc == null || !acc.getSource().equals( Source.UNIPROT.toString() ) + if ( ( acc == null ) || !acc.getSource().equals( Source.UNIPROT.toString() ) || !acc.getValue().equals( "BCL2_HUMAN" ) ) { System.out.println( acc.toString() ); return false; } n.setName( "P10415" ); acc = SequenceDbWsTools.obtainSeqAccession( n ); - if ( acc == null || !acc.getSource().equals( Source.UNIPROT.toString() ) + if ( ( acc == null ) || !acc.getSource().equals( Source.UNIPROT.toString() ) || !acc.getValue().equals( "P10415" ) ) { System.out.println( acc.toString() ); return false; } n.setName( " P10415 " ); acc = SequenceDbWsTools.obtainSeqAccession( n ); - if ( acc == null || !acc.getSource().equals( Source.UNIPROT.toString() ) + if ( ( acc == null ) || !acc.getSource().equals( Source.UNIPROT.toString() ) || !acc.getValue().equals( "P10415" ) ) { System.out.println( acc.toString() ); return false; } n.setName( "_P10415|" ); acc = SequenceDbWsTools.obtainSeqAccession( n ); - if ( acc == null || !acc.getSource().equals( Source.UNIPROT.toString() ) + if ( ( acc == null ) || !acc.getSource().equals( Source.UNIPROT.toString() ) || !acc.getValue().equals( "P10415" ) ) { System.out.println( acc.toString() ); return false; } n.setName( "AY695820" ); acc = SequenceDbWsTools.obtainSeqAccession( n ); - if ( acc == null || !acc.getSource().equals( Source.NCBI.toString() ) + if ( ( acc == null ) || !acc.getSource().equals( Source.NCBI.toString() ) || !acc.getValue().equals( "AY695820" ) ) { System.out.println( acc.toString() ); return false; } n.setName( "_AY695820_" ); acc = SequenceDbWsTools.obtainSeqAccession( n ); - if ( acc == null || !acc.getSource().equals( Source.NCBI.toString() ) + if ( ( acc == null ) || !acc.getSource().equals( Source.NCBI.toString() ) || !acc.getValue().equals( "AY695820" ) ) { System.out.println( acc.toString() ); return false; } n.setName( "AAA59452" ); acc = SequenceDbWsTools.obtainSeqAccession( n ); - if ( acc == null || !acc.getSource().equals( Source.NCBI.toString() ) + if ( ( acc == null ) || !acc.getSource().equals( Source.NCBI.toString() ) || !acc.getValue().equals( "AAA59452" ) ) { System.out.println( acc.toString() ); return false; } n.setName( "_AAA59452_" ); acc = SequenceDbWsTools.obtainSeqAccession( n ); - if ( acc == null || !acc.getSource().equals( Source.NCBI.toString() ) + if ( ( acc == null ) || !acc.getSource().equals( Source.NCBI.toString() ) || !acc.getValue().equals( "AAA59452" ) ) { System.out.println( acc.toString() ); return false; } n.setName( "AAA59452.1" ); acc = SequenceDbWsTools.obtainSeqAccession( n ); - if ( acc == null || !acc.getSource().equals( Source.NCBI.toString() ) + if ( ( acc == null ) || !acc.getSource().equals( Source.NCBI.toString() ) || !acc.getValue().equals( "AAA59452.1" ) ) { System.out.println( acc.toString() ); return false; } n.setName( "_AAA59452.1_" ); acc = SequenceDbWsTools.obtainSeqAccession( n ); - if ( acc == null || !acc.getSource().equals( Source.NCBI.toString() ) + if ( ( acc == null ) || !acc.getSource().equals( Source.NCBI.toString() ) || !acc.getValue().equals( "AAA59452.1" ) ) { System.out.println( acc.toString() ); return false; } n.setName( "GI:94894583" ); acc = SequenceDbWsTools.obtainSeqAccession( n ); - if ( acc == null || !acc.getSource().equals( Source.GI.toString() ) || !acc.getValue().equals( "94894583" ) ) { + if ( ( acc == null ) || !acc.getSource().equals( Source.GI.toString() ) + || !acc.getValue().equals( "94894583" ) ) { System.out.println( acc.toString() ); return false; } @@ -10971,18 +10978,22 @@ public final class Test { private static boolean testSequenceDbWsTools2() { try { - PhylogenyNode n1 = new PhylogenyNode(); - n1.setName( "NP_001025424" ); - SequenceDbWsTools.obtainSeqInformation( false, 4000, new TreeSet(), n1 ); + final PhylogenyNode n1 = new PhylogenyNode( "NP_001025424" ); + SequenceDbWsTools.obtainSeqInformation( n1 ); if ( !n1.getNodeData().getSequence().getName().equals( "Bcl2" ) ) { return false; } if ( !n1.getNodeData().getTaxonomy().getScientificName().equals( "Danio rerio" ) ) { return false; } - PhylogenyNode n2 = new PhylogenyNode(); - n2.setName( "NM_001030253" ); - SequenceDbWsTools.obtainSeqInformation( false, 4000, new TreeSet(), n2 ); + if ( !n1.getNodeData().getSequence().getAccession().getSource().equals( Source.REFSEQ.toString() ) ) { + return false; + } + if ( !n1.getNodeData().getSequence().getAccession().getValue().equals( "NP_001025424" ) ) { + return false; + } + final PhylogenyNode n2 = new PhylogenyNode( "NM_001030253" ); + SequenceDbWsTools.obtainSeqInformation( n2 ); System.out.println( n2.toString() ); if ( !n2.getNodeData().getSequence().getName() .equals( "Danio rerio B-cell leukemia/lymphoma 2 (bcl2), mRNA" ) ) { @@ -10991,6 +11002,28 @@ public final class Test { if ( !n2.getNodeData().getTaxonomy().getScientificName().equals( "Danio rerio" ) ) { return false; } + if ( !n2.getNodeData().getSequence().getAccession().getSource().equals( Source.REFSEQ.toString() ) ) { + return false; + } + if ( !n2.getNodeData().getSequence().getAccession().getValue().equals( "NM_001030253" ) ) { + return false; + } + final PhylogenyNode n3 = new PhylogenyNode( "NM_184234.2" ); + SequenceDbWsTools.obtainSeqInformation( n3 ); + System.out.println( "n=" + n3.toString() ); + if ( !n3.getNodeData().getSequence().getName() + .equals( "Homo sapiens RNA binding motif protein 39 (RBM39), transcript variant 1, mRNA" ) ) { + return false; + } + if ( !n3.getNodeData().getTaxonomy().getScientificName().equals( "Homo sapiens" ) ) { + return false; + } + if ( !n3.getNodeData().getSequence().getAccession().getSource().equals( Source.REFSEQ.toString() ) ) { + return false; + } + if ( !n3.getNodeData().getSequence().getAccession().getValue().equals( "NM_184234" ) ) { + return false; + } } catch ( final IOException e ) { System.out.println(); @@ -10999,6 +11032,7 @@ public final class Test { return true; } catch ( final Exception e ) { + e.printStackTrace(); return false; } return true; diff --git a/forester/java/src/org/forester/ws/seqdb/DatabaseTools.java b/forester/java/src/org/forester/ws/seqdb/DatabaseTools.java deleted file mode 100644 index c8e1a4b..0000000 --- a/forester/java/src/org/forester/ws/seqdb/DatabaseTools.java +++ /dev/null @@ -1,20 +0,0 @@ - -package org.forester.ws.seqdb; - -public class DatabaseTools { - - static String extract( final String target, final String a, final String b ) { - final int i_a = target.indexOf( a ); - final int i_b = target.indexOf( b ); - if ( ( i_a < 0 ) || ( i_b < i_a ) ) { - throw new IllegalArgumentException( "attempt to extract from [" + target + "] between [" + a + "] and [" - + b + "]" ); - } - return target.substring( i_a + a.length(), i_b ).trim(); - } - - static String extract( final String target, final String a ) { - final int i_a = target.indexOf( a ); - return target.substring( i_a + a.length() ).trim(); - } -} diff --git a/forester/java/src/org/forester/ws/seqdb/EbiDbEntry.java b/forester/java/src/org/forester/ws/seqdb/EbiDbEntry.java index 65ae847..b1545b3 100644 --- a/forester/java/src/org/forester/ws/seqdb/EbiDbEntry.java +++ b/forester/java/src/org/forester/ws/seqdb/EbiDbEntry.java @@ -33,6 +33,7 @@ import org.forester.util.ForesterUtil; public final class EbiDbEntry implements SequenceDatabaseEntry { + // FIXME actually this is NCBI entry //http://www.ebi.ac.uk/Tools/dbfetch/dbfetch/emb/AAR37336/ private String _pa; private String _de; @@ -41,6 +42,454 @@ public final class EbiDbEntry implements SequenceDatabaseEntry { private String _symbol; private String _provider; + // TODO PUBMED 15798186 + //TODO (FEATURES) + // source /db_xref="taxon:9606" + // gene 1..2881 + // /gene="RBM39" + // + // /db_xref="MIM:604739" + // CDS + // /gene="RBM39" + // /db_xref="MIM:604739" + // /db_xref="InterPro:IPR002475" + // /product="Bcl-2" + // /protein_id="NP_909122.1" + // /db_xref="UniProtKB/TrEMBL:Q5J7V1" <- reparse? + // + // Protein + /* + LOCUS NM_184234 2881 bp mRNA linear PRI 16-JUN-2013 + DEFINITION Homo sapiens RNA binding motif protein 39 (RBM39), transcript + variant 1, mRNA. + ACCESSION NM_184234 + VERSION NM_184234.2 GI:336176061 + KEYWORDS RefSeq. + SOURCE Homo sapiens (human) + ORGANISM Homo sapiens + Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; + Mammalia; Eutheria; Euarchontoglires; Primates; Haplorrhini; + Catarrhini; Hominidae; Homo. + REFERENCE 1 (bases 1 to 2881) + AUTHORS Sillars-Hardebol,A.H., Carvalho,B., Belien,J.A., de Wit,M., + Delis-van Diemen,P.M., Tijssen,M., van de Wiel,M.A., Ponten,F., + Meijer,G.A. and Fijneman,R.J. + TITLE CSE1L, DIDO1 and RBM39 in colorectal adenoma to carcinoma + progression + JOURNAL Cell Oncol (Dordr) 35 (4), 293-300 (2012) + PUBMED 22711543 + REMARK GeneRIF: Data show that CSE1L, DIDO1 and RBM39 mRNA expression + levels correlated with chromosome 20q DNA copy number status. + REFERENCE 2 (bases 1 to 2881) + AUTHORS Huang,G., Zhou,Z., Wang,H. and Kleinerman,E.S. + TITLE CAPER-alpha alternative splicing regulates the expression of + vascular endothelial growth factor(1)(6)(5) in Ewing sarcoma cells + JOURNAL Cancer 118 (8), 2106-2116 (2012) + PUBMED 22009261 + REMARK GeneRIF: Increased VEGF(165) expression is secondary to the + down-regulation of CAPER-alpha by EWS/FLI-1. CAPER-alpha mediates + alternative splicing and controls the shift from VEGF(189) to + VEGF(165) . + REFERENCE 3 (bases 1 to 2881) + AUTHORS Han,B., Stockwin,L.H., Hancock,C., Yu,S.X., Hollingshead,M.G. and + Newton,D.L. + TITLE Proteomic analysis of nuclei isolated from cancer cell lines + treated with indenoisoquinoline NSC 724998, a novel topoisomerase I + inhibitor + JOURNAL J. Proteome Res. 9 (8), 4016-4027 (2010) + PUBMED 20515076 + REMARK Erratum:[J Proteome Res. 2011 Apr 1;10(4):2128] + REFERENCE 4 (bases 1 to 2881) + AUTHORS Zhang,J.Y., Looi,K.S. and Tan,E.M. + TITLE Identification of tumor-associated antigens as diagnostic and + predictive biomarkers in cancer + JOURNAL Methods Mol. Biol. 520, 1-10 (2009) + PUBMED 19381943 + REFERENCE 5 (bases 1 to 2881) + AUTHORS Dutta,J., Fan,G. and Gelinas,C. + TITLE CAPERalpha is a novel Rel-TAD-interacting factor that inhibits + lymphocyte transformation by the potent Rel/NF-kappaB oncoprotein + v-Rel + JOURNAL J. Virol. 82 (21), 10792-10802 (2008) + PUBMED 18753212 + REMARK GeneRIF: this study identifies CAPERalpha (RNA binding motif + protein 39) as a new transcriptional coregulator for v-Rel and + reveals an important role in modulating Rel's oncogenic activity. + REFERENCE 6 (bases 1 to 2881) + AUTHORS Cazalla,D., Newton,K. and Caceres,J.F. + TITLE A novel SR-related protein is required for the second step of + Pre-mRNA splicing + JOURNAL Mol. Cell. Biol. 25 (8), 2969-2980 (2005) + PUBMED 15798186 + REFERENCE 7 (bases 1 to 2881) + AUTHORS Dowhan,D.H., Hong,E.P., Auboeuf,D., Dennis,A.P., Wilson,M.M., + Berget,S.M. and O'Malley,B.W. + TITLE Steroid hormone receptor coactivation and alternative RNA splicing + by U2AF65-related proteins CAPERalpha and CAPERbeta + JOURNAL Mol. Cell 17 (3), 429-439 (2005) + PUBMED 15694343 + REFERENCE 8 (bases 1 to 2881) + AUTHORS Sun,N.N., Fastje,C.D., Wong,S.S., Sheppard,P.R., Macdonald,S.J., + Ridenour,G., Hyde,J.D. and Witten,M.L. + TITLE Dose-dependent transcriptome changes by metal ores on a human acute + lymphoblastic leukemia cell line + JOURNAL Toxicol Ind Health 19 (7-10), 157-163 (2003) + PUBMED 15747776 + REMARK GeneRIF: 10 genes were down-regulated following treatment of the + T-ALL cells with 0.15 and 1.5 microg/mL of metal ores at 72 h + REFERENCE 9 (bases 1 to 2881) + AUTHORS Jung,D.J., Na,S.Y., Na,D.S. and Lee,J.W. + TITLE Molecular cloning and characterization of CAPER, a novel + coactivator of activating protein-1 and estrogen receptors + JOURNAL J. Biol. Chem. 277 (2), 1229-1234 (2002) + PUBMED 11704680 + REMARK GeneRIF: This paper describes the mouse gene. + REFERENCE 10 (bases 1 to 2881) + AUTHORS Imai,H., Chan,E.K., Kiyosawa,K., Fu,X.D. and Tan,E.M. + TITLE Novel nuclear autoantigen with splicing factor motifs identified + with antibody from hepatocellular carcinoma + JOURNAL J. Clin. Invest. 92 (5), 2419-2426 (1993) + PUBMED 8227358 + COMMENT REVIEWED REFSEQ: This record has been curated by NCBI staff. The + reference sequence was derived from DC346351.1, BC141835.1 and + C75555.1. + On Jun 16, 2011 this sequence version replaced gi:35493810. + + Summary: This gene encodes a member of the U2AF65 family of + proteins. The encoded protein is found in the nucleus, where it + co-localizes with core spliceosomal proteins. It has been shown to + play a role in both steroid hormone receptor-mediated transcription + and alternative splicing, and it is also a transcriptional + coregulator of the viral oncoprotein v-Rel. Multiple transcript + variants have been observed for this gene. A related pseudogene has + been identified on chromosome X. [provided by RefSeq, Aug 2011]. + + Transcript Variant: This variant (1) encodes the longest isoform + (a, also called CC1.4). + + Publication Note: This RefSeq record includes a subset of the + publications that are available for this gene. Please see the Gene + record to access additional publications. + + ##Evidence-Data-START## + Transcript exon combination :: BC141835.1, L10911.1 [ECO:0000332] + RNAseq introns :: mixed/partial sample support + ERS025081, ERS025082 [ECO:0000350] + ##Evidence-Data-END## + COMPLETENESS: complete on the 3' end. + PRIMARY REFSEQ_SPAN PRIMARY_IDENTIFIER PRIMARY_SPAN COMP + 1-578 DC346351.1 3-580 + 579-2872 BC141835.1 429-2722 + 2873-2881 C75555.1 1-9 c + FEATURES Location/Qualifiers + source 1..2881 + /organism="Homo sapiens" + /mol_type="mRNA" + /db_xref="taxon:9606" + /chromosome="20" + /map="20q11.22" + gene 1..2881 + /gene="RBM39" + /gene_synonym="CAPER; CAPERalpha; FSAP59; HCC1; RNPC2" + /note="RNA binding motif protein 39" + /db_xref="GeneID:9584" + /db_xref="HGNC:15923" + /db_xref="HPRD:09201" + /db_xref="MIM:604739" + exon 1..396 + /gene="RBM39" + /gene_synonym="CAPER; CAPERalpha; FSAP59; HCC1; RNPC2" + /inference="alignment:Splign:1.39.8" + STS 35..262 + /gene="RBM39" + /gene_synonym="CAPER; CAPERalpha; FSAP59; HCC1; RNPC2" + /standard_name="REN58946" + /db_xref="UniSTS:383746" + misc_feature 221..223 + /gene="RBM39" + /gene_synonym="CAPER; CAPERalpha; FSAP59; HCC1; RNPC2" + /note="upstream in-frame stop codon" + STS 299..453 + /gene="RBM39" + /gene_synonym="CAPER; CAPERalpha; FSAP59; HCC1; RNPC2" + /standard_name="G64285" + /db_xref="UniSTS:158667" + exon 397..460 + /gene="RBM39" + /gene_synonym="CAPER; CAPERalpha; FSAP59; HCC1; RNPC2" + /inference="alignment:Splign:1.39.8" + CDS 410..2002 + /gene="RBM39" + /gene_synonym="CAPER; CAPERalpha; FSAP59; HCC1; RNPC2" + /note="isoform a is encoded by transcript variant 1; + coactivator of activating protein-1 and estrogen + receptors; functional spliceosome-associated protein 59; + RNA-binding region (RNP1, RRM) containing 2; + hepatocellular carcinoma protein 1; splicing factor HCC1" + /codon_start=1 + /product="RNA-binding protein 39 isoform a" + /protein_id="NP_909122.1" + /db_xref="GI:35493811" + /db_xref="CCDS:CCDS13266.1" + /db_xref="GeneID:9584" + /db_xref="HGNC:15923" + /db_xref="HPRD:09201" + /db_xref="MIM:604739" + /translation="MADDIDIEAMLEAPYKKDENKLSSANGHEERSKKRKKSKSRSRS + HERKRSKSKERKRSRDRERKKSKSRERKRSRSKERRRSRSRSRDRRFRGRYRSPYSGP + KFNSAIRGKIGLPHSIKLSRRRSRSKSPFRKDKSPVREPIDNLTPEERDARTVFCMQL + AARIRPRDLEEFFSTVGKVRDVRMISDRNSRRSKGIAYVEFVDVSSVPLAIGLTGQRV + LGVPIIVQASQAEKNRAAAMANNLQKGSAGPMRLYVGSLHFNITEDMLRGIFEPFGRI + ESIQLMMDSETGRSKGYGFITFSDSECAKKALEQLNGFELAGRPMKVGHVTERTDASS + ASSFLDSDELERTGIDLGTTGRLQLMARLAEGTGLQIPPAAQQALQMSGSLAFGAVAE + FSFVIDLQTRLSQQTEASALAAAASVQPLATQCFQLSNMFNPQTEEEVGWDTEIKDDV + IEECNKHGGVIHIYVDKNSAQGNVYVKCPSIAAAIAAVNALHGRWFAGKMITAAYVPL + PTYHNLFPDSMTATQLLVPSRR" + misc_feature 413..415 + /gene="RBM39" + /gene_synonym="CAPER; CAPERalpha; FSAP59; HCC1; RNPC2" + /experiment="experimental evidence, no additional details + recorded" + /note="N-acetylalanine; propagated from + UniProtKB/Swiss-Prot (Q14498.2); acetylation site" + misc_feature 692..694 + /gene="RBM39" + /gene_synonym="CAPER; CAPERalpha; FSAP59; HCC1; RNPC2" + /experiment="experimental evidence, no additional details + recorded" + /note="Phosphotyrosine; propagated from + UniProtKB/Swiss-Prot (Q14498.2); phosphorylation site" + misc_feature 698..700 + /gene="RBM39" + /gene_synonym="CAPER; CAPERalpha; FSAP59; HCC1; RNPC2" + /experiment="experimental evidence, no additional details + recorded" + /note="Phosphoserine; propagated from UniProtKB/Swiss-Prot + (Q14498.2); phosphorylation site" + misc_feature 707..709 + /gene="RBM39" + /gene_synonym="CAPER; CAPERalpha; FSAP59; HCC1; RNPC2" + /experiment="experimental evidence, no additional details + recorded" + /note="Phosphoserine; propagated from UniProtKB/Swiss-Prot + (Q14498.2); phosphorylation site" + misc_feature 815..817 + /gene="RBM39" + /gene_synonym="CAPER; CAPERalpha; FSAP59; HCC1; RNPC2" + /experiment="experimental evidence, no additional details + recorded" + /note="Phosphoserine; propagated from UniProtKB/Swiss-Prot + (Q14498.2); phosphorylation site" + misc_feature 845..847 + /gene="RBM39" + /gene_synonym="CAPER; CAPERalpha; FSAP59; HCC1; RNPC2" + /experiment="experimental evidence, no additional details + recorded" + /note="Phosphothreonine; propagated from + UniProtKB/Swiss-Prot (Q14498.2); phosphorylation site" + misc_feature 1280..1627 + /gene="RBM39" + /gene_synonym="CAPER; CAPERalpha; FSAP59; HCC1; RNPC2" + /inference="non-experimental evidence, no additional + details recorded" + /note="propagated from UniProtKB/Swiss-Prot (Q14498.2); + Region: Interaction with JUN (By similarity)" + misc_feature 1280..1474 + /gene="RBM39" + /gene_synonym="CAPER; CAPERalpha; FSAP59; HCC1; RNPC2" + /inference="non-experimental evidence, no additional + details recorded" + /note="propagated from UniProtKB/Swiss-Prot (Q14498.2); + Region: Activating domain (By similarity)" + misc_feature 1409..1411 + /gene="RBM39" + /gene_synonym="CAPER; CAPERalpha; FSAP59; HCC1; RNPC2" + /experiment="experimental evidence, no additional details + recorded" + /note="Phosphoserine; propagated from UniProtKB/Swiss-Prot + (Q14498.2); phosphorylation site" + misc_feature 1418..1420 + /gene="RBM39" + /gene_synonym="CAPER; CAPERalpha; FSAP59; HCC1; RNPC2" + /experiment="experimental evidence, no additional details + recorded" + /note="Phosphoserine; propagated from UniProtKB/Swiss-Prot + (Q14498.2); phosphorylation site" + misc_feature 1430..1432 + /gene="RBM39" + /gene_synonym="CAPER; CAPERalpha; FSAP59; HCC1; RNPC2" + /experiment="experimental evidence, no additional details + recorded" + /note="Phosphoserine; propagated from UniProtKB/Swiss-Prot + (Q14498.2); phosphorylation site" + misc_feature 1472..1627 + /gene="RBM39" + /gene_synonym="CAPER; CAPERalpha; FSAP59; HCC1; RNPC2" + /inference="non-experimental evidence, no additional + details recorded" + /note="propagated from UniProtKB/Swiss-Prot (Q14498.2); + Region: Interaction with ESR1 and ESR2 (By similarity)" + misc_feature 1625..1999 + /gene="RBM39" + /gene_synonym="CAPER; CAPERalpha; FSAP59; HCC1; RNPC2" + /inference="non-experimental evidence, no additional + details recorded" + /note="propagated from UniProtKB/Swiss-Prot (Q14498.2); + Region: Interaction with NCOA6 (By similarity)" + exon 461..510 + /gene="RBM39" + /gene_synonym="CAPER; CAPERalpha; FSAP59; HCC1; RNPC2" + /inference="alignment:Splign:1.39.8" + exon 511..705 + /gene="RBM39" + /gene_synonym="CAPER; CAPERalpha; FSAP59; HCC1; RNPC2" + /inference="alignment:Splign:1.39.8" + exon 706..771 + /gene="RBM39" + /gene_synonym="CAPER; CAPERalpha; FSAP59; HCC1; RNPC2" + /inference="alignment:Splign:1.39.8" + exon 772..825 + /gene="RBM39" + /gene_synonym="CAPER; CAPERalpha; FSAP59; HCC1; RNPC2" + /inference="alignment:Splign:1.39.8" + exon 826..943 + /gene="RBM39" + /gene_synonym="CAPER; CAPERalpha; FSAP59; HCC1; RNPC2" + /inference="alignment:Splign:1.39.8" + exon 944..1096 + /gene="RBM39" + /gene_synonym="CAPER; CAPERalpha; FSAP59; HCC1; RNPC2" + /inference="alignment:Splign:1.39.8" + exon 1097..1234 + /gene="RBM39" + /gene_synonym="CAPER; CAPERalpha; FSAP59; HCC1; RNPC2" + /inference="alignment:Splign:1.39.8" + exon 1235..1300 + /gene="RBM39" + /gene_synonym="CAPER; CAPERalpha; FSAP59; HCC1; RNPC2" + /inference="alignment:Splign:1.39.8" + exon 1301..1505 + /gene="RBM39" + /gene_synonym="CAPER; CAPERalpha; FSAP59; HCC1; RNPC2" + /inference="alignment:Splign:1.39.8" + exon 1506..1583 + /gene="RBM39" + /gene_synonym="CAPER; CAPERalpha; FSAP59; HCC1; RNPC2" + /inference="alignment:Splign:1.39.8" + exon 1584..1634 + /gene="RBM39" + /gene_synonym="CAPER; CAPERalpha; FSAP59; HCC1; RNPC2" + /inference="alignment:Splign:1.39.8" + exon 1635..1716 + /gene="RBM39" + /gene_synonym="CAPER; CAPERalpha; FSAP59; HCC1; RNPC2" + /inference="alignment:Splign:1.39.8" + exon 1717..1822 + /gene="RBM39" + /gene_synonym="CAPER; CAPERalpha; FSAP59; HCC1; RNPC2" + /inference="alignment:Splign:1.39.8" + exon 1823..1901 + /gene="RBM39" + /gene_synonym="CAPER; CAPERalpha; FSAP59; HCC1; RNPC2" + /inference="alignment:Splign:1.39.8" + exon 1902..2874 + /gene="RBM39" + /gene_synonym="CAPER; CAPERalpha; FSAP59; HCC1; RNPC2" + /inference="alignment:Splign:1.39.8" + STS 1956..2182 + /gene="RBM39" + /gene_synonym="CAPER; CAPERalpha; FSAP59; HCC1; RNPC2" + /standard_name="REN58786" + /db_xref="UniSTS:383586" + STS 2104..2148 + /gene="RBM39" + /gene_synonym="CAPER; CAPERalpha; FSAP59; HCC1; RNPC2" + /standard_name="D19S1033" + /db_xref="UniSTS:154759" + STS 2145..2400 + /gene="RBM39" + /gene_synonym="CAPER; CAPERalpha; FSAP59; HCC1; RNPC2" + /standard_name="REN58785" + /db_xref="UniSTS:383585" + STS 2349..2590 + /gene="RBM39" + /gene_synonym="CAPER; CAPERalpha; FSAP59; HCC1; RNPC2" + /standard_name="REN58784" + /db_xref="UniSTS:383584" + STS 2450..2669 + /gene="RBM39" + /gene_synonym="CAPER; CAPERalpha; FSAP59; HCC1; RNPC2" + /standard_name="RH69003" + /db_xref="UniSTS:85360" + STS 2579..2828 + /gene="RBM39" + /gene_synonym="CAPER; CAPERalpha; FSAP59; HCC1; RNPC2" + /standard_name="REN58783" + /db_xref="UniSTS:383583" + STS 2639..2728 + /gene="RBM39" + /gene_synonym="CAPER; CAPERalpha; FSAP59; HCC1; RNPC2" + /standard_name="RH67917" + /db_xref="UniSTS:84037" + polyA_signal 2851..2856 + /gene="RBM39" + /gene_synonym="CAPER; CAPERalpha; FSAP59; HCC1; RNPC2" + polyA_site 2874 + /gene="RBM39" + /gene_synonym="CAPER; CAPERalpha; FSAP59; HCC1; RNPC2" + ORIGIN + 1 atttggagct tggggcagct tctcgcgaga gcccgtgctg agggctctgt gaggccccgt + 61 gtgtttgtgt gtgtgtatgt gtgctggtga atgtgagtac agggaagcag cggccgccat + 121 ttcagggagc ttgtcgacgc tgtcgcaggg gtggatcctg agctgccgaa gccgccgtcc + 181 tgctctcccg cgtgggcttc tctaattcca ttgttttttt tagattctct cgggcctagc + 241 cgtccttgga acccgatatt cgggctgggc ggttccgcgg cctgggccta ggggcttaac + 301 agtagcaaca gaagcggcgg cggcggcagc agcagcagca gcagcagcaa tctcttcccg + 361 aacacgagca ccacaggcgc ccgaaggccg gaacaggcgt ttagagaaaa tggcagacga + 421 tattgatatt gaagcaatgc ttgaggctcc ttacaagaag gatgagaaca agttgagcag + 481 tgccaacggc catgaagaac gtagcaaaaa gaggaaaaaa agcaagagca gaagtcgtag + 541 tcatgaacga aagagaagca aaagtaagga acggaagcga agtagagaca gagaaaggaa + 601 aaagagcaaa agccgtgaaa gaaagcgaag tagaagcaaa gagaggcgac ggagccgctc + 661 aagaagtcga gatcgaagat ttagaggccg ctacagaagt ccttactccg gaccaaaatt + 721 taacagtgcc atccgaggaa agattgggtt gcctcatagc atcaaattaa gcagacgacg + 781 ttcccgaagc aaaagtccat tcagaaaaga caagagccct gtgagagaac ctattgataa + 841 tttaactcct gaggaaagag atgcaaggac agtcttctgt atgcagctgg cggcaagaat + 901 tcgaccaagg gatttggaag agtttttctc tacagtagga aaggttcgag atgtgaggat + 961 gatttctgac agaaattcaa gacgttccaa aggaattgct tatgtggagt tcgtcgatgt + 1021 tagctcagtg cctctagcaa taggattaac tggccaacga gttttaggcg tgccaatcat + 1081 agtacaggca tcacaggcag aaaaaaacag agctgcagca atggcaaaca atttacaaaa + 1141 gggaagtgct ggacctatga ggctttatgt gggctcatta cacttcaaca taactgaaga + 1201 tatgcttcgt gggatctttg agccttttgg aagaattgaa agtatccagc tgatgatgga + 1261 cagtgaaact ggtcgatcca agggatatgg atttattaca ttttctgact cagaatgtgc + 1321 caaaaaggct ttggaacaac ttaatggatt tgaactagca ggaagaccaa tgaaagttgg + 1381 tcatgttact gaacgtactg atgcttcgag tgctagttca tttttggaca gtgatgaact + 1441 ggaaaggact ggaattgatt tgggaacaac tggtcgtctt cagttaatgg caagacttgc + 1501 agagggtaca ggtttgcaga ttccgccagc agcacagcaa gctctacaga tgagtggctc + 1561 tttggcattt ggtgctgtgg cagaattctc ttttgttata gatttgcaaa caagactttc + 1621 ccagcagact gaagcttcag ctttagctgc agctgcctct gttcagccac ttgcaacaca + 1681 atgtttccaa ctctctaaca tgtttaaccc tcaaacagaa gaagaagttg gatgggatac + 1741 cgagattaag gatgatgtga ttgaagaatg taataaacat ggaggagtta ttcatattta + 1801 tgttgacaaa aattcagctc agggcaatgt gtatgtgaag tgcccatcaa ttgctgcagc + 1861 tattgctgct gtcaatgcat tgcatggcag gtggtttgct ggtaaaatga taacagcagc + 1921 atatgtacct cttccaactt accacaacct gtttcctgat tctatgacag caacacagct + 1981 actggttcca agtagacgat gaaggaagat atagtccctt atgtatatag ctttttttct + 2041 ttcttgagaa ttcatcttga gttatctttt atttagataa aaataaagag gcaaggatct + 2101 actgtcattt gtatgcaatt tcctgttacc ttgaaaaaat aaaaatgtta acaggaatgc + 2161 agtgtgctca ttctccctaa atagtaaatc ccactgtata caaaactgtt ctcttgttct + 2221 gccttttaaa atgttcatgt agaaaattaa tgaactatag gaatagctct aggagaacaa + 2281 atgtgctttc tgtaaaaagg cagaccaggg atgtaatgtt tttaatgttt cagaagccta + 2341 actttttaca cagtggttac atttcacatt tcactaatgt tgatatttgg ctgatggttg + 2401 agcagtttct gaaatacaca tttagtgtat ggaaatacaa gacagctaaa gggctgtttg + 2461 gttagcatct catcttgcat tctgatcaat tggcaagaaa gggagatttc aaaattatat + 2521 ttcttgatgg tatcttttca attaatgtat ctgtaaaagt ttctttgtaa atactatgtg + 2581 ttctggtgtg tcttaaaatt ccaaacaaaa tgatccctgc atttcctgaa gatgtttaaa + 2641 cgtgagagtc tggtaggcaa agcagtctga gaaagaaata ggaaatgcag aaataggttt + 2701 tgtctggttg catataatct ttgctctttt taagctctgt gagctctgaa atatattttt + 2761 gggttacttc agtgtgtttg acaagacagc ttgatatttc tatcaaacaa atgactttca + 2821 tattgcaaca atctttgtaa gaaccactca aataaaagtc tcttaaaaag gccaaaaaaa + 2881 a + + + */ private EbiDbEntry() { } @@ -51,30 +500,53 @@ public final class EbiDbEntry implements SequenceDatabaseEntry { public static SequenceDatabaseEntry createInstanceFromPlainTextForRefSeq( final List lines ) { final EbiDbEntry e = new EbiDbEntry(); + final StringBuilder def = new StringBuilder(); + boolean in_def = false; for( final String line : lines ) { // System.out.println( "-" + line ); if ( line.startsWith( "ACCESSION" ) ) { - e.setPA( DatabaseTools.extract( line, "ACCESSION" ) ); + e.setPA( SequenceDbWsTools.extractFrom( line, "ACCESSION" ) ); + in_def = false; } else if ( line.startsWith( "DEFINITION" ) ) { if ( line.indexOf( "[" ) > 0 ) { - e.setDe( DatabaseTools.extract( line, "DEFINITION", "[" ) ); + def.append( SequenceDbWsTools.extractFromTo( line, "DEFINITION", "[" ) ); } else if ( line.indexOf( "." ) > 0 ) { - e.setDe( DatabaseTools.extract( line, "DEFINITION", "." ) ); + def.append( SequenceDbWsTools.extractFromTo( line, "DEFINITION", "." ) ); } else { - e.setDe( DatabaseTools.extract( line, "DEFINITION" ) ); + def.append( SequenceDbWsTools.extractFrom( line, "DEFINITION" ) ); } + in_def = true; } else if ( line.startsWith( "SOURCE" ) ) { if ( line.indexOf( "(" ) > 0 ) { - e.setOs( DatabaseTools.extract( line, "SOURCE", "(" ) ); + e.setOs( SequenceDbWsTools.extractFromTo( line, "SOURCE", "(" ) ); } else { - e.setOs( DatabaseTools.extract( line, "SOURCE" ) ); + e.setOs( SequenceDbWsTools.extractFrom( line, "SOURCE" ) ); } + in_def = false; } + else if ( line.startsWith( " " ) && in_def ) { + def.append( " " ); + if ( line.indexOf( "[" ) > 0 ) { + def.append( SequenceDbWsTools.extractTo( line, "[" ) ); + } + else if ( line.indexOf( "." ) > 0 ) { + def.append( SequenceDbWsTools.extractTo( line, "." ) ); + } + else { + def.append( line.trim() ); + } + } + else { + in_def = false; + } + } + if ( def.length() > 0 ) { + e.setDe( def.toString().trim() ); } return e; } @@ -83,22 +555,22 @@ public final class EbiDbEntry implements SequenceDatabaseEntry { final EbiDbEntry e = new EbiDbEntry(); for( final String line : lines ) { if ( line.startsWith( "PA" ) ) { - e.setPA( DatabaseTools.extract( line, "PA" ) ); + e.setPA( SequenceDbWsTools.extractFrom( line, "PA" ) ); } else if ( line.startsWith( "DE" ) ) { - e.setDe( DatabaseTools.extract( line, "DE" ) ); + e.setDe( SequenceDbWsTools.extractFrom( line, "DE" ) ); } else if ( line.startsWith( "OS" ) ) { if ( line.indexOf( "(" ) > 0 ) { - e.setOs( DatabaseTools.extract( line, "OS", "(" ) ); + e.setOs( SequenceDbWsTools.extractFromTo( line, "OS", "(" ) ); } else { - e.setOs( DatabaseTools.extract( line, "OS" ) ); + e.setOs( SequenceDbWsTools.extractFrom( line, "OS" ) ); } } else if ( line.startsWith( "OX" ) ) { if ( line.indexOf( "NCBI_TaxID=" ) > 0 ) { - e.setTaxId( DatabaseTools.extract( line, "NCBI_TaxID=", ";" ) ); + e.setTaxId( SequenceDbWsTools.extractFromTo( line, "NCBI_TaxID=", ";" ) ); } } } diff --git a/forester/java/src/org/forester/ws/seqdb/SequenceDbWsTools.java b/forester/java/src/org/forester/ws/seqdb/SequenceDbWsTools.java index 55eead3..cecef9f 100644 --- a/forester/java/src/org/forester/ws/seqdb/SequenceDbWsTools.java +++ b/forester/java/src/org/forester/ws/seqdb/SequenceDbWsTools.java @@ -54,13 +54,34 @@ import org.forester.util.SequenceAccessionTools; public final class SequenceDbWsTools { - public final static String EMBL_REFSEQ = "http://www.ebi.ac.uk/Tools/dbfetch/dbfetch?db=REFSEQ&style=raw&id="; - public final static String BASE_UNIPROT_URL = "http://www.uniprot.org/"; - public final static String EMBL_DBS_EMBL = "embl"; - public final static String EMBL_DBS_REFSEQ_N = "refseqn"; - public final static String EMBL_DBS_REFSEQ_P = "refseqp"; - private final static boolean DEBUG = true; - private final static String URL_ENC = "UTF-8"; + public final static String EMBL_REFSEQ = "http://www.ebi.ac.uk/Tools/dbfetch/dbfetch?db=REFSEQ&style=raw&id="; + public final static String BASE_UNIPROT_URL = "http://www.uniprot.org/"; + public final static String EMBL_DBS_EMBL = "embl"; + public final static String EMBL_DBS_REFSEQ_N = "refseqn"; + public final static String EMBL_DBS_REFSEQ_P = "refseqp"; + private final static boolean DEBUG = true; + private final static String URL_ENC = "UTF-8"; + public final static int DEFAULT_LINES_TO_RETURN = 4000; + + final static String extractFrom( final String target, final String a ) { + final int i_a = target.indexOf( a ); + return target.substring( i_a + a.length() ).trim(); + } + + final static String extractFromTo( final String target, final String a, final String b ) { + final int i_a = target.indexOf( a ); + final int i_b = target.indexOf( b ); + if ( ( i_a < 0 ) || ( i_b < i_a ) ) { + throw new IllegalArgumentException( "attempt to extract from \"" + target + "\" between \"" + a + + "\" and \"" + b + "\"" ); + } + return target.substring( i_a + a.length(), i_b ).trim(); + } + + final static String extractTo( final String target, final String b ) { + final int i_b = target.indexOf( b ); + return target.substring( 0, i_b ).trim(); + } public static List getTaxonomiesFromCommonNameStrict( final String cn, final int max_taxonomies_return ) @@ -155,6 +176,16 @@ public final class SequenceDbWsTools { } } + public final static void obtainSeqInformation( final boolean allow_to_set_taxonomic_data, + final SortedSet not_found, + final PhylogenyNode node ) throws IOException { + obtainSeqInformation( allow_to_set_taxonomic_data, DEFAULT_LINES_TO_RETURN, not_found, node ); + } + + public final static void obtainSeqInformation( final PhylogenyNode node ) throws IOException { + obtainSeqInformation( true, DEFAULT_LINES_TO_RETURN, new TreeSet(), node ); + } + public final static SortedSet obtainSeqInformation( final Phylogeny phy, final boolean ext_nodes_only, final boolean allow_to_set_taxonomic_data, diff --git a/forester/java/src/org/forester/ws/seqdb/UniProtEntry.java b/forester/java/src/org/forester/ws/seqdb/UniProtEntry.java index 4a8d158..4ba10de 100644 --- a/forester/java/src/org/forester/ws/seqdb/UniProtEntry.java +++ b/forester/java/src/org/forester/ws/seqdb/UniProtEntry.java @@ -174,24 +174,24 @@ public final class UniProtEntry implements SequenceDatabaseEntry { for( final String line : lines ) { //System.out.println( line ); if ( line.startsWith( "AC" ) ) { - e.setAc( DatabaseTools.extract( line, "AC", ";" ) ); + e.setAc( SequenceDbWsTools.extractFromTo( line, "AC", ";" ) ); } else if ( line.startsWith( "DE" ) && ForesterUtil.isEmpty( e.getSequenceName() ) ) { if ( ( line.indexOf( "RecName:" ) > 0 ) && ( line.indexOf( "Full=" ) > 0 ) ) { - e.setSequenceName( DatabaseTools.extract( line, "Full=", ";" ) ); + e.setSequenceName( SequenceDbWsTools.extractFromTo( line, "Full=", ";" ) ); } else if ( ( line.indexOf( "SubName:" ) > 0 ) && ( line.indexOf( "Full=" ) > 0 ) ) { - e.setSequenceName( DatabaseTools.extract( line, "Full=", ";" ) ); + e.setSequenceName( SequenceDbWsTools.extractFromTo( line, "Full=", ";" ) ); } } else if ( line.startsWith( "DE" ) && ForesterUtil.isEmpty( e.getSequenceSymbol() ) ) { if ( line.indexOf( "Short=" ) > 0 ) { - e.setSequenceSymbol( DatabaseTools.extract( line, "Short=", ";" ) ); + e.setSequenceSymbol( SequenceDbWsTools.extractFromTo( line, "Short=", ";" ) ); } } else if ( line.startsWith( "GN" ) && ForesterUtil.isEmpty( e.getGeneName() ) ) { if ( line.indexOf( "Name=" ) > 0 ) { - e.setGeneName( DatabaseTools.extract( line, "Name=", ";" ) ); + e.setGeneName( SequenceDbWsTools.extractFromTo( line, "Name=", ";" ) ); } } else if ( line.startsWith( "DR" ) ) { @@ -274,15 +274,15 @@ public final class UniProtEntry implements SequenceDatabaseEntry { } else if ( line.startsWith( "OS" ) ) { if ( line.indexOf( "(" ) > 0 ) { - e.setOsScientificName( DatabaseTools.extract( line, "OS", "(" ) ); + e.setOsScientificName( SequenceDbWsTools.extractFromTo( line, "OS", "(" ) ); } else { - e.setOsScientificName( DatabaseTools.extract( line, "OS", "." ) ); + e.setOsScientificName( SequenceDbWsTools.extractFromTo( line, "OS", "." ) ); } } else if ( line.startsWith( "OX" ) ) { if ( line.indexOf( "NCBI_TaxID=" ) > 0 ) { - e.setTaxId( DatabaseTools.extract( line, "NCBI_TaxID=", ";" ) ); + e.setTaxId( SequenceDbWsTools.extractFromTo( line, "NCBI_TaxID=", ";" ) ); } } } -- 1.7.10.2