From b80a84de8b4d07847496bc04f51b45bacd146ff3 Mon Sep 17 00:00:00 2001 From: "cmzmasek@gmail.com" Date: Fri, 4 Oct 2013 03:25:54 +0000 Subject: [PATCH 1/1] inprogress --- .../archaeopteryx/tools/SequenceDataRetriver.java | 2 +- .../src/org/forester/phylogeny/data/Accession.java | 44 +++- forester/java/src/org/forester/test/Test.java | 211 +++++++++++++++++--- .../org/forester/util/SequenceAccessionTools.java | 40 ++-- .../java/src/org/forester/ws/seqdb/EbiDbEntry.java | 3 + .../org/forester/ws/seqdb/SequenceDbWsTools.java | 51 +++-- 6 files changed, 277 insertions(+), 74 deletions(-) diff --git a/forester/java/src/org/forester/archaeopteryx/tools/SequenceDataRetriver.java b/forester/java/src/org/forester/archaeopteryx/tools/SequenceDataRetriver.java index a805a43..cc6a444 100644 --- a/forester/java/src/org/forester/archaeopteryx/tools/SequenceDataRetriver.java +++ b/forester/java/src/org/forester/archaeopteryx/tools/SequenceDataRetriver.java @@ -94,7 +94,7 @@ public final class SequenceDataRetriver extends RunnableProcess { sb.append( "For the following node no data was found:\n" ); } else { - sb.append( "For the following nodes no data was found: (total: " + not_found.size() + "):\n" ); + sb.append( "For the following nodes no data was found (total: " + not_found.size() + "):\n" ); } int i = 0; for( final String string : not_found ) { diff --git a/forester/java/src/org/forester/phylogeny/data/Accession.java b/forester/java/src/org/forester/phylogeny/data/Accession.java index 8d9739a..b3d99a0 100644 --- a/forester/java/src/org/forester/phylogeny/data/Accession.java +++ b/forester/java/src/org/forester/phylogeny/data/Accession.java @@ -34,15 +34,34 @@ import org.forester.util.ForesterUtil; public final class Accession implements PhylogenyData, Comparable { - final private String _comment; - final private String _source; - final private String _source_value; - final private String _value; - final public static String NCBI = "ncbi"; - final public static String REFSEQ = "refseq"; - final public static String UNIPROT = "uniprot"; - final public static String GI = "gi"; - public static final String EMBL = "embl"; + final private String _comment; + final private String _source; + final private String _source_value; + final private String _value; + + public enum Source { + NCBI, REFSEQ, UNIPROT, GI, EMBL, UNKNOWN; + + @Override + public String toString() { + switch ( this ) { + case NCBI: + return "ncbi"; + case REFSEQ: + return "refseq"; + case UNIPROT: + return "uniprot"; + case GI: + return "gi"; + case EMBL: + return "embl"; + case UNKNOWN: + return "unknown"; + default: + throw new IllegalArgumentException(); + } + } + } public Accession( final String value ) { _value = value; @@ -63,6 +82,13 @@ public final class Accession implements PhylogenyData, Comparable { } } + public Accession( final String value, final Source source ) { + _value = value; + _source = source.toString(); + _comment = ""; + _source_value = source + value; + } + public Accession( final String value, final String source, final String comment ) { _value = value; _source = source; diff --git a/forester/java/src/org/forester/test/Test.java b/forester/java/src/org/forester/test/Test.java index 61b3d3d..ca8df7e 100644 --- a/forester/java/src/org/forester/test/Test.java +++ b/forester/java/src/org/forester/test/Test.java @@ -37,6 +37,7 @@ import java.util.List; import java.util.Locale; import java.util.Set; import java.util.SortedSet; +import java.util.TreeSet; import org.forester.application.support_transfer; import org.forester.archaeopteryx.TreePanelUtil; @@ -71,6 +72,7 @@ import org.forester.phylogeny.PhylogenyMethods; import org.forester.phylogeny.PhylogenyNode; import org.forester.phylogeny.PhylogenyNode.NH_CONVERSION_SUPPORT_VALUE_STYLE; import org.forester.phylogeny.data.Accession; +import org.forester.phylogeny.data.Accession.Source; import org.forester.phylogeny.data.BinaryCharacters; import org.forester.phylogeny.data.BranchWidth; import org.forester.phylogeny.data.Confidence; @@ -172,6 +174,15 @@ public final class Test { System.exit( -1 ); } final long start_time = new Date().getTime(); + System.out.print( "Basic node methods: " ); + if ( Test.testBasicNodeMethods() ) { + System.out.println( "OK." ); + succeeded++; + } + else { + System.out.println( "failed." ); + failed++; + } System.out.print( "Protein id: " ); if ( !testProteinId() ) { System.out.println( "failed." ); @@ -226,8 +237,8 @@ public final class Test { System.out.println( "failed." ); failed++; } - System.out.print( "Hmmscan output parser: " ); - if ( testHmmscanOutputParser() ) { + System.out.print( "UniProtKB id extraction: " ); + if ( Test.testExtractUniProtKbProteinSeqIdentifier() ) { System.out.println( "OK." ); succeeded++; } @@ -235,8 +246,8 @@ public final class Test { System.out.println( "failed." ); failed++; } - System.out.print( "Basic node methods: " ); - if ( Test.testBasicNodeMethods() ) { + System.out.print( "Sequence DB tools 1: " ); + if ( testSequenceDbWsTools1() ) { System.out.println( "OK." ); succeeded++; } @@ -244,17 +255,19 @@ public final class Test { System.out.println( "failed." ); failed++; } - System.out.print( "Taxonomy code extraction: " ); - if ( Test.testExtractTaxonomyCodeFromNodeName() ) { + System.out.print( "Sequence DB tools 2: " ); + if ( testSequenceDbWsTools2() ) { System.out.println( "OK." ); succeeded++; } else { System.out.println( "failed." ); failed++; + System.exit( -1 ); } - System.out.print( "SN extraction: " ); - if ( Test.testExtractSNFromNodeName() ) { + System.exit( 0 ); + System.out.print( "Hmmscan output parser: " ); + if ( testHmmscanOutputParser() ) { System.out.println( "OK." ); succeeded++; } @@ -262,8 +275,8 @@ public final class Test { System.out.println( "failed." ); failed++; } - System.out.print( "Taxonomy extraction (general): " ); - if ( Test.testTaxonomyExtraction() ) { + System.out.print( "Taxonomy code extraction: " ); + if ( Test.testExtractTaxonomyCodeFromNodeName() ) { System.out.println( "OK." ); succeeded++; } @@ -271,8 +284,17 @@ public final class Test { System.out.println( "failed." ); failed++; } - System.out.print( "UniProtKB id extraction: " ); - if ( Test.testExtractUniProtKbProteinSeqIdentifier() ) { + System.out.print( "SN extraction: " ); + if ( Test.testExtractSNFromNodeName() ) { + System.out.println( "OK." ); + succeeded++; + } + else { + System.out.println( "failed." ); + failed++; + } + System.out.print( "Taxonomy extraction (general): " ); + if ( Test.testTaxonomyExtraction() ) { System.out.println( "OK." ); succeeded++; } @@ -3830,7 +3852,7 @@ public final class Test { return false; } n = new PhylogenyNode(); - n.setName( "_ACP19736_" ); + n.setName( "|ACP19736|" ); if ( !SequenceAccessionTools.obtainGenbankAccessorFromDataFields( n ).equals( "ACP19736" ) ) { return false; } @@ -9729,24 +9751,12 @@ public final class Test { } return false; } - // - // id = SequenceAccessionTools.parse( "pllf[pok P4A123_osdjfosnqo035-9233332904i000490 vf tmv x45" ); - // if ( ( id == null ) || ForesterUtil.isEmpty( id.getValue() ) || ForesterUtil.isEmpty( id.getSource() ) - // || !id.getValue().equals( "P4A123" ) || !id.getSource().equals( "sp" ) ) { - // if ( id != null ) { - // System.out.println( "value =" + id.getValue() ); - // System.out.println( "provider=" + id.getSource() ); - // } - // return false; - // } - // id = SequenceAccessionTools.parseAccessorFromString( "XP_12345" ); if ( id != null ) { System.out.println( "value =" + id.getValue() ); System.out.println( "provider=" + id.getSource() ); return false; } - // lcl_91970_unknown_ } catch ( final Exception e ) { e.printStackTrace( System.out ); @@ -10849,6 +10859,157 @@ public final class Test { return true; } + private static boolean testSequenceDbWsTools1() { + try { + PhylogenyNode n = new PhylogenyNode(); + n.setName( "NP_001025424" ); + Accession acc = SequenceDbWsTools.obtainSeqAccession( n ); + if ( acc == null || !acc.getSource().equals( Source.REFSEQ.toString() ) + || !acc.getValue().equals( "NP_001025424" ) ) { + return false; + } + n.setName( "340 0559 -- _NP_001025424_dsfdg15 05" ); + acc = SequenceDbWsTools.obtainSeqAccession( n ); + if ( acc == null || !acc.getSource().equals( Source.REFSEQ.toString() ) + || !acc.getValue().equals( "NP_001025424" ) ) { + return false; + } + n.setName( "NP_001025424.1" ); + acc = SequenceDbWsTools.obtainSeqAccession( n ); + if ( acc == null || !acc.getSource().equals( Source.REFSEQ.toString() ) + || !acc.getValue().equals( "NP_001025424" ) ) { + return false; + } + n.setName( "NM_001030253" ); + acc = SequenceDbWsTools.obtainSeqAccession( n ); + if ( acc == null || !acc.getSource().equals( Source.REFSEQ.toString() ) + || !acc.getValue().equals( "NM_001030253" ) ) { + return false; + } + n.setName( "BCL2_HUMAN" ); + acc = SequenceDbWsTools.obtainSeqAccession( n ); + if ( acc == null || !acc.getSource().equals( Source.UNIPROT.toString() ) + || !acc.getValue().equals( "BCL2_HUMAN" ) ) { + System.out.println( acc.toString() ); + return false; + } + n.setName( "P10415" ); + acc = SequenceDbWsTools.obtainSeqAccession( n ); + if ( acc == null || !acc.getSource().equals( Source.UNIPROT.toString() ) + || !acc.getValue().equals( "P10415" ) ) { + System.out.println( acc.toString() ); + return false; + } + n.setName( " P10415 " ); + acc = SequenceDbWsTools.obtainSeqAccession( n ); + if ( acc == null || !acc.getSource().equals( Source.UNIPROT.toString() ) + || !acc.getValue().equals( "P10415" ) ) { + System.out.println( acc.toString() ); + return false; + } + n.setName( "_P10415|" ); + acc = SequenceDbWsTools.obtainSeqAccession( n ); + if ( acc == null || !acc.getSource().equals( Source.UNIPROT.toString() ) + || !acc.getValue().equals( "P10415" ) ) { + System.out.println( acc.toString() ); + return false; + } + n.setName( "AY695820" ); + acc = SequenceDbWsTools.obtainSeqAccession( n ); + if ( acc == null || !acc.getSource().equals( Source.NCBI.toString() ) + || !acc.getValue().equals( "AY695820" ) ) { + System.out.println( acc.toString() ); + return false; + } + n.setName( "_AY695820_" ); + acc = SequenceDbWsTools.obtainSeqAccession( n ); + if ( acc == null || !acc.getSource().equals( Source.NCBI.toString() ) + || !acc.getValue().equals( "AY695820" ) ) { + System.out.println( acc.toString() ); + return false; + } + n.setName( "AAA59452" ); + acc = SequenceDbWsTools.obtainSeqAccession( n ); + if ( acc == null || !acc.getSource().equals( Source.NCBI.toString() ) + || !acc.getValue().equals( "AAA59452" ) ) { + System.out.println( acc.toString() ); + return false; + } + n.setName( "_AAA59452_" ); + acc = SequenceDbWsTools.obtainSeqAccession( n ); + if ( acc == null || !acc.getSource().equals( Source.NCBI.toString() ) + || !acc.getValue().equals( "AAA59452" ) ) { + System.out.println( acc.toString() ); + return false; + } + n.setName( "AAA59452.1" ); + acc = SequenceDbWsTools.obtainSeqAccession( n ); + if ( acc == null || !acc.getSource().equals( Source.NCBI.toString() ) + || !acc.getValue().equals( "AAA59452.1" ) ) { + System.out.println( acc.toString() ); + return false; + } + n.setName( "_AAA59452.1_" ); + acc = SequenceDbWsTools.obtainSeqAccession( n ); + if ( acc == null || !acc.getSource().equals( Source.NCBI.toString() ) + || !acc.getValue().equals( "AAA59452.1" ) ) { + System.out.println( acc.toString() ); + return false; + } + n.setName( "GI:94894583" ); + acc = SequenceDbWsTools.obtainSeqAccession( n ); + if ( acc == null || !acc.getSource().equals( Source.GI.toString() ) || !acc.getValue().equals( "94894583" ) ) { + System.out.println( acc.toString() ); + return false; + } + } + // catch ( final IOException e ) { + // System.out.println(); + // System.out.println( "the following might be due to absence internet connection:" ); + // e.printStackTrace( System.out ); + // return true; + // } + catch ( final Exception e ) { + return false; + } + return true; + } + + private static boolean testSequenceDbWsTools2() { + try { + PhylogenyNode n1 = new PhylogenyNode(); + n1.setName( "NP_001025424" ); + SequenceDbWsTools.obtainSeqInformation( false, 4000, new TreeSet(), n1 ); + if ( !n1.getNodeData().getSequence().getName().equals( "Bcl2" ) ) { + return false; + } + if ( !n1.getNodeData().getTaxonomy().getScientificName().equals( "Danio rerio" ) ) { + return false; + } + PhylogenyNode n2 = new PhylogenyNode(); + n2.setName( "NM_001030253" ); + SequenceDbWsTools.obtainSeqInformation( false, 4000, new TreeSet(), n2 ); + System.out.println( n2.toString() ); + if ( !n2.getNodeData().getSequence().getName() + .equals( "Danio rerio B-cell leukemia/lymphoma 2 (bcl2), mRNA" ) ) { + return false; + } + if ( !n2.getNodeData().getTaxonomy().getScientificName().equals( "Danio rerio" ) ) { + return false; + } + } + catch ( final IOException e ) { + System.out.println(); + System.out.println( "the following might be due to absence internet connection:" ); + e.printStackTrace( System.out ); + return true; + } + catch ( final Exception e ) { + return false; + } + return true; + } + private static boolean testUniprotEntryRetrieval() { try { final SequenceDatabaseEntry entry = SequenceDbWsTools.obtainUniProtEntry( "P12345", 200 ); diff --git a/forester/java/src/org/forester/util/SequenceAccessionTools.java b/forester/java/src/org/forester/util/SequenceAccessionTools.java index c65fbc5..cce1b2e 100644 --- a/forester/java/src/org/forester/util/SequenceAccessionTools.java +++ b/forester/java/src/org/forester/util/SequenceAccessionTools.java @@ -33,6 +33,7 @@ import java.util.regex.Pattern; import org.forester.phylogeny.PhylogenyNode; import org.forester.phylogeny.data.Accession; +import org.forester.phylogeny.data.Accession.Source; import org.forester.phylogeny.data.Sequence; public final class SequenceAccessionTools { @@ -57,7 +58,8 @@ public final class SequenceAccessionTools { public final static Pattern GENBANK_PROT_PATTERN = Pattern .compile( "(?:\\A|.*[^a-zA-Z0-9])([A-Z]{3}\\d{5}(?:\\.\\d+)?)(?:[^a-zA-Z0-9]|\\Z)" ); public final static Pattern GI_PATTERN = Pattern.compile( "(?:\\b|_)(?:GI|gi)[|_=:](\\d+)(?:\\b|_)" ); - public final static Pattern UNIPROT_KB_PATTERN_0 = Pattern.compile( "\\b([A-Z][0-9][A-Z0-9]{3}[0-9])\\b" ); + public final static Pattern UNIPROT_KB_PATTERN_0 = Pattern + .compile( "(?:\\b|_)([A-Z][0-9][A-Z0-9]{3}[0-9])(?:\\b|_)" ); public final static Pattern UNIPROT_KB_PATTERN_1 = Pattern .compile( "(?:\\b|_)(?:sp|tr)[\\.|\\-_=/\\\\]([A-Z][0-9][A-Z0-9]{3}[0-9])(?:\\b|_)" ); public final static Pattern UNIPROT_KB_PATTERN_2 = Pattern @@ -87,19 +89,19 @@ public final class SequenceAccessionTools { public final static Accession obtainAccessorFromDataFields( final PhylogenyNode n ) { String a = obtainUniProtAccessorFromDataFields( n ); if ( !ForesterUtil.isEmpty( a ) ) { - return new Accession( a, Accession.UNIPROT ); + return new Accession( a, Source.UNIPROT ); } a = obtainGenbankAccessorFromDataFields( n ); if ( !ForesterUtil.isEmpty( a ) ) { - return new Accession( a, Accession.NCBI ); + return new Accession( a, Source.NCBI ); } a = obtainRefSeqAccessorFromDataFields( n ); if ( !ForesterUtil.isEmpty( a ) ) { - return new Accession( a, Accession.REFSEQ ); + return new Accession( a, Source.REFSEQ ); } a = obtainGiNumberFromDataFields( n ); if ( !ForesterUtil.isEmpty( a ) ) { - return new Accession( a, Accession.GI ); + return new Accession( a, Source.GI ); } return null; } @@ -112,19 +114,19 @@ public final class SequenceAccessionTools { final String value = n.getNodeData().getSequence().getAccession().getValue(); if ( ( source.startsWith( "uniprot" ) || source.equals( "swissprot" ) || source.equals( "trembl" ) || source .equals( "sp" ) ) ) { - return new Accession( value, Accession.UNIPROT ); + return new Accession( value, Source.UNIPROT ); } else if ( source.equals( "embl" ) || source.equals( "ebi" ) ) { - return new Accession( value, Accession.EMBL ); + return new Accession( value, Source.EMBL ); } else if ( source.equals( "ncbi" ) || source.equals( "genbank" ) ) { - return new Accession( value, Accession.NCBI ); + return new Accession( value, Source.NCBI ); } else if ( source.equals( "refseq" ) ) { - return new Accession( value, Accession.REFSEQ ); + return new Accession( value, Source.REFSEQ ); } else if ( source.equals( "gi" ) ) { - return new Accession( value, Accession.GI ); + return new Accession( value, Source.GI ); } } return null; @@ -227,19 +229,19 @@ public final class SequenceAccessionTools { if ( !ForesterUtil.isEmpty( s ) ) { String v = parseUniProtAccessorFromString( s ); if ( !ForesterUtil.isEmpty( v ) ) { - return new Accession( v, Accession.UNIPROT ); + return new Accession( v, Source.UNIPROT ); } v = parseGenbankAccessorFromString( s ); if ( !ForesterUtil.isEmpty( v ) ) { - return new Accession( v, Accession.NCBI ); + return new Accession( v, Source.NCBI ); } v = parseRefSeqAccessorFromString( s ); if ( !ForesterUtil.isEmpty( v ) ) { - return new Accession( v, Accession.REFSEQ ); + return new Accession( v, Source.REFSEQ ); } v = parseGInumberFromString( s ); if ( !ForesterUtil.isEmpty( v ) ) { - return new Accession( v, Accession.GI ); + return new Accession( v, Source.GI ); } } return null; @@ -294,11 +296,7 @@ public final class SequenceAccessionTools { } public final static String parseUniProtAccessorFromString( final String s ) { - Matcher m = UNIPROT_KB_PATTERN_0.matcher( s ); - if ( m.find() ) { - return m.group( 1 ); - } - m = UNIPROT_KB_PATTERN_1.matcher( s ); + Matcher m = UNIPROT_KB_PATTERN_1.matcher( s ); if ( m.find() ) { return m.group( 1 ); } @@ -306,6 +304,10 @@ public final class SequenceAccessionTools { if ( m.find() ) { return m.group(); } + m = UNIPROT_KB_PATTERN_0.matcher( s ); + if ( m.find() ) { + return m.group( 1 ); + } return null; } } diff --git a/forester/java/src/org/forester/ws/seqdb/EbiDbEntry.java b/forester/java/src/org/forester/ws/seqdb/EbiDbEntry.java index 8172d3d..65ae847 100644 --- a/forester/java/src/org/forester/ws/seqdb/EbiDbEntry.java +++ b/forester/java/src/org/forester/ws/seqdb/EbiDbEntry.java @@ -60,6 +60,9 @@ public final class EbiDbEntry implements SequenceDatabaseEntry { if ( line.indexOf( "[" ) > 0 ) { e.setDe( DatabaseTools.extract( line, "DEFINITION", "[" ) ); } + else if ( line.indexOf( "." ) > 0 ) { + e.setDe( DatabaseTools.extract( line, "DEFINITION", "." ) ); + } else { e.setDe( DatabaseTools.extract( line, "DEFINITION" ) ); } diff --git a/forester/java/src/org/forester/ws/seqdb/SequenceDbWsTools.java b/forester/java/src/org/forester/ws/seqdb/SequenceDbWsTools.java index 05efea9..55eead3 100644 --- a/forester/java/src/org/forester/ws/seqdb/SequenceDbWsTools.java +++ b/forester/java/src/org/forester/ws/seqdb/SequenceDbWsTools.java @@ -43,6 +43,7 @@ import org.forester.io.parsers.phyloxml.PhyloXmlDataFormatException; import org.forester.phylogeny.Phylogeny; import org.forester.phylogeny.PhylogenyNode; import org.forester.phylogeny.data.Accession; +import org.forester.phylogeny.data.Accession.Source; import org.forester.phylogeny.data.Annotation; import org.forester.phylogeny.data.Identifier; import org.forester.phylogeny.data.Sequence; @@ -53,7 +54,7 @@ import org.forester.util.SequenceAccessionTools; public final class SequenceDbWsTools { - public final static String BASE_EMBL_DB_URL = "http://www.ebi.ac.uk/Tools/dbfetch/dbfetch/"; + public final static String EMBL_REFSEQ = "http://www.ebi.ac.uk/Tools/dbfetch/dbfetch?db=REFSEQ&style=raw&id="; public final static String BASE_UNIPROT_URL = "http://www.uniprot.org/"; public final static String EMBL_DBS_EMBL = "embl"; public final static String EMBL_DBS_REFSEQ_N = "refseqn"; @@ -125,7 +126,7 @@ public final class SequenceDbWsTools { return EbiDbEntry.createInstanceFromPlainText( lines ); } - public final static Accession obtainFromSeqAccession( final PhylogenyNode node ) { + public final static Accession obtainSeqAccession( final PhylogenyNode node ) { Accession acc = SequenceAccessionTools.obtainFromSeqAccession( node ); if ( !isAccessionAcceptable( acc ) ) { acc = SequenceAccessionTools.obtainAccessorFromDataFields( node ); @@ -135,7 +136,7 @@ public final class SequenceDbWsTools { public static SequenceDatabaseEntry obtainRefSeqEntryFromEmbl( final Accession id, final int max_lines_to_return ) throws IOException { - final List lines = queryEmblDb( id, max_lines_to_return ); + final List lines = queryEmblDbForRefSeqEntry( id, max_lines_to_return ); return EbiDbEntry.createInstanceFromPlainTextForRefSeq( lines ); } @@ -143,7 +144,7 @@ public final class SequenceDbWsTools { final int lines_to_return, final SortedSet not_found, final PhylogenyNode node ) throws IOException { - final Accession acc = obtainFromSeqAccession( node ); + final Accession acc = obtainSeqAccession( node ); if ( !isAccessionAcceptable( acc ) ) { if ( node.isExternal() || !node.isEmpty() ) { not_found.add( node.toString() ); @@ -210,22 +211,30 @@ public final class SequenceDbWsTools { return result; } + public static List queryEmblDbForRefSeqEntry( final Accession id, final int max_lines_to_return ) + throws IOException { + final StringBuilder url_sb = new StringBuilder(); + url_sb.append( EMBL_REFSEQ ); + return queryDb( id.getValue(), max_lines_to_return, url_sb.toString() ); + } + public static List queryEmblDb( final Accession id, final int max_lines_to_return ) throws IOException { final StringBuilder url_sb = new StringBuilder(); - url_sb.append( BASE_EMBL_DB_URL ); - if ( ForesterUtil.isEmpty( id.getSource() ) || ( id.getSource() == Accession.NCBI ) ) { - url_sb.append( SequenceDbWsTools.EMBL_DBS_EMBL ); + // url_sb.append( BASE_EMBL_DB_URL ); + if ( ForesterUtil.isEmpty( id.getSource() ) || ( id.getSource().equals( Source.NCBI.toString() ) ) ) { + url_sb.append( EMBL_DBS_EMBL ); url_sb.append( '/' ); } - else if ( id.getSource() == Accession.REFSEQ ) { - if ( id.getValue().toUpperCase().indexOf( 'P' ) == 1 ) { - url_sb.append( SequenceDbWsTools.EMBL_DBS_REFSEQ_P ); - url_sb.append( '/' ); - } - else { - url_sb.append( SequenceDbWsTools.EMBL_DBS_REFSEQ_N ); - url_sb.append( '/' ); - } + else if ( id.getSource().equals( Source.REFSEQ.toString() ) ) { + url_sb.append( EMBL_REFSEQ ); + // if ( id.getValue().toUpperCase().indexOf( 'P' ) == 1 ) { + // url_sb.append( SequenceDbWsTools.EMBL_DBS_REFSEQ_P ); + // url_sb.append( '/' ); + // } + // else { + // url_sb.append( SequenceDbWsTools.EMBL_DBS_REFSEQ_N ); + // url_sb.append( '/' ); + // } } return queryDb( id.getValue(), max_lines_to_return, url_sb.toString() ); } @@ -241,7 +250,7 @@ public final class SequenceDbWsTools { final Accession acc ) throws IOException { SequenceDatabaseEntry db_entry = null; final String query = acc.getValue(); - if ( acc.getSource() == Accession.UNIPROT ) { + if ( acc.getSource().equals( Source.UNIPROT.toString() ) ) { if ( DEBUG ) { System.out.println( "uniprot: " + query ); } @@ -252,7 +261,7 @@ public final class SequenceDbWsTools { // Eat this, and move to next. } } - else if ( acc.getSource() == Accession.EMBL ) { + else if ( acc.getSource().equals( Source.EMBL.toString() ) ) { if ( DEBUG ) { System.out.println( "embl: " + query ); } @@ -263,7 +272,7 @@ public final class SequenceDbWsTools { // Eat this, and move to next. } } - else if ( acc.getSource() == Accession.REFSEQ ) { + else if ( acc.getSource().equals( Source.REFSEQ.toString() ) ) { if ( DEBUG ) { System.out.println( "refseq: " + query ); } @@ -372,7 +381,9 @@ public final class SequenceDbWsTools { private final static boolean isAccessionAcceptable( final Accession acc ) { return ( !( ( acc == null ) || ForesterUtil.isEmpty( acc.getSource() ) || ForesterUtil.isEmpty( acc.getValue() ) || ( ( acc - .getSource() != Accession.UNIPROT ) && ( acc.getSource() != Accession.EMBL ) && ( acc.getSource() != Accession.REFSEQ ) ) ) ); + .getSource().equals( Source.UNIPROT.toString() ) ) + && ( acc.getSource().toString().equals( Source.EMBL.toString() ) ) && ( acc.getSource().toString() + .equals( Source.REFSEQ.toString() ) ) ) ) ); } private static List parseUniProtTaxonomy( final List result ) throws IOException { -- 1.7.10.2