X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=forester%2Fjava%2Fsrc%2Forg%2Fforester%2Fws%2Fseqdb%2FSequenceDbWsTools.java;h=9106c38aae62aa1472ad384338ce31384a409a6c;hb=10297bd8b8a4b4ab198a17a42fc6ff24ae2ed49b;hp=da16246c7ff3ae9c357c50afb68a70c23e8641ab;hpb=c9d6692f563c182dc32758f8cd5237afe107a44c;p=jalview.git diff --git a/forester/java/src/org/forester/ws/seqdb/SequenceDbWsTools.java b/forester/java/src/org/forester/ws/seqdb/SequenceDbWsTools.java index da16246..9106c38 100644 --- a/forester/java/src/org/forester/ws/seqdb/SequenceDbWsTools.java +++ b/forester/java/src/org/forester/ws/seqdb/SequenceDbWsTools.java @@ -55,20 +55,21 @@ import org.forester.util.SequenceAccessionTools; public final class SequenceDbWsTools { - public final static String BASE_UNIPROT_URL = "http://www.uniprot.org/"; - public final static int DEFAULT_LINES_TO_RETURN = 4000; - public final static String EMBL_DBS_REFSEQ_N = "refseqn"; - public final static String EMBL_DBS_REFSEQ_P = "refseqp"; - public final static String EMBL_GENBANK = "http://www.ebi.ac.uk/Tools/dbfetch/dbfetch?db=GENBANK&style=raw&id="; - public final static String EMBL_REFSEQ = "http://www.ebi.ac.uk/Tools/dbfetch/dbfetch?db=REFSEQ&style=raw&id="; - public final static String EMBL_EMBL = "http://www.ebi.ac.uk/Tools/dbfetch/dbfetch?db=EMBL&style=raw&id="; - private final static boolean DEBUG = true; - private final static String URL_ENC = "UTF-8"; - private final static int SLEEP = 200; + public final static String BASE_UNIPROT_URL = "http://www.uniprot.org/"; + public final static int DEFAULT_LINES_TO_RETURN = 4000; + public final static String EMBL_DBS_REFSEQ_N = "refseqn"; + public final static String EMBL_DBS_REFSEQ_P = "refseqp"; + public final static String EMBL_GENBANK = "http://www.ebi.ac.uk/Tools/dbfetch/dbfetch?db=GENBANK&style=raw&id="; + public final static String EMBL_REFSEQ = "http://www.ebi.ac.uk/Tools/dbfetch/dbfetch?db=REFSEQ&style=raw&id="; + public final static String EMBL_EMBL = "http://www.ebi.ac.uk/Tools/dbfetch/dbfetch?db=EMBL&style=raw&id="; + private final static boolean DEBUG = true; + private final static String URL_ENC = "UTF-8"; + private final static int SLEEP = 200; + private static final boolean ALLOW_TO_OVERWRITE_MOL_SEQ = false; public static List getTaxonomiesFromCommonNameStrict( final String cn, final int max_taxonomies_return ) - throws IOException { + throws IOException { final List taxonomies = getTaxonomiesFromCommonName( cn, max_taxonomies_return ); if ( ( taxonomies != null ) && ( taxonomies.size() > 0 ) ) { final List filtered_taxonomies = new ArrayList(); @@ -95,11 +96,11 @@ public final class SequenceDbWsTools { * Does not return "sub-types". * For example, for "Mus musculus" only returns "Mus musculus" * and not "Mus musculus", "Mus musculus bactrianus", ... - * + * */ public static List getTaxonomiesFromScientificNameStrict( final String sn, final int max_taxonomies_return ) - throws IOException { + throws IOException { final List taxonomies = getTaxonomiesFromScientificName( sn, max_taxonomies_return ); if ( ( taxonomies != null ) && ( taxonomies.size() > 0 ) ) { final List filtered_taxonomies = new ArrayList(); @@ -115,7 +116,7 @@ public final class SequenceDbWsTools { public static List getTaxonomiesFromTaxonomyCode( final String code, final int max_taxonomies_return ) - throws IOException { + throws IOException { final String my_code = new String( code ); final List result = getTaxonomyStringFromTaxonomyCode( my_code, max_taxonomies_return ); if ( result.size() > 0 ) { @@ -141,7 +142,7 @@ public final class SequenceDbWsTools { final Accession acc = SequenceAccessionTools.parseAccessorFromString( acc_str ); if ( acc == null ) { throw new IllegalArgumentException( "could not extract acceptable sequence db accessor from \"" + acc_str - + "\"" ); + + "\"" ); } if ( acc.getSource().equals( Source.REFSEQ.toString() ) || acc.getSource().equals( Source.EMBL.toString() ) || acc.getSource().equals( Source.NCBI.toString() ) ) { @@ -152,7 +153,7 @@ public final class SequenceDbWsTools { } else { throw new IllegalArgumentException( "don't know how to handle request for source \"" + acc.getSource() - + "\"" ); + + "\"" ); } } @@ -250,7 +251,7 @@ public final class SequenceDbWsTools { } in.close(); try { - // To prevent accessing online dbs in too quick succession. + // To prevent accessing online dbs in too quick succession. Thread.sleep( SLEEP ); } catch ( final InterruptedException e ) { @@ -300,7 +301,7 @@ public final class SequenceDbWsTools { final int i_b = target.indexOf( b ); if ( ( i_a < 0 ) || ( i_b < i_a ) ) { throw new IllegalArgumentException( "attempt to extract from \"" + target + "\" between \"" + a - + "\" and \"" + b + "\"" ); + + "\" and \"" + b + "\"" ); } return target.substring( i_a + a.length(), i_b ).trim(); } @@ -383,7 +384,7 @@ public final class SequenceDbWsTools { } if ( ( db_entry.getMolecularSequence() != null ) && !ForesterUtil.isEmpty( db_entry.getMolecularSequence().getMolecularSequenceAsString() ) - && seq.getMolecularSequence().isEmpty() ) { + && ( ALLOW_TO_OVERWRITE_MOL_SEQ || seq.getMolecularSequence().isEmpty() ) ) { seq.setMolecularSequence( db_entry.getMolecularSequence().getMolecularSequenceAsString() ); seq.setMolecularSequenceAligned( false ); if ( db_entry.getMolecularSequence().getType() == TYPE.AA ) { @@ -454,7 +455,7 @@ public final class SequenceDbWsTools { private static List getTaxonomiesFromScientificName( final String sn, final int max_taxonomies_return ) - throws IOException { + throws IOException { final List result = getTaxonomyStringFromScientificName( sn, max_taxonomies_return ); if ( result.size() > 0 ) { return parseUniProtTaxonomy( result ); @@ -486,7 +487,7 @@ public final class SequenceDbWsTools { return ( !( ( acc == null ) || ForesterUtil.isEmpty( acc.getSource() ) || ForesterUtil.isEmpty( acc.getValue() ) || ( ( acc .getSource().equals( Source.UNIPROT.toString() ) ) && ( acc.getSource().toString().equals( Source.EMBL.toString() ) ) && ( acc.getSource().toString() - .equals( Source.REFSEQ.toString() ) ) ) ) ); + .equals( Source.REFSEQ.toString() ) ) ) ) ); } private static List parseUniProtTaxonomy( final List result ) throws IOException {