public final class SequenceDbWsTools {
- public final static String BASE_UNIPROT_URL = "http://www.uniprot.org/";
- public final static int DEFAULT_LINES_TO_RETURN = 4000;
- public final static String EMBL_DBS_REFSEQ_N = "refseqn";
- public final static String EMBL_DBS_REFSEQ_P = "refseqp";
- public final static String EMBL_GENBANK = "http://www.ebi.ac.uk/Tools/dbfetch/dbfetch?db=GENBANK&style=raw&id=";
- public final static String EMBL_REFSEQ = "http://www.ebi.ac.uk/Tools/dbfetch/dbfetch?db=REFSEQ&style=raw&id=";
- public final static String EMBL_EMBL = "http://www.ebi.ac.uk/Tools/dbfetch/dbfetch?db=EMBL&style=raw&id=";
- private final static boolean DEBUG = true;
- private final static String URL_ENC = "UTF-8";
- private final static int SLEEP = 200;
+ public final static String BASE_UNIPROT_URL = "http://www.uniprot.org/";
+ public final static int DEFAULT_LINES_TO_RETURN = 4000;
+ public final static String EMBL_DBS_REFSEQ_N = "refseqn";
+ public final static String EMBL_DBS_REFSEQ_P = "refseqp";
+ public final static String EMBL_GENBANK = "http://www.ebi.ac.uk/Tools/dbfetch/dbfetch?db=GENBANK&style=raw&id=";
+ public final static String EMBL_REFSEQ = "http://www.ebi.ac.uk/Tools/dbfetch/dbfetch?db=REFSEQ&style=raw&id=";
+ public final static String EMBL_EMBL = "http://www.ebi.ac.uk/Tools/dbfetch/dbfetch?db=EMBL&style=raw&id=";
+ private final static boolean DEBUG = true;
+ private final static String URL_ENC = "UTF-8";
+ private final static int SLEEP = 200;
+ private static final boolean ALLOW_TO_OVERWRITE_MOL_SEQ = false;
public static List<UniProtTaxonomy> getTaxonomiesFromCommonNameStrict( final String cn,
final int max_taxonomies_return )
- throws IOException {
+ throws IOException {
final List<UniProtTaxonomy> taxonomies = getTaxonomiesFromCommonName( cn, max_taxonomies_return );
if ( ( taxonomies != null ) && ( taxonomies.size() > 0 ) ) {
final List<UniProtTaxonomy> filtered_taxonomies = new ArrayList<UniProtTaxonomy>();
* Does not return "sub-types".
* For example, for "Mus musculus" only returns "Mus musculus"
* and not "Mus musculus", "Mus musculus bactrianus", ...
- *
+ *
*/
public static List<UniProtTaxonomy> getTaxonomiesFromScientificNameStrict( final String sn,
final int max_taxonomies_return )
- throws IOException {
+ throws IOException {
final List<UniProtTaxonomy> taxonomies = getTaxonomiesFromScientificName( sn, max_taxonomies_return );
if ( ( taxonomies != null ) && ( taxonomies.size() > 0 ) ) {
final List<UniProtTaxonomy> filtered_taxonomies = new ArrayList<UniProtTaxonomy>();
public static List<UniProtTaxonomy> getTaxonomiesFromTaxonomyCode( final String code,
final int max_taxonomies_return )
- throws IOException {
+ throws IOException {
final String my_code = new String( code );
final List<String> result = getTaxonomyStringFromTaxonomyCode( my_code, max_taxonomies_return );
if ( result.size() > 0 ) {
final Accession acc = SequenceAccessionTools.parseAccessorFromString( acc_str );
if ( acc == null ) {
throw new IllegalArgumentException( "could not extract acceptable sequence db accessor from \"" + acc_str
- + "\"" );
+ + "\"" );
}
if ( acc.getSource().equals( Source.REFSEQ.toString() ) || acc.getSource().equals( Source.EMBL.toString() )
|| acc.getSource().equals( Source.NCBI.toString() ) ) {
}
else {
throw new IllegalArgumentException( "don't know how to handle request for source \"" + acc.getSource()
- + "\"" );
+ + "\"" );
}
}
}
in.close();
try {
- // To prevent accessing online dbs in too quick succession.
+ // To prevent accessing online dbs in too quick succession.
Thread.sleep( SLEEP );
}
catch ( final InterruptedException e ) {
final int i_b = target.indexOf( b );
if ( ( i_a < 0 ) || ( i_b < i_a ) ) {
throw new IllegalArgumentException( "attempt to extract from \"" + target + "\" between \"" + a
- + "\" and \"" + b + "\"" );
+ + "\" and \"" + b + "\"" );
}
return target.substring( i_a + a.length(), i_b ).trim();
}
}
}
if ( ( db_entry.getMolecularSequence() != null )
- && !ForesterUtil.isEmpty( db_entry.getMolecularSequence().getMolecularSequenceAsString() ) ) {
+ && !ForesterUtil.isEmpty( db_entry.getMolecularSequence().getMolecularSequenceAsString() )
+ && ( ALLOW_TO_OVERWRITE_MOL_SEQ || seq.getMolecularSequence().isEmpty() ) ) {
seq.setMolecularSequence( db_entry.getMolecularSequence().getMolecularSequenceAsString() );
+ seq.setMolecularSequenceAligned( false );
if ( db_entry.getMolecularSequence().getType() == TYPE.AA ) {
seq.setType( "protein" );
}
private static List<UniProtTaxonomy> getTaxonomiesFromScientificName( final String sn,
final int max_taxonomies_return )
- throws IOException {
+ throws IOException {
final List<String> result = getTaxonomyStringFromScientificName( sn, max_taxonomies_return );
if ( result.size() > 0 ) {
return parseUniProtTaxonomy( result );
return ( !( ( acc == null ) || ForesterUtil.isEmpty( acc.getSource() ) || ForesterUtil.isEmpty( acc.getValue() ) || ( ( acc
.getSource().equals( Source.UNIPROT.toString() ) )
&& ( acc.getSource().toString().equals( Source.EMBL.toString() ) ) && ( acc.getSource().toString()
- .equals( Source.REFSEQ.toString() ) ) ) ) );
+ .equals( Source.REFSEQ.toString() ) ) ) ) );
}
private static List<UniProtTaxonomy> parseUniProtTaxonomy( final List<String> result ) throws IOException {