import org.forester.phylogeny.data.Sequence;
import org.forester.phylogeny.data.Taxonomy;
import org.forester.phylogeny.iterators.PhylogenyNodeIterator;
+import org.forester.sequence.MolecularSequence.TYPE;
import org.forester.util.ForesterUtil;
import org.forester.util.SequenceAccessionTools;
public final class SequenceDbWsTools {
- public final static String BASE_UNIPROT_URL = "http://www.uniprot.org/";
- public final static int DEFAULT_LINES_TO_RETURN = 4000;
- //public final static String EMBL_DBS_EMBL = "embl";
- public final static String EMBL_DBS_REFSEQ_N = "refseqn";
- public final static String EMBL_DBS_REFSEQ_P = "refseqp";
- public final static String EMBL_GENBANK = "http://www.ebi.ac.uk/Tools/dbfetch/dbfetch?db=GENBANK&style=raw&id=";
- public final static String EMBL_REFSEQ = "http://www.ebi.ac.uk/Tools/dbfetch/dbfetch?db=REFSEQ&style=raw&id=";
- public final static String EMBL_EMBL = "http://www.ebi.ac.uk/Tools/dbfetch/dbfetch?db=EMBL&style=raw&id=";
- private final static boolean DEBUG = true;
- private final static String URL_ENC = "UTF-8";
+ public final static String BASE_UNIPROT_URL = "http://www.uniprot.org/";
+ public final static int DEFAULT_LINES_TO_RETURN = 4000;
+ public final static String EMBL_DBS_REFSEQ_N = "refseqn";
+ public final static String EMBL_DBS_REFSEQ_P = "refseqp";
+ public final static String EMBL_GENBANK = "http://www.ebi.ac.uk/Tools/dbfetch/dbfetch?db=GENBANK&style=raw&id=";
+ public final static String EMBL_REFSEQ = "http://www.ebi.ac.uk/Tools/dbfetch/dbfetch?db=REFSEQ&style=raw&id=";
+ public final static String EMBL_EMBL = "http://www.ebi.ac.uk/Tools/dbfetch/dbfetch?db=EMBL&style=raw&id=";
+ private final static boolean DEBUG = true;
+ private final static String URL_ENC = "UTF-8";
+ private final static int SLEEP = 200;
+ private static final boolean ALLOW_TO_OVERWRITE_MOL_SEQ = false;
public static List<UniProtTaxonomy> getTaxonomiesFromCommonNameStrict( final String cn,
final int max_taxonomies_return )
- throws IOException {
+ throws IOException {
final List<UniProtTaxonomy> taxonomies = getTaxonomiesFromCommonName( cn, max_taxonomies_return );
if ( ( taxonomies != null ) && ( taxonomies.size() > 0 ) ) {
final List<UniProtTaxonomy> filtered_taxonomies = new ArrayList<UniProtTaxonomy>();
* Does not return "sub-types".
* For example, for "Mus musculus" only returns "Mus musculus"
* and not "Mus musculus", "Mus musculus bactrianus", ...
- *
+ *
*/
public static List<UniProtTaxonomy> getTaxonomiesFromScientificNameStrict( final String sn,
final int max_taxonomies_return )
- throws IOException {
+ throws IOException {
final List<UniProtTaxonomy> taxonomies = getTaxonomiesFromScientificName( sn, max_taxonomies_return );
if ( ( taxonomies != null ) && ( taxonomies.size() > 0 ) ) {
final List<UniProtTaxonomy> filtered_taxonomies = new ArrayList<UniProtTaxonomy>();
public static List<UniProtTaxonomy> getTaxonomiesFromTaxonomyCode( final String code,
final int max_taxonomies_return )
- throws IOException {
+ throws IOException {
final String my_code = new String( code );
final List<String> result = getTaxonomyStringFromTaxonomyCode( my_code, max_taxonomies_return );
if ( result.size() > 0 ) {
final Accession acc = SequenceAccessionTools.parseAccessorFromString( acc_str );
if ( acc == null ) {
throw new IllegalArgumentException( "could not extract acceptable sequence db accessor from \"" + acc_str
- + "\"" );
+ + "\"" );
}
if ( acc.getSource().equals( Source.REFSEQ.toString() ) || acc.getSource().equals( Source.EMBL.toString() )
|| acc.getSource().equals( Source.NCBI.toString() ) ) {
}
else {
throw new IllegalArgumentException( "don't know how to handle request for source \"" + acc.getSource()
- + "\"" );
+ + "\"" );
}
}
}
in.close();
try {
- // To prevent accessing online dbs in too quick succession.
- Thread.sleep( 20 );
+ // To prevent accessing online dbs in too quick succession.
+ Thread.sleep( SLEEP );
}
catch ( final InterruptedException e ) {
e.printStackTrace();
final int i_b = target.indexOf( b );
if ( ( i_a < 0 ) || ( i_b < i_a ) ) {
throw new IllegalArgumentException( "attempt to extract from \"" + target + "\" between \"" + a
- + "\" and \"" + b + "\"" );
+ + "\" and \"" + b + "\"" );
}
return target.substring( i_a + a.length(), i_b ).trim();
}
// Eat this exception.
}
}
+ if ( ( db_entry.getMolecularSequence() != null )
+ && !ForesterUtil.isEmpty( db_entry.getMolecularSequence().getMolecularSequenceAsString() )
+ && ( ALLOW_TO_OVERWRITE_MOL_SEQ || seq.getMolecularSequence().isEmpty() ) ) {
+ seq.setMolecularSequence( db_entry.getMolecularSequence().getMolecularSequenceAsString() );
+ seq.setMolecularSequenceAligned( false );
+ if ( db_entry.getMolecularSequence().getType() == TYPE.AA ) {
+ seq.setType( "protein" );
+ }
+ else if ( db_entry.getMolecularSequence().getType() == TYPE.DNA ) {
+ seq.setType( "dna" );
+ }
+ else if ( db_entry.getMolecularSequence().getType() == TYPE.RNA ) {
+ seq.setType( "rna" );
+ }
+ }
if ( ( db_entry.getGoTerms() != null ) && !db_entry.getGoTerms().isEmpty() ) {
for( final GoTerm go : db_entry.getGoTerms() ) {
final Annotation ann = new Annotation( go.getGoId().getId() );
}
}
try {
- Thread.sleep( 10 );// Sleep for 10 ms
+ Thread.sleep( SLEEP );
}
catch ( final InterruptedException ie ) {
}
private static List<UniProtTaxonomy> getTaxonomiesFromScientificName( final String sn,
final int max_taxonomies_return )
- throws IOException {
+ throws IOException {
final List<String> result = getTaxonomyStringFromScientificName( sn, max_taxonomies_return );
if ( result.size() > 0 ) {
return parseUniProtTaxonomy( result );
return ( !( ( acc == null ) || ForesterUtil.isEmpty( acc.getSource() ) || ForesterUtil.isEmpty( acc.getValue() ) || ( ( acc
.getSource().equals( Source.UNIPROT.toString() ) )
&& ( acc.getSource().toString().equals( Source.EMBL.toString() ) ) && ( acc.getSource().toString()
- .equals( Source.REFSEQ.toString() ) ) ) ) );
+ .equals( Source.REFSEQ.toString() ) ) ) ) );
}
private static List<UniProtTaxonomy> parseUniProtTaxonomy( final List<String> result ) throws IOException {