import org.forester.phylogeny.iterators.PhylogenyNodeIterator;
import org.forester.util.ForesterUtil;
import org.forester.ws.uniprot.UniProtTaxonomy;
-import org.forester.ws.uniprot.UniProtWsTools;
+import org.forester.ws.uniprot.SequenceDbWsTools;
public final class TaxonomyDataManager extends RunnableProcess {
}
private final static List<UniProtTaxonomy> getTaxonomiesFromCommonName( final String query ) throws IOException {
- return UniProtWsTools.getTaxonomiesFromCommonNameStrict( query, MAX_TAXONOMIES_TO_RETURN );
+ return SequenceDbWsTools.getTaxonomiesFromCommonNameStrict( query, MAX_TAXONOMIES_TO_RETURN );
}
private final static List<UniProtTaxonomy> getTaxonomiesFromId( final String query ) throws IOException {
- return UniProtWsTools.getTaxonomiesFromId( query, MAX_TAXONOMIES_TO_RETURN );
+ return SequenceDbWsTools.getTaxonomiesFromId( query, MAX_TAXONOMIES_TO_RETURN );
}
private final static List<UniProtTaxonomy> getTaxonomiesFromScientificName( final String query ) throws IOException {
- return UniProtWsTools.getTaxonomiesFromScientificNameStrict( query, MAX_TAXONOMIES_TO_RETURN );
+ return SequenceDbWsTools.getTaxonomiesFromScientificNameStrict( query, MAX_TAXONOMIES_TO_RETURN );
}
private final static List<UniProtTaxonomy> getTaxonomiesFromTaxonomyCode( final String query ) throws IOException {
- return UniProtWsTools.getTaxonomiesFromTaxonomyCode( query, MAX_TAXONOMIES_TO_RETURN );
+ return SequenceDbWsTools.getTaxonomiesFromTaxonomyCode( query, MAX_TAXONOMIES_TO_RETURN );
}
static final boolean isHasAppropriateId( final Taxonomy tax ) {
import org.forester.archaeopteryx.MainFrameApplication;
import org.forester.archaeopteryx.TreePanel;
import org.forester.phylogeny.Phylogeny;
-import org.forester.ws.uniprot.UniProtWsTools;
+import org.forester.ws.uniprot.SequenceDbWsTools;
public class AncestralTaxonomyInferrer extends RunnableProcess {
}
public static String getBaseUrl() {
- return UniProtWsTools.BASE_UNIPROT_URL;
+ return SequenceDbWsTools.BASE_UNIPROT_URL;
}
private void inferTaxonomies() {
package org.forester.archaeopteryx.tools;
-import java.io.FileNotFoundException;
import java.io.IOException;
import java.net.UnknownHostException;
import java.util.SortedSet;
import org.forester.util.ForesterUtil;
import org.forester.util.SequenceIdParser;
import org.forester.ws.uniprot.SequenceDatabaseEntry;
-import org.forester.ws.uniprot.UniProtWsTools;
+import org.forester.ws.uniprot.SequenceDbWsTools;
public final class SequenceDataRetriver extends RunnableProcess {
private final static boolean DEBUG = true;
private enum Db {
- UNIPROT, EMBL, NCBI, NONE;
+ UNIPROT, EMBL, NCBI, NONE, REFSEQ;
}
public SequenceDataRetriver( final MainFrameApplication mf, final TreePanel treepanel, final Phylogeny phy ) {
db = Db.EMBL;
}
else if ( !ForesterUtil.isEmpty( node.getName() ) ) {
- if ( ( query = UniProtWsTools.parseUniProtAccessor( node.getName() ) ) != null ) {
+ if ( ( query = SequenceDbWsTools.parseUniProtAccessor( node.getName() ) ) != null ) {
db = Db.UNIPROT;
}
else if ( ( id = SequenceIdParser.parse( node.getName() ) ) != null ) {
- db = Db.NCBI;
+
+ if ( id.getProvider().equalsIgnoreCase( Identifier.NCBI ) ) {
+ db = Db.NCBI;
+ }
+ else if ( id.getProvider().equalsIgnoreCase( Identifier.REFSEQ ) ) {
+ db = Db.REFSEQ;
+ }
+
}
}
SequenceDatabaseEntry db_entry = null;
if ( DEBUG ) {
System.out.println( "uniprot: " + query );
}
-
- db_entry = UniProtWsTools.obtainUniProtEntry( query, 200 );
-
+ db_entry = SequenceDbWsTools.obtainUniProtEntry( query, 200 );
}
if ( ( db == Db.EMBL ) || ( ( db == Db.UNIPROT ) && ( db_entry == null ) ) ) {
if ( DEBUG ) {
System.out.println( "embl: " + query );
}
-
- db_entry = UniProtWsTools.obtainEmblEntry( query, 200 );
-
+ db_entry = SequenceDbWsTools.obtainEmblEntry( new Identifier( query ), 200 );
if ( ( db == Db.UNIPROT ) && ( db_entry != null ) ) {
db = Db.EMBL;
}
}
}
+ else if ( ( db == Db.REFSEQ ) && ( id != null ) ) {
+ db_entry = SequenceDbWsTools.obtainRefSeqEntryFromEmbl( id, 200 );
+ }
else if ( ( db == Db.NCBI ) && ( id != null ) ) {
- System.out.println( "db == Db.NCBI && id != null" );
- db_entry = UniProtWsTools.obtainrefSeqentryFromEmbl( id, 200 );
+ db_entry = SequenceDbWsTools.obtainEmblEntry( id, 200 );
}
if ( ( db_entry != null ) && !db_entry.isEmpty() ) {
if ( !ForesterUtil.isEmpty( db_entry.getAccession() ) ) {
else if ( db == Db.UNIPROT ) {
type = "uniprot";
}
+ else if ( db == Db.NCBI ) {
+ type = "ncbi";
+ }
+ else if ( db == Db.REFSEQ ) {
+ type = "refseq";
+ }
seq.setAccession( new Accession( db_entry.getAccession(), type ) );
}
if ( !ForesterUtil.isEmpty( db_entry.getSequenceName() ) ) {
public final class Identifier implements PhylogenyData {
- final public static String NCBI = "ncbi";
+ final public static String NCBI = "ncbi";
final public static String REFSEQ = "refseq";
final private String _value;
import org.forester.ws.uniprot.DatabaseTools;
import org.forester.ws.uniprot.SequenceDatabaseEntry;
import org.forester.ws.uniprot.UniProtTaxonomy;
-import org.forester.ws.uniprot.UniProtWsTools;
+import org.forester.ws.uniprot.SequenceDbWsTools;
import org.forester.ws.wabi.TxSearch;
import org.forester.ws.wabi.TxSearch.RANKS;
import org.forester.ws.wabi.TxSearch.TAX_NAME_CLASS;
private static boolean testUniprotTaxonomySearch() {
try {
- List<UniProtTaxonomy> results = UniProtWsTools
+ List<UniProtTaxonomy> results = SequenceDbWsTools
.getTaxonomiesFromCommonNameStrict( "starlet sea anemone", 10 );
if ( results.size() != 1 ) {
return false;
return false;
}
results = null;
- results = UniProtWsTools.getTaxonomiesFromScientificNameStrict( "Nematostella vectensis", 10 );
+ results = SequenceDbWsTools.getTaxonomiesFromScientificNameStrict( "Nematostella vectensis", 10 );
if ( results.size() != 1 ) {
return false;
}
return false;
}
results = null;
- results = UniProtWsTools.getTaxonomiesFromId( "45351", 10 );
+ results = SequenceDbWsTools.getTaxonomiesFromId( "45351", 10 );
if ( results.size() != 1 ) {
return false;
}
return false;
}
results = null;
- results = UniProtWsTools.getTaxonomiesFromTaxonomyCode( "NEMVE", 10 );
+ results = SequenceDbWsTools.getTaxonomiesFromTaxonomyCode( "NEMVE", 10 );
if ( results.size() != 1 ) {
return false;
}
}
private static boolean testUniprotEntryRetrieval() {
- if ( !UniProtWsTools.parseUniProtAccessor( "P12345" ).equals( "P12345" ) ) {
+ if ( !SequenceDbWsTools.parseUniProtAccessor( "P12345" ).equals( "P12345" ) ) {
return false;
}
- if ( UniProtWsTools.parseUniProtAccessor( "EP12345" ) != null ) {
+ if ( SequenceDbWsTools.parseUniProtAccessor( "EP12345" ) != null ) {
return false;
}
- if ( UniProtWsTools.parseUniProtAccessor( "3 4P12345" ) != null ) {
+ if ( SequenceDbWsTools.parseUniProtAccessor( "3 4P12345" ) != null ) {
return false;
}
- if ( UniProtWsTools.parseUniProtAccessor( "P12345E" ) != null ) {
+ if ( SequenceDbWsTools.parseUniProtAccessor( "P12345E" ) != null ) {
return false;
}
- if ( UniProtWsTools.parseUniProtAccessor( "P123455" ) != null ) {
+ if ( SequenceDbWsTools.parseUniProtAccessor( "P123455" ) != null ) {
return false;
}
- if ( UniProtWsTools.parseUniProtAccessor( "EP12345E" ) != null ) {
+ if ( SequenceDbWsTools.parseUniProtAccessor( "EP12345E" ) != null ) {
return false;
}
- if ( UniProtWsTools.parseUniProtAccessor( "AY423861" ) != null ) {
+ if ( SequenceDbWsTools.parseUniProtAccessor( "AY423861" ) != null ) {
return false;
}
- if ( !UniProtWsTools.parseUniProtAccessor( "P1DDD5" ).equals( "P1DDD5" ) ) {
+ if ( !SequenceDbWsTools.parseUniProtAccessor( "P1DDD5" ).equals( "P1DDD5" ) ) {
return false;
}
- if ( UniProtWsTools.parseUniProtAccessor( "P1DDDD" ) != null ) {
+ if ( SequenceDbWsTools.parseUniProtAccessor( "P1DDDD" ) != null ) {
return false;
}
- if ( !UniProtWsTools.parseUniProtAccessor( "P1234X/P12345/12-42" ).equals( "P12345" ) ) {
+ if ( !SequenceDbWsTools.parseUniProtAccessor( "P1234X/P12345/12-42" ).equals( "P12345" ) ) {
return false;
}
- if ( !UniProtWsTools.parseUniProtAccessor( "P1234X P12345 12-42" ).equals( "P12345" ) ) {
+ if ( !SequenceDbWsTools.parseUniProtAccessor( "P1234X P12345 12-42" ).equals( "P12345" ) ) {
return false;
}
- if ( !UniProtWsTools.parseUniProtAccessor( "P12345/12-42" ).equals( "P12345" ) ) {
+ if ( !SequenceDbWsTools.parseUniProtAccessor( "P12345/12-42" ).equals( "P12345" ) ) {
return false;
}
- if ( !UniProtWsTools.parseUniProtAccessor( "P1234X/P12345" ).equals( "P12345" ) ) {
+ if ( !SequenceDbWsTools.parseUniProtAccessor( "P1234X/P12345" ).equals( "P12345" ) ) {
return false;
}
try {
- final SequenceDatabaseEntry entry = UniProtWsTools.obtainUniProtEntry( "P12345", 200 );
+ final SequenceDatabaseEntry entry = SequenceDbWsTools.obtainUniProtEntry( "P12345", 200 );
if ( !entry.getAccession().equals( "P12345" ) ) {
return false;
}
import java.util.regex.Pattern;\r
\r
import org.forester.phylogeny.data.Identifier;\r
-import org.forester.ws.uniprot.DatabaseTools;\r
\r
public final class SequenceIdParser {\r
\r
private final static Pattern REFSEQ_PATTERN = Pattern\r
.compile( "(?:\\A|.*[^a-zA-Z0-9])([A-Z]{2}_\\d{6,})(?:[^a-zA-Z0-9]|\\Z)" );\r
\r
- \r
- private final static boolean DEBUG = true;\r
- \r
- \r
-\r
+ \r
\r
/**\r
* Returns null if no match.\r
public final static Identifier parse( final String s ) {\r
String v = parseGenbankAccessor( s );\r
if ( !ForesterUtil.isEmpty( v ) ) {\r
+\r
return new Identifier( v, Identifier.NCBI );\r
}\r
v = parseRefSeqAccessor( s );\r
private String _os;
private String _tax_id;
private String _symbol;
+ private String _provider;
private EbiDbEntry() {
}
}
- public static SequenceDatabaseEntry createInstanceForRefSeq( final List<String> lines ) {
+ public static SequenceDatabaseEntry createInstanceFromPlainTextForRefSeq( final List<String> lines ) {
final EbiDbEntry e = new EbiDbEntry();
for( final String line : lines ) {
- System.out.println( "-" + line );
+ // System.out.println( "-" + line );
if ( line.startsWith( "ACCESSION" ) ) {
e.setPA( DatabaseTools.extract( line, "ACCESSION" ) );
}
public static SequenceDatabaseEntry createInstanceFromPlainText( final List<String> lines ) {
final EbiDbEntry e = new EbiDbEntry();
for( final String line : lines ) {
- System.out.println( "->" + line );
+
if ( line.startsWith( "PA" ) ) {
e.setPA( DatabaseTools.extract( line, "PA" ) );
}
&& ForesterUtil.isEmpty( getTaxonomyScientificName() )
&& ForesterUtil.isEmpty( getTaxonomyIdentifier() ) && ForesterUtil.isEmpty( getSequenceSymbol() ) );
}
+
+ @Override
+ public String getProvider() {
+ return _provider;
+ }
+
+ public void setProvider( final String provider ) {
+ _provider = provider;
+ }
}
public boolean isEmpty();
public String getAccession();
+
+ public String getProvider();
public String getSequenceName();
import org.forester.phylogeny.data.Identifier;
import org.forester.util.ForesterUtil;
-public final class UniProtWsTools {
+public final class SequenceDbWsTools {
private static final boolean ALLOW_TAXONOMY_CODE_HACKS = true; //TODO turn off for final realease!
url_sb.append( BASE_EMBL_DB_URL );
if ( ForesterUtil.isEmpty( id.getProvider() ) || id.getProvider().equalsIgnoreCase( Identifier.NCBI ) ) {
- url_sb.append( '/');
- url_sb.append( UniProtWsTools.EMBL_DBS_EMBL );
+
+ url_sb.append( SequenceDbWsTools.EMBL_DBS_EMBL );
url_sb.append( '/');
}
else if ( id.getProvider().equalsIgnoreCase( Identifier.REFSEQ ) ) {
if ( id.getValue().toUpperCase().indexOf( 'P' ) == 1 ) {
- url_sb.append( '/');
- url_sb.append( UniProtWsTools.EMBL_DBS_REFSEQ_P );
+
+ url_sb.append( SequenceDbWsTools.EMBL_DBS_REFSEQ_P );
url_sb.append( '/');
}
else {
- url_sb.append( '/');
- url_sb.append( UniProtWsTools.EMBL_DBS_REFSEQ_N );
+
+ url_sb.append( SequenceDbWsTools.EMBL_DBS_REFSEQ_N );
url_sb.append( '/');
}
}
return UniProtEntry.createInstanceFromPlainText( lines );
}
- public static SequenceDatabaseEntry obtainrefSeqentryFromEmbl( final Identifier id, final int max_lines_to_return )
+ public static SequenceDatabaseEntry obtainRefSeqEntryFromEmbl( final Identifier id, final int max_lines_to_return )
throws IOException {
final List<String> lines = queryEmblDb( id, max_lines_to_return );
- return EbiDbEntry.createInstanceForRefSeq( lines );
+ return EbiDbEntry.createInstanceFromPlainTextForRefSeq( lines );
}
- public static SequenceDatabaseEntry obtainEmblEntry( final String query, final int max_lines_to_return )
+ public static SequenceDatabaseEntry obtainEmblEntry( final Identifier id, final int max_lines_to_return )
throws IOException {
- final List<String> lines = queryEmblDb( new Identifier( query ), max_lines_to_return );
+ final List<String> lines = queryEmblDb( id , max_lines_to_return );
return EbiDbEntry.createInstanceFromPlainText( lines );
}
}
&& ForesterUtil.isEmpty( getTaxonomyScientificName() )
&& ForesterUtil.isEmpty( getTaxonomyIdentifier() ) && ForesterUtil.isEmpty( getSequenceSymbol() ) );
}
+
+ @Override
+ public String getProvider() {
+ return "uniprot";
+ }
}