- if ( code.equals( "CAP" ) ) {
- return getTaxonomiesFromId( "283909", max_taxonomies_return );
- }
- else if ( code.equals( "FUGRU" ) ) {
- return getTaxonomiesFromId( "31033", max_taxonomies_return );
- }
- else if ( code.equals( "GIALA" ) ) {
- return getTaxonomiesFromId( "5741", max_taxonomies_return );
- }
- else if ( code.equals( "TRIVE" ) ) {
- return getTaxonomiesFromId( "413071", max_taxonomies_return );
- }
- else if ( code.equals( "CAPOWC" ) ) {
- return getTaxonomiesFromId( "192875", max_taxonomies_return );
- }
- else if ( code.equals( "SPHARC" ) ) {
- return getTaxonomiesFromId( "667725", max_taxonomies_return );
- }
- else if ( code.equals( "THETRA" ) ) {
- return getTaxonomiesFromId( "529818", max_taxonomies_return );
- }
- else if ( code.equals( "CHLVUL" ) ) {
- return getTaxonomiesFromId( "574566", max_taxonomies_return );
- }
- else if ( code.equals( "CITCLE" ) ) {
- return getTaxonomiesFromId( "85681", max_taxonomies_return );
- }
- else if ( code.equals( "MYCPOP" ) ) {
- return getTaxonomiesFromId( "85929", max_taxonomies_return );
- }
- else if ( code.equals( "AGABB" ) ) {
- return getTaxonomiesFromId( "597362", max_taxonomies_return );
- }
- else if ( code.equals( "BAUCOM" ) ) {
- return getTaxonomiesFromId( "430998", max_taxonomies_return );
- }
- else if ( code.equals( "DICSQU" ) ) {
- return getTaxonomiesFromId( "114155", max_taxonomies_return );
- }
- else if ( code.equals( "FOMPIN" ) ) {
- return getTaxonomiesFromId( "40483", max_taxonomies_return );
- }
- else if ( code.equals( "HYDMA" ) ) {
- return getTaxonomiesFromId( "6085", max_taxonomies_return );
- }
- else if ( code.equals( "MYCFI" ) ) {
- return getTaxonomiesFromId( "83344", max_taxonomies_return );
- }
- else if ( code.equals( "OIDMAI" ) ) {
- return getTaxonomiesFromId( "78148", max_taxonomies_return );
- }
- else if ( code.equals( "OSTRC" ) ) {
- return getTaxonomiesFromId( "385169", max_taxonomies_return );
- }
- else if ( code.equals( "POSPL" ) ) {
- return getTaxonomiesFromId( "104341", max_taxonomies_return );
- }
- else if ( code.equals( "SAICOM" ) ) {
- return getTaxonomiesFromId( "5606", max_taxonomies_return );
- }
- else if ( code.equals( "SERLA" ) ) {
- return getTaxonomiesFromId( "85982", max_taxonomies_return );
- }
- else if ( code.equals( "SPORO" ) ) {
- return getTaxonomiesFromId( "40563", max_taxonomies_return );
- }
- else if ( code.equals( "ACRALC" ) ) {
- return getTaxonomiesFromId( "398408", max_taxonomies_return );
- }
- else if ( code.equals( "THITER" ) ) {
- return getTaxonomiesFromId( "35720", max_taxonomies_return );
+ final List<String> lines = queryEmblDb( id, max_lines_to_return );
+ return EbiDbEntry.createInstanceFromPlainText( lines );
+ }
+
+ public static SequenceDatabaseEntry obtainRefSeqEntryFromEmbl( final Identifier id, final int max_lines_to_return )
+ throws IOException {
+ final List<String> lines = queryEmblDb( id, max_lines_to_return );
+ return EbiDbEntry.createInstanceFromPlainTextForRefSeq( lines );
+ }
+
+ public static SortedSet<String> obtainSeqInformation( final Phylogeny phy,
+ final boolean ext_nodes_only,
+ final boolean allow_to_set_taxonomic_data,
+ final int lines_to_return ) throws IOException {
+ final SortedSet<String> not_found = new TreeSet<String>();
+ for( final PhylogenyNodeIterator iter = phy.iteratorPostorder(); iter.hasNext(); ) {
+ final PhylogenyNode node = iter.next();
+ if ( ext_nodes_only && node.isInternal() ) {
+ continue;
+ }
+ String query = null;
+ Identifier id = null;
+ Db db = Db.NONE;
+ if ( node.getNodeData().isHasSequence()
+ && ( node.getNodeData().getSequence().getAccession() != null )
+ && !ForesterUtil.isEmpty( node.getNodeData().getSequence().getAccession().getSource() )
+ && !ForesterUtil.isEmpty( node.getNodeData().getSequence().getAccession().getValue() )
+ && ( node.getNodeData().getSequence().getAccession().getValue().toLowerCase()
+ .startsWith( "uniprot" )
+ || node.getNodeData().getSequence().getAccession().getValue().toLowerCase()
+ .startsWith( "swissprot" )
+ || node.getNodeData().getSequence().getAccession().getValue().toLowerCase()
+ .startsWith( "trembl" )
+ || node.getNodeData().getSequence().getAccession().getValue().toLowerCase()
+ .startsWith( "sp" ) || node.getNodeData().getSequence().getAccession().getValue()
+ .toLowerCase().startsWith( "uniprotkb" ) ) ) {
+ query = node.getNodeData().getSequence().getAccession().getValue();
+ db = Db.UNIPROT;
+ }
+ else if ( node.getNodeData().isHasSequence()
+ && ( node.getNodeData().getSequence().getAccession() != null )
+ && !ForesterUtil.isEmpty( node.getNodeData().getSequence().getAccession().getSource() )
+ && !ForesterUtil.isEmpty( node.getNodeData().getSequence().getAccession().getValue() )
+ && ( node.getNodeData().getSequence().getAccession().getValue().toLowerCase().startsWith( "embl" ) || node
+ .getNodeData().getSequence().getAccession().getValue().toLowerCase().startsWith( "ebi" ) ) ) {
+ query = node.getNodeData().getSequence().getAccession().getValue();
+ db = Db.EMBL;
+ }
+ else if ( node.getNodeData().isHasSequence()
+ && ( node.getNodeData().getSequence().getAccession() != null )
+ && !ForesterUtil.isEmpty( node.getNodeData().getSequence().getAccession().getSource() )
+ && !ForesterUtil.isEmpty( node.getNodeData().getSequence().getAccession().getValue() )
+ && ( node.getNodeData().getSequence().getAccession().getValue().toLowerCase().startsWith( "ncbi" ) || node
+ .getNodeData().getSequence().getAccession().getValue().toLowerCase().startsWith( "genbank" ) ) ) {
+ query = node.getNodeData().getSequence().getAccession().getValue();
+ // db = Db.NCBI;
+ }
+ else if ( node.getNodeData().isHasSequence() && ( node.getNodeData().getSequence().getAccession() != null )
+ && !ForesterUtil.isEmpty( node.getNodeData().getSequence().getAccession().getSource() )
+ && !ForesterUtil.isEmpty( node.getNodeData().getSequence().getAccession().getValue() )
+ && node.getNodeData().getSequence().getAccession().getValue().toLowerCase().startsWith( "refseq" ) ) {
+ query = node.getNodeData().getSequence().getAccession().getValue();
+ db = Db.REFSEQ;
+ }
+ else {
+ if ( ( query = ForesterUtil.extractUniProtKbProteinSeqIdentifier( node ) ) != null ) {
+ db = Db.UNIPROT;
+ }
+ else if ( node.getNodeData().isHasSequence() ) {
+ if ( ( id = SequenceIdParser.parse( node.getName() ) ) != null ) {
+ if ( id.getProvider().equalsIgnoreCase( Identifier.NCBI ) ) {
+ // db = Db.NCBI;
+ }
+ else if ( id.getProvider().equalsIgnoreCase( Identifier.REFSEQ ) ) {
+ db = Db.REFSEQ;
+ }
+ }
+ else if ( ( id = SequenceIdParser.parse( node.getNodeData().getSequence().getName() ) ) != null ) {
+ if ( id.getProvider().equalsIgnoreCase( Identifier.NCBI ) ) {
+ // = Db.NCBI;
+ }
+ else if ( id.getProvider().equalsIgnoreCase( Identifier.REFSEQ ) ) {
+ db = Db.REFSEQ;
+ }
+ }
+ else if ( ( id = SequenceIdParser.parse( node.getNodeData().getSequence().getGeneName() ) ) != null ) {
+ if ( id.getProvider().equalsIgnoreCase( Identifier.NCBI ) ) {
+ // db = Db.NCBI;
+ }
+ else if ( id.getProvider().equalsIgnoreCase( Identifier.REFSEQ ) ) {
+ db = Db.REFSEQ;
+ }
+ }
+ else if ( ( id = SequenceIdParser.parse( node.getNodeData().getSequence().getSymbol() ) ) != null ) {
+ if ( id.getProvider().equalsIgnoreCase( Identifier.NCBI ) ) {
+ // db = Db.NCBI;
+ }
+ else if ( id.getProvider().equalsIgnoreCase( Identifier.REFSEQ ) ) {
+ db = Db.REFSEQ;
+ }
+ }
+ }
+ }
+ if ( db == Db.NONE ) {
+ not_found.add( node.toString() );
+ }
+ SequenceDatabaseEntry db_entry = null;
+ if ( !ForesterUtil.isEmpty( query ) ) {
+ if ( db == Db.UNIPROT ) {
+ if ( DEBUG ) {
+ System.out.println( "uniprot: " + query );
+ }
+ db_entry = obtainUniProtEntry( query, lines_to_return );
+ }
+ else if ( db == Db.EMBL ) {
+ if ( DEBUG ) {
+ System.out.println( "embl: " + query );
+ }
+ db_entry = obtainEmblEntry( new Identifier( query ), lines_to_return );
+ }
+ else if ( db == Db.REFSEQ ) {
+ if ( DEBUG ) {
+ System.out.println( "refseq: " + query );
+ }
+ db_entry = obtainRefSeqEntryFromEmbl( new Identifier( query ), lines_to_return );
+ }
+ // else if ( db == Db.NCBI ) {
+ // if ( DEBUG ) {
+ // System.out.println( "ncbi: " + query );
+ // }
+ // db_entry = obtainNcbiEntry( new Identifier( query ), lines_to_return );
+ // }
+ }
+ else if ( ( db == Db.REFSEQ ) && ( id != null ) ) {
+ db_entry = obtainRefSeqEntryFromEmbl( id, lines_to_return );
+ }
+ //else if ( ( db == Db.NCBI ) && ( id != null ) ) {
+ // db_entry = obtainNcbiEntry( id, lines_to_return );
+ //}
+ if ( ( db_entry != null ) && !db_entry.isEmpty() ) {
+ final Sequence seq = node.getNodeData().isHasSequence() ? node.getNodeData().getSequence()
+ : new Sequence();
+ if ( !ForesterUtil.isEmpty( db_entry.getAccession() ) ) {
+ String type = null;
+ if ( db == Db.EMBL ) {
+ type = "embl";
+ }
+ else if ( db == Db.UNIPROT ) {
+ type = "uniprot";
+ }
+ // else if ( db == Db.NCBI ) {
+ // type = "ncbi";
+ // }
+ else if ( db == Db.REFSEQ ) {
+ type = "refseq";
+ }
+ seq.setAccession( new Accession( db_entry.getAccession(), type ) );
+ }
+ if ( !ForesterUtil.isEmpty( db_entry.getSequenceName() ) ) {
+ seq.setName( db_entry.getSequenceName() );
+ }
+ if ( !ForesterUtil.isEmpty( db_entry.getGeneName() ) ) {
+ seq.setGeneName( db_entry.getGeneName() );
+ }
+ if ( !ForesterUtil.isEmpty( db_entry.getSequenceSymbol() ) ) {
+ try {
+ seq.setSymbol( db_entry.getSequenceSymbol() );
+ }
+ catch ( final PhyloXmlDataFormatException e ) {
+ // Eat this exception.
+ }
+ }
+ if ( ( db_entry.getGoTerms() != null ) && !db_entry.getGoTerms().isEmpty() ) {
+ for( final GoTerm go : db_entry.getGoTerms() ) {
+ final Annotation ann = new Annotation( go.getGoId().getId() );
+ ann.setDesc( go.getName() );
+ seq.addAnnotation( ann );
+ }
+ }
+ if ( ( db_entry.getCrossReferences() != null ) && !db_entry.getCrossReferences().isEmpty() ) {
+ for( final Accession x : db_entry.getCrossReferences() ) {
+ seq.addCrossReference( x );
+ }
+ }
+ final Taxonomy tax = node.getNodeData().isHasTaxonomy() ? node.getNodeData().getTaxonomy()
+ : new Taxonomy();
+ if ( !ForesterUtil.isEmpty( db_entry.getTaxonomyScientificName() ) ) {
+ tax.setScientificName( db_entry.getTaxonomyScientificName() );
+ }
+ if ( allow_to_set_taxonomic_data && !ForesterUtil.isEmpty( db_entry.getTaxonomyIdentifier() ) ) {
+ tax.setIdentifier( new Identifier( db_entry.getTaxonomyIdentifier(), "uniprot" ) );
+ }
+ node.getNodeData().setTaxonomy( tax );
+ node.getNodeData().setSequence( seq );
+ }
+ else if ( db != Db.NONE ) {
+ not_found.add( node.getName() );
+ }
+ try {
+ Thread.sleep( 10 );// Sleep for 10 ms
+ }
+ catch ( final InterruptedException ie ) {
+ }