+ final static String extractFrom( final String target, final String a ) {
+ final int i_a = target.indexOf( a );
+ return target.substring( i_a + a.length() ).trim();
+ }
+
+ final static String extractFromTo( final String target, final String a, final String b ) {
+ final int i_a = target.indexOf( a );
+ final int i_b = target.indexOf( b );
+ if ( ( i_a < 0 ) || ( i_b < i_a ) ) {
+ throw new IllegalArgumentException( "attempt to extract from \"" + target + "\" between \"" + a
+ + "\" and \"" + b + "\"" );
+ }
+ return target.substring( i_a + a.length(), i_b ).trim();
+ }
+
+ final static String extractTo( final String target, final String b ) {
+ final int i_b = target.indexOf( b );
+ return target.substring( 0, i_b ).trim();
+ }
+
+ private static void addDataFromDbToNode( final boolean allow_to_set_taxonomic_data,
+ final int lines_to_return,
+ final SortedSet<String> not_found,
+ final PhylogenyNode node,
+ final Accession acc ) throws IOException {
+ SequenceDatabaseEntry db_entry = null;
+ final String query = acc.getValue();
+ if ( acc.getSource().equals( Source.UNIPROT.toString() ) ) {
+ if ( DEBUG ) {
+ System.out.println( "uniprot: " + query );
+ }
+ try {
+ db_entry = obtainUniProtEntry( query, lines_to_return );
+ }
+ catch ( final FileNotFoundException e ) {
+ // Eat this, and move to next.
+ }
+ }
+ else if ( acc.getSource().equals( Source.REFSEQ.toString() ) ) {
+ if ( DEBUG ) {
+ System.out.println( "refseq: " + query );
+ }
+ try {
+ db_entry = obtainRefSeqEntryFromEmbl( new Accession( query ), lines_to_return );
+ }
+ catch ( final FileNotFoundException e ) {
+ // Eat this, and move to next.
+ }
+ }
+ else if ( acc.getSource().equals( Source.EMBL.toString() ) || acc.getSource().equals( Source.NCBI.toString() )
+ || acc.getSource().equals( Source.EMBL.toString() ) ) {
+ if ( DEBUG ) {
+ System.out.println( acc.toString() );
+ }
+ try {
+ db_entry = obtainEmblEntry( acc, lines_to_return );
+ }
+ catch ( final FileNotFoundException e ) {
+ // Eat this, and move to next.
+ }
+ }
+ else if ( acc.getSource().equals( Source.GI.toString() ) ) {
+ if ( DEBUG ) {
+ System.out.println( "gi: " + query );
+ }
+ try {
+ db_entry = obtainRefSeqEntryFromEmbl( new Accession( query ), lines_to_return );
+ }
+ catch ( final FileNotFoundException e ) {
+ // Eat this, and move to next.
+ }
+ }
+ if ( ( db_entry != null ) && !db_entry.isEmpty() ) {
+ final Sequence seq = node.getNodeData().isHasSequence() ? node.getNodeData().getSequence() : new Sequence();
+ if ( !ForesterUtil.isEmpty( db_entry.getAccession() ) ) {
+ seq.setAccession( new Accession( db_entry.getAccession(), acc.getSource() ) );
+ }
+ if ( !ForesterUtil.isEmpty( db_entry.getSequenceName() ) ) {
+ seq.setName( db_entry.getSequenceName() );
+ }
+ if ( !ForesterUtil.isEmpty( db_entry.getGeneName() ) ) {
+ seq.setGeneName( db_entry.getGeneName() );
+ }
+ if ( !ForesterUtil.isEmpty( db_entry.getSequenceSymbol() ) ) {
+ try {
+ seq.setSymbol( db_entry.getSequenceSymbol() );
+ }
+ catch ( final PhyloXmlDataFormatException e ) {
+ // Eat this exception.
+ }
+ }
+ if ( ( db_entry.getMolecularSequence() != null )
+ && !ForesterUtil.isEmpty( db_entry.getMolecularSequence().getMolecularSequenceAsString() )
+ && ( ALLOW_TO_OVERWRITE_MOL_SEQ || seq.getMolecularSequence().isEmpty() ) ) {
+ seq.setMolecularSequence( db_entry.getMolecularSequence().getMolecularSequenceAsString() );
+ seq.setMolecularSequenceAligned( false );
+ if ( db_entry.getMolecularSequence().getType() == TYPE.AA ) {
+ seq.setType( "protein" );
+ }
+ else if ( db_entry.getMolecularSequence().getType() == TYPE.DNA ) {
+ seq.setType( "dna" );
+ }
+ else if ( db_entry.getMolecularSequence().getType() == TYPE.RNA ) {
+ seq.setType( "rna" );
+ }
+ }
+ if ( ( db_entry.getGoTerms() != null ) && !db_entry.getGoTerms().isEmpty() ) {
+ for( final GoTerm go : db_entry.getGoTerms() ) {
+ final Annotation ann = new Annotation( go.getGoId().getId() );
+ ann.setDesc( go.getName() );
+ seq.addAnnotation( ann );
+ }
+ }
+ if ( ( db_entry.getCrossReferences() != null ) && !db_entry.getCrossReferences().isEmpty() ) {
+ for( final Accession x : db_entry.getCrossReferences() ) {
+ seq.addCrossReference( x );
+ }
+ }
+ if ( !ForesterUtil.isEmpty( db_entry.getChromosome() ) && !ForesterUtil.isEmpty( db_entry.getMap() ) ) {
+ seq.setLocation( "chr " + db_entry.getChromosome() + ", " + db_entry.getMap() );
+ }
+ else if ( !ForesterUtil.isEmpty( db_entry.getChromosome() ) ) {
+ seq.setLocation( "chr " + db_entry.getChromosome() );
+ }
+ else if ( !ForesterUtil.isEmpty( db_entry.getMap() ) ) {
+ seq.setLocation( db_entry.getMap() );
+ }
+ final Taxonomy tax = node.getNodeData().isHasTaxonomy() ? node.getNodeData().getTaxonomy() : new Taxonomy();
+ if ( !ForesterUtil.isEmpty( db_entry.getTaxonomyScientificName() ) ) {
+ tax.setScientificName( db_entry.getTaxonomyScientificName() );
+ }
+ if ( allow_to_set_taxonomic_data && !ForesterUtil.isEmpty( db_entry.getTaxonomyIdentifier() ) ) {
+ tax.setIdentifier( new Identifier( db_entry.getTaxonomyIdentifier(), "uniprot" ) );
+ }
+ node.getNodeData().setTaxonomy( tax );
+ node.getNodeData().setSequence( seq );
+ }
+ else {
+ if ( node.isExternal() || !node.isEmpty() ) {
+ not_found.add( node.toString() );
+ }
+ }
+ try {
+ Thread.sleep( SLEEP );
+ }
+ catch ( final InterruptedException ie ) {
+ }
+ }
+