X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=forester%2Fjava%2Fsrc%2Forg%2Fforester%2Farchaeopteryx%2Ftools%2FSequenceDataRetriver.java;h=4db817576b51f5ed852e1ccacab037474b7ea550;hb=d772adf9d23c4cdbf84b2af9d23e2e7ebedfcf3a;hp=7f0477ed05faccdb49269544639d216d5fbea1d1;hpb=389376dc037d39d3c7983e2866cbfb47bebf416a;p=jalview.git diff --git a/forester/java/src/org/forester/archaeopteryx/tools/SequenceDataRetriver.java b/forester/java/src/org/forester/archaeopteryx/tools/SequenceDataRetriver.java index 7f0477e..4db8175 100644 --- a/forester/java/src/org/forester/archaeopteryx/tools/SequenceDataRetriver.java +++ b/forester/java/src/org/forester/archaeopteryx/tools/SequenceDataRetriver.java @@ -30,7 +30,6 @@ import java.io.IOException; import java.net.UnknownHostException; import java.util.SortedSet; import java.util.TreeSet; -import java.util.regex.Pattern; import javax.swing.JOptionPane; @@ -44,21 +43,19 @@ import org.forester.phylogeny.data.Sequence; import org.forester.phylogeny.data.Taxonomy; import org.forester.phylogeny.iterators.PhylogenyNodeIterator; import org.forester.util.ForesterUtil; +import org.forester.ws.uniprot.DatabaseTools; import org.forester.ws.uniprot.SequenceDatabaseEntry; import org.forester.ws.uniprot.UniProtWsTools; -public final class SequenceDataRetriver implements Runnable { +public final class SequenceDataRetriver extends RunnableProcess { - // uniprot/expasy accession number format (6 chars): - // letter digit letter-or-digit letter-or-digit letter-or-digit digit - private final static Pattern UNIPROT_AC_PATTERN = Pattern.compile( "[A-NR-ZOPQ]\\d[A-Z0-9]{3}\\d" ); private final Phylogeny _phy; private final MainFrameApplication _mf; private final TreePanel _treepanel; - private final static boolean DEBUG = true; + private final static boolean DEBUG = false; private enum Db { - UNKNOWN, UNIPROT; + UNKNOWN, UNIPROT, EMBL; } public SequenceDataRetriver( final MainFrameApplication mf, final TreePanel treepanel, final Phylogeny phy ) { @@ -72,13 +69,12 @@ public final class SequenceDataRetriver implements Runnable { } private void execute() { - _mf.getMainPanel().getCurrentTreePanel().setWaitCursor(); + start( _mf, "sequence data" ); SortedSet not_found = null; try { not_found = obtainSeqInformation( _phy ); } catch ( final UnknownHostException e ) { - _mf.getMainPanel().getCurrentTreePanel().setArrowCursor(); JOptionPane.showMessageDialog( _mf, "Could not connect to \"" + getBaseUrl() + "\"", "Network error during taxonomic information gathering", @@ -86,7 +82,6 @@ public final class SequenceDataRetriver implements Runnable { return; } catch ( final IOException e ) { - _mf.getMainPanel().getCurrentTreePanel().setArrowCursor(); e.printStackTrace(); JOptionPane.showMessageDialog( _mf, e.toString(), @@ -95,7 +90,7 @@ public final class SequenceDataRetriver implements Runnable { return; } finally { - _mf.getMainPanel().getCurrentTreePanel().setArrowCursor(); + end( _mf ); } _treepanel.setTree( _phy ); _mf.showWhole(); @@ -108,12 +103,12 @@ public final class SequenceDataRetriver implements Runnable { max = 20; } final StringBuffer sb = new StringBuffer(); - sb.append( "Not all identifiers could be resolved.\n" ); if ( not_found.size() == 1 ) { - sb.append( "The following identifier was not found:\n" ); + sb.append( "Data for the following sequence identifier was not found:\n" ); } else { - sb.append( "The following identifiers were not found (total: " + not_found.size() + "):\n" ); + sb.append( "Data for the following sequence identifiers was not found (total: " + not_found.size() + + "):\n" ); } int i = 0; for( final String string : not_found ) { @@ -130,7 +125,7 @@ public final class SequenceDataRetriver implements Runnable { try { JOptionPane.showMessageDialog( _mf, sb.toString(), - "UniProt Sequence Tool Completed", + "Sequence Tool Completed", JOptionPane.WARNING_MESSAGE ); } catch ( final Exception e ) { @@ -150,7 +145,7 @@ public final class SequenceDataRetriver implements Runnable { } } - synchronized public static SortedSet obtainSeqInformation( final Phylogeny phy ) throws IOException { + public static SortedSet obtainSeqInformation( final Phylogeny phy ) throws IOException { final SortedSet not_found = new TreeSet(); for( final PhylogenyNodeIterator iter = phy.iteratorPostorder(); iter.hasNext(); ) { final PhylogenyNode node = iter.next(); @@ -177,21 +172,26 @@ public final class SequenceDataRetriver implements Runnable { query = node.getNodeData().getSequence().getAccession().getValue(); db = Db.UNIPROT; } - else if ( !ForesterUtil.isEmpty( node.getName() ) ) { - query = node.getName(); + else if ( node.getNodeData().isHasSequence() + && ( node.getNodeData().getSequence().getAccession() != null ) + && !ForesterUtil.isEmpty( node.getNodeData().getSequence().getAccession().getSource() ) + && !ForesterUtil.isEmpty( node.getNodeData().getSequence().getAccession().getValue() ) + && ( node.getNodeData().getSequence().getAccession().getValue().toLowerCase().startsWith( "embl" ) || node + .getNodeData().getSequence().getAccession().getValue().toLowerCase().startsWith( "ebi" ) ) ) { + query = node.getNodeData().getSequence().getAccession().getValue(); + db = Db.EMBL; } - if ( !ForesterUtil.isEmpty( query ) ) { - if ( query.indexOf( '/' ) > 0 ) { - query = query.substring( 0, query.indexOf( '/' ) ); - } - if ( query.indexOf( '.' ) > 0 ) { - query = query.substring( 0, query.indexOf( '.' ) ); + else if ( !ForesterUtil.isEmpty( node.getName() ) ) { + if ( ( query = UniProtWsTools.parseUniProtAccessor( node.getName() ) ) != null ) { + db = Db.UNIPROT; } - if ( query.indexOf( '_' ) > 0 ) { - query = query.substring( 0, query.indexOf( '_' ) ); + else if ( ( query = DatabaseTools.parseGenbankAccessor( node.getName() ) ) != null ) { + db = Db.EMBL; } + } + if ( !ForesterUtil.isEmpty( query ) ) { SequenceDatabaseEntry db_entry = null; - if ( ( db == Db.UNIPROT ) || UNIPROT_AC_PATTERN.matcher( query ).matches() ) { + if ( db == Db.UNIPROT ) { if ( DEBUG ) { System.out.println( "uniprot: " + query ); } @@ -202,9 +202,30 @@ public final class SequenceDataRetriver implements Runnable { // Ignore. } } - if ( db_entry != null ) { + if ( ( db == Db.EMBL ) || ( ( db == Db.UNIPROT ) && ( db_entry == null ) ) ) { + if ( DEBUG ) { + System.out.println( "embl: " + query ); + } + try { + db_entry = UniProtWsTools.obtainEmblEntry( query, 200 ); + } + catch ( final FileNotFoundException e ) { + // Ignore. + } + if ( ( db == Db.UNIPROT ) && ( db_entry != null ) ) { + db = Db.EMBL; + } + } + if ( ( db_entry != null ) && !db_entry.isEmpty() ) { if ( !ForesterUtil.isEmpty( db_entry.getAccession() ) ) { - seq.setAccession( new Accession( db_entry.getAccession(), "uniprot" ) ); + String type = null; + if ( db == Db.EMBL ) { + type = "embl"; + } + else if ( db == Db.UNIPROT ) { + type = "uniprot"; + } + seq.setAccession( new Accession( db_entry.getAccession(), type ) ); } if ( !ForesterUtil.isEmpty( db_entry.getSequenceName() ) ) { seq.setName( db_entry.getSequenceName() );