From 7c8aef9df85f80605041ba4d065c798a3cd78d5a Mon Sep 17 00:00:00 2001 From: "cmzmasek@gmail.com" Date: Fri, 29 Apr 2011 01:19:50 +0000 Subject: [PATCH] in progress --- .../archaeopteryx/tools/SequenceDataRetriver.java | 26 ++++++++++++++------ .../src/org/forester/ws/uniprot/EbiDbEntry.java | 11 ++++++++- .../forester/ws/uniprot/SequenceDatabaseEntry.java | 2 ++ .../src/org/forester/ws/uniprot/UniProtEntry.java | 9 +++++++ .../org/forester/ws/uniprot/UniProtWsTools.java | 5 ++-- 5 files changed, 42 insertions(+), 11 deletions(-) diff --git a/forester/java/src/org/forester/archaeopteryx/tools/SequenceDataRetriver.java b/forester/java/src/org/forester/archaeopteryx/tools/SequenceDataRetriver.java index 5b454e3..7aed148 100644 --- a/forester/java/src/org/forester/archaeopteryx/tools/SequenceDataRetriver.java +++ b/forester/java/src/org/forester/archaeopteryx/tools/SequenceDataRetriver.java @@ -52,7 +52,7 @@ public final class SequenceDataRetriver implements Runnable { private final Phylogeny _phy; private final MainFrameApplication _mf; private final TreePanel _treepanel; - private final static boolean DEBUG = true; + private final static boolean DEBUG = false; private enum Db { UNKNOWN, UNIPROT, EMBL; @@ -105,12 +105,12 @@ public final class SequenceDataRetriver implements Runnable { max = 20; } final StringBuffer sb = new StringBuffer(); - sb.append( "Not all identifiers could be resolved.\n" ); if ( not_found.size() == 1 ) { - sb.append( "The following identifier was not found:\n" ); + sb.append( "Data for the following sequence identifier was not found:\n" ); } else { - sb.append( "The following identifiers were not found (total: " + not_found.size() + "):\n" ); + sb.append( "Data for the following sequence identifiers was not found (total: " + not_found.size() + + "):\n" ); } int i = 0; for( final String string : not_found ) { @@ -127,7 +127,7 @@ public final class SequenceDataRetriver implements Runnable { try { JOptionPane.showMessageDialog( _mf, sb.toString(), - "UniProt Sequence Tool Completed", + "Sequence Tool Completed", JOptionPane.WARNING_MESSAGE ); } catch ( final Exception e ) { @@ -204,7 +204,7 @@ public final class SequenceDataRetriver implements Runnable { // Ignore. } } - else if ( db == Db.EMBL ) { + if ( ( db == Db.EMBL ) || ( ( db == Db.UNIPROT ) && ( db_entry == null ) ) ) { if ( DEBUG ) { System.out.println( "embl: " + query ); } @@ -214,10 +214,20 @@ public final class SequenceDataRetriver implements Runnable { catch ( final FileNotFoundException e ) { // Ignore. } + if ( ( db == Db.UNIPROT ) && ( db_entry != null ) ) { + db = Db.EMBL; + } } - if ( db_entry != null ) { + if ( ( db_entry != null ) && !db_entry.isEmpty() ) { if ( !ForesterUtil.isEmpty( db_entry.getAccession() ) ) { - seq.setAccession( new Accession( db_entry.getAccession(), "uniprot" ) ); + String type = null; + if ( db == Db.EMBL ) { + type = "embl"; + } + else if ( db == Db.UNIPROT ) { + type = "uniprot"; + } + seq.setAccession( new Accession( db_entry.getAccession(), type ) ); } if ( !ForesterUtil.isEmpty( db_entry.getSequenceName() ) ) { seq.setName( db_entry.getSequenceName() ); diff --git a/forester/java/src/org/forester/ws/uniprot/EbiDbEntry.java b/forester/java/src/org/forester/ws/uniprot/EbiDbEntry.java index 98fe38d..5f0d599 100644 --- a/forester/java/src/org/forester/ws/uniprot/EbiDbEntry.java +++ b/forester/java/src/org/forester/ws/uniprot/EbiDbEntry.java @@ -27,6 +27,8 @@ package org.forester.ws.uniprot; import java.util.List; +import org.forester.util.ForesterUtil; + public final class EbiDbEntry implements SequenceDatabaseEntry { //http://www.ebi.ac.uk/Tools/dbfetch/dbfetch/emb/AAR37336/ @@ -48,7 +50,7 @@ public final class EbiDbEntry implements SequenceDatabaseEntry { final EbiDbEntry e = new EbiDbEntry(); for( final String line : lines ) { if ( line.startsWith( "PA" ) ) { - e.setPA( DatabaseTools.extract( line, "PA", ";" ) ); + e.setPA( DatabaseTools.extract( line, "PA" ) ); } else if ( line.startsWith( "DE" ) ) { // if ( ( line.indexOf( "RecName:" ) > 0 ) && ( line.indexOf( "Full=" ) > 0 ) ) { @@ -131,4 +133,11 @@ public final class EbiDbEntry implements SequenceDatabaseEntry { _symbol = symbol; } } + + @Override + public boolean isEmpty() { + return ( ForesterUtil.isEmpty( getAccession() ) && ForesterUtil.isEmpty( getSequenceName() ) + && ForesterUtil.isEmpty( getTaxonomyScientificName() ) + && ForesterUtil.isEmpty( getTaxonomyIdentifier() ) && ForesterUtil.isEmpty( getSequenceSymbol() ) ); + } } diff --git a/forester/java/src/org/forester/ws/uniprot/SequenceDatabaseEntry.java b/forester/java/src/org/forester/ws/uniprot/SequenceDatabaseEntry.java index e62ee5f..9089737 100644 --- a/forester/java/src/org/forester/ws/uniprot/SequenceDatabaseEntry.java +++ b/forester/java/src/org/forester/ws/uniprot/SequenceDatabaseEntry.java @@ -27,6 +27,8 @@ package org.forester.ws.uniprot; public interface SequenceDatabaseEntry { + public boolean isEmpty(); + public String getAccession(); public String getSequenceName(); diff --git a/forester/java/src/org/forester/ws/uniprot/UniProtEntry.java b/forester/java/src/org/forester/ws/uniprot/UniProtEntry.java index 5a5568a..cd5d541 100644 --- a/forester/java/src/org/forester/ws/uniprot/UniProtEntry.java +++ b/forester/java/src/org/forester/ws/uniprot/UniProtEntry.java @@ -27,6 +27,8 @@ package org.forester.ws.uniprot; import java.util.List; +import org.forester.util.ForesterUtil; + public final class UniProtEntry implements SequenceDatabaseEntry { private String _ac; @@ -130,4 +132,11 @@ public final class UniProtEntry implements SequenceDatabaseEntry { _symbol = symbol; } } + + @Override + public boolean isEmpty() { + return ( ForesterUtil.isEmpty( getAccession() ) && ForesterUtil.isEmpty( getSequenceName() ) + && ForesterUtil.isEmpty( getTaxonomyScientificName() ) + && ForesterUtil.isEmpty( getTaxonomyIdentifier() ) && ForesterUtil.isEmpty( getSequenceSymbol() ) ); + } } diff --git a/forester/java/src/org/forester/ws/uniprot/UniProtWsTools.java b/forester/java/src/org/forester/ws/uniprot/UniProtWsTools.java index 8bef111..d24b171 100644 --- a/forester/java/src/org/forester/ws/uniprot/UniProtWsTools.java +++ b/forester/java/src/org/forester/ws/uniprot/UniProtWsTools.java @@ -54,7 +54,7 @@ public final class UniProtWsTools { // \Z => end of String private final static Pattern UNIPROT_AC_PATTERN = Pattern .compile( "(?:\\A|.*[^a-zA-Z0-9])([A-Z]\\d[A-Z0-9]{3}\\d)(?:[^a-zA-Z0-9]|\\Z)" ); - private final static boolean DEBUG = false; + private final static boolean DEBUG = true; private static String encode( final String str ) throws UnsupportedEncodingException { return URLEncoder.encode( str.trim(), URL_ENC ); @@ -237,6 +237,7 @@ public final class UniProtWsTools { String line; final List result = new ArrayList(); while ( ( line = in.readLine() ) != null ) { + System.out.println( line ); result.add( line ); if ( result.size() > max_lines_to_return ) { break; @@ -254,7 +255,7 @@ public final class UniProtWsTools { public static SequenceDatabaseEntry obtainEmblEntry( final String query, final int max_lines_to_return ) throws IOException { - final List lines = queryEmblDb( "query", max_lines_to_return ); + final List lines = queryEmblDb( query, max_lines_to_return ); return EbiDbEntry.createInstanceFromPlainText( lines ); } } -- 1.7.10.2