JAL-1705 DbSourceProxy properties converted to methods, tidy/format code
authorgmungoc <g.m.carstairs@dundee.ac.uk>
Fri, 8 Jan 2016 10:03:40 +0000 (10:03 +0000)
committergmungoc <g.m.carstairs@dundee.ac.uk>
Fri, 8 Jan 2016 10:03:40 +0000 (10:03 +0000)
32 files changed:
src/jalview/datamodel/DBRefSource.java
src/jalview/ext/ensembl/EnsemblCdna.java
src/jalview/ext/ensembl/EnsemblCds.java
src/jalview/ext/ensembl/EnsemblGenome.java
src/jalview/ext/ensembl/EnsemblProtein.java
src/jalview/ext/ensembl/EnsemblSeqProxy.java
src/jalview/ext/ensembl/EnsemblTranscript.java
src/jalview/ext/ensembl/SeqFetcher.java
src/jalview/gui/SequenceFetcher.java
src/jalview/util/DBRefUtils.java
src/jalview/ws/DBRefFetcher.java
src/jalview/ws/SequenceFetcher.java
src/jalview/ws/dbsources/EmblCdsSource.java [moved from src/jalview/ws/dbsources/EmblCdsSouce.java with 89% similarity]
src/jalview/ws/dbsources/EmblSource.java
src/jalview/ws/dbsources/EmblXmlSource.java
src/jalview/ws/dbsources/GeneDbSource.java
src/jalview/ws/dbsources/Pdb.java
src/jalview/ws/dbsources/Pfam.java
src/jalview/ws/dbsources/PfamFull.java
src/jalview/ws/dbsources/PfamSeed.java
src/jalview/ws/dbsources/Rfam.java
src/jalview/ws/dbsources/RfamFull.java
src/jalview/ws/dbsources/RfamSeed.java
src/jalview/ws/dbsources/Uniprot.java
src/jalview/ws/dbsources/UniprotName.java [moved from src/jalview/ws/dbsources/UnprotName.java with 88% similarity]
src/jalview/ws/dbsources/Xfam.java
src/jalview/ws/seqfetcher/ASequenceFetcher.java
src/jalview/ws/seqfetcher/DbSourceProxy.java
src/jalview/ws/seqfetcher/DbSourceProxyImpl.java
test/jalview/ext/ensembl/EnsemblSeqProxyTest.java
test/jalview/ext/ensembl/SeqFetcherTest.java [new file with mode: 0644]
test/jalview/ws/SequenceFetcherTest.java [new file with mode: 0644]

index 6a676cf..99c68ba 100755 (executable)
@@ -94,47 +94,4 @@ public class DBRefSource
   public static final String[] PROTEINSTR = { PDB };
 
   public static final String[] DOMAINDBS = { PFAM, RFAM };
-
-  /**
-   * set of unique DBRefSource property constants. These could be used to
-   * reconstruct the above groupings
-   */
-  public static final Object SEQDB = "SQ";
-
-  /**
-   * database of nucleic acid sequences
-   */
-  public static final Object DNASEQDB = "NASQ";
-
-  /**
-   * database of amino acid sequences
-   */
-  public static final Object PROTSEQDB = "PROTSQ";
-
-  /**
-   * database of cDNA sequences
-   */
-  public static final Object CODINGSEQDB = "CODING";
-
-  /**
-   * database of na sequences with exon annotation
-   */
-  public static final Object DNACODINGSEQDB = "XONCODING";
-
-  /**
-   * DB returns several sequences associated with a protein/nucleotide domain
-   */
-  public static final Object DOMAINDB = "DOMAIN";
-
-  /**
-   * DB query can take multiple accession codes concatenated by a separator.
-   * Value of property indicates maximum number of accession codes to send at a
-   * time.
-   */
-  public static final Object MULTIACC = "MULTIACC";
-
-  /**
-   * DB query returns an alignment for each accession provided.
-   */
-  public static final Object ALIGNMENTDB = "ALIGNMENTS";
 }
index 757b3c8..9c88b7c 100644 (file)
@@ -7,7 +7,7 @@ import com.stevesoft.pat.Regex;
 public class EnsemblCdna extends EnsemblSeqProxy
 {
 
-  public EnsemblCdna() throws Exception
+  public EnsemblCdna()
   {
     super();
   }
index 1f63e05..dc92348 100644 (file)
@@ -5,7 +5,7 @@ import jalview.ext.ensembl.SeqFetcher.EnsemblSeqType;
 public class EnsemblCds extends EnsemblSeqProxy
 {
 
-  public EnsemblCds() throws Exception
+  public EnsemblCds()
   {
     super();
   }
index 37e8e2b..39dfac0 100644 (file)
@@ -5,7 +5,7 @@ import jalview.ext.ensembl.SeqFetcher.EnsemblSeqType;
 public class EnsemblGenome extends EnsemblSeqProxy
 {
 
-  public EnsemblGenome() throws Exception
+  public EnsemblGenome()
   {
     super();
   }
index db8d9d5..4cc43ab 100644 (file)
@@ -5,7 +5,7 @@ import jalview.ext.ensembl.SeqFetcher.EnsemblSeqType;
 public class EnsemblProtein extends EnsemblSeqProxy
 {
 
-  public EnsemblProtein() throws Exception
+  public EnsemblProtein()
   {
     super();
   }
@@ -22,4 +22,19 @@ public class EnsemblProtein extends EnsemblSeqProxy
     return EnsemblSeqType.PROTEIN;
   }
 
+  @Override
+  public boolean isDnaCoding()
+  {
+    return false;
+  }
+
+  /**
+   * Test query is to the protein translation of transcript ENST00000288602
+   */
+  @Override
+  public String getTestQuery()
+  {
+    return "ENSP00000288602";
+  }
+
 }
index 137c9b0..4f85bd0 100644 (file)
@@ -1,14 +1,13 @@
 package jalview.ext.ensembl;
 
+import jalview.datamodel.Alignment;
 import jalview.datamodel.AlignmentI;
-import jalview.datamodel.DBRefSource;
 import jalview.datamodel.SequenceI;
 import jalview.exceptions.JalviewException;
 import jalview.ext.ensembl.SeqFetcher.EnsemblSeqType;
 import jalview.io.FastaFile;
 import jalview.io.FileParse;
 import jalview.util.DBRefUtils;
-import jalview.ws.seqfetcher.DbSourceProxy;
 import jalview.ws.seqfetcher.DbSourceProxyImpl;
 
 import java.util.ArrayList;
@@ -17,20 +16,13 @@ import java.util.List;
 
 import com.stevesoft.pat.Regex;
 
-public abstract class EnsemblSeqProxy extends DbSourceProxyImpl implements
-        DbSourceProxy
+public abstract class EnsemblSeqProxy extends DbSourceProxyImpl
 {
   SeqFetcher sf;
 
-  public EnsemblSeqProxy() throws Exception
+  public EnsemblSeqProxy()
   {
     sf = new SeqFetcher();
-    addDbSourceProperty(DBRefSource.MULTIACC);
-    addDbSourceProperty(DBRefSource.SEQDB);
-    // decide whether these need to be filtered according to return type
-    addDbSourceProperty(DBRefSource.PROTSEQDB);
-    addDbSourceProperty(DBRefSource.DNACODINGSEQDB);
-    addDbSourceProperty(DBRefSource.DNASEQDB);
   }
 
   @Override
@@ -58,10 +50,13 @@ public abstract class EnsemblSeqProxy extends DbSourceProxyImpl implements
     return new Regex("((ENSP|ENST|ENSG|CCDS)[0-9.]{3,})");
   }
 
+  /**
+   * Default test query is a transcript
+   */
   @Override
   public String getTestQuery()
   {
-    return "ENSP00000288602";
+    return "ENST00000288602";
   }
 
   @Override
@@ -79,14 +74,15 @@ public abstract class EnsemblSeqProxy extends DbSourceProxyImpl implements
     List<String> tids, ids = new ArrayList<String>();
     tids = Arrays.asList(queries.split(" +"));
     AlignmentI rtn = null;
-    for (int v = 0, vSize = tids.size(); v < vSize; v += 50)
+
+    /*
+     * execute queries, if necessary in batches of the
+     * maximum allowed number of ids
+     */
+    int maxQueryCount = getMaximumQueryCount();
+    for (int v = 0, vSize = tids.size(); v < vSize; v += maxQueryCount)
     {
-      int p = v + 50;
-      if (p > vSize)
-      {
-        p = vSize;
-      }
-      ;
+      int p = Math.min(vSize, v + maxQueryCount);
       ids = tids.subList(v, p);
       try
       {
@@ -111,7 +107,7 @@ public abstract class EnsemblSeqProxy extends DbSourceProxyImpl implements
         }
         if (fr.getSeqs().size() > 0)
         {
-          AlignmentI seqal = new jalview.datamodel.Alignment(
+          AlignmentI seqal = new Alignment(
                   fr.getSeqsAsArray());
           for (SequenceI sq:seqal.getSequences())
           {
@@ -175,4 +171,21 @@ public abstract class EnsemblSeqProxy extends DbSourceProxyImpl implements
   {
     return 0;
   }
+
+  /**
+   * A sequence/id POST request currently allows up to 50 queries
+   * 
+   * @see http://rest.ensembl.org/documentation/info/sequence_id_post
+   */
+  @Override
+  public int getMaximumQueryCount()
+  {
+    return 50;
+  }
+
+  @Override
+  public boolean isDnaCoding()
+  {
+    return true;
+  }
 }
index 68ed310..c2d0b6e 100644 (file)
@@ -5,7 +5,8 @@ import jalview.ext.ensembl.SeqFetcher.EnsemblSeqType;
 public class EnsemblTranscript extends EnsemblSeqProxy
 {
 
-  public EnsemblTranscript() throws Exception
+  // TODO is this class needed? it seems to duplicate EnsemblProtein
+  public EnsemblTranscript()
   {
     super();
   }
index 7c913bf..57f000f 100644 (file)
@@ -10,7 +10,7 @@ import java.io.InputStreamReader;
 import java.net.HttpURLConnection;
 import java.net.URL;
 import java.net.URLConnection;
-import java.util.Collections;
+import java.util.ArrayList;
 import java.util.List;
 
 import org.apache.http.NameValuePair;
@@ -20,81 +20,60 @@ public class SeqFetcher
 {
   private final static String ENSEMBL_REST = "rest.ensembl.org";
 
-  private static boolean ensemblRestavailable = false;
+  private static final String SEQUENCE_ID_URL = "http://" + ENSEMBL_REST + "/sequence/id";
 
-  private static long lastCheck = -1;
+  private static final String PING_URL = "http://" + ENSEMBL_REST + "/info/ping";
 
-  public boolean isEnsemblAvailable()
-  {
-    if (isTesting || !ensemblRestavailable
-            || System.currentTimeMillis() - lastCheck > 10000)
-    {
-      checkEnsembl();
-      lastCheck = System.currentTimeMillis();
-    }
-    return ensemblRestavailable;
-  }
-
-  private boolean isTesting, testEnsemblStatus;
+  private final static long RETEST_INTERVAL = 10000L; // 10 seconds
 
-  /**
-   * @return the isTesting
-   */
-  public boolean isTesting()
-  {
-    return isTesting;
-  }
+  private static boolean ensemblRestAvailable = false;
 
-  /**
-   * @param isTesting
-   *          the isTesting to set
-   */
-  public void setTesting(boolean isTesting)
-  {
-    this.isTesting = isTesting;
-  }
+  private static long lastCheck = -1;
 
   /**
-   * @return the testEnsemblStatus
+   * Rechecks if Ensembl is responding, unless the last check was successful and
+   * the retest interval has not yet elapsed. Returns true if Ensembl is up,
+   * else false.
+   * 
+   * @return
    */
-  public boolean isTestEnsemblStatus()
+  public boolean isEnsemblAvailable()
   {
-    return testEnsemblStatus;
+    long now = System.currentTimeMillis();
+    boolean retest = now - lastCheck > RETEST_INTERVAL;
+    if (ensemblRestAvailable && !retest)
+    {
+      return true;
+    }
+    ensemblRestAvailable = checkEnsembl();
+    lastCheck = now;
+    return ensemblRestAvailable;
   }
 
   /**
-   * @param testEnsemblStatus
-   *          the testEnsemblStatus to set
+   * Tries to connect to Ensembl's REST 'ping' endpoint, and returns true if
+   * successful, else false
+   * 
+   * @return
    */
-  public void setTestEnsemblStatus(boolean testEnsemblStatus)
+  private boolean checkEnsembl()
   {
-    this.testEnsemblStatus = testEnsemblStatus;
-  }
-
-  private void checkEnsembl()
-  {
-    if (isTesting)
-    {
-      ensemblRestavailable = testEnsemblStatus;
-      return;
-    }
     try
     {
-      URL ping = new URL("http://" + ENSEMBL_REST + "/info/ping");
-      HttpURLConnection conn = (HttpURLConnection) (ping.openConnection());
-      if (conn.getResponseCode() >= 200 && conn.getResponseCode() < 300)
+      URL ping = new URL(PING_URL);
+      HttpURLConnection conn = (HttpURLConnection) ping.openConnection();
+      int rc = conn.getResponseCode();
+      conn.disconnect();
+      if (rc >= 200 && rc < 300)
       {
-        ensemblRestavailable = true;
-        return;
+        return true;
       }
-    } catch (Error err)
-    {
-      err.printStackTrace();
-    } catch (Exception exx)
+    } catch (Throwable t)
     {
-      exx.printStackTrace();
+      System.err.println("Error connecting to " + PING_URL + ": "
+              + t.getMessage());
     }
-    ensemblRestavailable = false;
+    return false;
   }
 
   public SeqFetcher()
@@ -120,14 +99,17 @@ public class SeqFetcher
   }
 
   /**
-   * reolve request type as an argument for sequence and features queries
+   * Returns a list of additional URL query parameters to specify the desired
+   * sequence type (genomic/cds/protein etc), and data format Fasta
    * 
    * @param type
    */
-  public List<NameValuePair> getObjectTypeArg(EnsemblSeqType type)
+  public List<NameValuePair> getAdditionalParameters(EnsemblSeqType type)
   {
-    NameValuePair nameValue = new BasicNameValuePair("type", type.getType());
-    return Collections.singletonList(nameValue);
+    List<NameValuePair> params = new ArrayList<NameValuePair>();
+    params.add(new BasicNameValuePair("type", type.getType()));
+    params.add(new BasicNameValuePair("content-type", "text/x-fasta"));
+    return params;
   }
 
   /**
@@ -141,27 +123,19 @@ public class SeqFetcher
   public FileParse getSequenceReader(EnsemblSeqType returnType,
           List<String> ids) throws IOException
   {
+    // see http://rest.ensembl.org/documentation/info/sequence_id
 
-    // adapted From the rest.ensembl.org documentation for sequence_id
-
-    String urls = "http://" + ENSEMBL_REST + "/sequence/id";
-    List<NameValuePair> vals = getObjectTypeArg(returnType);
-    boolean f = true;
+    String urlstring = SEQUENCE_ID_URL;
+    List<NameValuePair> vals = getAdditionalParameters(returnType);
+    boolean first = true;
     for (NameValuePair nvp : vals)
     {
-      if (f)
-      {
-        f = false;
-        urls += "?";
-      }
-      else
-      {
-        urls += "&";
-      }
-      urls += nvp.getName() + "=" + nvp.getValue();
+      urlstring += first ? "?" : "&";
+      first = false;
+      urlstring += nvp.getName() + "=" + nvp.getValue();
     }
 
-    URL url = new URL(urls);
+    URL url = new URL(urlstring);
 
     URLConnection connection = url.openConnection();
     HttpURLConnection httpConnection = (HttpURLConnection) connection;
@@ -173,17 +147,14 @@ public class SeqFetcher
     {
       StringBuilder postBody = new StringBuilder();
       postBody.append("{\"ids\":[");
-      boolean first = true;
+      first = true;
       for (String id : ids)
       {
-        if (first)
-        {
-          first = false;
-        }
-        else
+        if (!first)
         {
           postBody.append(",");
         }
+        first = false;
         postBody.append("\"");
         postBody.append(id.trim());
         postBody.append("\"");
index 09d33c8..ad3fcc9 100755 (executable)
@@ -22,7 +22,6 @@ package jalview.gui;
 
 import jalview.datamodel.AlignmentI;
 import jalview.datamodel.DBRefEntry;
-import jalview.datamodel.DBRefSource;
 import jalview.datamodel.SequenceFeature;
 import jalview.datamodel.SequenceI;
 import jalview.io.FormatAdapter;
@@ -551,7 +550,7 @@ public class SequenceFetcher extends JPanel implements Runnable
                             Integer.valueOf(nqueries).toString(),
                             proxy.getDbName() }), Thread.currentThread()
                         .hashCode());
-        isAliSource = proxy.isA(DBRefSource.ALIGNMENTDB);
+        isAliSource = proxy.isAlignmentSource();
         if (proxy.getAccessionSeparator() == null)
         {
           while (en.hasNext())
@@ -638,8 +637,7 @@ public class SequenceFetcher extends JPanel implements Runnable
                 DBRefEntry dbr = new DBRefEntry(), found[] = null;
                 dbr.setSource(proxy.getDbSource());
                 dbr.setVersion(null);
-                String accId = DBRefUtils.processQueryToAccessionFor(proxy,
-                        q);
+                String accId = proxy.getAccessionIdFromQuery(q);
                 dbr.setAccessionId(accId);
                 boolean rfound = false;
                 for (int r = 0; r < rs.length; r++)
index b8f1dd5..cdf2325 100755 (executable)
@@ -473,36 +473,4 @@ public class DBRefUtils
     return ref;
   }
 
-  /**
-   * Extract valid accession strings from a query string. Used by the
-   * SequenceFetcher and DBRefFetcher to create valid accession strings from an
-   * ID string for database sources with a Regex validation field.
-   * 
-   * @param proxy
-   * @param q
-   * @return q if proxy.getAccessionValidator()==null, otherwise the matched
-   *         region or the first subgroup match from the matched region
-   */
-  public static String processQueryToAccessionFor(DbSourceProxy proxy,
-          String q)
-  {
-    if (proxy.getAccessionValidator() != null)
-    {
-      Regex vgr = proxy.getAccessionValidator();
-      vgr.search(q);
-      if (vgr.numSubs() > 0)
-      {
-        return (vgr.stringMatched(1));
-      }
-      else
-      {
-        return (vgr.stringMatched());
-      }
-    }
-    else
-    {
-      return (q);
-    }
-  }
-
 }
index d531fea..9d91e31 100644 (file)
@@ -283,6 +283,7 @@ public class DBRefFetcher implements Runnable
   /**
    * DOCUMENT ME!
    */
+  @Override
   public void run()
   {
     if (dbSources == null)
@@ -335,16 +336,8 @@ public class DBRefFetcher implements Runnable
         // TODO: introduce multithread multisource queries and logic to remove a
         // query from other sources if any source for a database returns a
         // record
-        if (dbsource.getDbSourceProperties().containsKey(
-                DBRefSource.MULTIACC))
-        {
-          maxqlen = ((Integer) dbsource.getDbSourceProperties().get(
-                  DBRefSource.MULTIACC)).intValue();
-        }
-        else
-        {
-          maxqlen = 1;
-        }
+        maxqlen = dbsource.getMaximumQueryCount();
+
         while (queries.size() > 0 || seqIndex < currSeqs.length)
         {
           if (queries.size() > 0)
index fcc4457..74bf56f 100644 (file)
  */
 package jalview.ws;
 
-import jalview.datamodel.Alignment;
-import jalview.datamodel.AlignmentI;
-import jalview.datamodel.DBRefSource;
-import jalview.datamodel.SequenceI;
+import jalview.ext.ensembl.EnsemblCdna;
+import jalview.ext.ensembl.EnsemblCds;
+import jalview.ext.ensembl.EnsemblGenome;
+import jalview.ext.ensembl.EnsemblProtein;
+import jalview.ws.dbsources.EmblCdsSource;
+import jalview.ws.dbsources.EmblSource;
+import jalview.ws.dbsources.Pdb;
+import jalview.ws.dbsources.PfamFull;
+import jalview.ws.dbsources.PfamSeed;
+import jalview.ws.dbsources.RfamFull;
+import jalview.ws.dbsources.RfamSeed;
+import jalview.ws.dbsources.Uniprot;
+import jalview.ws.dbsources.UniprotName;
 import jalview.ws.dbsources.das.api.jalviewSourceI;
 import jalview.ws.seqfetcher.ASequenceFetcher;
 import jalview.ws.seqfetcher.DbSourceProxy;
 
 import java.util.ArrayList;
-import java.util.Enumeration;
 import java.util.List;
-import java.util.Vector;
 
 /**
  * This is the the concrete implementation of the sequence retrieval interface
  * and abstract class in jalview.ws.seqfetcher. This implements the run-time
- * discovery of sequence database clients, and provides a hardwired main for
- * testing all registered handlers.
+ * discovery of sequence database clientss.
  * 
  */
 public class SequenceFetcher extends ASequenceFetcher
@@ -55,22 +61,24 @@ public class SequenceFetcher extends ASequenceFetcher
 
   public SequenceFetcher(boolean addDas)
   {
-    addDBRefSourceImpl(jalview.ext.ensembl.EnsemblProtein.class);
-    addDBRefSourceImpl(jalview.ext.ensembl.EnsemblTranscript.class);
-    addDBRefSourceImpl(jalview.ext.ensembl.EnsemblCds.class);
-    addDBRefSourceImpl(jalview.ext.ensembl.EnsemblGenome.class);
-    addDBRefSourceImpl(jalview.ext.ensembl.EnsemblCdna.class);
-
-    addDBRefSourceImpl(jalview.ws.dbsources.EmblSource.class);
-    addDBRefSourceImpl(jalview.ws.dbsources.EmblCdsSouce.class);
-    addDBRefSourceImpl(jalview.ws.dbsources.Uniprot.class);
-    addDBRefSourceImpl(jalview.ws.dbsources.UnprotName.class);
-    addDBRefSourceImpl(jalview.ws.dbsources.Pdb.class);
-    addDBRefSourceImpl(jalview.ws.dbsources.PfamFull.class);
-    addDBRefSourceImpl(jalview.ws.dbsources.PfamSeed.class);
+    addDBRefSourceImpl(EnsemblProtein.class);
+    // EnsemblTranscript would just replace EnsemblProtein as the proxy for
+    // { DbSource="ENSEMBL", DbName="ENSEMBL (Protein)" }
+    // addDBRefSourceImpl(EnsemblTranscript.class);
+    addDBRefSourceImpl(EnsemblCds.class);
+    addDBRefSourceImpl(EnsemblGenome.class);
+    addDBRefSourceImpl(EnsemblCdna.class);
+
+    addDBRefSourceImpl(EmblSource.class);
+    addDBRefSourceImpl(EmblCdsSource.class);
+    addDBRefSourceImpl(Uniprot.class);
+    addDBRefSourceImpl(UniprotName.class);
+    addDBRefSourceImpl(Pdb.class);
+    addDBRefSourceImpl(PfamFull.class);
+    addDBRefSourceImpl(PfamSeed.class);
     // ensures Seed alignment is 'default' for PFAM
-    addDBRefSourceImpl(jalview.ws.dbsources.RfamFull.class);
-    addDBRefSourceImpl(jalview.ws.dbsources.RfamSeed.class);
+    addDBRefSourceImpl(RfamFull.class);
+    addDBRefSourceImpl(RfamSeed.class);
     if (addDas)
     {
       registerDasSequenceSources();
@@ -93,7 +101,7 @@ public class SequenceFetcher extends ASequenceFetcher
       {
         // Skip the alignment databases for the moment - they're not useful for
         // verifying a single sequence against its reference source
-        if (dbs.isA(DBRefSource.ALIGNMENTDB))
+        if (dbs.isAlignmentSource())
         {
           skip = true;
         }
@@ -156,283 +164,6 @@ public class SequenceFetcher extends ASequenceFetcher
   }
 
   /**
-   * return plaintext databse list suitable for using in a GUI element
-   */
-  public String[] _getOrderedSupportedSources()
-  {
-    String[] srcs = this.getSupportedDb();
-    ArrayList dassrc = new ArrayList(), nondas = new ArrayList();
-    for (int i = 0; i < srcs.length; i++)
-    {
-      for (DbSourceProxy dbs : getSourceProxy(srcs[i]))
-      {
-        String nm = dbs.getDbName();
-        if (getSourceProxy(srcs[i]) instanceof jalview.ws.dbsources.das.datamodel.DasSequenceSource)
-        {
-          if (nm.startsWith("das:"))
-          {
-            nm = nm.substring(4);
-          }
-          dassrc.add(new String[] { srcs[i], nm.toUpperCase() });
-        }
-        else
-        {
-          nondas.add(new String[] { srcs[i], nm.toUpperCase() });
-        }
-      }
-    }
-    Object[] sorted = nondas.toArray();
-    String[] tosort = new String[sorted.length];
-    nondas.clear();
-    for (int j = 0; j < sorted.length; j++)
-    {
-      tosort[j] = ((String[]) sorted[j])[1];
-    }
-    jalview.util.QuickSort.sort(tosort, sorted);
-    int i = 0;
-    // construct array with all sources listed
-    srcs = new String[sorted.length + dassrc.size()];
-    for (int j = sorted.length - 1; j >= 0; j--, i++)
-    {
-      srcs[i] = ((String[]) sorted[j])[0];
-      sorted[j] = null;
-    }
-
-    sorted = dassrc.toArray();
-    tosort = new String[sorted.length];
-    dassrc.clear();
-    for (int j = 0; j < sorted.length; j++)
-    {
-      tosort[j] = ((String[]) sorted[j])[1];
-    }
-    jalview.util.QuickSort.sort(tosort, sorted);
-    for (int j = sorted.length - 1; j >= 0; j--, i++)
-    {
-      srcs[i] = ((String[]) sorted[j])[0];
-      sorted[j] = null;
-    }
-    return srcs;
-  }
-
-  /**
-   * simple run method to test dbsources.
-   * 
-   * @param argv
-   */
-  public static void main(String[] argv)
-  {
-    AlignmentI ds = null;
-    Vector noProds = new Vector();
-    String usage = "SequenceFetcher.main [-nodas] [<DBNAME> [<ACCNO>]]\n"
-            + "With no arguments, all DbSources will be queried with their test Accession number.\n"
-            + "With one argument, the argument will be resolved to one or more db sources and each will be queried with their test accession only.\n"
-            + "If given two arguments, SequenceFetcher will try to find the DbFetcher corresponding to <DBNAME> and retrieve <ACCNO> from it.\n"
-            + "The -nodas option will exclude DAS sources from the database fetchers Jalview will try to use.";
-    boolean withDas = true;
-    if (argv != null && argv.length > 0
-            && argv[0].toLowerCase().startsWith("-nodas"))
-    {
-      withDas = false;
-      String targs[] = new String[argv.length - 1];
-      System.arraycopy(argv, 1, targs, 0, targs.length);
-      argv = targs;
-    }
-    if (argv != null && argv.length > 0)
-    {
-      List<DbSourceProxy> sps = new SequenceFetcher(withDas)
-              .getSourceProxy(argv[0]);
-
-      if (sps != null)
-      {
-        for (DbSourceProxy sp : sps)
-        {
-          AlignmentI al = null;
-          try
-          {
-            al = sp.getSequenceRecords(argv.length > 1 ? argv[1] : sp
-                    .getTestQuery());
-          } catch (Exception e)
-          {
-            e.printStackTrace();
-            System.err.println("Error when retrieving "
-                    + (argv.length > 1 ? argv[1] : sp.getTestQuery())
-                    + " from " + argv[0] + "\nUsage: " + usage);
-          }
-          SequenceI[] prod = al.getSequencesArray();
-          if (al != null)
-          {
-            for (int p = 0; p < prod.length; p++)
-            {
-              System.out.println("Prod " + p + ": "
-                      + prod[p].getDisplayId(true) + " : "
-                      + prod[p].getDescription());
-            }
-          }
-        }
-        return;
-      }
-      else
-      {
-        System.err.println("Can't resolve " + argv[0]
-                + " as a database name. Allowed values are :\n"
-                + new SequenceFetcher().getSupportedDb());
-      }
-      System.out.println(usage);
-      return;
-    }
-    ASequenceFetcher sfetcher = new SequenceFetcher(withDas);
-    String[] dbSources = sfetcher.getSupportedDb();
-    for (int dbsource = 0; dbsource < dbSources.length; dbsource++)
-    {
-      String db = dbSources[dbsource];
-      // skip me
-      if (db.equals(DBRefSource.PDB))
-      {
-        continue;
-      }
-      for (DbSourceProxy sp : sfetcher.getSourceProxy(db))
-      {
-        System.out.println("Source: " + sp.getDbName() + " (" + db
-                + "): retrieving test:" + sp.getTestQuery());
-        AlignmentI al = null;
-        try
-        {
-          al = sp.getSequenceRecords(sp.getTestQuery());
-          if (al != null && al.getHeight() > 0
-                  && sp.getDbSourceProperties() != null)
-          {
-            boolean dna = sp.getDbSourceProperties().containsKey(
-                    DBRefSource.DNACODINGSEQDB)
-                    || sp.getDbSourceProperties().containsKey(
-                            DBRefSource.DNASEQDB)
-                    || sp.getDbSourceProperties().containsKey(
-                            DBRefSource.CODINGSEQDB);
-            // try and find products
-            String types[] = jalview.analysis.CrossRef
-                    .findSequenceXrefTypes(dna, al.getSequencesArray());
-            if (types != null)
-            {
-              System.out.println("Xref Types for: "
-                      + (dna ? "dna" : "prot"));
-              for (int t = 0; t < types.length; t++)
-              {
-                System.out.println("Type: " + types[t]);
-                SequenceI[] prod = jalview.analysis.CrossRef
-                        .findXrefSequences(al.getSequencesArray(), dna,
-                                types[t]).getSequencesArray();
-                System.out.println("Found "
-                        + ((prod == null) ? "no" : "" + prod.length)
-                        + " products");
-                if (prod != null)
-                {
-                  for (int p = 0; p < prod.length; p++)
-                  {
-                    System.out.println("Prod " + p + ": "
-                            + prod[p].getDisplayId(true));
-                  }
-                }
-              }
-            }
-            else
-            {
-              noProds.addElement((dna ? new Object[] { al, al }
-                      : new Object[] { al }));
-            }
-
-          }
-        } catch (Exception ex)
-        {
-          System.out.println("ERROR:Failed to retrieve test query.");
-          ex.printStackTrace(System.out);
-        }
-
-        if (al == null)
-        {
-          System.out.println("ERROR:No alignment retrieved.");
-          StringBuffer raw = sp.getRawRecords();
-          if (raw != null)
-          {
-            System.out.println(raw.toString());
-          }
-          else
-          {
-            System.out.println("ERROR:No Raw results.");
-          }
-        }
-        else
-        {
-          System.out.println("Retrieved " + al.getHeight() + " sequences.");
-          for (int s = 0; s < al.getHeight(); s++)
-          {
-            SequenceI sq = al.getSequenceAt(s);
-            while (sq.getDatasetSequence() != null)
-            {
-              sq = sq.getDatasetSequence();
-
-            }
-            if (ds == null)
-            {
-              ds = new Alignment(new SequenceI[] { sq });
-
-            }
-            else
-            {
-              ds.addSequence(sq);
-            }
-          }
-        }
-        System.out.flush();
-        System.err.flush();
-
-      }
-      if (noProds.size() > 0)
-      {
-        Enumeration ts = noProds.elements();
-        while (ts.hasMoreElements())
-
-        {
-          Object[] typeSq = (Object[]) ts.nextElement();
-          boolean dna = (typeSq.length > 1);
-          AlignmentI al = (AlignmentI) typeSq[0];
-          System.out.println("Trying getProducts for "
-                  + al.getSequenceAt(0).getDisplayId(true));
-          System.out.println("Search DS Xref for: "
-                  + (dna ? "dna" : "prot"));
-          // have a bash at finding the products amongst all the retrieved
-          // sequences.
-          SequenceI[] seqs = al.getSequencesArray();
-          Alignment prodal = jalview.analysis.CrossRef.findXrefSequences(
-                  seqs, dna, null, ds);
-          System.out.println("Found "
-                  + ((prodal == null) ? "no" : "" + prodal.getHeight())
-                  + " products");
-          if (prodal != null)
-          {
-            SequenceI[] prod = prodal.getSequencesArray(); // note
-            // should
-            // test
-            // rather
-            // than
-            // throw
-            // away
-            // codon
-            // mapping
-            // (if
-            // present)
-            for (int p = 0; p < prod.length; p++)
-            {
-              System.out.println("Prod " + p + ": "
-                      + prod[p].getDisplayId(true));
-            }
-          }
-        }
-
-      }
-
-    }
-  }
-
-  /**
    * query the currently defined DAS source registry for sequence sources and
    * add a DasSequenceSource instance for each source to the SequenceFetcher
    * source list.
similarity index 89%
rename from src/jalview/ws/dbsources/EmblCdsSouce.java
rename to src/jalview/ws/dbsources/EmblCdsSource.java
index e5fbd6c..a73af61 100644 (file)
@@ -22,40 +22,43 @@ package jalview.ws.dbsources;
 
 import jalview.datamodel.AlignmentI;
 import jalview.datamodel.DBRefSource;
-import jalview.ws.seqfetcher.DbSourceProxy;
 
 import com.stevesoft.pat.Regex;
 
-public class EmblCdsSouce extends EmblXmlSource implements DbSourceProxy
+public class EmblCdsSource extends EmblXmlSource
 {
 
-  public EmblCdsSouce()
+  public EmblCdsSource()
   {
     super();
-    addDbSourceProperty(DBRefSource.CODINGSEQDB);
   }
 
+  @Override
   public String getAccessionSeparator()
   {
     return null;
   }
 
+  @Override
   public Regex getAccessionValidator()
   {
-    return new com.stevesoft.pat.Regex("^[A-Z]+[0-9]+");
+    return new Regex("^[A-Z]+[0-9]+");
   }
 
+  @Override
   public String getDbSource()
   {
     return DBRefSource.EMBLCDS;
   }
 
+  @Override
   public String getDbVersion()
   {
     return "0"; // TODO : this is dynamically set for a returned record - not
     // tied to proxy
   }
 
+  @Override
   public AlignmentI getSequenceRecords(String queries) throws Exception
   {
     if (queries.indexOf(".") > -1)
@@ -65,6 +68,7 @@ public class EmblCdsSouce extends EmblXmlSource implements DbSourceProxy
     return getEmblSequenceRecords(DBRefSource.EMBLCDS, queries);
   }
 
+  @Override
   public boolean isValidReference(String accession)
   {
     // most embl CDS refs look like ..
@@ -76,11 +80,13 @@ public class EmblCdsSouce extends EmblXmlSource implements DbSourceProxy
   /**
    * cDNA for LDHA_CHICK swissprot sequence
    */
+  @Override
   public String getTestQuery()
   {
     return "CAA37824";
   }
 
+  @Override
   public String getDbName()
   {
     return "EMBL (CDS)";
index 893ab41..6bbe2e1 100644 (file)
@@ -22,7 +22,6 @@ package jalview.ws.dbsources;
 
 import jalview.datamodel.AlignmentI;
 import jalview.datamodel.DBRefSource;
-import jalview.ws.seqfetcher.DbSourceProxy;
 
 import com.stevesoft.pat.Regex;
 
@@ -30,13 +29,12 @@ import com.stevesoft.pat.Regex;
  * @author JimP
  * 
  */
-public class EmblSource extends EmblXmlSource implements DbSourceProxy
+public class EmblSource extends EmblXmlSource
 {
 
   public EmblSource()
   {
-    addDbSourceProperty(DBRefSource.DNASEQDB);
-    addDbSourceProperty(DBRefSource.CODINGSEQDB);
+    super();
   }
 
   /*
@@ -44,6 +42,7 @@ public class EmblSource extends EmblXmlSource implements DbSourceProxy
    * 
    * @see jalview.ws.DbSourceProxy#getAccessionSeparator()
    */
+  @Override
   public String getAccessionSeparator()
   {
     // TODO Auto-generated method stub
@@ -55,9 +54,10 @@ public class EmblSource extends EmblXmlSource implements DbSourceProxy
    * 
    * @see jalview.ws.DbSourceProxy#getAccessionValidator()
    */
+  @Override
   public Regex getAccessionValidator()
   {
-    return new com.stevesoft.pat.Regex("^[A-Z]+[0-9]+");
+    return new Regex("^[A-Z]+[0-9]+");
   }
 
   /*
@@ -65,6 +65,7 @@ public class EmblSource extends EmblXmlSource implements DbSourceProxy
    * 
    * @see jalview.ws.DbSourceProxy#getDbSource()
    */
+  @Override
   public String getDbSource()
   {
     return DBRefSource.EMBL;
@@ -75,6 +76,7 @@ public class EmblSource extends EmblXmlSource implements DbSourceProxy
    * 
    * @see jalview.ws.DbSourceProxy#getDbVersion()
    */
+  @Override
   public String getDbVersion()
   {
     // TODO Auto-generated method stub
@@ -86,6 +88,7 @@ public class EmblSource extends EmblXmlSource implements DbSourceProxy
    * 
    * @see jalview.ws.DbSourceProxy#getSequenceRecords(java.lang.String[])
    */
+  @Override
   public AlignmentI getSequenceRecords(String queries) throws Exception
   {
     return getEmblSequenceRecords(DBRefSource.EMBL, queries);
@@ -96,6 +99,7 @@ public class EmblSource extends EmblXmlSource implements DbSourceProxy
    * 
    * @see jalview.ws.DbSourceProxy#isValidReference(java.lang.String)
    */
+  @Override
   public boolean isValidReference(String accession)
   {
     // most embl refs look like ..
@@ -108,11 +112,13 @@ public class EmblSource extends EmblXmlSource implements DbSourceProxy
   /**
    * return LHD_CHICK coding gene
    */
+  @Override
   public String getTestQuery()
   {
     return "X53828";
   }
 
+  @Override
   public String getDbName()
   {
     return "EMBL"; // getDbSource();
index 20da45c..66ebe1b 100644 (file)
@@ -155,4 +155,10 @@ public abstract class EmblXmlSource extends EbiFileRetrievedProxy
     return al;
   }
 
+  @Override
+  public boolean isDnaCoding()
+  {
+    return true;
+  }
+
 }
index 116962b..ce21ad0 100644 (file)
@@ -22,7 +22,6 @@ package jalview.ws.dbsources;
 
 import jalview.datamodel.AlignmentI;
 import jalview.datamodel.DBRefSource;
-import jalview.ws.seqfetcher.DbSourceProxy;
 
 import com.stevesoft.pat.Regex;
 
@@ -32,13 +31,12 @@ import com.stevesoft.pat.Regex;
  * @author JimP
  * 
  */
-public class GeneDbSource extends EmblXmlSource implements DbSourceProxy
+public class GeneDbSource extends EmblXmlSource
 {
 
   public GeneDbSource()
   {
-    addDbSourceProperty(DBRefSource.DNASEQDB);
-    addDbSourceProperty(DBRefSource.CODINGSEQDB);
+    super();
   }
 
   /*
@@ -46,6 +44,7 @@ public class GeneDbSource extends EmblXmlSource implements DbSourceProxy
    * 
    * @see jalview.ws.DbSourceProxy#getAccessionSeparator()
    */
+  @Override
   public String getAccessionSeparator()
   {
     // TODO Auto-generated method stub
@@ -57,6 +56,7 @@ public class GeneDbSource extends EmblXmlSource implements DbSourceProxy
    * 
    * @see jalview.ws.DbSourceProxy#getAccessionValidator()
    */
+  @Override
   public Regex getAccessionValidator()
   {
     // TODO Auto-generated method stub
@@ -68,6 +68,7 @@ public class GeneDbSource extends EmblXmlSource implements DbSourceProxy
    * 
    * @see jalview.ws.DbSourceProxy#getDbSource()
    */
+  @Override
   public String getDbSource()
   {
     return DBRefSource.GENEDB;
@@ -78,6 +79,7 @@ public class GeneDbSource extends EmblXmlSource implements DbSourceProxy
    * 
    * @see jalview.ws.DbSourceProxy#getDbVersion()
    */
+  @Override
   public String getDbVersion()
   {
     // TODO Auto-generated method stub
@@ -89,6 +91,7 @@ public class GeneDbSource extends EmblXmlSource implements DbSourceProxy
    * 
    * @see jalview.ws.DbSourceProxy#getSequenceRecords(java.lang.String[])
    */
+  @Override
   public AlignmentI getSequenceRecords(String queries) throws Exception
   {
     // query of form
@@ -102,6 +105,7 @@ public class GeneDbSource extends EmblXmlSource implements DbSourceProxy
    * 
    * @see jalview.ws.DbSourceProxy#isValidReference(java.lang.String)
    */
+  @Override
   public boolean isValidReference(String accession)
   {
     // TODO Auto-generated method stub
@@ -111,11 +115,13 @@ public class GeneDbSource extends EmblXmlSource implements DbSourceProxy
   /**
    * return T.Brucei Mannosyl-Transferase TbPIG-M
    */
+  @Override
   public String getTestQuery()
   {
     return "Tb927.6.3300";
   }
 
+  @Override
   public String getDbName()
   {
     return "GeneDB"; // getDbSource();
index 4a50196..b9fb8f3 100644 (file)
@@ -29,7 +29,6 @@ import jalview.datamodel.SequenceI;
 import jalview.io.FormatAdapter;
 import jalview.util.MessageManager;
 import jalview.ws.ebi.EBIFetchClient;
-import jalview.ws.seqfetcher.DbSourceProxy;
 
 import java.util.ArrayList;
 import java.util.List;
@@ -41,12 +40,11 @@ import com.stevesoft.pat.Regex;
  * @author JimP
  * 
  */
-public class Pdb extends EbiFileRetrievedProxy implements DbSourceProxy
+public class Pdb extends EbiFileRetrievedProxy
 {
   public Pdb()
   {
     super();
-    addDbSourceProperty(DBRefSource.PROTSEQDB);
   }
 
   /*
@@ -54,6 +52,7 @@ public class Pdb extends EbiFileRetrievedProxy implements DbSourceProxy
    * 
    * @see jalview.ws.DbSourceProxy#getAccessionSeparator()
    */
+  @Override
   public String getAccessionSeparator()
   {
     // TODO Auto-generated method stub
@@ -65,6 +64,7 @@ public class Pdb extends EbiFileRetrievedProxy implements DbSourceProxy
    * 
    * @see jalview.ws.DbSourceProxy#getAccessionValidator()
    */
+  @Override
   public Regex getAccessionValidator()
   {
     return new Regex("([1-9][0-9A-Za-z]{3}):?([ _A-Za-z0-9]?)");
@@ -75,6 +75,7 @@ public class Pdb extends EbiFileRetrievedProxy implements DbSourceProxy
    * 
    * @see jalview.ws.DbSourceProxy#getDbSource()
    */
+  @Override
   public String getDbSource()
   {
     return DBRefSource.PDB;
@@ -85,6 +86,7 @@ public class Pdb extends EbiFileRetrievedProxy implements DbSourceProxy
    * 
    * @see jalview.ws.DbSourceProxy#getDbVersion()
    */
+  @Override
   public String getDbVersion()
   {
     return "0";
@@ -95,6 +97,7 @@ public class Pdb extends EbiFileRetrievedProxy implements DbSourceProxy
    * 
    * @see jalview.ws.DbSourceProxy#getSequenceRecords(java.lang.String[])
    */
+  @Override
   public AlignmentI getSequenceRecords(String queries) throws Exception
   {
     AlignmentI pdbfile = null;
@@ -219,6 +222,7 @@ public class Pdb extends EbiFileRetrievedProxy implements DbSourceProxy
    * 
    * @see jalview.ws.DbSourceProxy#isValidReference(java.lang.String)
    */
+  @Override
   public boolean isValidReference(String accession)
   {
     Regex r = getAccessionValidator();
@@ -228,11 +232,13 @@ public class Pdb extends EbiFileRetrievedProxy implements DbSourceProxy
   /**
    * obtain human glyoxalase chain A sequence
    */
+  @Override
   public String getTestQuery()
   {
     return "1QIPA";
   }
 
+  @Override
   public String getDbName()
   {
     return "PDB"; // getDbSource();
index 0211bb1..4f081ee 100644 (file)
@@ -22,7 +22,8 @@ package jalview.ws.dbsources;
 
 import jalview.datamodel.AlignmentI;
 import jalview.datamodel.DBRefEntry;
-import jalview.ws.seqfetcher.DbSourceProxy;
+import jalview.datamodel.DBRefSource;
+import jalview.io.FormatAdapter;
 
 import com.stevesoft.pat.Regex;
 
@@ -34,15 +35,12 @@ import com.stevesoft.pat.Regex;
  * @author JimP
  * 
  */
-abstract public class Pfam extends Xfam implements DbSourceProxy
+abstract public class Pfam extends Xfam
 {
 
   public Pfam()
   {
     super();
-    // all extensions of this PFAM source base class are DOMAINDB sources
-    addDbSourceProperty(jalview.datamodel.DBRefSource.DOMAINDB);
-    addDbSourceProperty(jalview.datamodel.DBRefSource.ALIGNMENTDB);
   }
 
   /*
@@ -50,6 +48,7 @@ abstract public class Pfam extends Xfam implements DbSourceProxy
    * 
    * @see jalview.ws.DbSourceProxy#getAccessionSeparator()
    */
+  @Override
   public String getAccessionSeparator()
   {
     // TODO Auto-generated method stub
@@ -61,6 +60,7 @@ abstract public class Pfam extends Xfam implements DbSourceProxy
    * 
    * @see jalview.ws.DbSourceProxy#getAccessionValidator()
    */
+  @Override
   public Regex getAccessionValidator()
   {
     // TODO Auto-generated method stub
@@ -111,22 +111,23 @@ abstract public class Pfam extends Xfam implements DbSourceProxy
    * 
    * @see jalview.ws.DbSourceProxy#getSequenceRecords(java.lang.String[])
    */
+  @Override
   public AlignmentI getSequenceRecords(String queries) throws Exception
   {
     // TODO: this is not a perfect implementation. We need to be able to add
     // individual references to each sequence in each family alignment that's
     // retrieved.
     startQuery();
-    AlignmentI rcds = new jalview.io.FormatAdapter().readFile(getXFAMURL()
+    AlignmentI rcds = new FormatAdapter().readFile(getXFAMURL()
             + queries.trim().toUpperCase(), jalview.io.FormatAdapter.URL,
             "STH");
     for (int s = 0, sNum = rcds.getHeight(); s < sNum; s++)
     {
       rcds.getSequenceAt(s).addDBRef(
-              new DBRefEntry(jalview.datamodel.DBRefSource.PFAM,
+new DBRefEntry(DBRefSource.PFAM,
               // getDbSource(),
                       getDbVersion(), queries.trim().toUpperCase()));
-      if (!getDbSource().equals(jalview.datamodel.DBRefSource.PFAM))
+      if (!getDbSource().equals(DBRefSource.PFAM))
       { // add the specific ref too
         rcds.getSequenceAt(s).addDBRef(
                 new DBRefEntry(getDbSource(), getDbVersion(), queries
@@ -142,6 +143,7 @@ abstract public class Pfam extends Xfam implements DbSourceProxy
    * 
    * @see jalview.ws.DbSourceProxy#isValidReference(java.lang.String)
    */
+  @Override
   public boolean isValidReference(String accession)
   {
     return accession.indexOf("PF") == 0;
@@ -151,9 +153,10 @@ abstract public class Pfam extends Xfam implements DbSourceProxy
    * public String getDbName() { return "PFAM"; // getDbSource(); }
    */
 
+  @Override
   public String getXfamSource()
   {
-    return jalview.datamodel.DBRefSource.PFAM;
+    return DBRefSource.PFAM;
   }
 
 }
index 0490291..4f5b8f5 100644 (file)
  */
 package jalview.ws.dbsources;
 
-import jalview.ws.seqfetcher.DbSourceProxy;
 
 /**
  * flyweight class specifying retrieval of Full family alignments from PFAM
  * 
  */
-public class PfamFull extends Pfam implements DbSourceProxy
+public class PfamFull extends Pfam
 {
   public PfamFull()
   {
@@ -38,6 +37,7 @@ public class PfamFull extends Pfam implements DbSourceProxy
    * 
    * @see jalview.ws.dbsources.Pfam#getPFAMURL()
    */
+  @Override
   protected String getXFAMURL()
   {
     return "http://pfam.sanger.ac.uk/family/alignment/download/format?alnType=full&format=stockholm&order=t&case=l&gaps=default&entry=";
@@ -48,21 +48,25 @@ public class PfamFull extends Pfam implements DbSourceProxy
    * 
    * @see jalview.ws.seqfetcher.DbSourceProxy#getDbName()
    */
+  @Override
   public String getDbName()
   {
     return "PFAM (Full)";
   }
 
+  @Override
   public String getDbSource()
   {
     return getDbName(); // so we have unique DbSource string.
   }
 
+  @Override
   public String getTestQuery()
   {
     return "PF03760";
   }
 
+  @Override
   public String getDbVersion()
   {
     return null;
index 2ea75af..be8f044 100644 (file)
@@ -20,7 +20,6 @@
  */
 package jalview.ws.dbsources;
 
-import jalview.ws.seqfetcher.DbSourceProxy;
 
 /**
  * flyweight class specifying retrieval of Seed alignments from PFAM
@@ -28,7 +27,7 @@ import jalview.ws.seqfetcher.DbSourceProxy;
  * @author JimP
  * 
  */
-public class PfamSeed extends Pfam implements DbSourceProxy
+public class PfamSeed extends Pfam
 {
   public PfamSeed()
   {
@@ -40,6 +39,7 @@ public class PfamSeed extends Pfam implements DbSourceProxy
    * 
    * @see jalview.ws.dbsources.Pfam#getPFAMURL()
    */
+  @Override
   protected String getXFAMURL()
   {
     return "http://pfam.sanger.ac.uk/family/alignment/download/format?alnType=seed&format=stockholm&order=t&case=l&gaps=default&entry=";
@@ -50,16 +50,19 @@ public class PfamSeed extends Pfam implements DbSourceProxy
    * 
    * @see jalview.ws.seqfetcher.DbSourceProxy#getDbName()
    */
+  @Override
   public String getDbName()
   {
     return "PFAM (Seed)";
   }
 
+  @Override
   public String getDbSource()
   {
     return jalview.datamodel.DBRefSource.PFAM; // archetype source
   }
 
+  @Override
   public String getTestQuery()
   {
     return "PF03760";
index b07b8ea..97f73d0 100644 (file)
@@ -20,7 +20,7 @@
  */
 package jalview.ws.dbsources;
 
-import jalview.ws.seqfetcher.DbSourceProxy;
+import jalview.datamodel.DBRefSource;
 
 import com.stevesoft.pat.Regex;
 
@@ -29,15 +29,12 @@ import com.stevesoft.pat.Regex;
  * 
  * @author Lauren Michelle Lui
  */
-abstract public class Rfam extends Xfam implements DbSourceProxy
+abstract public class Rfam extends Xfam
 {
 
   public Rfam()
   {
     super();
-    // all extensions of this RFAM source base class are DOMAINDB sources
-    addDbSourceProperty(jalview.datamodel.DBRefSource.DOMAINDB);
-    addDbSourceProperty(jalview.datamodel.DBRefSource.ALIGNMENTDB);
   }
 
   /*
@@ -46,6 +43,7 @@ abstract public class Rfam extends Xfam implements DbSourceProxy
    * @see jalview.ws.DbSourceProxy#getAccessionSeparator() Left here for
    * consistency with Pfam class
    */
+  @Override
   public String getAccessionSeparator()
   {
     // TODO Auto-generated method stub
@@ -57,6 +55,7 @@ abstract public class Rfam extends Xfam implements DbSourceProxy
    * 
    * @see jalview.ws.DbSourceProxy#getAccessionValidator() * Left here for
    */
+  @Override
   public Regex getAccessionValidator()
   {
     // TODO Auto-generated method stub
@@ -100,6 +99,7 @@ abstract public class Rfam extends Xfam implements DbSourceProxy
    * 
    * @see jalview.ws.DbSourceProxy#isValidReference(java.lang.String)
    */
+  @Override
   public boolean isValidReference(String accession)
   {
     return accession.indexOf("RF") == 0;
@@ -110,9 +110,10 @@ abstract public class Rfam extends Xfam implements DbSourceProxy
    * 
    * @see jalview.ws.dbsources.Xfam#getXfamSource()
    */
+  @Override
   public String getXfamSource()
   {
-    return jalview.datamodel.DBRefSource.RFAM;
+    return DBRefSource.RFAM;
   }
 
 }
index 74f4ec6..e1e9e9a 100644 (file)
@@ -20,7 +20,6 @@
  */
 package jalview.ws.dbsources;
 
-import jalview.ws.seqfetcher.DbSourceProxy;
 
 /**
  * Flyweight class specifying retrieval of Full family alignments from RFAM
@@ -28,7 +27,7 @@ import jalview.ws.seqfetcher.DbSourceProxy;
  * @author Lauren Michelle Lui
  * 
  */
-public class RfamFull extends Rfam implements DbSourceProxy
+public class RfamFull extends Rfam
 {
   public RfamFull()
   {
@@ -40,6 +39,7 @@ public class RfamFull extends Rfam implements DbSourceProxy
    * 
    * @see jalview.ws.dbsources.Rfam#getXFAMURL()
    */
+  @Override
   protected String getXFAMURL()
   {
     return "http://rfam.sanger.ac.uk/family/alignment/download/format?alnType=full&nseLabels=0&format=stockholm&acc=";
@@ -50,16 +50,19 @@ public class RfamFull extends Rfam implements DbSourceProxy
    * 
    * @see jalview.ws.seqfetcher.DbSourceProxy#getDbName()
    */
+  @Override
   public String getDbName()
   {
     return "RFAM (Full)";
   }
 
+  @Override
   public String getDbSource()
   {
     return getDbName(); // so we have unique DbSource string.
   }
 
+  @Override
   public String getTestQuery()
   {
     // Can be retrieved from http://rfam.janelia.org/cgi-bin/getdesc?acc=RF00014
@@ -68,6 +71,7 @@ public class RfamFull extends Rfam implements DbSourceProxy
     return "RF00014";
   }
 
+  @Override
   public String getDbVersion()
   {
     return null;
index dd2b12f..2850fd5 100644 (file)
@@ -20,7 +20,6 @@
  */
 package jalview.ws.dbsources;
 
-import jalview.ws.seqfetcher.DbSourceProxy;
 
 /**
  * Flyweight class specifying retrieval of Seed family alignments from RFAM
@@ -28,7 +27,7 @@ import jalview.ws.seqfetcher.DbSourceProxy;
  * @author Lauren Michelle Lui
  * 
  */
-public class RfamSeed extends Rfam implements DbSourceProxy
+public class RfamSeed extends Rfam
 {
   public RfamSeed()
   {
@@ -40,6 +39,7 @@ public class RfamSeed extends Rfam implements DbSourceProxy
    * 
    * @see jalview.ws.dbsources.Rfam#getRFAMURL()
    */
+  @Override
   protected String getXFAMURL()
   {
     return "http://rfam.sanger.ac.uk/family/alignment/download/format?alnType=seed&nseLabels=0&format=stockholm&acc=";
@@ -52,21 +52,25 @@ public class RfamSeed extends Rfam implements DbSourceProxy
    * 
    * @see jalview.ws.seqfetcher.DbSourceProxy#getDbName()
    */
+  @Override
   public String getDbName()
   {
     return "RFAM (Seed)";
   }
 
+  @Override
   public String getDbSource()
   {
     return getDbName(); // so we have unique DbSource string.
   }
 
+  @Override
   public String getTestQuery()
   {
     return "RF00014";
   } // http://rfam.janelia.org/cgi-bin/getdesc?acc=RF00014
 
+  @Override
   public String getDbVersion()
   {
     return null;
index 1e8eadb..0a252b1 100644 (file)
@@ -29,14 +29,15 @@ import jalview.datamodel.SequenceI;
 import jalview.datamodel.UniprotEntry;
 import jalview.datamodel.UniprotFile;
 import jalview.ws.ebi.EBIFetchClient;
-import jalview.ws.seqfetcher.DbSourceProxy;
 import jalview.ws.seqfetcher.DbSourceProxyImpl;
 
 import java.io.File;
 import java.io.FileReader;
 import java.io.Reader;
+import java.net.URL;
 import java.util.Vector;
 
+import org.exolab.castor.mapping.Mapping;
 import org.exolab.castor.xml.Unmarshaller;
 
 import com.stevesoft.pat.Regex;
@@ -45,14 +46,14 @@ import com.stevesoft.pat.Regex;
  * @author JimP
  * 
  */
-public class Uniprot extends DbSourceProxyImpl implements DbSourceProxy
+public class Uniprot extends DbSourceProxyImpl
 {
 
   private static final String BAR_DELIMITER = "|";
 
   private static final String NEWLINE = "\n";
 
-  private static org.exolab.castor.mapping.Mapping map;
+  private static Mapping map;
 
   /**
    * Constructor
@@ -60,9 +61,6 @@ public class Uniprot extends DbSourceProxyImpl implements DbSourceProxy
   public Uniprot()
   {
     super();
-    addDbSourceProperty(DBRefSource.SEQDB, DBRefSource.SEQDB);
-    addDbSourceProperty(DBRefSource.PROTSEQDB);
-    // addDbSourceProperty(DBRefSource.MULTIACC, new Integer(50));
   }
 
   /*
@@ -70,6 +68,7 @@ public class Uniprot extends DbSourceProxyImpl implements DbSourceProxy
    * 
    * @see jalview.ws.DbSourceProxy#getAccessionSeparator()
    */
+  @Override
   public String getAccessionSeparator()
   {
     return null; // ";";
@@ -80,6 +79,7 @@ public class Uniprot extends DbSourceProxyImpl implements DbSourceProxy
    * 
    * @see jalview.ws.DbSourceProxy#getAccessionValidator()
    */
+  @Override
   public Regex getAccessionValidator()
   {
     return new Regex("([A-Z]+[0-9]+[A-Z0-9]+|[A-Z0-9]+_[A-Z0-9]+)");
@@ -90,6 +90,7 @@ public class Uniprot extends DbSourceProxyImpl implements DbSourceProxy
    * 
    * @see jalview.ws.DbSourceProxy#getDbSource()
    */
+  @Override
   public String getDbSource()
   {
     return DBRefSource.UNIPROT;
@@ -100,6 +101,7 @@ public class Uniprot extends DbSourceProxyImpl implements DbSourceProxy
    * 
    * @see jalview.ws.DbSourceProxy#getDbVersion()
    */
+  @Override
   public String getDbVersion()
   {
     return "0"; // we really don't know what version we're on.
@@ -121,9 +123,8 @@ public class Uniprot extends DbSourceProxyImpl implements DbSourceProxy
       if (map == null)
       {
         // 1. Load the mapping information from the file
-        map = new org.exolab.castor.mapping.Mapping(uni.getClass()
-                .getClassLoader());
-        java.net.URL url = getClass().getResource("/uniprot_mapping.xml");
+        map = new Mapping(uni.getClass().getClassLoader());
+        URL url = getClass().getResource("/uniprot_mapping.xml");
         map.loadMapping(url);
       }
 
@@ -148,6 +149,7 @@ public class Uniprot extends DbSourceProxyImpl implements DbSourceProxy
    * 
    * @see jalview.ws.DbSourceProxy#getSequenceRecords(java.lang.String[])
    */
+  @Override
   public AlignmentI getSequenceRecords(String queries) throws Exception
   {
     startQuery();
@@ -302,6 +304,7 @@ public class Uniprot extends DbSourceProxyImpl implements DbSourceProxy
    * 
    * @see jalview.ws.DbSourceProxy#isValidReference(java.lang.String)
    */
+  @Override
   public boolean isValidReference(String accession)
   {
     // TODO: make the following a standard validator
@@ -312,11 +315,13 @@ public class Uniprot extends DbSourceProxyImpl implements DbSourceProxy
   /**
    * return LDHA_CHICK uniprot entry
    */
+  @Override
   public String getTestQuery()
   {
     return "P00340";
   }
 
+  @Override
   public String getDbName()
   {
     return "Uniprot"; // getDbSource();
@@ -327,4 +332,13 @@ public class Uniprot extends DbSourceProxyImpl implements DbSourceProxy
   {
     return 0;
   }
+
+  @Override
+  public int getMaximumQueryCount()
+  {
+    // relocated this commented out code...
+    // addDbSourceProperty(DBRefSource.MULTIACC, new Integer(50));
+    // return 50;
+    return super.getMaximumQueryCount();
+  }
 }
similarity index 88%
rename from src/jalview/ws/dbsources/UnprotName.java
rename to src/jalview/ws/dbsources/UniprotName.java
index 5dbc960..fa693c0 100644 (file)
@@ -20,6 +20,8 @@
  */
 package jalview.ws.dbsources;
 
+import jalview.datamodel.DBRefSource;
+
 /**
  * Canonical Uniprot fetcher instance specifically retrieving UP_NAME
  * references.
@@ -27,8 +29,7 @@ package jalview.ws.dbsources;
  * @author JimP
  * 
  */
-public class UnprotName extends Uniprot implements
-        jalview.ws.seqfetcher.DbSourceProxy
+public class UniprotName extends Uniprot
 {
 
   /*
@@ -36,9 +37,10 @@ public class UnprotName extends Uniprot implements
    * 
    * @see jalview.ws.dbsources.Uniprot#getDbSource()
    */
+  @Override
   public String getDbSource()
   {
-    return jalview.datamodel.DBRefSource.UP_NAME;
+    return DBRefSource.UP_NAME;
   }
 
 }
index c392ce6..26c9997 100644 (file)
@@ -22,6 +22,7 @@ package jalview.ws.dbsources;
 
 import jalview.datamodel.AlignmentI;
 import jalview.datamodel.DBRefEntry;
+import jalview.io.FormatAdapter;
 import jalview.ws.seqfetcher.DbSourceProxyImpl;
 
 /**
@@ -40,10 +41,12 @@ public abstract class Xfam extends DbSourceProxyImpl
 
   protected abstract String getXFAMURL();
 
+  @Override
   public abstract String getDbVersion();
 
   abstract String getXfamSource();
 
+  @Override
   public AlignmentI getSequenceRecords(String queries) throws Exception
   {
     // TODO: this is not a perfect implementation. We need to be able to add
@@ -51,9 +54,8 @@ public abstract class Xfam extends DbSourceProxyImpl
     // retrieved.
     startQuery();
     // TODO: trap HTTP 404 exceptions and return null
-    AlignmentI rcds = new jalview.io.FormatAdapter().readFile(getXFAMURL()
-            + queries.trim().toUpperCase(), jalview.io.FormatAdapter.URL,
-            "STH");
+    AlignmentI rcds = new FormatAdapter().readFile(getXFAMURL()
+            + queries.trim().toUpperCase(), FormatAdapter.URL, "STH");
     for (int s = 0, sNum = rcds.getHeight(); s < sNum; s++)
     {
       rcds.getSequenceAt(s).addDBRef(new DBRefEntry(getXfamSource(),
@@ -70,4 +72,13 @@ public abstract class Xfam extends DbSourceProxyImpl
     return rcds;
   }
 
+  /**
+   * Pfam and Rfam provide alignments
+   */
+  @Override
+  public boolean isAlignmentSource()
+  {
+    return true;
+  }
+
 }
index 8656c25..1e3ae7a 100644 (file)
  */
 package jalview.ws.seqfetcher;
 
+import jalview.bin.Cache;
 import jalview.datamodel.AlignmentI;
 import jalview.datamodel.DBRefEntry;
 import jalview.datamodel.SequenceI;
 import jalview.util.DBRefUtils;
 import jalview.util.MessageManager;
+import jalview.util.QuickSort;
 
 import java.util.ArrayList;
 import java.util.Enumeration;
@@ -62,11 +64,11 @@ public class ASequenceFetcher
       return null;
     }
     String[] sf = new String[FETCHABLEDBS.size()];
-    Enumeration e = FETCHABLEDBS.keys();
+    Enumeration<String> e = FETCHABLEDBS.keys();
     int i = 0;
     while (e.hasMoreElements())
     {
-      sf[i++] = (String) e.nextElement();
+      sf[i++] = e.nextElement();
     }
     ;
     return sf;
@@ -74,25 +76,25 @@ public class ASequenceFetcher
 
   public boolean isFetchable(String source)
   {
-    Enumeration e = FETCHABLEDBS.keys();
+    Enumeration<String> e = FETCHABLEDBS.keys();
     while (e.hasMoreElements())
     {
-      String db = (String) e.nextElement();
+      String db = e.nextElement();
       if (source.compareToIgnoreCase(db) == 0)
       {
         return true;
       }
     }
-    jalview.bin.Cache.log.warn("isFetchable doesn't know about '" + source
+    Cache.log.warn("isFetchable doesn't know about '" + source
             + "'");
     return false;
   }
 
-  public SequenceI[] getSequences(jalview.datamodel.DBRefEntry[] refs)
+  public SequenceI[] getSequences(DBRefEntry[] refs)
   {
     SequenceI[] ret = null;
-    Vector<SequenceI> rseqs = new Vector();
-    Hashtable<String, List<String>> queries = new Hashtable();
+    Vector<SequenceI> rseqs = new Vector<SequenceI>();
+    Hashtable<String, List<String>> queries = new Hashtable<String, List<String>>();
     for (int r = 0; r < refs.length; r++)
     {
       if (!queries.containsKey(refs[r].getSource()))
@@ -125,15 +127,12 @@ public class ASequenceFetcher
       while (fetchers.hasNext())
       {
         List<String> queriesMade = new ArrayList<String>();
-        HashSet queriesFound = new HashSet<String>();
+        HashSet<String> queriesFound = new HashSet<String>();
         try
         {
           DbSourceProxy fetcher = fetchers.next();
-          boolean doMultiple = fetcher.getAccessionSeparator() != null; // No
-          // separator
-          // - no
-          // Multiple
-          // Queries
+          boolean doMultiple = fetcher.getAccessionSeparator() != null;
+          // No separator - no Multiple Queries
           while (!queriesLeft.isEmpty())
           {
             StringBuffer qsb = new StringBuffer();
@@ -228,11 +227,11 @@ public class ASequenceFetcher
     if (rseqs.size() > 0)
     {
       ret = new SequenceI[rseqs.size()];
-      Enumeration sqs = rseqs.elements();
+      Enumeration<SequenceI> sqs = rseqs.elements();
       int si = 0;
       while (sqs.hasMoreElements())
       {
-        SequenceI s = (SequenceI) sqs.nextElement();
+        SequenceI s = sqs.nextElement();
         ret[si++] = s;
         s.updatePDBIds();
       }
@@ -288,7 +287,7 @@ public class ASequenceFetcher
       {
         nm[i++] = "" + s.getTier() + s.getDbName().toLowerCase();
       }
-      jalview.util.QuickSort.sort(nm, l);
+      QuickSort.sort(nm, l);
       dbs = new ArrayList<DbSourceProxy>();
       for (i = l.length - 1; i >= 0; i--)
       {
@@ -303,8 +302,7 @@ public class ASequenceFetcher
   }
 
   /**
-   * constructs and instance of the proxy and registers it as a valid
-   * dbrefsource
+   * constructs an instance of the proxy and registers it as a valid dbrefsource
    * 
    * @param dbSourceProxy
    *          reference for class implementing
@@ -312,7 +310,7 @@ public class ASequenceFetcher
    */
   protected void addDBRefSourceImpl(
           Class<? extends DbSourceProxy> dbSourceProxy)
-          throws java.lang.IllegalArgumentException
+          throws IllegalArgumentException
   {
     DbSourceProxy proxy = null;
     try
@@ -359,34 +357,6 @@ public class ASequenceFetcher
   }
 
   /**
-   * test if the database handler for dbName contains the given dbProperty when
-   * a dbName resolves to a set of proxies - this method will return the result
-   * of the test for the first instance. TODO implement additional method to
-   * query all sources for a db to find one with a particular property
-   * 
-   * @param dbName
-   * @param dbProperty
-   * @return true if proxy has the given property
-   */
-  public boolean hasDbSourceProperty(String dbName, String dbProperty)
-  {
-    // TODO: decide if invalidDbName exception is thrown here.
-
-    List<DbSourceProxy> proxies = getSourceProxy(dbName);
-    if (proxies != null)
-    {
-      for (DbSourceProxy proxy : proxies)
-      {
-        if (proxy.getDbSourceProperties() != null)
-        {
-          return proxy.getDbSourceProperties().containsKey(dbProperty);
-        }
-      }
-    }
-    return false;
-  }
-
-  /**
    * select sources which are implemented by instances of the given class
    * 
    * @param class that implements DbSourceProxy
@@ -394,7 +364,7 @@ public class ASequenceFetcher
    */
   public String[] getDbInstances(Class class1)
   {
-    if (!jalview.ws.seqfetcher.DbSourceProxy.class.isAssignableFrom(class1))
+    if (!DbSourceProxy.class.isAssignableFrom(class1))
     {
       throw new Error(
               MessageManager
@@ -407,11 +377,11 @@ public class ASequenceFetcher
       return null;
     }
     String[] sources = null;
-    Vector src = new Vector();
-    Enumeration dbs = FETCHABLEDBS.keys();
+    Vector<String> src = new Vector<String>();
+    Enumeration<String> dbs = FETCHABLEDBS.keys();
     while (dbs.hasMoreElements())
     {
-      String dbn = (String) dbs.nextElement();
+      String dbn = dbs.nextElement();
       for (DbSourceProxy dbp : FETCHABLEDBS.get(dbn).values())
       {
         if (class1.isAssignableFrom(dbp.getClass()))
@@ -429,7 +399,7 @@ public class ASequenceFetcher
 
   public DbSourceProxy[] getDbSourceProxyInstances(Class class1)
   {
-    ArrayList<DbSourceProxy> prlist = new ArrayList<DbSourceProxy>();
+    List<DbSourceProxy> prlist = new ArrayList<DbSourceProxy>();
     for (String fetchable : getSupportedDb())
     {
       for (DbSourceProxy pr : getSourceProxy(fetchable))
index 33f62b6..1e62d13 100644 (file)
@@ -78,15 +78,6 @@ public interface DbSourceProxy
   public Regex getAccessionValidator();
 
   /**
-   * DbSource properties hash - define the capabilities of this source Property
-   * hash methods defined in DbSourceProxyImpl. See constants in
-   * jalview.datamodel.DBRefSource for definition of properties.
-   * 
-   * @return
-   */
-  public Hashtable getDbSourceProperties();
-
-  /**
    * 
    * @return a test/example query that can be used to validate retrieval and
    *         parsing mechanisms
@@ -133,19 +124,43 @@ public interface DbSourceProxy
   public StringBuffer getRawRecords();
 
   /**
-   * Find out more info about the source.
+   * Tier for this data source
    * 
-   * @param dbsourceproperty
-   *          - one of the database reference source properties in
-   *          jalview.datamodel.DBRefSource
-   * @return true if the source has this property
+   * @return 0 - primary datasource, 1 - das primary source, 2 - secondary
    */
-  public boolean isA(Object dbsourceproperty);
+  public int getTier();
 
   /**
-   * Tier for this data source
+   * Extracts valid accession strings from a query string. If there is an
+   * accession id validator, returns the the matched region or the first
+   * subgroup match from the matched region; else just returns the whole query.
    * 
-   * @return 0 - primary datasource, 1 - das primary source, 2 - secondary
+   * @param query
+   * @return
    */
-  public int getTier();
-}
+  String getAccessionIdFromQuery(String query);
+
+  /**
+   * Returns the maximum number of accession ids that can be queried in one
+   * request.
+   * 
+   * @return
+   */
+  public int getMaximumQueryCount();
+
+  /**
+   * Returns true if the source may provide coding DNA i.e. sequences with
+   * implicit peptide products
+   * 
+   * @return
+   */
+  public boolean isDnaCoding();
+
+  /**
+   * Answers true if the database is a source of alignments (for example, domain
+   * families)
+   * 
+   * @return
+   */
+  public boolean isAlignmentSource();
+}
\ No newline at end of file
index 85a729d..1e1468f 100644 (file)
@@ -24,7 +24,7 @@ import jalview.datamodel.AlignmentI;
 import jalview.io.FormatAdapter;
 import jalview.io.IdentifyFile;
 
-import java.util.Hashtable;
+import com.stevesoft.pat.Regex;
 
 /**
  * common methods for implementations of the DbSourceProxy interface.
@@ -34,50 +34,21 @@ import java.util.Hashtable;
  */
 public abstract class DbSourceProxyImpl implements DbSourceProxy
 {
-  public DbSourceProxyImpl()
-  {
-    // default constructor - do nothing probably.
-  }
 
-  private Hashtable props = null;
-
-  /*
-   * (non-Javadoc)
-   * 
-   * @see jalview.ws.DbSourceProxy#getDbSourceProperties()
-   */
-  public Hashtable getDbSourceProperties()
-  {
-    if (props == null)
-    {
-      props = new Hashtable();
-    }
-    return props;
-  }
+  boolean queryInProgress = false;
 
-  protected void addDbSourceProperty(Object propname)
-  {
-    addDbSourceProperty(propname, propname);
-  }
+  protected StringBuffer results = null;
 
-  protected void addDbSourceProperty(Object propname, Object propvalue)
+  public DbSourceProxyImpl()
   {
-    if (props == null)
-    {
-      props = new Hashtable();
-    }
-    props.put(propname, propvalue);
   }
 
-  boolean queryInProgress = false;
-
-  protected StringBuffer results = null;
-
   /*
    * (non-Javadoc)
    * 
    * @see jalview.ws.DbSourceProxy#getRawRecords()
    */
+  @Override
   public StringBuffer getRawRecords()
   {
     return results;
@@ -88,6 +59,7 @@ public abstract class DbSourceProxyImpl implements DbSourceProxy
    * 
    * @see jalview.ws.DbSourceProxy#queryInProgress()
    */
+  @Override
   public boolean queryInProgress()
   {
     return queryInProgress;
@@ -131,10 +103,48 @@ public abstract class DbSourceProxyImpl implements DbSourceProxy
   }
 
   @Override
-  public boolean isA(Object dbsourceproperty)
+  public String getAccessionIdFromQuery(String query)
+  {
+    Regex vgr = getAccessionValidator();
+    if (vgr == null)
+    {
+      return query;
+    }
+    vgr.search(query);
+    if (vgr.numSubs() > 0)
+    {
+      return (vgr.stringMatched(1));
+    }
+    else
+    {
+      return (vgr.stringMatched());
+    }
+  }
+
+  /**
+   * Default is only one accession id per query - override if more are allowed.
+   */
+  @Override
+  public int getMaximumQueryCount()
+  {
+    return 1;
+  }
+
+  /**
+   * Returns false - override to return true for DNA coding data sources
+   */
+  @Override
+  public boolean isDnaCoding()
   {
-    assert (dbsourceproperty != null);
-    return (props == null) ? false : props.containsKey(dbsourceproperty);
+    return false;
   }
 
+  /**
+   * Answers false - override as required in subclasses
+   */
+  @Override
+  public boolean isAlignmentSource()
+  {
+    return false;
+  }
 }
index 978316b..f3526bc 100644 (file)
 package jalview.ext.ensembl;
 
-import jalview.datamodel.Alignment;
-import jalview.datamodel.SequenceI;
-import jalview.ext.ensembl.SeqFetcher.EnsemblSeqType;
-import jalview.io.AppletFormatAdapter;
-import jalview.io.FastaFile;
-import jalview.io.FileParse;
-import jalview.util.DBRefUtils;
-
 import java.lang.reflect.Method;
-import java.util.Arrays;
 
 import org.testng.Assert;
 import org.testng.annotations.DataProvider;
 import org.testng.annotations.Test;
 
+
 public class EnsemblSeqProxyTest
 {
-  @Test
-  public void testCheckEnsembl()
-  {
-    SeqFetcher sf = new SeqFetcher();
-    sf.setTestEnsemblStatus(true);
-    sf.setTesting(true);
-    Assert.assertTrue(sf.isEnsemblAvailable());
-    sf.setTestEnsemblStatus(false);
-    Assert.assertFalse(sf.isEnsemblAvailable());
-  }
-
-  @Test(suiteName = "live")
-  public void testLiveCheckEnsembl()
-  {
-    SeqFetcher sf = new SeqFetcher();
-    boolean isAvailable = sf.isEnsemblAvailable();
-    System.out.println("Ensembl is "
-            + (isAvailable ? "UP!" : "DOWN ******************* BAD!"));
-  }
-
-  @DataProvider(name = "ens_seqs")
+  @DataProvider(name = "queries")
   public Object[][] createData(Method m)
   {
-    System.out.println(m.getName());
-    return allSeqs;
+    return new Object[][] { { "CCDS5863.1" }, { "ENSP00000288602" } };
   }
 
-  public static Object[][] allSeqs = new Object[][]
-  {
-      {
-          EnsemblSeqType.PROTEIN,
-          "CCDS5863.1",
-          ">CCDS5863.1\n"
-                  + "MAALSGGGGGGAEPGQALFNGDMEPEAGAGAGAAASSAADPAIPEEVWNIKQMIKLTQEH\n"
-                  + "IEALLDKFGGEHNPPSIYLEAYEEYTSKLDALQQREQQLLESLGNGTDFSVSSSASMDTV\n"
-                  + "TSSSSSSLSVLPSSLSVFQNPTDVARSNPKSPQKPIVRVFLPNKQRTVVPARCGVTVRDS\n"
-                  + "LKKALMMRGLIPECCAVYRIQDGEKKPIGWDTDISWLTGEELHVEVLENVPLTTHNFVRK\n"
-                  + "TFFTLAFCDFCRKLLFQGFRCQTCGYKFHQRCSTEVPLMCVNYDQLDLLFVSKFFEHHPI\n"
-                  + "PQEEASLAETALTSGSSPSAPASDSIGPQILTSPSPSKSIPIPQPFRPADEDHRNQFGQR\n"
-                  + "DRSSSAPNVHINTIEPVNIDDLIRDQGFRGDGGSTTGLSATPPASLPGSLTNVKALQKSP\n"
-                  + "GPQRERKSSSSSEDRNRMKTLGRRDSSDDWEIPDGQITVGQRIGSGSFGTVYKGKWHGDV\n"
-                  + "AVKMLNVTAPTPQQLQAFKNEVGVLRKTRHVNILLFMGYSTKPQLAIVTQWCEGSSLYHH\n"
-                  + "LHIIETKFEMIKLIDIARQTAQGMDYLHAKSIIHRDLKSNNIFLHEDLTVKIGDFGLATV\n"
-                  + "KSRWSGSHQFEQLSGSILWMAPEVIRMQDKNPYSFQSDVYAFGIVLYELMTGQLPYSNIN\n"
-                  + "NRDQIIFMVGRGYLSPDLSKVRSNCPKAMKRLMAECLKKKRDERPLFPQILASIELLARS\n"
-                  + "LPKIHRSASEPSLNRAGFQTEDFSLYACASPKTPIQAGGYGAFPVH\n" },
-      {
-          EnsemblSeqType.TRANSCRIPT,
-          "CCDS5863.1",
-          ">CCDS5863.1\n"
-                  + "ATGGCGGCGCTGAGCGGTGGCGGTGGTGGCGGCGCGGAGCCGGGCCAGGCTCTGTTCAAC\n"
-                  + "GGGGACATGGAGCCCGAGGCCGGCGCCGGCGCCGGCGCCGCGGCCTCTTCGGCTGCGGAC\n"
-                  + "CCTGCCATTCCGGAGGAGGTGTGGAATATCAAACAAATGATTAAGTTGACACAGGAACAT\n"
-                  + "ATAGAGGCCCTATTGGACAAATTTGGTGGGGAGCATAATCCACCATCAATATATCTGGAG\n"
-                  + "GCCTATGAAGAATACACCAGCAAGCTAGATGCACTCCAACAAAGAGAACAACAGTTATTG\n"
-                  + "GAATCTCTGGGGAACGGAACTGATTTTTCTGTTTCTAGCTCTGCATCAATGGATACCGTT\n"
-                  + "ACATCTTCTTCCTCTTCTAGCCTTTCAGTGCTACCTTCATCTCTTTCAGTTTTTCAAAAT\n"
-                  + "CCCACAGATGTGGCACGGAGCAACCCCAAGTCACCACAAAAACCTATCGTTAGAGTCTTC\n"
-                  + "CTGCCCAACAAACAGAGGACAGTGGTACCTGCAAGGTGTGGAGTTACAGTCCGAGACAGT\n"
-                  + "CTAAAGAAAGCACTGATGATGAGAGGTCTAATCCCAGAGTGCTGTGCTGTTTACAGAATT\n"
-                  + "CAGGATGGAGAGAAGAAACCAATTGGTTGGGACACTGATATTTCCTGGCTTACTGGAGAA\n"
-                  + "GAATTGCATGTGGAAGTGTTGGAGAATGTTCCACTTACAACACACAACTTTGTACGAAAA\n"
-                  + "ACGTTTTTCACCTTAGCATTTTGTGACTTTTGTCGAAAGCTGCTTTTCCAGGGTTTCCGC\n"
-                  + "TGTCAAACATGTGGTTATAAATTTCACCAGCGTTGTAGTACAGAAGTTCCACTGATGTGT\n"
-                  + "GTTAATTATGACCAACTTGATTTGCTGTTTGTCTCCAAGTTCTTTGAACACCACCCAATA\n"
-                  + "CCACAGGAAGAGGCGTCCTTAGCAGAGACTGCCCTAACATCTGGATCATCCCCTTCCGCA\n"
-                  + "CCCGCCTCGGACTCTATTGGGCCCCAAATTCTCACCAGTCCGTCTCCTTCAAAATCCATT\n"
-                  + "CCAATTCCACAGCCCTTCCGACCAGCAGATGAAGATCATCGAAATCAATTTGGGCAACGA\n"
-                  + "GACCGATCCTCATCAGCTCCCAATGTGCATATAAACACAATAGAACCTGTCAATATTGAT\n"
-                  + "GACTTGATTAGAGACCAAGGATTTCGTGGTGATGGAGGATCAACCACAGGTTTGTCTGCT\n"
-                  + "ACCCCCCCTGCCTCATTACCTGGCTCACTAACTAACGTGAAAGCCTTACAGAAATCTCCA\n"
-                  + "GGACCTCAGCGAGAAAGGAAGTCATCTTCATCCTCAGAAGACAGGAATCGAATGAAAACA\n"
-                  + "CTTGGTAGACGGGACTCGAGTGATGATTGGGAGATTCCTGATGGGCAGATTACAGTGGGA\n"
-                  + "CAAAGAATTGGATCTGGATCATTTGGAACAGTCTACAAGGGAAAGTGGCATGGTGATGTG\n"
-                  + "GCAGTGAAAATGTTGAATGTGACAGCACCTACACCTCAGCAGTTACAAGCCTTCAAAAAT\n"
-                  + "GAAGTAGGAGTACTCAGGAAAACACGACATGTGAATATCCTACTCTTCATGGGCTATTCC\n"
-                  + "ACAAAGCCACAACTGGCTATTGTTACCCAGTGGTGTGAGGGCTCCAGCTTGTATCACCAT\n"
-                  + "CTCCATATCATTGAGACCAAATTTGAGATGATCAAACTTATAGATATTGCACGACAGACT\n"
-                  + "GCACAGGGCATGGATTACTTACACGCCAAGTCAATCATCCACAGAGACCTCAAGAGTAAT\n"
-                  + "AATATATTTCTTCATGAAGACCTCACAGTAAAAATAGGTGATTTTGGTCTAGCTACAGTG\n"
-                  + "AAATCTCGATGGAGTGGGTCCCATCAGTTTGAACAGTTGTCTGGATCCATTTTGTGGATG\n"
-                  + "GCACCAGAAGTCATCAGAATGCAAGATAAAAATCCATACAGCTTTCAGTCAGATGTATAT\n"
-                  + "GCATTTGGAATTGTTCTGTATGAATTGATGACTGGACAGTTACCTTATTCAAACATCAAC\n"
-                  + "AACAGGGACCAGATAATTTTTATGGTGGGACGAGGATACCTGTCTCCAGATCTCAGTAAG\n"
-                  + "GTACGGAGTAACTGTCCAAAAGCCATGAAGAGATTAATGGCAGAGTGCCTCAAAAAGAAA\n"
-                  + "AGAGATGAGAGACCACTCTTTCCCCAAATTCTCGCCTCTATTGAGCTGCTGGCCCGCTCA\n"
-                  + "TTGCCAAAAATTCACCGCAGTGCATCAGAACCCTCCTTGAATCGGGCTGGTTTCCAAACA\n"
-                  + "GAGGATTTTAGTCTATATGCTTGTGCTTCTCCAAAAACACCCATCCAGGCAGGGGGATAT\n"
-                  + "GGTGCGTTTCCTGTCCACTGA\n" },
-      {
-          EnsemblSeqType.PROTEIN,
-          "ENSP00000288602",
-          ">ENSP00000288602\n"
-                  + "MAALSGGGGGGAEPGQALFNGDMEPEAGAGAGAAASSAADPAIPEEVWNIKQMIKLTQEH\n"
-                  + "IEALLDKFGGEHNPPSIYLEAYEEYTSKLDALQQREQQLLESLGNGTDFSVSSSASMDTV\n"
-                  + "TSSSSSSLSVLPSSLSVFQNPTDVARSNPKSPQKPIVRVFLPNKQRTVVPARCGVTVRDS\n"
-                  + "LKKALMMRGLIPECCAVYRIQDGEKKPIGWDTDISWLTGEELHVEVLENVPLTTHNFVRK\n"
-                  + "TFFTLAFCDFCRKLLFQGFRCQTCGYKFHQRCSTEVPLMCVNYDQLDLLFVSKFFEHHPI\n"
-                  + "PQEEASLAETALTSGSSPSAPASDSIGPQILTSPSPSKSIPIPQPFRPADEDHRNQFGQR\n"
-                  + "DRSSSAPNVHINTIEPVNIDDLIRDQGFRGDGGSTTGLSATPPASLPGSLTNVKALQKSP\n"
-                  + "GPQRERKSSSSSEDRNRMKTLGRRDSSDDWEIPDGQITVGQRIGSGSFGTVYKGKWHGDV\n"
-                  + "AVKMLNVTAPTPQQLQAFKNEVGVLRKTRHVNILLFMGYSTKPQLAIVTQWCEGSSLYHH\n"
-                  + "LHIIETKFEMIKLIDIARQTAQGMDYLHAKSIIHRDLKSNNIFLHEDLTVKIGDFGLATV\n"
-                  + "KSRWSGSHQFEQLSGSILWMAPEVIRMQDKNPYSFQSDVYAFGIVLYELMTGQLPYSNIN\n"
-                  + "NRDQIIFMVGRGYLSPDLSKVRSNCPKAMKRLMAECLKKKRDERPLFPQILASIELLARS\n"
-                  + "LPKIHRSASEPSLNRAGFQTEDFSLYACASPKTPIQAGGYGAFPVH" } };
-
-  @Test(dataProvider = "ens_seqs", suiteName = "live")
-  public void testGetOneSeqs(EnsemblSeqType type, String sq, String fastasq)
-          throws Exception
-  {
-    SeqFetcher sf = new SeqFetcher();
-    FileParse fp = sf.getSequenceReader(type, Arrays.asList(new String[]
-    { sq }));
-    SequenceI[] sqs = new FastaFile(fp).getSeqsAsArray();
-    FastaFile trueRes = new FastaFile(fastasq, AppletFormatAdapter.PASTE);
-    SequenceI[] trueSqs = trueRes.getSeqsAsArray();
-    Assert.assertEquals(sqs.length, trueSqs.length,
-            "Different number of sequences retrieved for query " + sq);
-    Alignment ral = new Alignment(sqs);
-    for (SequenceI tr : trueSqs)
-    {
-      SequenceI[] rseq;
-      Assert.assertNotNull(
-              rseq = ral.findSequenceMatch(tr.getName()),
-              "Couldn't find sequences matching expected sequence "
-                      + tr.getName());
-      Assert.assertEquals(rseq.length, 1,
-              "Expected only one sequence for sequence ID " + tr.getName());
-      Assert.assertEquals(
-              rseq[0].getSequenceAsString(),
-              tr.getSequenceAsString(),
-              "Sequences differ for " + tr.getName() + "\n" + "Exp:"
-                      + tr.getSequenceAsString() + "\n" + "Got:"
-                      + rseq[0].getSequenceAsString());
-
-    }
-  }
-
-  @Test(dataProvider = "ens_seqs")
-  public void testRegexForProxy(EnsemblSeqType type, String sq,
-          String fastasq) throws Exception
+  @Test(dataProvider = "queries")
+  public void testIsValidReference(String query) throws Exception
   {
     EnsemblSeqProxy esq = new EnsemblProtein();
-    Assert.assertTrue(esq.isValidReference(sq),
-            "Expected reference string " + sq + " to be valid for regex "
+    Assert.assertTrue(esq.isValidReference(query),
+            "Expected reference string " + query
+                    + " to be valid for regex "
                     + esq.getAccessionValidator().toString());
-    
-    Assert.assertEquals(sq, DBRefUtils.processQueryToAccessionFor(esq, sq),
-            "Regex for " + esq.getClass().toString() + " not correct.");
   }
-  // TODO:
-  // sequence query with ENSG and anything other than a genomic type will yield
-  // sequences with different IDs which will
-  // break the post-processing stage where DBRefs are assigned to sequences.
-  // -> multiple_sequences = true is needed additional parameter
-  // http://rest.ensembl.org/sequence/id/ENSG00000157764?content-type=text/x-json;type=protein;multiple_sequences=true
-  // result with four transcripts, cds, cdna, and protein products.
-  // *
-  // features for ENG -
-  // http://rest.ensembl.org/overlap/id/ENSG00000157764?feature=cds&feature=exon&feature=transcript&content-type=text/x-gff3
-  // transcript: gives locus, all transcript products with ENSG parents
-  // gene: give all ENSG on locus
-  // exon: all exon boundaries. CDS same info.
-
-  // @Test(dataProvider = "ens_seqs", suiteName = "live")
-  // public void testGetOneSeqs(EnsemblSeqType type, String sq, String fastasq)
-  // throws Exception
-  // {
-  //
-  // {
-  // Assert.assertTrue(rseq[0].getDBRef() != null
-  // && rseq[0].getDBRef().length > 0,
-  // "No database references added to sequence by fetcher.");
-  // Assert.assertNotNull(DBRefUtils.searchRefs(rseq[0].getDBRef(),
-  // new DBRefEntry("ENSEMBL", null, sq)),
-  // "Could't find database references added to sequence by fetcher.");
-  //
-  // }
 }
\ No newline at end of file
diff --git a/test/jalview/ext/ensembl/SeqFetcherTest.java b/test/jalview/ext/ensembl/SeqFetcherTest.java
new file mode 100644 (file)
index 0000000..8762698
--- /dev/null
@@ -0,0 +1,175 @@
+package jalview.ext.ensembl;
+
+import jalview.datamodel.Alignment;
+import jalview.datamodel.SequenceI;
+import jalview.ext.ensembl.SeqFetcher.EnsemblSeqType;
+import jalview.io.AppletFormatAdapter;
+import jalview.io.FastaFile;
+import jalview.io.FileParse;
+
+import java.lang.reflect.Method;
+import java.util.Arrays;
+
+import org.testng.Assert;
+import org.testng.annotations.DataProvider;
+import org.testng.annotations.Test;
+
+public class SeqFetcherTest
+{
+  private static final Object[][] allSeqs = new Object[][] {
+      {
+          EnsemblSeqType.PROTEIN,
+          "CCDS5863.1",
+          ">CCDS5863.1\n"
+                  + "MAALSGGGGGGAEPGQALFNGDMEPEAGAGAGAAASSAADPAIPEEVWNIKQMIKLTQEH\n"
+                  + "IEALLDKFGGEHNPPSIYLEAYEEYTSKLDALQQREQQLLESLGNGTDFSVSSSASMDTV\n"
+                  + "TSSSSSSLSVLPSSLSVFQNPTDVARSNPKSPQKPIVRVFLPNKQRTVVPARCGVTVRDS\n"
+                  + "LKKALMMRGLIPECCAVYRIQDGEKKPIGWDTDISWLTGEELHVEVLENVPLTTHNFVRK\n"
+                  + "TFFTLAFCDFCRKLLFQGFRCQTCGYKFHQRCSTEVPLMCVNYDQLDLLFVSKFFEHHPI\n"
+                  + "PQEEASLAETALTSGSSPSAPASDSIGPQILTSPSPSKSIPIPQPFRPADEDHRNQFGQR\n"
+                  + "DRSSSAPNVHINTIEPVNIDDLIRDQGFRGDGGSTTGLSATPPASLPGSLTNVKALQKSP\n"
+                  + "GPQRERKSSSSSEDRNRMKTLGRRDSSDDWEIPDGQITVGQRIGSGSFGTVYKGKWHGDV\n"
+                  + "AVKMLNVTAPTPQQLQAFKNEVGVLRKTRHVNILLFMGYSTKPQLAIVTQWCEGSSLYHH\n"
+                  + "LHIIETKFEMIKLIDIARQTAQGMDYLHAKSIIHRDLKSNNIFLHEDLTVKIGDFGLATV\n"
+                  + "KSRWSGSHQFEQLSGSILWMAPEVIRMQDKNPYSFQSDVYAFGIVLYELMTGQLPYSNIN\n"
+                  + "NRDQIIFMVGRGYLSPDLSKVRSNCPKAMKRLMAECLKKKRDERPLFPQILASIELLARS\n"
+                  + "LPKIHRSASEPSLNRAGFQTEDFSLYACASPKTPIQAGGYGAFPVH\n" },
+      {
+          EnsemblSeqType.TRANSCRIPT,
+          "CCDS5863.1",
+          ">CCDS5863.1\n"
+                  + "ATGGCGGCGCTGAGCGGTGGCGGTGGTGGCGGCGCGGAGCCGGGCCAGGCTCTGTTCAAC\n"
+                  + "GGGGACATGGAGCCCGAGGCCGGCGCCGGCGCCGGCGCCGCGGCCTCTTCGGCTGCGGAC\n"
+                  + "CCTGCCATTCCGGAGGAGGTGTGGAATATCAAACAAATGATTAAGTTGACACAGGAACAT\n"
+                  + "ATAGAGGCCCTATTGGACAAATTTGGTGGGGAGCATAATCCACCATCAATATATCTGGAG\n"
+                  + "GCCTATGAAGAATACACCAGCAAGCTAGATGCACTCCAACAAAGAGAACAACAGTTATTG\n"
+                  + "GAATCTCTGGGGAACGGAACTGATTTTTCTGTTTCTAGCTCTGCATCAATGGATACCGTT\n"
+                  + "ACATCTTCTTCCTCTTCTAGCCTTTCAGTGCTACCTTCATCTCTTTCAGTTTTTCAAAAT\n"
+                  + "CCCACAGATGTGGCACGGAGCAACCCCAAGTCACCACAAAAACCTATCGTTAGAGTCTTC\n"
+                  + "CTGCCCAACAAACAGAGGACAGTGGTACCTGCAAGGTGTGGAGTTACAGTCCGAGACAGT\n"
+                  + "CTAAAGAAAGCACTGATGATGAGAGGTCTAATCCCAGAGTGCTGTGCTGTTTACAGAATT\n"
+                  + "CAGGATGGAGAGAAGAAACCAATTGGTTGGGACACTGATATTTCCTGGCTTACTGGAGAA\n"
+                  + "GAATTGCATGTGGAAGTGTTGGAGAATGTTCCACTTACAACACACAACTTTGTACGAAAA\n"
+                  + "ACGTTTTTCACCTTAGCATTTTGTGACTTTTGTCGAAAGCTGCTTTTCCAGGGTTTCCGC\n"
+                  + "TGTCAAACATGTGGTTATAAATTTCACCAGCGTTGTAGTACAGAAGTTCCACTGATGTGT\n"
+                  + "GTTAATTATGACCAACTTGATTTGCTGTTTGTCTCCAAGTTCTTTGAACACCACCCAATA\n"
+                  + "CCACAGGAAGAGGCGTCCTTAGCAGAGACTGCCCTAACATCTGGATCATCCCCTTCCGCA\n"
+                  + "CCCGCCTCGGACTCTATTGGGCCCCAAATTCTCACCAGTCCGTCTCCTTCAAAATCCATT\n"
+                  + "CCAATTCCACAGCCCTTCCGACCAGCAGATGAAGATCATCGAAATCAATTTGGGCAACGA\n"
+                  + "GACCGATCCTCATCAGCTCCCAATGTGCATATAAACACAATAGAACCTGTCAATATTGAT\n"
+                  + "GACTTGATTAGAGACCAAGGATTTCGTGGTGATGGAGGATCAACCACAGGTTTGTCTGCT\n"
+                  + "ACCCCCCCTGCCTCATTACCTGGCTCACTAACTAACGTGAAAGCCTTACAGAAATCTCCA\n"
+                  + "GGACCTCAGCGAGAAAGGAAGTCATCTTCATCCTCAGAAGACAGGAATCGAATGAAAACA\n"
+                  + "CTTGGTAGACGGGACTCGAGTGATGATTGGGAGATTCCTGATGGGCAGATTACAGTGGGA\n"
+                  + "CAAAGAATTGGATCTGGATCATTTGGAACAGTCTACAAGGGAAAGTGGCATGGTGATGTG\n"
+                  + "GCAGTGAAAATGTTGAATGTGACAGCACCTACACCTCAGCAGTTACAAGCCTTCAAAAAT\n"
+                  + "GAAGTAGGAGTACTCAGGAAAACACGACATGTGAATATCCTACTCTTCATGGGCTATTCC\n"
+                  + "ACAAAGCCACAACTGGCTATTGTTACCCAGTGGTGTGAGGGCTCCAGCTTGTATCACCAT\n"
+                  + "CTCCATATCATTGAGACCAAATTTGAGATGATCAAACTTATAGATATTGCACGACAGACT\n"
+                  + "GCACAGGGCATGGATTACTTACACGCCAAGTCAATCATCCACAGAGACCTCAAGAGTAAT\n"
+                  + "AATATATTTCTTCATGAAGACCTCACAGTAAAAATAGGTGATTTTGGTCTAGCTACAGTG\n"
+                  + "AAATCTCGATGGAGTGGGTCCCATCAGTTTGAACAGTTGTCTGGATCCATTTTGTGGATG\n"
+                  + "GCACCAGAAGTCATCAGAATGCAAGATAAAAATCCATACAGCTTTCAGTCAGATGTATAT\n"
+                  + "GCATTTGGAATTGTTCTGTATGAATTGATGACTGGACAGTTACCTTATTCAAACATCAAC\n"
+                  + "AACAGGGACCAGATAATTTTTATGGTGGGACGAGGATACCTGTCTCCAGATCTCAGTAAG\n"
+                  + "GTACGGAGTAACTGTCCAAAAGCCATGAAGAGATTAATGGCAGAGTGCCTCAAAAAGAAA\n"
+                  + "AGAGATGAGAGACCACTCTTTCCCCAAATTCTCGCCTCTATTGAGCTGCTGGCCCGCTCA\n"
+                  + "TTGCCAAAAATTCACCGCAGTGCATCAGAACCCTCCTTGAATCGGGCTGGTTTCCAAACA\n"
+                  + "GAGGATTTTAGTCTATATGCTTGTGCTTCTCCAAAAACACCCATCCAGGCAGGGGGATAT\n"
+                  + "GGTGCGTTTCCTGTCCACTGA\n" },
+      {
+          EnsemblSeqType.PROTEIN,
+          "ENSP00000288602",
+          ">ENSP00000288602\n"
+                  + "MAALSGGGGGGAEPGQALFNGDMEPEAGAGAGAAASSAADPAIPEEVWNIKQMIKLTQEH\n"
+                  + "IEALLDKFGGEHNPPSIYLEAYEEYTSKLDALQQREQQLLESLGNGTDFSVSSSASMDTV\n"
+                  + "TSSSSSSLSVLPSSLSVFQNPTDVARSNPKSPQKPIVRVFLPNKQRTVVPARCGVTVRDS\n"
+                  + "LKKALMMRGLIPECCAVYRIQDGEKKPIGWDTDISWLTGEELHVEVLENVPLTTHNFVRK\n"
+                  + "TFFTLAFCDFCRKLLFQGFRCQTCGYKFHQRCSTEVPLMCVNYDQLDLLFVSKFFEHHPI\n"
+                  + "PQEEASLAETALTSGSSPSAPASDSIGPQILTSPSPSKSIPIPQPFRPADEDHRNQFGQR\n"
+                  + "DRSSSAPNVHINTIEPVNIDDLIRDQGFRGDGGSTTGLSATPPASLPGSLTNVKALQKSP\n"
+                  + "GPQRERKSSSSSEDRNRMKTLGRRDSSDDWEIPDGQITVGQRIGSGSFGTVYKGKWHGDV\n"
+                  + "AVKMLNVTAPTPQQLQAFKNEVGVLRKTRHVNILLFMGYSTKPQLAIVTQWCEGSSLYHH\n"
+                  + "LHIIETKFEMIKLIDIARQTAQGMDYLHAKSIIHRDLKSNNIFLHEDLTVKIGDFGLATV\n"
+                  + "KSRWSGSHQFEQLSGSILWMAPEVIRMQDKNPYSFQSDVYAFGIVLYELMTGQLPYSNIN\n"
+                  + "NRDQIIFMVGRGYLSPDLSKVRSNCPKAMKRLMAECLKKKRDERPLFPQILASIELLARS\n"
+                  + "LPKIHRSASEPSLNRAGFQTEDFSLYACASPKTPIQAGGYGAFPVH" } };
+
+  @DataProvider(name = "ens_seqs")
+  public Object[][] createData(Method m)
+  {
+    System.out.println(m.getName());
+    return allSeqs;
+  }
+
+  @Test(dataProvider = "ens_seqs", suiteName = "live")
+  public void testGetOneSeqs(EnsemblSeqType type, String sq, String fastasq)
+          throws Exception
+  {
+    SeqFetcher sf = new SeqFetcher();
+    FileParse fp = sf.getSequenceReader(type, Arrays.asList(new String[]
+    { sq }));
+    SequenceI[] sqs = new FastaFile(fp).getSeqsAsArray();
+    FastaFile trueRes = new FastaFile(fastasq, AppletFormatAdapter.PASTE);
+    SequenceI[] trueSqs = trueRes.getSeqsAsArray();
+    Assert.assertEquals(sqs.length, trueSqs.length,
+            "Different number of sequences retrieved for query " + sq);
+    Alignment ral = new Alignment(sqs);
+    for (SequenceI tr : trueSqs)
+    {
+      SequenceI[] rseq;
+      Assert.assertNotNull(
+              rseq = ral.findSequenceMatch(tr.getName()),
+              "Couldn't find sequences matching expected sequence "
+                      + tr.getName());
+      Assert.assertEquals(rseq.length, 1,
+              "Expected only one sequence for sequence ID " + tr.getName());
+      Assert.assertEquals(
+              rseq[0].getSequenceAsString(),
+              tr.getSequenceAsString(),
+              "Sequences differ for " + tr.getName() + "\n" + "Exp:"
+                      + tr.getSequenceAsString() + "\n" + "Got:"
+                      + rseq[0].getSequenceAsString());
+  
+    }
+  }
+
+  @Test(suiteName = "live")
+  public void testLiveCheckEnsembl()
+  {
+    SeqFetcher sf = new SeqFetcher();
+    boolean isAvailable = sf.isEnsemblAvailable();
+    System.out.println("Ensembl is "
+            + (isAvailable ? "UP!"
+                    : "DOWN or unreachable ******************* BAD!"));
+  }
+  // TODO:
+  // sequence query with ENSG and anything other than a genomic type will yield
+  // sequences with different IDs which will
+  // break the post-processing stage where DBRefs are assigned to sequences.
+  // -> multiple_sequences = true is needed additional parameter
+  // http://rest.ensembl.org/sequence/id/ENSG00000157764?content-type=text/x-json;type=protein;multiple_sequences=true
+  // result with four transcripts, cds, cdna, and protein products.
+  // *
+  // features for ENG -
+  // http://rest.ensembl.org/overlap/id/ENSG00000157764?feature=cds&feature=exon&feature=transcript&content-type=text/x-gff3
+  // transcript: gives locus, all transcript products with ENSG parents
+  // gene: give all ENSG on locus
+  // exon: all exon boundaries. CDS same info.
+
+  // @Test(dataProvider = "ens_seqs", suiteName = "live")
+  // public void testGetOneSeqs(EnsemblSeqType type, String sq, String fastasq)
+  // throws Exception
+  // {
+  //
+  // {
+  // Assert.assertTrue(rseq[0].getDBRef() != null
+  // && rseq[0].getDBRef().length > 0,
+  // "No database references added to sequence by fetcher.");
+  // Assert.assertNotNull(DBRefUtils.searchRefs(rseq[0].getDBRef(),
+  // new DBRefEntry("ENSEMBL", null, sq)),
+  // "Could't find database references added to sequence by fetcher.");
+  //
+  // }
+
+}
diff --git a/test/jalview/ws/SequenceFetcherTest.java b/test/jalview/ws/SequenceFetcherTest.java
new file mode 100644 (file)
index 0000000..7a9b553
--- /dev/null
@@ -0,0 +1,232 @@
+package jalview.ws;
+
+import jalview.datamodel.Alignment;
+import jalview.datamodel.AlignmentI;
+import jalview.datamodel.DBRefSource;
+import jalview.datamodel.SequenceI;
+import jalview.ws.seqfetcher.ASequenceFetcher;
+import jalview.ws.seqfetcher.DbSourceProxy;
+
+import java.util.Enumeration;
+import java.util.List;
+import java.util.Vector;
+
+public class SequenceFetcherTest
+{
+
+  /**
+   * simple run method to test dbsources.
+   * 
+   * @param argv
+   */
+  public static void main(String[] argv)
+  {
+    // TODO: extracted from SequenceFetcher - convert to proper unit test with
+    // assertions
+
+    AlignmentI ds = null;
+    Vector noProds = new Vector();
+    String usage = "SequenceFetcher.main [-nodas] [<DBNAME> [<ACCNO>]]\n"
+            + "With no arguments, all DbSources will be queried with their test Accession number.\n"
+            + "With one argument, the argument will be resolved to one or more db sources and each will be queried with their test accession only.\n"
+            + "If given two arguments, SequenceFetcher will try to find the DbFetcher corresponding to <DBNAME> and retrieve <ACCNO> from it.\n"
+            + "The -nodas option will exclude DAS sources from the database fetchers Jalview will try to use.";
+    boolean withDas = true;
+    if (argv != null && argv.length > 0
+            && argv[0].toLowerCase().startsWith("-nodas"))
+    {
+      withDas = false;
+      String targs[] = new String[argv.length - 1];
+      System.arraycopy(argv, 1, targs, 0, targs.length);
+      argv = targs;
+    }
+    if (argv != null && argv.length > 0)
+    {
+      List<DbSourceProxy> sps = new SequenceFetcher(withDas)
+              .getSourceProxy(argv[0]);
+  
+      if (sps != null)
+      {
+        for (DbSourceProxy sp : sps)
+        {
+          AlignmentI al = null;
+          try
+          {
+            al = sp.getSequenceRecords(argv.length > 1 ? argv[1] : sp
+                    .getTestQuery());
+          } catch (Exception e)
+          {
+            e.printStackTrace();
+            System.err.println("Error when retrieving "
+                    + (argv.length > 1 ? argv[1] : sp.getTestQuery())
+                    + " from " + argv[0] + "\nUsage: " + usage);
+          }
+          SequenceI[] prod = al.getSequencesArray();
+          if (al != null)
+          {
+            for (int p = 0; p < prod.length; p++)
+            {
+              System.out.println("Prod " + p + ": "
+                      + prod[p].getDisplayId(true) + " : "
+                      + prod[p].getDescription());
+            }
+          }
+        }
+        return;
+      }
+      else
+      {
+        System.err.println("Can't resolve " + argv[0]
+                + " as a database name. Allowed values are :\n"
+                + new SequenceFetcher().getSupportedDb());
+      }
+      System.out.println(usage);
+      return;
+    }
+    ASequenceFetcher sfetcher = new SequenceFetcher(withDas);
+    String[] dbSources = sfetcher.getSupportedDb();
+    for (int dbsource = 0; dbsource < dbSources.length; dbsource++)
+    {
+      String db = dbSources[dbsource];
+      // skip me
+      if (db.equals(DBRefSource.PDB))
+      {
+        continue;
+      }
+      for (DbSourceProxy sp : sfetcher.getSourceProxy(db))
+      {
+        System.out.println("Source: " + sp.getDbName() + " (" + db
+                + "): retrieving test:" + sp.getTestQuery());
+        AlignmentI al = null;
+        try
+        {
+          al = sp.getSequenceRecords(sp.getTestQuery());
+          if (al != null && al.getHeight() > 0)
+          {
+            boolean dna = sp.isDnaCoding();
+            // try and find products
+            String types[] = jalview.analysis.CrossRef
+                    .findSequenceXrefTypes(dna, al.getSequencesArray());
+            if (types != null)
+            {
+              System.out.println("Xref Types for: "
+                      + (dna ? "dna" : "prot"));
+              for (int t = 0; t < types.length; t++)
+              {
+                System.out.println("Type: " + types[t]);
+                SequenceI[] prod = jalview.analysis.CrossRef
+                        .findXrefSequences(al.getSequencesArray(), dna,
+                                types[t]).getSequencesArray();
+                System.out.println("Found "
+                        + ((prod == null) ? "no" : "" + prod.length)
+                        + " products");
+                if (prod != null)
+                {
+                  for (int p = 0; p < prod.length; p++)
+                  {
+                    System.out.println("Prod " + p + ": "
+                            + prod[p].getDisplayId(true));
+                  }
+                }
+              }
+            }
+            else
+            {
+              noProds.addElement((dna ? new Object[] { al, al }
+                      : new Object[] { al }));
+            }
+  
+          }
+        } catch (Exception ex)
+        {
+          System.out.println("ERROR:Failed to retrieve test query.");
+          ex.printStackTrace(System.out);
+        }
+  
+        if (al == null)
+        {
+          System.out.println("ERROR:No alignment retrieved.");
+          StringBuffer raw = sp.getRawRecords();
+          if (raw != null)
+          {
+            System.out.println(raw.toString());
+          }
+          else
+          {
+            System.out.println("ERROR:No Raw results.");
+          }
+        }
+        else
+        {
+          System.out.println("Retrieved " + al.getHeight() + " sequences.");
+          for (int s = 0; s < al.getHeight(); s++)
+          {
+            SequenceI sq = al.getSequenceAt(s);
+            while (sq.getDatasetSequence() != null)
+            {
+              sq = sq.getDatasetSequence();
+  
+            }
+            if (ds == null)
+            {
+              ds = new Alignment(new SequenceI[] { sq });
+  
+            }
+            else
+            {
+              ds.addSequence(sq);
+            }
+          }
+        }
+        System.out.flush();
+        System.err.flush();
+  
+      }
+      if (noProds.size() > 0)
+      {
+        Enumeration ts = noProds.elements();
+        while (ts.hasMoreElements())
+  
+        {
+          Object[] typeSq = (Object[]) ts.nextElement();
+          boolean dna = (typeSq.length > 1);
+          AlignmentI al = (AlignmentI) typeSq[0];
+          System.out.println("Trying getProducts for "
+                  + al.getSequenceAt(0).getDisplayId(true));
+          System.out.println("Search DS Xref for: "
+                  + (dna ? "dna" : "prot"));
+          // have a bash at finding the products amongst all the retrieved
+          // sequences.
+          SequenceI[] seqs = al.getSequencesArray();
+          Alignment prodal = jalview.analysis.CrossRef.findXrefSequences(
+                  seqs, dna, null, ds);
+          System.out.println("Found "
+                  + ((prodal == null) ? "no" : "" + prodal.getHeight())
+                  + " products");
+          if (prodal != null)
+          {
+            SequenceI[] prod = prodal.getSequencesArray(); // note
+            // should
+            // test
+            // rather
+            // than
+            // throw
+            // away
+            // codon
+            // mapping
+            // (if
+            // present)
+            for (int p = 0; p < prod.length; p++)
+            {
+              System.out.println("Prod " + p + ": "
+                      + prod[p].getDisplayId(true));
+            }
+          }
+        }
+  
+      }
+  
+    }
+  }
+
+}