JAL-1705 reworked Ensembl clients now fetching and mapping features &
authorgmungoc <g.m.carstairs@dundee.ac.uk>
Thu, 21 Jan 2016 16:01:19 +0000 (16:01 +0000)
committergmungoc <g.m.carstairs@dundee.ac.uk>
Thu, 21 Jan 2016 16:01:19 +0000 (16:01 +0000)
peptide

14 files changed:
src/jalview/ext/ensembl/EnsemblCdna.java
src/jalview/ext/ensembl/EnsemblCds.java
src/jalview/ext/ensembl/EnsemblGenome.java
src/jalview/ext/ensembl/EnsemblOverlap.java [new file with mode: 0644]
src/jalview/ext/ensembl/EnsemblProtein.java
src/jalview/ext/ensembl/EnsemblRestClient.java [new file with mode: 0644]
src/jalview/ext/ensembl/EnsemblSeqProxy.java
src/jalview/ext/ensembl/EnsemblSequenceFetcher.java [new file with mode: 0644]
src/jalview/ext/ensembl/EnsemblTranscript.java [deleted file]
src/jalview/ext/ensembl/SeqFetcher.java [deleted file]
test/jalview/ext/ensembl/EnsemblRestClientTest.java [new file with mode: 0644]
test/jalview/ext/ensembl/EnsemblSeqProxyTest.java
test/jalview/ext/ensembl/SeqFetcherTest.java [deleted file]
test/jalview/ext/jmol/JmolCommandsTest.java [new file with mode: 0644]

index 9c88b7c..b8c9c3f 100644 (file)
@@ -1,11 +1,19 @@
 package jalview.ext.ensembl;
 
-import jalview.ext.ensembl.SeqFetcher.EnsemblSeqType;
+import jalview.datamodel.SequenceFeature;
+import jalview.io.gff.SequenceOntology;
 
 import com.stevesoft.pat.Regex;
 
 public class EnsemblCdna extends EnsemblSeqProxy
 {
+  /*
+   * fetch exon features on genomic sequence (to identify the cdnaregions)
+   * and cds and variation features (to retain)
+   */
+  private static final EnsemblFeatureType[] FEATURES_TO_FETCH = {
+      EnsemblFeatureType.exon, EnsemblFeatureType.cds,
+      EnsemblFeatureType.variation };
 
   public EnsemblCdna()
   {
@@ -31,9 +39,41 @@ public class EnsemblCdna extends EnsemblSeqProxy
   }
 
   @Override
-  public String getTestQuery()
+  protected EnsemblFeatureType[] getFeaturesToFetch()
   {
-    return "ENST00000288602";
+    return FEATURES_TO_FETCH;
+  }
+
+  /**
+   * Answers true unless the feature type is 'exon' (or a sub-type of exon in
+   * the Sequence Ontology). Exon features are only retrieved in order to
+   * identify the exon sequence range, and are redundant information on the exon
+   * sequence itself.
+   */
+  @Override
+  protected boolean retainFeature(String type)
+  {
+    return !SequenceOntology.getInstance().isA(type, SequenceOntology.EXON);
+  }
+
+  /**
+   * Answers true if the sequence feature type is 'exon' (or a subtype of exon
+   * in the Sequence Ontology), and the Parent of the feature is the transcript
+   * we are retrieving
+   */
+  @Override
+  protected boolean identifiesSequence(SequenceFeature sf, String accId)
+  {
+    if (SequenceOntology.getInstance().isA(sf.getType(),
+            SequenceOntology.EXON))
+    {
+      String parentFeature = (String) sf.getValue("Parent");
+      if (("transcript:" + accId).equals(parentFeature))
+      {
+        return true;
+      }
+    }
+    return false;
   }
 
 }
index dc92348..897371d 100644 (file)
@@ -1,10 +1,20 @@
 package jalview.ext.ensembl;
 
-import jalview.ext.ensembl.SeqFetcher.EnsemblSeqType;
+import jalview.datamodel.SequenceFeature;
+import jalview.io.gff.SequenceOntology;
 
 public class EnsemblCds extends EnsemblSeqProxy
 {
+  /*
+   * fetch cds features on genomic sequence (to identify the CDS regions)
+   * and variation features (to retain)
+   */
+  private static final EnsemblFeatureType[] FEATURES_TO_FETCH = {
+      EnsemblFeatureType.cds, EnsemblFeatureType.variation };
 
+  /**
+   * Constructor
+   */
   public EnsemblCds()
   {
     super();
@@ -22,4 +32,42 @@ public class EnsemblCds extends EnsemblSeqProxy
     return EnsemblSeqType.CDS;
   }
 
+  @Override
+  protected EnsemblFeatureType[] getFeaturesToFetch()
+  {
+    return FEATURES_TO_FETCH;
+  }
+
+  /**
+   * Answers true unless the feature type is 'CDS' (or a sub-type of CDS in the
+   * Sequence Ontology). CDS features are only retrieved in order to identify
+   * the cds sequence range, and are redundant information on the cds sequence
+   * itself.
+   */
+  @Override
+  protected boolean retainFeature(String type)
+  {
+    return !SequenceOntology.getInstance().isA(type, SequenceOntology.CDS);
+  }
+
+  /**
+   * Answers true if the sequence feature type is 'CDS' (or a subtype of CDS in
+   * the Sequence Ontology), and the Parent of the feature is the transcript we
+   * are retrieving
+   */
+  @Override
+  protected boolean identifiesSequence(SequenceFeature sf, String accId)
+  {
+    if (SequenceOntology.getInstance().isA(sf.getType(),
+            SequenceOntology.CDS))
+    {
+      String parentFeature = (String) sf.getValue("Parent");
+      if (("transcript:" + accId).equals(parentFeature))
+      {
+        return true;
+      }
+    }
+    return false;
+  }
+
 }
index 39dfac0..6b4a1f6 100644 (file)
@@ -1,9 +1,17 @@
 package jalview.ext.ensembl;
 
-import jalview.ext.ensembl.SeqFetcher.EnsemblSeqType;
+import jalview.datamodel.SequenceFeature;
+import jalview.io.gff.SequenceOntology;
 
 public class EnsemblGenome extends EnsemblSeqProxy
 {
+  /*
+   * fetch transcript features on genomic sequence (to identify the transcript 
+   * regions) and cds, exon and variation features (to retain)
+   */
+  private static final EnsemblFeatureType[] FEATURES_TO_FETCH = {
+      EnsemblFeatureType.transcript, EnsemblFeatureType.exon,
+      EnsemblFeatureType.cds, EnsemblFeatureType.variation };
 
   public EnsemblGenome()
   {
@@ -22,4 +30,43 @@ public class EnsemblGenome extends EnsemblSeqProxy
     return EnsemblSeqType.GENOMIC;
   }
 
+  @Override
+  protected EnsemblFeatureType[] getFeaturesToFetch()
+  {
+    return FEATURES_TO_FETCH;
+  }
+
+  /**
+   * Answers true unless the feature type is 'transcript' (or a sub-type of
+   * transcript in the Sequence Ontology). Transcript features are only
+   * retrieved in order to identify the transcript sequence range, and are
+   * redundant information on the transcript sequence itself.
+   */
+  @Override
+  protected boolean retainFeature(String type)
+  {
+    return !SequenceOntology.getInstance().isA(type,
+            SequenceOntology.TRANSCRIPT);
+  }
+
+  /**
+   * Answers true if the sequence feature type is 'transcript' (or a subtype of
+   * transcript in the Sequence Ontology), and the ID of the feature is the
+   * transcript we are retrieving
+   */
+  @Override
+  protected boolean identifiesSequence(SequenceFeature sf, String accId)
+  {
+    if (SequenceOntology.getInstance().isA(sf.getType(),
+            SequenceOntology.TRANSCRIPT))
+    {
+      String parentFeature = (String) sf.getValue("ID");
+      if (("transcript:" + accId).equals(parentFeature))
+      {
+        return true;
+      }
+    }
+    return false;
+  }
+
 }
diff --git a/src/jalview/ext/ensembl/EnsemblOverlap.java b/src/jalview/ext/ensembl/EnsemblOverlap.java
new file mode 100644 (file)
index 0000000..732b518
--- /dev/null
@@ -0,0 +1,123 @@
+package jalview.ext.ensembl;
+
+import jalview.datamodel.Alignment;
+import jalview.datamodel.AlignmentI;
+import jalview.io.FeaturesFile;
+import jalview.io.FileParse;
+
+import java.io.IOException;
+import java.net.MalformedURLException;
+import java.net.URL;
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * A client for fetching and processing Ensembl overlap data in GFF feature
+ * format
+ * 
+ * @author gmcarstairs
+ * @see http://rest.ensembl.org/documentation/info/overlap_id
+ */
+public class EnsemblOverlap extends EnsemblRestClient
+{
+  /*
+   * The default features to retrieve from Ensembl; can override in getSequenceRecords
+   */
+  private EnsemblFeatureType[] featuresWanted = { EnsemblFeatureType.cds,
+      EnsemblFeatureType.exon, EnsemblFeatureType.variation };
+
+  @Override
+  public String getDbName()
+  {
+    return "ENSEMBL (overlap)";
+  }
+
+  /**
+   * Makes a query to the REST overlap endpoint for the given sequence
+   * identifier. This returns an 'alignment' consisting of one 'dummy sequence'
+   * (the genomic sequence for which overlap features are returned by the
+   * service). This sequence will have on it sequence features which are the
+   * real information of interest, such as CDS regions or sequence variations.
+   */
+  @Override
+  public AlignmentI getSequenceRecords(String query) throws IOException
+  {
+    // TODO: use a vararg String... for getSequenceRecords instead?
+    List<String> queries = new ArrayList<String>();
+    queries.add(query);
+    FileParse fp = getSequenceReader(queries);
+    FeaturesFile fr = new FeaturesFile(fp);
+    return new Alignment(fr.getSeqsAsArray());
+  }
+
+  /**
+   * Returns a URL for the REST overlap endpoint
+   * 
+   * @param ids
+   * @return
+   */
+  @Override
+  protected URL getUrl(List<String> ids) throws MalformedURLException
+  {
+    StringBuffer urlstring = new StringBuffer(128);
+    urlstring.append(ENSEMBL_REST).append("/overlap/id/")
+            .append(ids.get(0));
+
+    // @see https://github.com/Ensembl/ensembl-rest/wiki/Output-formats
+    urlstring.append("?content-type=text/x-gff3");
+
+    /*
+     * specify  features to retrieve
+     * @see http://rest.ensembl.org/documentation/info/overlap_id
+     * could make the list a configurable entry in jalview.properties
+     */
+    for (EnsemblFeatureType feature : featuresWanted)
+    {
+      urlstring.append("&feature=").append(feature.name());
+    }
+
+    return new URL(urlstring.toString());
+  }
+
+  @Override
+  public boolean useGetRequest()
+  {
+    return true;
+  }
+
+  /**
+   * Returns the MIME type for GFF3. For GET requests the Content-type header
+   * describes the required encoding of the response.
+   */
+  @Override
+  public String getRequestMimeType()
+  {
+    return "text/x-gff3";
+  }
+
+  /**
+   * Returns the MIME type for GFF3.
+   */
+  @Override
+  public String getResponseMimeType()
+  {
+    return "text/x-gff3";
+  }
+
+  /**
+   * Overloaded method that allows a list of features to retrieve to be
+   * specified
+   * 
+   * @param accId
+   * @param features
+   * @return
+   * @throws IOException
+   */
+  public AlignmentI getSequenceRecords(String accId,
+          EnsemblFeatureType[] features)
+          throws IOException
+  {
+    featuresWanted = features;
+    return getSequenceRecords(accId);
+  }
+}
index 4cc43ab..5238f98 100644 (file)
@@ -1,6 +1,7 @@
 package jalview.ext.ensembl;
 
-import jalview.ext.ensembl.SeqFetcher.EnsemblSeqType;
+import jalview.datamodel.AlignmentI;
+import jalview.datamodel.SequenceFeature;
 
 public class EnsemblProtein extends EnsemblSeqProxy
 {
@@ -22,6 +23,9 @@ public class EnsemblProtein extends EnsemblSeqProxy
     return EnsemblSeqType.PROTEIN;
   }
 
+  /**
+   * Returns false, as this fetcher does not retrieve DNA sequences.
+   */
   @Override
   public boolean isDnaCoding()
   {
@@ -37,4 +41,27 @@ public class EnsemblProtein extends EnsemblSeqProxy
     return "ENSP00000288602";
   }
 
+  /**
+   * Overrides base class method to do nothing - genomic features are not
+   * applicable to the protein product sequence
+   */
+  @Override
+  protected void addFeaturesAndProduct(String accId, AlignmentI alignment)
+  {
+  }
+
+  @Override
+  protected EnsemblFeatureType[] getFeaturesToFetch()
+  {
+    // not applicable - can't fetch genomic features for a protein sequence
+    return null;
+  }
+
+  @Override
+  protected boolean identifiesSequence(SequenceFeature sf, String accId)
+  {
+    // not applicable - protein sequence is not a 'subset' of genomic sequence
+    return false;
+  }
+
 }
diff --git a/src/jalview/ext/ensembl/EnsemblRestClient.java b/src/jalview/ext/ensembl/EnsemblRestClient.java
new file mode 100644 (file)
index 0000000..52993e9
--- /dev/null
@@ -0,0 +1,215 @@
+package jalview.ext.ensembl;
+
+import jalview.io.FileParse;
+
+import java.io.BufferedReader;
+import java.io.DataOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.net.HttpURLConnection;
+import java.net.MalformedURLException;
+import java.net.URL;
+import java.util.List;
+
+import javax.ws.rs.HttpMethod;
+
+/**
+ * Base class for Ensembl REST service clients
+ * 
+ * @author gmcarstairs
+ */
+abstract class EnsemblRestClient extends EnsemblSequenceFetcher
+{
+  protected final static String ENSEMBL_REST = "http://rest.ensembl.org";
+
+  protected static final String SEQUENCE_ID_URL = ENSEMBL_REST
+          + "/sequence/id";
+
+  // @see https://github.com/Ensembl/ensembl-rest/wiki/Output-formats
+  private static final String PING_URL = "http://rest.ensembl.org/info/ping.json";
+
+  private final static long RETEST_INTERVAL = 10000L; // 10 seconds
+
+  private static boolean ensemblRestAvailable = false;
+
+  private static long lastCheck = -1;
+
+  protected volatile boolean inProgress = false;
+
+  @Override
+  public boolean queryInProgress()
+  {
+    return inProgress;
+  }
+
+  @Override
+  public StringBuffer getRawRecords()
+  {
+    return null;
+  }
+
+  /**
+   * Returns the URL for the client http request
+   * 
+   * @param ids
+   * @return
+   * @throws MalformedURLException
+   */
+  protected abstract URL getUrl(List<String> ids)
+          throws MalformedURLException;
+
+  /**
+   * Returns true if client uses GET method, false if it uses POST
+   * 
+   * @return
+   */
+  public abstract boolean useGetRequest();
+
+  /**
+   * Return the desired value for the Content-Type request header
+   * 
+   * @return
+   * @see https://github.com/Ensembl/ensembl-rest/wiki/HTTP-Headers
+   */
+  public abstract String getRequestMimeType();
+
+  /**
+   * Return the desired value for the Accept request header
+   * 
+   * @return
+   * @see https://github.com/Ensembl/ensembl-rest/wiki/HTTP-Headers
+   */
+  public abstract String getResponseMimeType();
+
+  /**
+   * Tries to connect to Ensembl's REST 'ping' endpoint, and returns true if
+   * successful, else false
+   * 
+   * @return
+   */
+  private boolean checkEnsembl()
+  {
+    try
+    {
+      URL ping = new URL(PING_URL);
+      HttpURLConnection conn = (HttpURLConnection) ping.openConnection();
+      int rc = conn.getResponseCode();
+      conn.disconnect();
+      if (rc >= 200 && rc < 300)
+      {
+        return true;
+      }
+    } catch (Throwable t)
+    {
+      System.err.println("Error connecting to " + PING_URL + ": "
+              + t.getMessage());
+    }
+    return false;
+  }
+
+  /**
+   * returns a reader to a Fasta response from the Ensembl sequence endpoint
+   * 
+   * @param ids
+   * @return
+   * @throws IOException
+   */
+  public FileParse getSequenceReader(List<String> ids)
+          throws IOException
+  {
+    URL url = getUrl(ids);
+  
+    HttpURLConnection connection = (HttpURLConnection) url.openConnection();
+  
+    /*
+     * POST method allows multiple queries in one request; it is supported for
+     * sequence queries, but not for overlap
+     */
+    connection.setRequestMethod(useGetRequest() ? HttpMethod.GET
+            : HttpMethod.POST);
+    connection.setRequestProperty("Content-Type", getRequestMimeType());
+    connection.setRequestProperty("Accept", getResponseMimeType());
+
+    connection.setUseCaches(false);
+    connection.setDoInput(true);
+    connection.setDoOutput(true);
+
+    if (!useGetRequest())
+    {
+      writePostBody(connection, ids);
+    }
+  
+    InputStream response = connection.getInputStream();
+    int responseCode = connection.getResponseCode();
+  
+    if (responseCode != 200)
+    {
+      throw new RuntimeException(
+              "Response code was not 200. Detected response was "
+                      + responseCode);
+    }
+  
+    BufferedReader reader = null;
+    reader = new BufferedReader(new InputStreamReader(response, "UTF-8"));
+    FileParse fp = new FileParse(reader, url.toString(), "HTTP_POST");
+    return fp;
+  }
+
+  /**
+   * Rechecks if Ensembl is responding, unless the last check was successful and
+   * the retest interval has not yet elapsed. Returns true if Ensembl is up,
+   * else false.
+   * 
+   * @return
+   */
+  public boolean isEnsemblAvailable()
+  {
+    long now = System.currentTimeMillis();
+    boolean retest = now - lastCheck > RETEST_INTERVAL;
+    if (ensemblRestAvailable && !retest)
+    {
+      return true;
+    }
+    ensemblRestAvailable = checkEnsembl();
+    lastCheck = now;
+    return ensemblRestAvailable;
+  }
+
+  /**
+   * Constructs, writes and flushes the POST body of the request, containing the
+   * query ids in JSON format
+   * 
+   * @param connection
+   * @param ids
+   * @throws IOException
+   */
+  protected void writePostBody(HttpURLConnection connection,
+          List<String> ids) throws IOException
+  {
+    boolean first;
+    StringBuilder postBody = new StringBuilder(64);
+    postBody.append("{\"ids\":[");
+    first = true;
+    for (String id : ids)
+    {
+      if (!first)
+      {
+        postBody.append(",");
+      }
+      first = false;
+      postBody.append("\"");
+      postBody.append(id.trim());
+      postBody.append("\"");
+    }
+    postBody.append("]}");
+    byte[] thepostbody = postBody.toString().getBytes();
+    connection.setRequestProperty("Content-Length",
+            Integer.toString(thepostbody.length));
+    DataOutputStream wr = new DataOutputStream(connection.getOutputStream());
+    wr.write(thepostbody);
+    wr.flush();
+    wr.close();
+  }
+
+}
index 4f85bd0..e986ba8 100644 (file)
@@ -2,174 +2,381 @@ package jalview.ext.ensembl;
 
 import jalview.datamodel.Alignment;
 import jalview.datamodel.AlignmentI;
+import jalview.datamodel.DBRefEntry;
+import jalview.datamodel.DBRefSource;
+import jalview.datamodel.Mapping;
+import jalview.datamodel.SequenceFeature;
 import jalview.datamodel.SequenceI;
 import jalview.exceptions.JalviewException;
-import jalview.ext.ensembl.SeqFetcher.EnsemblSeqType;
 import jalview.io.FastaFile;
 import jalview.io.FileParse;
+import jalview.io.gff.SequenceOntology;
 import jalview.util.DBRefUtils;
-import jalview.ws.seqfetcher.DbSourceProxyImpl;
+import jalview.util.MapList;
 
+import java.io.IOException;
+import java.net.MalformedURLException;
+import java.net.URL;
 import java.util.ArrayList;
 import java.util.Arrays;
+import java.util.Collections;
+import java.util.Comparator;
 import java.util.List;
 
-import com.stevesoft.pat.Regex;
-
-public abstract class EnsemblSeqProxy extends DbSourceProxyImpl
+/**
+ * Base class for Ensembl sequence fetchers
+ * 
+ * @author gmcarstairs
+ */
+public abstract class EnsemblSeqProxy extends EnsemblRestClient
 {
-  SeqFetcher sf;
-
-  public EnsemblSeqProxy()
+  public enum EnsemblSeqType
   {
-    sf = new SeqFetcher();
-  }
+    /**
+     * type=genomic for the full dna including introns
+     */
+    GENOMIC("genomic"),
 
-  @Override
-  public String getDbSource()
-  {
-    return "ENSEMBL";
-  }
+    /**
+     * type=cdna for transcribed dna including UTRs
+     */
+    CDNA("cdna"),
 
+    /**
+     * type=cds for coding dna excluding UTRs
+     */
+    CDS("cds"),
 
-  @Override
-  public String getDbVersion()
-  {
-    return "0"; // sf.getVersion();
-  }
+    /**
+     * type=protein for the peptide product sequence
+     */
+    PROTEIN("protein");
 
-  @Override
-  public String getAccessionSeparator()
-  {
-    return " ";
-  }
+    /*
+     * the value of the 'type' parameter to fetch this version of 
+     * an Ensembl sequence
+     */
+    private String type;
+
+    EnsemblSeqType(String t)
+    {
+      type = t;
+    }
+
+    public String getType()
+    {
+      return type;
+    }
 
-  @Override
-  public Regex getAccessionValidator()
-  {
-    return new Regex("((ENSP|ENST|ENSG|CCDS)[0-9.]{3,})");
   }
 
   /**
-   * Default test query is a transcript
+   * A comparator to sort ranges into ascending start position order
    */
-  @Override
-  public String getTestQuery()
+  private class RangeSorter implements Comparator<int[]>
   {
-    return "ENST00000288602";
-  }
+    boolean forwards;
 
-  @Override
-  public boolean isValidReference(String accession)
+    RangeSorter(boolean forward)
+    {
+      forwards = forward;
+    }
+
+    @Override
+    public int compare(int[] o1, int[] o2)
+    {
+      return (forwards ? 1 : -1) * Integer.compare(o1[0], o2[0]);
+    }
+
+  };
+
+  /**
+   * Constructor
+   */
+  public EnsemblSeqProxy()
   {
-    return getAccessionValidator().search(accession);
   }
 
-  private volatile boolean inProgress = false;
-
+  /**
+   * Makes the sequence queries to Ensembl's REST service and returns an
+   * alignment consisting of the returned sequences
+   */
   @Override
-  public AlignmentI getSequenceRecords(String queries) throws Exception
+  public AlignmentI getSequenceRecords(String query) throws Exception
   {
+    // TODO use a String... query vararg instead?
+
+    // danger: accession separator used as a regex here, a string elsewhere
+    // in this case it is ok (it is just a space), but (e.g.) '\' would not be
+    List<String> allIds = Arrays.asList(query.split(getAccessionSeparator()));
+    AlignmentI alignment = null;
     inProgress = true;
-    List<String> tids, ids = new ArrayList<String>();
-    tids = Arrays.asList(queries.split(" +"));
-    AlignmentI rtn = null;
 
     /*
      * execute queries, if necessary in batches of the
      * maximum allowed number of ids
      */
     int maxQueryCount = getMaximumQueryCount();
-    for (int v = 0, vSize = tids.size(); v < vSize; v += maxQueryCount)
+    for (int v = 0, vSize = allIds.size(); v < vSize; v += maxQueryCount)
     {
       int p = Math.min(vSize, v + maxQueryCount);
-      ids = tids.subList(v, p);
+      List<String> ids = allIds.subList(v, p);
       try
       {
-        if (!sf.isEnsemblAvailable())
-        {
-          inProgress = false;
-          throw new JalviewException("ENSEMBL Rest API not available.");
-        }
-        FileParse fp = new FileParse(sf.getSequenceReader(
-                getSourceEnsemblType(), ids));
-        FastaFile fr = new FastaFile(fp);
-        if (fr.hasWarningMessage())
-        {
-          System.out
-                  .println("Warning when retrieving " + ids.size() + " ids"
-                          + ids.toString() + "\n" + fr.getWarningMessage());
-        }
-        else if (fr.getSeqs().size() != ids.size())
-        {
-          System.out.println("Only retrieved " + fr.getSeqs().size()
-                  + " sequences for " + ids.size() + " query strings.");
-        }
-        if (fr.getSeqs().size() > 0)
-        {
-          AlignmentI seqal = new Alignment(
-                  fr.getSeqsAsArray());
-          for (SequenceI sq:seqal.getSequences())
-          {
-            if (ids.contains((sq.getName())))
-            {
-              DBRefUtils.parseToDbRef(sq, "ENSEMBL", "0", sq.getName());
-            }
-          }
-          if (rtn == null)
-          {
-            rtn = seqal;
-          }
-          else
-          {
-            rtn.append(seqal);
-          }
-        }
+        alignment = fetchSequences(ids, alignment);
       } catch (Throwable r)
       {
         inProgress = false;
-        if (rtn != null)
+        String msg = "Aborting ID retrieval after " + v
+                + " chunks. Unexpected problem (" + r.getLocalizedMessage()
+                + ")";
+        System.err.println(msg);
+        if (alignment != null)
         {
-          System.err.println("Aborting ID retrieval after " + v
-                  + " chunks.");
-          r.printStackTrace();
+          break; // return what we got
         }
         else
         {
-
-          throw new JalviewException("Aborting ID retrieval after " + v
-                  + " chunks. Unexpected problem ("
-                  + r.getLocalizedMessage() + ")", r);
+          throw new JalviewException(msg, r);
         }
-
       }
     }
+
+    /*
+     * fetch and transfer genomic sequence features
+     */
+    for (String accId : allIds)
+    {
+      addFeaturesAndProduct(accId, alignment);
+    }
+
     inProgress = false;
-    return rtn;
+    return alignment;
   }
 
   /**
+   * Fetches Ensembl features using the /overlap REST endpoint, and adds them to
+   * the sequence in the alignment. Also fetches the protein product, maps it
+   * from the CDS features of the sequence, and saves it as a cross-reference of
+   * the dna sequence.
    * 
-   * @return the configured sequence return type for this source
+   * @param accId
+   * @param alignment
    */
-  protected abstract EnsemblSeqType getSourceEnsemblType();
+  protected void addFeaturesAndProduct(String accId, AlignmentI alignment)
+  {
+    try
+    {
+      /*
+       * get 'dummy' genomic sequence with exon, cds and variation features
+       */
+      EnsemblOverlap gffFetcher = new EnsemblOverlap();
+      EnsemblFeatureType[] features = getFeaturesToFetch();
+      AlignmentI geneFeatures = gffFetcher.getSequenceRecords(accId,
+              features);
+      if (geneFeatures.getHeight() > 0)
+      {
+        /*
+         * transfer features to the query sequence
+         */
+        SequenceI genomicSequence = geneFeatures.getSequenceAt(0);
+        SequenceI querySeq = alignment.findName(accId);
+        transferFeatures(accId, genomicSequence, querySeq);
 
-  @Override
-  public boolean queryInProgress()
+        /*
+         * fetch and map protein product, and add it as a cross-reference
+         * of the retrieved sequence
+         */
+        addProteinProduct(querySeq);
+      }
+    } catch (IOException e)
+    {
+      System.err.println("Error transferring Ensembl features: "
+              + e.getMessage());
+    }
+  }
+
+  /**
+   * Returns those sequence feature types to fetch from Ensembl. We may want
+   * features either because they are of interest to the user, or as means to
+   * identify the locations of the sequence on the genomic sequence (CDS
+   * features identify CDS, exon features identify cDNA etc).
+   * 
+   * @return
+   */
+  protected abstract EnsemblFeatureType[] getFeaturesToFetch();
+
+  /**
+   * Fetches and maps the protein product, and adds it as a cross-reference of
+   * the retrieved sequence
+   */
+  protected void addProteinProduct(SequenceI querySeq)
   {
-    return inProgress;
+    String accId = querySeq.getName();
+    try
+    {
+      AlignmentI protein = new EnsemblProtein().getSequenceRecords(accId);
+      if (protein == null || protein.getHeight() == 0)
+      {
+        System.out.println("Failed to retrieve protein for " + accId);
+        return;
+      }
+      SequenceI proteinSeq = protein.getSequenceAt(0);
+
+      /*
+       * need dataset sequences (to be the subject of mappings)
+       */
+      proteinSeq.createDatasetSequence();
+      querySeq.createDatasetSequence();
+
+      MapList mapList = mapCdsToProtein(querySeq, proteinSeq);
+      if (mapList != null)
+      {
+        Mapping map = new Mapping(proteinSeq.getDatasetSequence(), mapList);
+        DBRefEntry dbr = new DBRefEntry(getDbSource(), getDbVersion(),
+                accId, map);
+        querySeq.getDatasetSequence().addDBRef(dbr);
+      }
+    } catch (Exception e)
+    {
+      System.err
+              .println(String.format("Error retrieving protein for %s: %s",
+                      accId, e.getMessage()));
+    }
   }
 
-  @Override
-  public StringBuffer getRawRecords()
+  /**
+   * Returns a mapping from dna to protein by inspecting sequence features of
+   * type "CDS" on the dna.
+   * 
+   * @param dnaSeq
+   * @param proteinSeq
+   * @return
+   */
+  protected MapList mapCdsToProtein(SequenceI dnaSeq, SequenceI proteinSeq)
   {
+    SequenceFeature[] sfs = dnaSeq.getSequenceFeatures();
+    if (sfs == null)
+    {
+      return null;
+    }
+
+    List<int[]> ranges = new ArrayList<int[]>(50);
+    SequenceOntology so = SequenceOntology.getInstance();
+
+    int mappedDnaLength = 0;
+    
+    /*
+     * Map CDS columns of dna to peptide. No need to worry about reverse strand
+     * dna here since the retrieved sequence is as transcribed (reverse
+     * complement for reverse strand), i.e in the same sense as the peptide. 
+     */
+    for (SequenceFeature sf : sfs)
+    {
+      /*
+       * process a CDS feature (or a sub-type of CDS)
+       */
+      if (so.isA(sf.getType(), SequenceOntology.CDS))
+      {
+        ranges.add(new int[] { sf.getBegin(), sf.getEnd() });
+        mappedDnaLength += Math.abs(sf.getEnd() - sf.getBegin()) + 1;
+      }
+    }
+    int proteinLength = proteinSeq.getLength();
+    List<int[]> proteinRange = new ArrayList<int[]>();
+    proteinRange.add(new int[] { 1, proteinLength });
+
+    /*
+     * dna length should map to protein (or protein minus stop codon)
+     */
+    if (mappedDnaLength == 3 * proteinLength
+            || mappedDnaLength == 3 * (proteinLength + 1))
+    {
+      return new MapList(ranges, proteinRange, 3, 1);
+    }
     return null;
   }
 
+  /**
+   * Fetches sequences for the list of accession ids and adds them to the
+   * alignment. Returns the extended (or created) alignment.
+   * 
+   * @param ids
+   * @param alignment
+   * @return
+   * @throws JalviewException
+   * @throws IOException
+   */
+  protected AlignmentI fetchSequences(List<String> ids, AlignmentI alignment)
+          throws JalviewException, IOException
+  {
+    if (!isEnsemblAvailable())
+    {
+      inProgress = false;
+      throw new JalviewException("ENSEMBL Rest API not available.");
+    }
+    FileParse fp = getSequenceReader(ids);
+    FastaFile fr = new FastaFile(fp);
+    if (fr.hasWarningMessage())
+    {
+      System.out.println(String.format(
+              "Warning when retrieving %d ids %s\n%s", ids.size(),
+              ids.toString(), fr.getWarningMessage()));
+    }
+    else if (fr.getSeqs().size() != ids.size())
+    {
+      System.out.println(String.format(
+              "Only retrieved %d sequences for %d query strings", fr
+                      .getSeqs().size(), ids.size()));
+    }
+    if (fr.getSeqs().size() > 0)
+    {
+      AlignmentI seqal = new Alignment(
+              fr.getSeqsAsArray());
+      for (SequenceI sq:seqal.getSequences())
+      {
+        if (sq.getDescription() == null)
+        {
+          sq.setDescription(getDbName());
+        }
+        String name = sq.getName();
+        if (ids.contains(name)
+                || ids.contains(name.replace("ENSP", "ENST")))
+        {
+          DBRefUtils.parseToDbRef(sq, DBRefSource.ENSEMBL, "0", name);
+        }
+      }
+      if (alignment == null)
+      {
+        alignment = seqal;
+      }
+      else
+      {
+        alignment.append(seqal);
+      }
+    }
+    return alignment;
+  }
+
+  /**
+   * Returns the URL for the REST call
+   * 
+   * @return
+   * @throws MalformedURLException
+   */
   @Override
-  public int getTier()
+  protected URL getUrl(List<String> ids) throws MalformedURLException
   {
-    return 0;
+    // ids are not used - they go in the POST body instead
+    StringBuffer urlstring = new StringBuffer(128);
+    urlstring.append(SEQUENCE_ID_URL);
+
+    // @see https://github.com/Ensembl/ensembl-rest/wiki/Output-formats
+    urlstring.append("?type=").append(getSourceEnsemblType().getType());
+    urlstring.append(("&Accept=text/x-fasta"));
+
+    URL url = new URL(urlstring.toString());
+    return url;
   }
 
   /**
@@ -184,8 +391,208 @@ public abstract class EnsemblSeqProxy extends DbSourceProxyImpl
   }
 
   @Override
-  public boolean isDnaCoding()
+  public boolean useGetRequest()
+  {
+    return false;
+  }
+
+  @Override
+  public String getRequestMimeType()
+  {
+    return "application/json";
+  }
+
+  @Override
+  public String getResponseMimeType()
+  {
+    return "text/x-fasta";
+  }
+
+  /**
+   * 
+   * @return the configured sequence return type for this source
+   */
+  protected abstract EnsemblSeqType getSourceEnsemblType();
+
+  /**
+   * Returns a list of [start, end] genomic ranges corresponding to the sequence
+   * being retrieved.
+   * 
+   * The correspondence between the frames of reference is made by locating
+   * those features on the genomic sequence which identify the retrieved
+   * sequence. Specifically
+   * <ul>
+   * <li>genomic sequence is identified by "transcript" features with
+   * ID=transcript:transcriptId</li>
+   * <li>cdna sequence is identified by "exon" features with
+   * Parent=transcript:transcriptId</li>
+   * <li>cds sequence is identified by "CDS" features with
+   * Parent=transcript:transcriptId</li>
+   * </ul>
+   * 
+   * The returned ranges are sorted to run forwards (for positive strand) or
+   * backwards (for negative strand). Aborts and returns null if both positive
+   * and negative strand are found (this should not normally happen).
+   * 
+   * @param sfs
+   * @param accId
+   * @return
+   */
+  protected MapList getGenomicRanges(SequenceFeature[] sfs, String accId)
+  {
+    /*
+     * generously size for initial number of cds regions
+     * (worst case titin Q8WZ42 has c. 313 exons)
+     */
+    List<int[]> regions = new ArrayList<int[]>(100);
+    int mappedLength = 0;
+    int direction = 1; // forward
+    boolean directionSet = false;
+  
+    for (SequenceFeature sf : sfs)
+    {
+      /*
+       * accept the target feature type or a specialisation of it
+       * (e.g. coding_exon for exon)
+       */
+      if (identifiesSequence(sf, accId))
+      {
+          int strand = sf.getStrand();
+  
+          if (directionSet && strand != direction)
+          {
+            // abort - mix of forward and backward
+          System.err.println("Error: forward and backward strand for "
+                  + accId);
+            return null;
+          }
+          direction = strand;
+          directionSet = true;
+  
+          /*
+           * add to CDS ranges, semi-sorted forwards/backwards
+           */
+          if (strand < 0)
+          {
+            regions.add(0, new int[] { sf.getEnd(), sf.getBegin() });
+          }
+          else
+          {
+            regions.add(new int[] { sf.getBegin(), sf.getEnd() });
+          }
+          mappedLength += Math.abs(sf.getEnd() - sf.getBegin() + 1);
+        }
+    }
+  
+    /*
+     * a final sort is needed since Ensembl returns CDS sorted within source
+     * (havana / ensembl_havana)
+     */
+    Collections.sort(regions, new RangeSorter(direction == 1));
+  
+    List<int[]> to = new ArrayList<int[]>();
+    to.add(new int[] { 1, mappedLength });
+  
+    return new MapList(regions, to, 1, 1);
+  }
+
+  /**
+   * Returns true if the sequence feature identifies positions of the genomic
+   * sequence feature which are within the sequence being retrieved.
+   * 
+   * @param sf
+   * @param accId
+   * @return
+   */
+  protected abstract boolean identifiesSequence(SequenceFeature sf,
+          String accId);
+
+  /**
+   * Transfers the sequence feature to the target sequence, adjusting its start
+   * and end range based on the 'overlap' ranges. Features which do not overlap
+   * the target sequence are ignored, as are features with a parent other than
+   * the target sequence id.
+   * 
+   * @param sf
+   * @param targetSequence
+   * @param overlap
+   */
+  protected void transferFeature(SequenceFeature sf,
+          SequenceI targetSequence, MapList overlap)
+  {
+    String parent = (String) sf.getValue("Parent");
+    if (parent != null && !parent.contains(targetSequence.getName()))
+    {
+      // this genomic feature belongs to a different transcript
+      return;
+    }
+
+    int start = sf.getBegin();
+    int end = sf.getEnd();
+    int[] mappedRange = overlap.locateInTo(start, end);
+  
+    if (mappedRange != null)
+    {
+      SequenceFeature copy = new SequenceFeature(sf);
+      int offset = targetSequence.getStart() - 1;
+      copy.setBegin(offset + Math.min(mappedRange[0], mappedRange[1]));
+      copy.setEnd(offset + Math.max(mappedRange[0], mappedRange[1]));
+      targetSequence.addSequenceFeature(copy);
+    }
+  
+  }
+
+  /**
+   * Transfers features from sourceSequence to targetSequence
+   * 
+   * @param accessionId
+   * @param sourceSequence
+   * @param targetSequence
+   */
+  protected void transferFeatures(String accessionId,
+          SequenceI sourceSequence, SequenceI targetSequence)
+  {
+    if (sourceSequence == null || targetSequence == null)
+    {
+      return;
+    }
+
+    SequenceFeature[] sfs = sourceSequence.getSequenceFeatures();
+    MapList overlap = getGenomicRanges(sfs, accessionId);
+
+    final boolean forwardStrand = overlap.isFromForwardStrand();
+
+    /*
+     * sort features by start position (descending if reverse strand) 
+     * before transferring (in forwards order) to the target sequence
+     */
+    Arrays.sort(sfs, new Comparator<SequenceFeature>()
+    {
+      @Override
+      public int compare(SequenceFeature o1, SequenceFeature o2)
+      {
+        int c = Integer.compare(o1.getBegin(), o2.getBegin());
+        return forwardStrand ? c : -c;
+      }
+    });
+
+    for (SequenceFeature sf : sfs)
+    {
+      if (retainFeature(sf.getType()))
+      {
+        transferFeature(sf, targetSequence, overlap);
+      }
+    }
+  }
+
+  /**
+   * Answers true if the feature type is one to attach to the retrieved sequence
+   * 
+   * @param type
+   * @return
+   */
+  protected boolean retainFeature(@SuppressWarnings("unused") String type)
   {
-    return true;
+    return true; // default is to keep all
   }
 }
diff --git a/src/jalview/ext/ensembl/EnsemblSequenceFetcher.java b/src/jalview/ext/ensembl/EnsemblSequenceFetcher.java
new file mode 100644 (file)
index 0000000..f1b96e2
--- /dev/null
@@ -0,0 +1,80 @@
+package jalview.ext.ensembl;
+
+import jalview.datamodel.DBRefSource;
+import jalview.ws.seqfetcher.DbSourceProxyImpl;
+
+import com.stevesoft.pat.Regex;
+
+/**
+ * A base class for Ensembl sequence fetchers
+ * 
+ * @author gmcarstairs
+ *
+ */
+public abstract class EnsemblSequenceFetcher extends DbSourceProxyImpl
+{
+  /*
+   * possible values for the 'feature' parameter of the REST overlap endpoint
+   * @see 
+   */
+  protected enum EnsemblFeatureType
+  {
+    gene, transcript, cds, exon, repeat, simple, misc, variation,
+    somatic_variation, structural_variation, somatic_structural_variation,
+    constrained, regulatory
+  }
+
+  @Override
+  public String getDbSource()
+  {
+    // NB ensure Uniprot xrefs are canonicalised from "Ensembl" to "ENSEMBL"
+    return DBRefSource.ENSEMBL; // "ENSEMBL"
+  }
+
+  @Override
+  public String getDbVersion()
+  {
+    return "0";
+  }
+
+  @Override
+  public String getAccessionSeparator()
+  {
+    return " ";
+  }
+
+  @Override
+  public Regex getAccessionValidator()
+  {
+    return new Regex("((ENSP|ENST|ENSG|CCDS)[0-9.]{3,})");
+  }
+
+  @Override
+  public boolean isValidReference(String accession)
+  {
+    return getAccessionValidator().search(accession);
+  }
+
+  @Override
+  public int getTier()
+  {
+    return 0;
+  }
+
+  /**
+   * Default test query is a transcript
+   */
+  @Override
+  public String getTestQuery()
+  {
+    // has CDS on reverse strand:
+    return "ENST00000288602";
+    // ENST00000461457 // forward strand
+  }
+
+  @Override
+  public boolean isDnaCoding()
+  {
+    return true;
+  }
+}
diff --git a/src/jalview/ext/ensembl/EnsemblTranscript.java b/src/jalview/ext/ensembl/EnsemblTranscript.java
deleted file mode 100644 (file)
index c2d0b6e..0000000
+++ /dev/null
@@ -1,26 +0,0 @@
-package jalview.ext.ensembl;
-
-import jalview.ext.ensembl.SeqFetcher.EnsemblSeqType;
-
-public class EnsemblTranscript extends EnsemblSeqProxy
-{
-
-  // TODO is this class needed? it seems to duplicate EnsemblProtein
-  public EnsemblTranscript()
-  {
-    super();
-  }
-
-  @Override
-  public String getDbName()
-  {
-    return "ENSEMBL (Protein)";
-  }
-
-  @Override
-  protected EnsemblSeqType getSourceEnsemblType()
-  {
-    return EnsemblSeqType.PROTEIN;
-  }
-
-}
diff --git a/src/jalview/ext/ensembl/SeqFetcher.java b/src/jalview/ext/ensembl/SeqFetcher.java
deleted file mode 100644 (file)
index 57f000f..0000000
+++ /dev/null
@@ -1,193 +0,0 @@
-package jalview.ext.ensembl;
-
-import jalview.io.FileParse;
-
-import java.io.BufferedReader;
-import java.io.DataOutputStream;
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.InputStreamReader;
-import java.net.HttpURLConnection;
-import java.net.URL;
-import java.net.URLConnection;
-import java.util.ArrayList;
-import java.util.List;
-
-import org.apache.http.NameValuePair;
-import org.apache.http.message.BasicNameValuePair;
-
-public class SeqFetcher
-{
-  private final static String ENSEMBL_REST = "rest.ensembl.org";
-
-  private static final String SEQUENCE_ID_URL = "http://" + ENSEMBL_REST + "/sequence/id";
-
-  private static final String PING_URL = "http://" + ENSEMBL_REST + "/info/ping";
-
-  private final static long RETEST_INTERVAL = 10000L; // 10 seconds
-
-  private static boolean ensemblRestAvailable = false;
-
-  private static long lastCheck = -1;
-
-  /**
-   * Rechecks if Ensembl is responding, unless the last check was successful and
-   * the retest interval has not yet elapsed. Returns true if Ensembl is up,
-   * else false.
-   * 
-   * @return
-   */
-  public boolean isEnsemblAvailable()
-  {
-    long now = System.currentTimeMillis();
-    boolean retest = now - lastCheck > RETEST_INTERVAL;
-    if (ensemblRestAvailable && !retest)
-    {
-      return true;
-    }
-    ensemblRestAvailable = checkEnsembl();
-    lastCheck = now;
-    return ensemblRestAvailable;
-  }
-
-  /**
-   * Tries to connect to Ensembl's REST 'ping' endpoint, and returns true if
-   * successful, else false
-   * 
-   * @return
-   */
-  private boolean checkEnsembl()
-  {
-    try
-    {
-      URL ping = new URL(PING_URL);
-      HttpURLConnection conn = (HttpURLConnection) ping.openConnection();
-      int rc = conn.getResponseCode();
-      conn.disconnect();
-      if (rc >= 200 && rc < 300)
-      {
-        return true;
-      }
-    } catch (Throwable t)
-    {
-      System.err.println("Error connecting to " + PING_URL + ": "
-              + t.getMessage());
-    }
-    return false;
-  }
-
-  public SeqFetcher()
-  {
-  }
-
-  public enum EnsemblSeqType
-  {
-    GENOMIC("genomic"), CDS("cds"), TRANSCRIPT("cds"), PROTEIN("protein"), CDNA(
-            "cdna");
-
-    private String type;
-
-    EnsemblSeqType(String t)
-    {
-      type = t;
-    }
-
-    public String getType()
-    {
-      return type;
-    }
-  }
-
-  /**
-   * Returns a list of additional URL query parameters to specify the desired
-   * sequence type (genomic/cds/protein etc), and data format Fasta
-   * 
-   * @param type
-   */
-  public List<NameValuePair> getAdditionalParameters(EnsemblSeqType type)
-  {
-    List<NameValuePair> params = new ArrayList<NameValuePair>();
-    params.add(new BasicNameValuePair("type", type.getType()));
-    params.add(new BasicNameValuePair("content-type", "text/x-fasta"));
-    return params;
-  }
-
-  /**
-   * return a reader to a Fasta response from the Ensembl sequence endpoint
-   * 
-   * @param returnType
-   * @param ids
-   * @return
-   * @throws IOException
-   */
-  public FileParse getSequenceReader(EnsemblSeqType returnType,
-          List<String> ids) throws IOException
-  {
-    // see http://rest.ensembl.org/documentation/info/sequence_id
-
-    String urlstring = SEQUENCE_ID_URL;
-    List<NameValuePair> vals = getAdditionalParameters(returnType);
-    boolean first = true;
-    for (NameValuePair nvp : vals)
-    {
-      urlstring += first ? "?" : "&";
-      first = false;
-      urlstring += nvp.getName() + "=" + nvp.getValue();
-    }
-
-    URL url = new URL(urlstring);
-
-    URLConnection connection = url.openConnection();
-    HttpURLConnection httpConnection = (HttpURLConnection) connection;
-
-    httpConnection.setRequestMethod("POST");
-    httpConnection.setRequestProperty("Content-Type", "application/json");
-    httpConnection.setRequestProperty("Accept", "text/x-fasta");
-    byte[] thepostbody;
-    {
-      StringBuilder postBody = new StringBuilder();
-      postBody.append("{\"ids\":[");
-      first = true;
-      for (String id : ids)
-      {
-        if (!first)
-        {
-          postBody.append(",");
-        }
-        first = false;
-        postBody.append("\"");
-        postBody.append(id.trim());
-        postBody.append("\"");
-      }
-      postBody.append("]}");
-      thepostbody = postBody.toString().getBytes();
-    }
-    httpConnection.setRequestProperty("Content-Length",
-            Integer.toString(thepostbody.length));
-    httpConnection.setUseCaches(false);
-    httpConnection.setDoInput(true);
-    httpConnection.setDoOutput(true);
-
-    DataOutputStream wr = new DataOutputStream(
-            httpConnection.getOutputStream());
-    wr.write(thepostbody);
-    wr.flush();
-    wr.close();
-
-    InputStream response = connection.getInputStream();
-    int responseCode = httpConnection.getResponseCode();
-
-    if (responseCode != 200)
-    {
-      throw new RuntimeException(
-              "Response code was not 200. Detected response was "
-                      + responseCode);
-    }
-
-    BufferedReader reader = null;
-    reader = new BufferedReader(new InputStreamReader(response, "UTF-8"));
-    FileParse fp = new FileParse(reader, url.toString(), "HTTP_POST");
-    return fp;
-  }
-
-}
diff --git a/test/jalview/ext/ensembl/EnsemblRestClientTest.java b/test/jalview/ext/ensembl/EnsemblRestClientTest.java
new file mode 100644 (file)
index 0000000..086adbb
--- /dev/null
@@ -0,0 +1,69 @@
+package jalview.ext.ensembl;
+
+import jalview.datamodel.AlignmentI;
+
+import java.net.MalformedURLException;
+import java.net.URL;
+import java.util.List;
+
+import org.testng.annotations.Test;
+
+public class EnsemblRestClientTest
+{
+
+  @Test(suiteName = "live")
+  public void testLiveCheckEnsembl()
+  {
+    EnsemblRestClient sf = new EnsemblRestClient()
+    {
+  
+      @Override
+      public String getDbName()
+      {
+        return null;
+      }
+  
+      @Override
+      public AlignmentI getSequenceRecords(String queries) throws Exception
+      {
+        return null;
+      }
+  
+      @Override
+      protected URL getUrl(List<String> ids) throws MalformedURLException
+      {
+        return null;
+      }
+  
+      @Override
+      public boolean useGetRequest()
+      {
+        return false;
+      }
+  
+      @Override
+      public String getRequestMimeType()
+      {
+        return null;
+      }
+  
+      @Override
+      public String getResponseMimeType()
+      {
+        return null;
+      }
+  
+    };
+    boolean isAvailable = sf.isEnsemblAvailable();
+    if (isAvailable)
+    {
+      System.out.println("Ensembl is UP!");
+    }
+    else
+    {
+      System.err
+              .println("Ensembl is DOWN or unreachable ******************* BAD!");
+    }
+  }
+
+}
index f3526bc..3ca74b0 100644 (file)
@@ -1,6 +1,17 @@
 package jalview.ext.ensembl;
 
+import jalview.datamodel.Alignment;
+import jalview.datamodel.AlignmentI;
+import jalview.datamodel.SequenceI;
+import jalview.io.AppletFormatAdapter;
+import jalview.io.FastaFile;
+import jalview.io.FileParse;
+
 import java.lang.reflect.Method;
+import java.net.MalformedURLException;
+import java.net.URL;
+import java.util.Arrays;
+import java.util.List;
 
 import org.testng.Assert;
 import org.testng.annotations.DataProvider;
@@ -9,8 +20,87 @@ import org.testng.annotations.Test;
 
 public class EnsemblSeqProxyTest
 {
+  private static final Object[][] allSeqs = new Object[][] {
+      {
+          new EnsemblProtein(),
+          "CCDS5863.1",
+          ">CCDS5863.1\n"
+                  + "MAALSGGGGGGAEPGQALFNGDMEPEAGAGAGAAASSAADPAIPEEVWNIKQMIKLTQEH\n"
+                  + "IEALLDKFGGEHNPPSIYLEAYEEYTSKLDALQQREQQLLESLGNGTDFSVSSSASMDTV\n"
+                  + "TSSSSSSLSVLPSSLSVFQNPTDVARSNPKSPQKPIVRVFLPNKQRTVVPARCGVTVRDS\n"
+                  + "LKKALMMRGLIPECCAVYRIQDGEKKPIGWDTDISWLTGEELHVEVLENVPLTTHNFVRK\n"
+                  + "TFFTLAFCDFCRKLLFQGFRCQTCGYKFHQRCSTEVPLMCVNYDQLDLLFVSKFFEHHPI\n"
+                  + "PQEEASLAETALTSGSSPSAPASDSIGPQILTSPSPSKSIPIPQPFRPADEDHRNQFGQR\n"
+                  + "DRSSSAPNVHINTIEPVNIDDLIRDQGFRGDGGSTTGLSATPPASLPGSLTNVKALQKSP\n"
+                  + "GPQRERKSSSSSEDRNRMKTLGRRDSSDDWEIPDGQITVGQRIGSGSFGTVYKGKWHGDV\n"
+                  + "AVKMLNVTAPTPQQLQAFKNEVGVLRKTRHVNILLFMGYSTKPQLAIVTQWCEGSSLYHH\n"
+                  + "LHIIETKFEMIKLIDIARQTAQGMDYLHAKSIIHRDLKSNNIFLHEDLTVKIGDFGLATV\n"
+                  + "KSRWSGSHQFEQLSGSILWMAPEVIRMQDKNPYSFQSDVYAFGIVLYELMTGQLPYSNIN\n"
+                  + "NRDQIIFMVGRGYLSPDLSKVRSNCPKAMKRLMAECLKKKRDERPLFPQILASIELLARS\n"
+                  + "LPKIHRSASEPSLNRAGFQTEDFSLYACASPKTPIQAGGYGAFPVH\n" },
+      {
+          new EnsemblCdna(),
+          "CCDS5863.1",
+          ">CCDS5863.1\n"
+                  + "ATGGCGGCGCTGAGCGGTGGCGGTGGTGGCGGCGCGGAGCCGGGCCAGGCTCTGTTCAAC\n"
+                  + "GGGGACATGGAGCCCGAGGCCGGCGCCGGCGCCGGCGCCGCGGCCTCTTCGGCTGCGGAC\n"
+                  + "CCTGCCATTCCGGAGGAGGTGTGGAATATCAAACAAATGATTAAGTTGACACAGGAACAT\n"
+                  + "ATAGAGGCCCTATTGGACAAATTTGGTGGGGAGCATAATCCACCATCAATATATCTGGAG\n"
+                  + "GCCTATGAAGAATACACCAGCAAGCTAGATGCACTCCAACAAAGAGAACAACAGTTATTG\n"
+                  + "GAATCTCTGGGGAACGGAACTGATTTTTCTGTTTCTAGCTCTGCATCAATGGATACCGTT\n"
+                  + "ACATCTTCTTCCTCTTCTAGCCTTTCAGTGCTACCTTCATCTCTTTCAGTTTTTCAAAAT\n"
+                  + "CCCACAGATGTGGCACGGAGCAACCCCAAGTCACCACAAAAACCTATCGTTAGAGTCTTC\n"
+                  + "CTGCCCAACAAACAGAGGACAGTGGTACCTGCAAGGTGTGGAGTTACAGTCCGAGACAGT\n"
+                  + "CTAAAGAAAGCACTGATGATGAGAGGTCTAATCCCAGAGTGCTGTGCTGTTTACAGAATT\n"
+                  + "CAGGATGGAGAGAAGAAACCAATTGGTTGGGACACTGATATTTCCTGGCTTACTGGAGAA\n"
+                  + "GAATTGCATGTGGAAGTGTTGGAGAATGTTCCACTTACAACACACAACTTTGTACGAAAA\n"
+                  + "ACGTTTTTCACCTTAGCATTTTGTGACTTTTGTCGAAAGCTGCTTTTCCAGGGTTTCCGC\n"
+                  + "TGTCAAACATGTGGTTATAAATTTCACCAGCGTTGTAGTACAGAAGTTCCACTGATGTGT\n"
+                  + "GTTAATTATGACCAACTTGATTTGCTGTTTGTCTCCAAGTTCTTTGAACACCACCCAATA\n"
+                  + "CCACAGGAAGAGGCGTCCTTAGCAGAGACTGCCCTAACATCTGGATCATCCCCTTCCGCA\n"
+                  + "CCCGCCTCGGACTCTATTGGGCCCCAAATTCTCACCAGTCCGTCTCCTTCAAAATCCATT\n"
+                  + "CCAATTCCACAGCCCTTCCGACCAGCAGATGAAGATCATCGAAATCAATTTGGGCAACGA\n"
+                  + "GACCGATCCTCATCAGCTCCCAATGTGCATATAAACACAATAGAACCTGTCAATATTGAT\n"
+                  + "GACTTGATTAGAGACCAAGGATTTCGTGGTGATGGAGGATCAACCACAGGTTTGTCTGCT\n"
+                  + "ACCCCCCCTGCCTCATTACCTGGCTCACTAACTAACGTGAAAGCCTTACAGAAATCTCCA\n"
+                  + "GGACCTCAGCGAGAAAGGAAGTCATCTTCATCCTCAGAAGACAGGAATCGAATGAAAACA\n"
+                  + "CTTGGTAGACGGGACTCGAGTGATGATTGGGAGATTCCTGATGGGCAGATTACAGTGGGA\n"
+                  + "CAAAGAATTGGATCTGGATCATTTGGAACAGTCTACAAGGGAAAGTGGCATGGTGATGTG\n"
+                  + "GCAGTGAAAATGTTGAATGTGACAGCACCTACACCTCAGCAGTTACAAGCCTTCAAAAAT\n"
+                  + "GAAGTAGGAGTACTCAGGAAAACACGACATGTGAATATCCTACTCTTCATGGGCTATTCC\n"
+                  + "ACAAAGCCACAACTGGCTATTGTTACCCAGTGGTGTGAGGGCTCCAGCTTGTATCACCAT\n"
+                  + "CTCCATATCATTGAGACCAAATTTGAGATGATCAAACTTATAGATATTGCACGACAGACT\n"
+                  + "GCACAGGGCATGGATTACTTACACGCCAAGTCAATCATCCACAGAGACCTCAAGAGTAAT\n"
+                  + "AATATATTTCTTCATGAAGACCTCACAGTAAAAATAGGTGATTTTGGTCTAGCTACAGTG\n"
+                  + "AAATCTCGATGGAGTGGGTCCCATCAGTTTGAACAGTTGTCTGGATCCATTTTGTGGATG\n"
+                  + "GCACCAGAAGTCATCAGAATGCAAGATAAAAATCCATACAGCTTTCAGTCAGATGTATAT\n"
+                  + "GCATTTGGAATTGTTCTGTATGAATTGATGACTGGACAGTTACCTTATTCAAACATCAAC\n"
+                  + "AACAGGGACCAGATAATTTTTATGGTGGGACGAGGATACCTGTCTCCAGATCTCAGTAAG\n"
+                  + "GTACGGAGTAACTGTCCAAAAGCCATGAAGAGATTAATGGCAGAGTGCCTCAAAAAGAAA\n"
+                  + "AGAGATGAGAGACCACTCTTTCCCCAAATTCTCGCCTCTATTGAGCTGCTGGCCCGCTCA\n"
+                  + "TTGCCAAAAATTCACCGCAGTGCATCAGAACCCTCCTTGAATCGGGCTGGTTTCCAAACA\n"
+                  + "GAGGATTTTAGTCTATATGCTTGTGCTTCTCCAAAAACACCCATCCAGGCAGGGGGATAT\n"
+                  + "GGTGCGTTTCCTGTCCACTGA\n" },
+      {
+          new EnsemblProtein(),
+          "ENSP00000288602",
+          ">ENSP00000288602\n"
+                  + "MAALSGGGGGGAEPGQALFNGDMEPEAGAGAGAAASSAADPAIPEEVWNIKQMIKLTQEH\n"
+                  + "IEALLDKFGGEHNPPSIYLEAYEEYTSKLDALQQREQQLLESLGNGTDFSVSSSASMDTV\n"
+                  + "TSSSSSSLSVLPSSLSVFQNPTDVARSNPKSPQKPIVRVFLPNKQRTVVPARCGVTVRDS\n"
+                  + "LKKALMMRGLIPECCAVYRIQDGEKKPIGWDTDISWLTGEELHVEVLENVPLTTHNFVRK\n"
+                  + "TFFTLAFCDFCRKLLFQGFRCQTCGYKFHQRCSTEVPLMCVNYDQLDLLFVSKFFEHHPI\n"
+                  + "PQEEASLAETALTSGSSPSAPASDSIGPQILTSPSPSKSIPIPQPFRPADEDHRNQFGQR\n"
+                  + "DRSSSAPNVHINTIEPVNIDDLIRDQGFRGDGGSTTGLSATPPASLPGSLTNVKALQKSP\n"
+                  + "GPQRERKSSSSSEDRNRMKTLGRRDSSDDWEIPDGQITVGQRIGSGSFGTVYKGKWHGDV\n"
+                  + "AVKMLNVTAPTPQQLQAFKNEVGVLRKTRHVNILLFMGYSTKPQLAIVTQWCEGSSLYHH\n"
+                  + "LHIIETKFEMIKLIDIARQTAQGMDYLHAKSIIHRDLKSNNIFLHEDLTVKIGDFGLATV\n"
+                  + "KSRWSGSHQFEQLSGSILWMAPEVIRMQDKNPYSFQSDVYAFGIVLYELMTGQLPYSNIN\n"
+                  + "NRDQIIFMVGRGYLSPDLSKVRSNCPKAMKRLMAECLKKKRDERPLFPQILASIELLARS\n"
+                  + "LPKIHRSASEPSLNRAGFQTEDFSLYACASPKTPIQAGGYGAFPVH" } };
+
   @DataProvider(name = "queries")
-  public Object[][] createData(Method m)
+  public Object[][] createQueryData(Method m)
   {
     return new Object[][] { { "CCDS5863.1" }, { "ENSP00000288602" } };
   }
@@ -18,10 +108,104 @@ public class EnsemblSeqProxyTest
   @Test(dataProvider = "queries")
   public void testIsValidReference(String query) throws Exception
   {
-    EnsemblSeqProxy esq = new EnsemblProtein();
+    EnsemblSequenceFetcher esq = new EnsemblProtein();
     Assert.assertTrue(esq.isValidReference(query),
             "Expected reference string " + query
                     + " to be valid for regex "
                     + esq.getAccessionValidator().toString());
   }
+
+  @DataProvider(name = "ens_seqs")
+  public Object[][] createData(Method m)
+  {
+    System.out.println(m.getName());
+    return allSeqs;
+  }
+
+  @Test(dataProvider = "ens_seqs", suiteName = "live")
+  public void testGetOneSeqs(EnsemblRestClient proxy, String sq, String fastasq)
+          throws Exception
+  {
+    FileParse fp = proxy.getSequenceReader(Arrays
+            .asList(new String[]
+    { sq }));
+    SequenceI[] sqs = new FastaFile(fp).getSeqsAsArray();
+    FastaFile trueRes = new FastaFile(fastasq, AppletFormatAdapter.PASTE);
+    SequenceI[] trueSqs = trueRes.getSeqsAsArray();
+    Assert.assertEquals(sqs.length, trueSqs.length,
+            "Different number of sequences retrieved for query " + sq);
+    Alignment ral = new Alignment(sqs);
+    for (SequenceI tr : trueSqs)
+    {
+      SequenceI[] rseq;
+      Assert.assertNotNull(
+              rseq = ral.findSequenceMatch(tr.getName()),
+              "Couldn't find sequences matching expected sequence "
+                      + tr.getName());
+      Assert.assertEquals(rseq.length, 1,
+              "Expected only one sequence for sequence ID " + tr.getName());
+      Assert.assertEquals(
+              rseq[0].getSequenceAsString(),
+              tr.getSequenceAsString(),
+              "Sequences differ for " + tr.getName() + "\n" + "Exp:"
+                      + tr.getSequenceAsString() + "\n" + "Got:"
+                      + rseq[0].getSequenceAsString());
+  
+    }
+  }
+
+  @Test(suiteName = "live")
+  public void testLiveCheckEnsembl()
+  {
+    EnsemblRestClient sf = new EnsemblRestClient()
+    {
+
+      @Override
+      public String getDbName()
+      {
+        // TODO Auto-generated method stub
+        return null;
+      }
+
+      @Override
+      public AlignmentI getSequenceRecords(String queries) throws Exception
+      {
+        // TODO Auto-generated method stub
+        return null;
+      }
+
+      @Override
+      protected URL getUrl(List<String> ids) throws MalformedURLException
+      {
+        // TODO Auto-generated method stub
+        return null;
+      }
+
+      @Override
+      public boolean useGetRequest()
+      {
+        // TODO Auto-generated method stub
+        return false;
+      }
+
+      @Override
+      public String getRequestMimeType()
+      {
+        // TODO Auto-generated method stub
+        return null;
+      }
+
+      @Override
+      public String getResponseMimeType()
+      {
+        // TODO Auto-generated method stub
+        return null;
+      }
+
+    };
+    boolean isAvailable = sf.isEnsemblAvailable();
+    System.out.println("Ensembl is "
+            + (isAvailable ? "UP!"
+                    : "DOWN or unreachable ******************* BAD!"));
+  }
 }
\ No newline at end of file
diff --git a/test/jalview/ext/ensembl/SeqFetcherTest.java b/test/jalview/ext/ensembl/SeqFetcherTest.java
deleted file mode 100644 (file)
index 8762698..0000000
+++ /dev/null
@@ -1,175 +0,0 @@
-package jalview.ext.ensembl;
-
-import jalview.datamodel.Alignment;
-import jalview.datamodel.SequenceI;
-import jalview.ext.ensembl.SeqFetcher.EnsemblSeqType;
-import jalview.io.AppletFormatAdapter;
-import jalview.io.FastaFile;
-import jalview.io.FileParse;
-
-import java.lang.reflect.Method;
-import java.util.Arrays;
-
-import org.testng.Assert;
-import org.testng.annotations.DataProvider;
-import org.testng.annotations.Test;
-
-public class SeqFetcherTest
-{
-  private static final Object[][] allSeqs = new Object[][] {
-      {
-          EnsemblSeqType.PROTEIN,
-          "CCDS5863.1",
-          ">CCDS5863.1\n"
-                  + "MAALSGGGGGGAEPGQALFNGDMEPEAGAGAGAAASSAADPAIPEEVWNIKQMIKLTQEH\n"
-                  + "IEALLDKFGGEHNPPSIYLEAYEEYTSKLDALQQREQQLLESLGNGTDFSVSSSASMDTV\n"
-                  + "TSSSSSSLSVLPSSLSVFQNPTDVARSNPKSPQKPIVRVFLPNKQRTVVPARCGVTVRDS\n"
-                  + "LKKALMMRGLIPECCAVYRIQDGEKKPIGWDTDISWLTGEELHVEVLENVPLTTHNFVRK\n"
-                  + "TFFTLAFCDFCRKLLFQGFRCQTCGYKFHQRCSTEVPLMCVNYDQLDLLFVSKFFEHHPI\n"
-                  + "PQEEASLAETALTSGSSPSAPASDSIGPQILTSPSPSKSIPIPQPFRPADEDHRNQFGQR\n"
-                  + "DRSSSAPNVHINTIEPVNIDDLIRDQGFRGDGGSTTGLSATPPASLPGSLTNVKALQKSP\n"
-                  + "GPQRERKSSSSSEDRNRMKTLGRRDSSDDWEIPDGQITVGQRIGSGSFGTVYKGKWHGDV\n"
-                  + "AVKMLNVTAPTPQQLQAFKNEVGVLRKTRHVNILLFMGYSTKPQLAIVTQWCEGSSLYHH\n"
-                  + "LHIIETKFEMIKLIDIARQTAQGMDYLHAKSIIHRDLKSNNIFLHEDLTVKIGDFGLATV\n"
-                  + "KSRWSGSHQFEQLSGSILWMAPEVIRMQDKNPYSFQSDVYAFGIVLYELMTGQLPYSNIN\n"
-                  + "NRDQIIFMVGRGYLSPDLSKVRSNCPKAMKRLMAECLKKKRDERPLFPQILASIELLARS\n"
-                  + "LPKIHRSASEPSLNRAGFQTEDFSLYACASPKTPIQAGGYGAFPVH\n" },
-      {
-          EnsemblSeqType.TRANSCRIPT,
-          "CCDS5863.1",
-          ">CCDS5863.1\n"
-                  + "ATGGCGGCGCTGAGCGGTGGCGGTGGTGGCGGCGCGGAGCCGGGCCAGGCTCTGTTCAAC\n"
-                  + "GGGGACATGGAGCCCGAGGCCGGCGCCGGCGCCGGCGCCGCGGCCTCTTCGGCTGCGGAC\n"
-                  + "CCTGCCATTCCGGAGGAGGTGTGGAATATCAAACAAATGATTAAGTTGACACAGGAACAT\n"
-                  + "ATAGAGGCCCTATTGGACAAATTTGGTGGGGAGCATAATCCACCATCAATATATCTGGAG\n"
-                  + "GCCTATGAAGAATACACCAGCAAGCTAGATGCACTCCAACAAAGAGAACAACAGTTATTG\n"
-                  + "GAATCTCTGGGGAACGGAACTGATTTTTCTGTTTCTAGCTCTGCATCAATGGATACCGTT\n"
-                  + "ACATCTTCTTCCTCTTCTAGCCTTTCAGTGCTACCTTCATCTCTTTCAGTTTTTCAAAAT\n"
-                  + "CCCACAGATGTGGCACGGAGCAACCCCAAGTCACCACAAAAACCTATCGTTAGAGTCTTC\n"
-                  + "CTGCCCAACAAACAGAGGACAGTGGTACCTGCAAGGTGTGGAGTTACAGTCCGAGACAGT\n"
-                  + "CTAAAGAAAGCACTGATGATGAGAGGTCTAATCCCAGAGTGCTGTGCTGTTTACAGAATT\n"
-                  + "CAGGATGGAGAGAAGAAACCAATTGGTTGGGACACTGATATTTCCTGGCTTACTGGAGAA\n"
-                  + "GAATTGCATGTGGAAGTGTTGGAGAATGTTCCACTTACAACACACAACTTTGTACGAAAA\n"
-                  + "ACGTTTTTCACCTTAGCATTTTGTGACTTTTGTCGAAAGCTGCTTTTCCAGGGTTTCCGC\n"
-                  + "TGTCAAACATGTGGTTATAAATTTCACCAGCGTTGTAGTACAGAAGTTCCACTGATGTGT\n"
-                  + "GTTAATTATGACCAACTTGATTTGCTGTTTGTCTCCAAGTTCTTTGAACACCACCCAATA\n"
-                  + "CCACAGGAAGAGGCGTCCTTAGCAGAGACTGCCCTAACATCTGGATCATCCCCTTCCGCA\n"
-                  + "CCCGCCTCGGACTCTATTGGGCCCCAAATTCTCACCAGTCCGTCTCCTTCAAAATCCATT\n"
-                  + "CCAATTCCACAGCCCTTCCGACCAGCAGATGAAGATCATCGAAATCAATTTGGGCAACGA\n"
-                  + "GACCGATCCTCATCAGCTCCCAATGTGCATATAAACACAATAGAACCTGTCAATATTGAT\n"
-                  + "GACTTGATTAGAGACCAAGGATTTCGTGGTGATGGAGGATCAACCACAGGTTTGTCTGCT\n"
-                  + "ACCCCCCCTGCCTCATTACCTGGCTCACTAACTAACGTGAAAGCCTTACAGAAATCTCCA\n"
-                  + "GGACCTCAGCGAGAAAGGAAGTCATCTTCATCCTCAGAAGACAGGAATCGAATGAAAACA\n"
-                  + "CTTGGTAGACGGGACTCGAGTGATGATTGGGAGATTCCTGATGGGCAGATTACAGTGGGA\n"
-                  + "CAAAGAATTGGATCTGGATCATTTGGAACAGTCTACAAGGGAAAGTGGCATGGTGATGTG\n"
-                  + "GCAGTGAAAATGTTGAATGTGACAGCACCTACACCTCAGCAGTTACAAGCCTTCAAAAAT\n"
-                  + "GAAGTAGGAGTACTCAGGAAAACACGACATGTGAATATCCTACTCTTCATGGGCTATTCC\n"
-                  + "ACAAAGCCACAACTGGCTATTGTTACCCAGTGGTGTGAGGGCTCCAGCTTGTATCACCAT\n"
-                  + "CTCCATATCATTGAGACCAAATTTGAGATGATCAAACTTATAGATATTGCACGACAGACT\n"
-                  + "GCACAGGGCATGGATTACTTACACGCCAAGTCAATCATCCACAGAGACCTCAAGAGTAAT\n"
-                  + "AATATATTTCTTCATGAAGACCTCACAGTAAAAATAGGTGATTTTGGTCTAGCTACAGTG\n"
-                  + "AAATCTCGATGGAGTGGGTCCCATCAGTTTGAACAGTTGTCTGGATCCATTTTGTGGATG\n"
-                  + "GCACCAGAAGTCATCAGAATGCAAGATAAAAATCCATACAGCTTTCAGTCAGATGTATAT\n"
-                  + "GCATTTGGAATTGTTCTGTATGAATTGATGACTGGACAGTTACCTTATTCAAACATCAAC\n"
-                  + "AACAGGGACCAGATAATTTTTATGGTGGGACGAGGATACCTGTCTCCAGATCTCAGTAAG\n"
-                  + "GTACGGAGTAACTGTCCAAAAGCCATGAAGAGATTAATGGCAGAGTGCCTCAAAAAGAAA\n"
-                  + "AGAGATGAGAGACCACTCTTTCCCCAAATTCTCGCCTCTATTGAGCTGCTGGCCCGCTCA\n"
-                  + "TTGCCAAAAATTCACCGCAGTGCATCAGAACCCTCCTTGAATCGGGCTGGTTTCCAAACA\n"
-                  + "GAGGATTTTAGTCTATATGCTTGTGCTTCTCCAAAAACACCCATCCAGGCAGGGGGATAT\n"
-                  + "GGTGCGTTTCCTGTCCACTGA\n" },
-      {
-          EnsemblSeqType.PROTEIN,
-          "ENSP00000288602",
-          ">ENSP00000288602\n"
-                  + "MAALSGGGGGGAEPGQALFNGDMEPEAGAGAGAAASSAADPAIPEEVWNIKQMIKLTQEH\n"
-                  + "IEALLDKFGGEHNPPSIYLEAYEEYTSKLDALQQREQQLLESLGNGTDFSVSSSASMDTV\n"
-                  + "TSSSSSSLSVLPSSLSVFQNPTDVARSNPKSPQKPIVRVFLPNKQRTVVPARCGVTVRDS\n"
-                  + "LKKALMMRGLIPECCAVYRIQDGEKKPIGWDTDISWLTGEELHVEVLENVPLTTHNFVRK\n"
-                  + "TFFTLAFCDFCRKLLFQGFRCQTCGYKFHQRCSTEVPLMCVNYDQLDLLFVSKFFEHHPI\n"
-                  + "PQEEASLAETALTSGSSPSAPASDSIGPQILTSPSPSKSIPIPQPFRPADEDHRNQFGQR\n"
-                  + "DRSSSAPNVHINTIEPVNIDDLIRDQGFRGDGGSTTGLSATPPASLPGSLTNVKALQKSP\n"
-                  + "GPQRERKSSSSSEDRNRMKTLGRRDSSDDWEIPDGQITVGQRIGSGSFGTVYKGKWHGDV\n"
-                  + "AVKMLNVTAPTPQQLQAFKNEVGVLRKTRHVNILLFMGYSTKPQLAIVTQWCEGSSLYHH\n"
-                  + "LHIIETKFEMIKLIDIARQTAQGMDYLHAKSIIHRDLKSNNIFLHEDLTVKIGDFGLATV\n"
-                  + "KSRWSGSHQFEQLSGSILWMAPEVIRMQDKNPYSFQSDVYAFGIVLYELMTGQLPYSNIN\n"
-                  + "NRDQIIFMVGRGYLSPDLSKVRSNCPKAMKRLMAECLKKKRDERPLFPQILASIELLARS\n"
-                  + "LPKIHRSASEPSLNRAGFQTEDFSLYACASPKTPIQAGGYGAFPVH" } };
-
-  @DataProvider(name = "ens_seqs")
-  public Object[][] createData(Method m)
-  {
-    System.out.println(m.getName());
-    return allSeqs;
-  }
-
-  @Test(dataProvider = "ens_seqs", suiteName = "live")
-  public void testGetOneSeqs(EnsemblSeqType type, String sq, String fastasq)
-          throws Exception
-  {
-    SeqFetcher sf = new SeqFetcher();
-    FileParse fp = sf.getSequenceReader(type, Arrays.asList(new String[]
-    { sq }));
-    SequenceI[] sqs = new FastaFile(fp).getSeqsAsArray();
-    FastaFile trueRes = new FastaFile(fastasq, AppletFormatAdapter.PASTE);
-    SequenceI[] trueSqs = trueRes.getSeqsAsArray();
-    Assert.assertEquals(sqs.length, trueSqs.length,
-            "Different number of sequences retrieved for query " + sq);
-    Alignment ral = new Alignment(sqs);
-    for (SequenceI tr : trueSqs)
-    {
-      SequenceI[] rseq;
-      Assert.assertNotNull(
-              rseq = ral.findSequenceMatch(tr.getName()),
-              "Couldn't find sequences matching expected sequence "
-                      + tr.getName());
-      Assert.assertEquals(rseq.length, 1,
-              "Expected only one sequence for sequence ID " + tr.getName());
-      Assert.assertEquals(
-              rseq[0].getSequenceAsString(),
-              tr.getSequenceAsString(),
-              "Sequences differ for " + tr.getName() + "\n" + "Exp:"
-                      + tr.getSequenceAsString() + "\n" + "Got:"
-                      + rseq[0].getSequenceAsString());
-  
-    }
-  }
-
-  @Test(suiteName = "live")
-  public void testLiveCheckEnsembl()
-  {
-    SeqFetcher sf = new SeqFetcher();
-    boolean isAvailable = sf.isEnsemblAvailable();
-    System.out.println("Ensembl is "
-            + (isAvailable ? "UP!"
-                    : "DOWN or unreachable ******************* BAD!"));
-  }
-  // TODO:
-  // sequence query with ENSG and anything other than a genomic type will yield
-  // sequences with different IDs which will
-  // break the post-processing stage where DBRefs are assigned to sequences.
-  // -> multiple_sequences = true is needed additional parameter
-  // http://rest.ensembl.org/sequence/id/ENSG00000157764?content-type=text/x-json;type=protein;multiple_sequences=true
-  // result with four transcripts, cds, cdna, and protein products.
-  // *
-  // features for ENG -
-  // http://rest.ensembl.org/overlap/id/ENSG00000157764?feature=cds&feature=exon&feature=transcript&content-type=text/x-gff3
-  // transcript: gives locus, all transcript products with ENSG parents
-  // gene: give all ENSG on locus
-  // exon: all exon boundaries. CDS same info.
-
-  // @Test(dataProvider = "ens_seqs", suiteName = "live")
-  // public void testGetOneSeqs(EnsemblSeqType type, String sq, String fastasq)
-  // throws Exception
-  // {
-  //
-  // {
-  // Assert.assertTrue(rseq[0].getDBRef() != null
-  // && rseq[0].getDBRef().length > 0,
-  // "No database references added to sequence by fetcher.");
-  // Assert.assertNotNull(DBRefUtils.searchRefs(rseq[0].getDBRef(),
-  // new DBRefEntry("ENSEMBL", null, sq)),
-  // "Could't find database references added to sequence by fetcher.");
-  //
-  // }
-
-}
diff --git a/test/jalview/ext/jmol/JmolCommandsTest.java b/test/jalview/ext/jmol/JmolCommandsTest.java
new file mode 100644 (file)
index 0000000..46fa241
--- /dev/null
@@ -0,0 +1,34 @@
+package jalview.ext.jmol;
+
+import jalview.datamodel.Alignment;
+import jalview.datamodel.AlignmentI;
+import jalview.datamodel.Sequence;
+import jalview.datamodel.SequenceI;
+import jalview.gui.AlignFrame;
+import jalview.gui.SequenceRenderer;
+import jalview.structure.StructureMappingcommandSet;
+import jalview.structure.StructureSelectionManager;
+
+import org.testng.annotations.Test;
+
+public class JmolCommandsTest
+{
+
+  @Test(groups = { "Functional" })
+  public void testGetColourBySequenceCommand_noFeatures()
+  {
+    SequenceI seq1 = new Sequence("seq1", "MHRSQTRALK");
+    SequenceI seq2 = new Sequence("seq2", "MRLEITQSGD");
+    AlignmentI al = new Alignment(new SequenceI[] { seq1, seq2 });
+    AlignFrame af = new AlignFrame(al, 800, 500);
+    SequenceRenderer sr = new SequenceRenderer(af.getViewport());
+    SequenceI[][] seqs = new SequenceI[][] { { seq1 }, { seq2 } };
+    String[] files = new String[] { "seq1.pdb", "seq2.pdb" };
+    StructureSelectionManager ssm = new StructureSelectionManager();
+
+    // need some mappings!
+
+    StructureMappingcommandSet[] commands = JmolCommands
+            .getColourBySequenceCommand(ssm, files, seqs, sr, null, al);
+  }
+}