mungo merge
[jalview.git] / src / jalview / ext / ensembl / EnsemblFeatures.java
diff --git a/src/jalview/ext/ensembl/EnsemblFeatures.java b/src/jalview/ext/ensembl/EnsemblFeatures.java
new file mode 100644 (file)
index 0000000..0547433
--- /dev/null
@@ -0,0 +1,141 @@
+package jalview.ext.ensembl;
+
+import jalview.datamodel.Alignment;
+import jalview.datamodel.AlignmentI;
+import jalview.io.FeaturesFile;
+import jalview.io.FileParse;
+
+import java.io.IOException;
+import java.net.MalformedURLException;
+import java.net.URL;
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * A client for fetching and processing Ensembl feature data in GFF format by
+ * calling the overlap REST service
+ * 
+ * @author gmcarstairs
+ * @see http://rest.ensembl.org/documentation/info/overlap_id
+ */
+class EnsemblFeatures extends EnsemblRestClient
+{
+  /*
+   * The default features to retrieve from Ensembl
+   * can override in getSequenceRecords parameter
+   */
+  private EnsemblFeatureType[] featuresWanted = { EnsemblFeatureType.cds,
+      EnsemblFeatureType.exon, EnsemblFeatureType.variation };
+
+  /**
+   * Default constructor (to use rest.ensembl.org)
+   */
+  public EnsemblFeatures()
+  {
+    super();
+  }
+
+  /**
+   * Constructor given the target domain to fetch data from
+   * 
+   * @param d
+   */
+  public EnsemblFeatures(String d)
+  {
+    super(d);
+  }
+
+  @Override
+  public String getDbName()
+  {
+    return "ENSEMBL (features)";
+  }
+
+  /**
+   * Makes a query to the REST overlap endpoint for the given sequence
+   * identifier. This returns an 'alignment' consisting of one 'dummy sequence'
+   * (the genomic sequence for which overlap features are returned by the
+   * service). This sequence will have on it sequence features which are the
+   * real information of interest, such as CDS regions or sequence variations.
+   */
+  @Override
+  public AlignmentI getSequenceRecords(String query) throws IOException
+  {
+    // TODO: use a vararg String... for getSequenceRecords instead?
+    List<String> queries = new ArrayList<String>();
+    queries.add(query);
+    FileParse fp = getSequenceReader(queries);
+    FeaturesFile fr = new FeaturesFile(fp);
+    return new Alignment(fr.getSeqsAsArray());
+  }
+
+  /**
+   * Returns a URL for the REST overlap endpoint
+   * 
+   * @param ids
+   * @return
+   */
+  @Override
+  protected URL getUrl(List<String> ids) throws MalformedURLException
+  {
+    StringBuffer urlstring = new StringBuffer(128);
+    urlstring.append(getDomain()).append("/overlap/id/")
+            .append(ids.get(0));
+
+    // @see https://github.com/Ensembl/ensembl-rest/wiki/Output-formats
+    urlstring.append("?content-type=text/x-gff3");
+
+    /*
+     * specify  features to retrieve
+     * @see http://rest.ensembl.org/documentation/info/overlap_id
+     * could make the list a configurable entry in jalview.properties
+     */
+    for (EnsemblFeatureType feature : featuresWanted)
+    {
+      urlstring.append("&feature=").append(feature.name());
+    }
+
+    return new URL(urlstring.toString());
+  }
+
+  @Override
+  protected boolean useGetRequest()
+  {
+    return true;
+  }
+
+  /**
+   * Returns the MIME type for GFF3. For GET requests the Content-type header
+   * describes the required encoding of the response.
+   */
+  @Override
+  protected String getRequestMimeType(boolean multipleIds)
+  {
+    return "text/x-gff3";
+  }
+
+  /**
+   * Returns the MIME type for GFF3.
+   */
+  @Override
+  protected String getResponseMimeType()
+  {
+    return "text/x-gff3";
+  }
+
+  /**
+   * Overloaded method that allows a list of features to retrieve to be
+   * specified
+   * 
+   * @param accId
+   * @param features
+   * @return
+   * @throws IOException
+   */
+  protected AlignmentI getSequenceRecords(String accId,
+          EnsemblFeatureType[] features) throws IOException
+  {
+    featuresWanted = features;
+    return getSequenceRecords(accId);
+  }
+}