JAL-1705 renamed SeqFetcherTest to EnsemblSeqProxyTest
authorJim Procter <jprocter@issues.jalview.org>
Sun, 28 Jun 2015 16:10:57 +0000 (17:10 +0100)
committerJim Procter <jprocter@issues.jalview.org>
Sun, 28 Jun 2015 16:11:08 +0000 (17:11 +0100)
src/jalview/ext/ensembl/EnsemblSeqProxy.java [new file with mode: 0644]
test/jalview/ext/ensembl/EnsemblSeqProxyTest.java [moved from test/jalview/ext/ensembl/SeqFetcherTest.java with 100% similarity]

diff --git a/src/jalview/ext/ensembl/EnsemblSeqProxy.java b/src/jalview/ext/ensembl/EnsemblSeqProxy.java
new file mode 100644 (file)
index 0000000..137c9b0
--- /dev/null
@@ -0,0 +1,178 @@
+package jalview.ext.ensembl;
+
+import jalview.datamodel.AlignmentI;
+import jalview.datamodel.DBRefSource;
+import jalview.datamodel.SequenceI;
+import jalview.exceptions.JalviewException;
+import jalview.ext.ensembl.SeqFetcher.EnsemblSeqType;
+import jalview.io.FastaFile;
+import jalview.io.FileParse;
+import jalview.util.DBRefUtils;
+import jalview.ws.seqfetcher.DbSourceProxy;
+import jalview.ws.seqfetcher.DbSourceProxyImpl;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+
+import com.stevesoft.pat.Regex;
+
+public abstract class EnsemblSeqProxy extends DbSourceProxyImpl implements
+        DbSourceProxy
+{
+  SeqFetcher sf;
+
+  public EnsemblSeqProxy() throws Exception
+  {
+    sf = new SeqFetcher();
+    addDbSourceProperty(DBRefSource.MULTIACC);
+    addDbSourceProperty(DBRefSource.SEQDB);
+    // decide whether these need to be filtered according to return type
+    addDbSourceProperty(DBRefSource.PROTSEQDB);
+    addDbSourceProperty(DBRefSource.DNACODINGSEQDB);
+    addDbSourceProperty(DBRefSource.DNASEQDB);
+  }
+
+  @Override
+  public String getDbSource()
+  {
+    return "ENSEMBL";
+  }
+
+
+  @Override
+  public String getDbVersion()
+  {
+    return "0"; // sf.getVersion();
+  }
+
+  @Override
+  public String getAccessionSeparator()
+  {
+    return " ";
+  }
+
+  @Override
+  public Regex getAccessionValidator()
+  {
+    return new Regex("((ENSP|ENST|ENSG|CCDS)[0-9.]{3,})");
+  }
+
+  @Override
+  public String getTestQuery()
+  {
+    return "ENSP00000288602";
+  }
+
+  @Override
+  public boolean isValidReference(String accession)
+  {
+    return getAccessionValidator().search(accession);
+  }
+
+  private volatile boolean inProgress = false;
+
+  @Override
+  public AlignmentI getSequenceRecords(String queries) throws Exception
+  {
+    inProgress = true;
+    List<String> tids, ids = new ArrayList<String>();
+    tids = Arrays.asList(queries.split(" +"));
+    AlignmentI rtn = null;
+    for (int v = 0, vSize = tids.size(); v < vSize; v += 50)
+    {
+      int p = v + 50;
+      if (p > vSize)
+      {
+        p = vSize;
+      }
+      ;
+      ids = tids.subList(v, p);
+      try
+      {
+        if (!sf.isEnsemblAvailable())
+        {
+          inProgress = false;
+          throw new JalviewException("ENSEMBL Rest API not available.");
+        }
+        FileParse fp = new FileParse(sf.getSequenceReader(
+                getSourceEnsemblType(), ids));
+        FastaFile fr = new FastaFile(fp);
+        if (fr.hasWarningMessage())
+        {
+          System.out
+                  .println("Warning when retrieving " + ids.size() + " ids"
+                          + ids.toString() + "\n" + fr.getWarningMessage());
+        }
+        else if (fr.getSeqs().size() != ids.size())
+        {
+          System.out.println("Only retrieved " + fr.getSeqs().size()
+                  + " sequences for " + ids.size() + " query strings.");
+        }
+        if (fr.getSeqs().size() > 0)
+        {
+          AlignmentI seqal = new jalview.datamodel.Alignment(
+                  fr.getSeqsAsArray());
+          for (SequenceI sq:seqal.getSequences())
+          {
+            if (ids.contains((sq.getName())))
+            {
+              DBRefUtils.parseToDbRef(sq, "ENSEMBL", "0", sq.getName());
+            }
+          }
+          if (rtn == null)
+          {
+            rtn = seqal;
+          }
+          else
+          {
+            rtn.append(seqal);
+          }
+        }
+      } catch (Throwable r)
+      {
+        inProgress = false;
+        if (rtn != null)
+        {
+          System.err.println("Aborting ID retrieval after " + v
+                  + " chunks.");
+          r.printStackTrace();
+        }
+        else
+        {
+
+          throw new JalviewException("Aborting ID retrieval after " + v
+                  + " chunks. Unexpected problem ("
+                  + r.getLocalizedMessage() + ")", r);
+        }
+
+      }
+    }
+    inProgress = false;
+    return rtn;
+  }
+
+  /**
+   * 
+   * @return the configured sequence return type for this source
+   */
+  protected abstract EnsemblSeqType getSourceEnsemblType();
+
+  @Override
+  public boolean queryInProgress()
+  {
+    return inProgress;
+  }
+
+  @Override
+  public StringBuffer getRawRecords()
+  {
+    return null;
+  }
+
+  @Override
+  public int getTier()
+  {
+    return 0;
+  }
+}