--- /dev/null
+package jalview.ext.ensembl;
+
+import jalview.datamodel.AlignmentI;
+import jalview.datamodel.DBRefSource;
+import jalview.datamodel.SequenceI;
+import jalview.exceptions.JalviewException;
+import jalview.ext.ensembl.SeqFetcher.EnsemblSeqType;
+import jalview.io.FastaFile;
+import jalview.io.FileParse;
+import jalview.util.DBRefUtils;
+import jalview.ws.seqfetcher.DbSourceProxy;
+import jalview.ws.seqfetcher.DbSourceProxyImpl;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+
+import com.stevesoft.pat.Regex;
+
+public abstract class EnsemblSeqProxy extends DbSourceProxyImpl implements
+ DbSourceProxy
+{
+ SeqFetcher sf;
+
+ public EnsemblSeqProxy() throws Exception
+ {
+ sf = new SeqFetcher();
+ addDbSourceProperty(DBRefSource.MULTIACC);
+ addDbSourceProperty(DBRefSource.SEQDB);
+ // decide whether these need to be filtered according to return type
+ addDbSourceProperty(DBRefSource.PROTSEQDB);
+ addDbSourceProperty(DBRefSource.DNACODINGSEQDB);
+ addDbSourceProperty(DBRefSource.DNASEQDB);
+ }
+
+ @Override
+ public String getDbSource()
+ {
+ return "ENSEMBL";
+ }
+
+
+ @Override
+ public String getDbVersion()
+ {
+ return "0"; // sf.getVersion();
+ }
+
+ @Override
+ public String getAccessionSeparator()
+ {
+ return " ";
+ }
+
+ @Override
+ public Regex getAccessionValidator()
+ {
+ return new Regex("((ENSP|ENST|ENSG|CCDS)[0-9.]{3,})");
+ }
+
+ @Override
+ public String getTestQuery()
+ {
+ return "ENSP00000288602";
+ }
+
+ @Override
+ public boolean isValidReference(String accession)
+ {
+ return getAccessionValidator().search(accession);
+ }
+
+ private volatile boolean inProgress = false;
+
+ @Override
+ public AlignmentI getSequenceRecords(String queries) throws Exception
+ {
+ inProgress = true;
+ List<String> tids, ids = new ArrayList<String>();
+ tids = Arrays.asList(queries.split(" +"));
+ AlignmentI rtn = null;
+ for (int v = 0, vSize = tids.size(); v < vSize; v += 50)
+ {
+ int p = v + 50;
+ if (p > vSize)
+ {
+ p = vSize;
+ }
+ ;
+ ids = tids.subList(v, p);
+ try
+ {
+ if (!sf.isEnsemblAvailable())
+ {
+ inProgress = false;
+ throw new JalviewException("ENSEMBL Rest API not available.");
+ }
+ FileParse fp = new FileParse(sf.getSequenceReader(
+ getSourceEnsemblType(), ids));
+ FastaFile fr = new FastaFile(fp);
+ if (fr.hasWarningMessage())
+ {
+ System.out
+ .println("Warning when retrieving " + ids.size() + " ids"
+ + ids.toString() + "\n" + fr.getWarningMessage());
+ }
+ else if (fr.getSeqs().size() != ids.size())
+ {
+ System.out.println("Only retrieved " + fr.getSeqs().size()
+ + " sequences for " + ids.size() + " query strings.");
+ }
+ if (fr.getSeqs().size() > 0)
+ {
+ AlignmentI seqal = new jalview.datamodel.Alignment(
+ fr.getSeqsAsArray());
+ for (SequenceI sq:seqal.getSequences())
+ {
+ if (ids.contains((sq.getName())))
+ {
+ DBRefUtils.parseToDbRef(sq, "ENSEMBL", "0", sq.getName());
+ }
+ }
+ if (rtn == null)
+ {
+ rtn = seqal;
+ }
+ else
+ {
+ rtn.append(seqal);
+ }
+ }
+ } catch (Throwable r)
+ {
+ inProgress = false;
+ if (rtn != null)
+ {
+ System.err.println("Aborting ID retrieval after " + v
+ + " chunks.");
+ r.printStackTrace();
+ }
+ else
+ {
+
+ throw new JalviewException("Aborting ID retrieval after " + v
+ + " chunks. Unexpected problem ("
+ + r.getLocalizedMessage() + ")", r);
+ }
+
+ }
+ }
+ inProgress = false;
+ return rtn;
+ }
+
+ /**
+ *
+ * @return the configured sequence return type for this source
+ */
+ protected abstract EnsemblSeqType getSourceEnsemblType();
+
+ @Override
+ public boolean queryInProgress()
+ {
+ return inProgress;
+ }
+
+ @Override
+ public StringBuffer getRawRecords()
+ {
+ return null;
+ }
+
+ @Override
+ public int getTier()
+ {
+ return 0;
+ }
+}