1 package jalview.ext.ensembl;
3 import jalview.datamodel.Alignment;
4 import jalview.datamodel.AlignmentI;
5 import jalview.datamodel.SequenceI;
6 import jalview.exceptions.JalviewException;
7 import jalview.ext.ensembl.SeqFetcher.EnsemblSeqType;
8 import jalview.io.FastaFile;
9 import jalview.io.FileParse;
10 import jalview.util.DBRefUtils;
11 import jalview.ws.seqfetcher.DbSourceProxyImpl;
13 import java.util.ArrayList;
14 import java.util.Arrays;
15 import java.util.List;
17 import com.stevesoft.pat.Regex;
19 public abstract class EnsemblSeqProxy extends DbSourceProxyImpl
23 public EnsemblSeqProxy()
25 sf = new SeqFetcher();
29 public String getDbSource()
36 public String getDbVersion()
38 return "0"; // sf.getVersion();
42 public String getAccessionSeparator()
48 public Regex getAccessionValidator()
50 return new Regex("((ENSP|ENST|ENSG|CCDS)[0-9.]{3,})");
54 * Default test query is a transcript
57 public String getTestQuery()
59 return "ENST00000288602";
63 public boolean isValidReference(String accession)
65 return getAccessionValidator().search(accession);
68 private volatile boolean inProgress = false;
71 public AlignmentI getSequenceRecords(String queries) throws Exception
74 List<String> tids, ids = new ArrayList<String>();
75 tids = Arrays.asList(queries.split(" +"));
76 AlignmentI rtn = null;
79 * execute queries, if necessary in batches of the
80 * maximum allowed number of ids
82 int maxQueryCount = getMaximumQueryCount();
83 for (int v = 0, vSize = tids.size(); v < vSize; v += maxQueryCount)
85 int p = Math.min(vSize, v + maxQueryCount);
86 ids = tids.subList(v, p);
89 if (!sf.isEnsemblAvailable())
92 throw new JalviewException("ENSEMBL Rest API not available.");
94 FileParse fp = new FileParse(sf.getSequenceReader(
95 getSourceEnsemblType(), ids));
96 FastaFile fr = new FastaFile(fp);
97 if (fr.hasWarningMessage())
100 .println("Warning when retrieving " + ids.size() + " ids"
101 + ids.toString() + "\n" + fr.getWarningMessage());
103 else if (fr.getSeqs().size() != ids.size())
105 System.out.println("Only retrieved " + fr.getSeqs().size()
106 + " sequences for " + ids.size() + " query strings.");
108 if (fr.getSeqs().size() > 0)
110 AlignmentI seqal = new Alignment(
111 fr.getSeqsAsArray());
112 for (SequenceI sq:seqal.getSequences())
114 if (ids.contains((sq.getName())))
116 DBRefUtils.parseToDbRef(sq, "ENSEMBL", "0", sq.getName());
128 } catch (Throwable r)
133 System.err.println("Aborting ID retrieval after " + v
140 throw new JalviewException("Aborting ID retrieval after " + v
141 + " chunks. Unexpected problem ("
142 + r.getLocalizedMessage() + ")", r);
153 * @return the configured sequence return type for this source
155 protected abstract EnsemblSeqType getSourceEnsemblType();
158 public boolean queryInProgress()
164 public StringBuffer getRawRecords()
176 * A sequence/id POST request currently allows up to 50 queries
178 * @see http://rest.ensembl.org/documentation/info/sequence_id_post
181 public int getMaximumQueryCount()
187 public boolean isDnaCoding()