1 package jalview.ext.ensembl;
3 import jalview.datamodel.AlignmentI;
4 import jalview.datamodel.DBRefSource;
5 import jalview.datamodel.SequenceI;
6 import jalview.exceptions.JalviewException;
7 import jalview.ext.ensembl.SeqFetcher.EnsemblSeqType;
8 import jalview.io.FastaFile;
9 import jalview.io.FileParse;
10 import jalview.util.DBRefUtils;
11 import jalview.ws.seqfetcher.DbSourceProxy;
12 import jalview.ws.seqfetcher.DbSourceProxyImpl;
14 import java.util.ArrayList;
15 import java.util.Arrays;
16 import java.util.List;
18 import com.stevesoft.pat.Regex;
20 public abstract class EnsemblSeqProxy extends DbSourceProxyImpl implements
25 public EnsemblSeqProxy() throws Exception
27 sf = new SeqFetcher();
28 addDbSourceProperty(DBRefSource.MULTIACC);
29 addDbSourceProperty(DBRefSource.SEQDB);
30 // decide whether these need to be filtered according to return type
31 addDbSourceProperty(DBRefSource.PROTSEQDB);
32 addDbSourceProperty(DBRefSource.DNACODINGSEQDB);
33 addDbSourceProperty(DBRefSource.DNASEQDB);
37 public String getDbSource()
44 public String getDbVersion()
46 return "0"; // sf.getVersion();
50 public String getAccessionSeparator()
56 public Regex getAccessionValidator()
58 return new Regex("((ENSP|ENST|ENSG|CCDS)[0-9.]{3,})");
62 public String getTestQuery()
64 return "ENSP00000288602";
68 public boolean isValidReference(String accession)
70 return getAccessionValidator().search(accession);
73 private volatile boolean inProgress = false;
76 public AlignmentI getSequenceRecords(String queries) throws Exception
79 List<String> tids, ids = new ArrayList<String>();
80 tids = Arrays.asList(queries.split(" +"));
81 AlignmentI rtn = null;
82 for (int v = 0, vSize = tids.size(); v < vSize; v += 50)
90 ids = tids.subList(v, p);
93 if (!sf.isEnsemblAvailable())
96 throw new JalviewException("ENSEMBL Rest API not available.");
98 FileParse fp = new FileParse(sf.getSequenceReader(
99 getSourceEnsemblType(), ids));
100 FastaFile fr = new FastaFile(fp);
101 if (fr.hasWarningMessage())
104 .println("Warning when retrieving " + ids.size() + " ids"
105 + ids.toString() + "\n" + fr.getWarningMessage());
107 else if (fr.getSeqs().size() != ids.size())
109 System.out.println("Only retrieved " + fr.getSeqs().size()
110 + " sequences for " + ids.size() + " query strings.");
112 if (fr.getSeqs().size() > 0)
114 AlignmentI seqal = new jalview.datamodel.Alignment(
115 fr.getSeqsAsArray());
116 for (SequenceI sq:seqal.getSequences())
118 if (ids.contains((sq.getName())))
120 DBRefUtils.parseToDbRef(sq, "ENSEMBL", "0", sq.getName());
132 } catch (Throwable r)
137 System.err.println("Aborting ID retrieval after " + v
144 throw new JalviewException("Aborting ID retrieval after " + v
145 + " chunks. Unexpected problem ("
146 + r.getLocalizedMessage() + ")", r);
157 * @return the configured sequence return type for this source
159 protected abstract EnsemblSeqType getSourceEnsemblType();
162 public boolean queryInProgress()
168 public StringBuffer getRawRecords()