+ }
+ } catch (IOException e)
+ {
+ System.err.println("Error transferring Ensembl features: "
+ + e.getMessage());
+ }
+ }
+
+ /**
+ * Returns those sequence feature types to fetch from Ensembl. We may want
+ * features either because they are of interest to the user, or as means to
+ * identify the locations of the sequence on the genomic sequence (CDS
+ * features identify CDS, exon features identify cDNA etc).
+ *
+ * @return
+ */
+ protected abstract EnsemblFeatureType[] getFeaturesToFetch();
+
+ /**
+ * Fetches and maps the protein product, and adds it as a cross-reference of
+ * the retrieved sequence
+ */
+ protected void addProteinProduct(SequenceI querySeq)
+ {
+ String accId = querySeq.getName();
+ try
+ {
+ AlignmentI protein = new EnsemblProtein(getDomain())
+ .getSequenceRecords(accId);
+ if (protein == null || protein.getHeight() == 0)
+ {
+ System.out.println("No protein product found for " + accId);
+ return;
+ }
+ SequenceI proteinSeq = protein.getSequenceAt(0);
+
+ /*
+ * need dataset sequences (to be the subject of mappings)
+ */
+ proteinSeq.createDatasetSequence();
+ querySeq.createDatasetSequence();
+
+ MapList mapList = AlignmentUtils.mapCdsToProtein(querySeq, proteinSeq);
+ if (mapList != null)
+ {
+ // clunky: ensure Uniprot xref if we have one is on mapped sequence
+ SequenceI ds = proteinSeq.getDatasetSequence();
+ ds.setSourceDBRef(proteinSeq.getSourceDBRef());
+
+ Mapping map = new Mapping(ds, mapList);
+ DBRefEntry dbr = new DBRefEntry(getDbSource(),
+ getEnsemblDataVersion(), proteinSeq.getName(), map);
+ querySeq.getDatasetSequence().addDBRef(dbr);
+
+ /*
+ * copy exon features to protein, compute peptide variants from dna
+ * variants and add as features on the protein sequence ta-da
+ */
+ AlignmentUtils.computeProteinFeatures(querySeq, proteinSeq, mapList);
+ }
+ } catch (Exception e)
+ {
+ System.err
+ .println(String.format("Error retrieving protein for %s: %s",
+ accId, e.getMessage()));
+ }
+ }
+
+ /**
+ * Get database xrefs from Ensembl, and attach them to the sequence
+ *
+ * @param seq
+ */
+ protected void getCrossReferences(SequenceI seq)
+ {
+ while (seq.getDatasetSequence() != null)
+ {
+ seq = seq.getDatasetSequence();
+ }
+
+ EnsemblXref xrefFetcher = new EnsemblXref(getDomain());
+ List<DBRefEntry> xrefs = xrefFetcher.getCrossReferences(seq.getName());
+ for (DBRefEntry xref : xrefs)
+ {
+ seq.addDBRef(xref);
+ }
+
+ /*
+ * and add a reference to itself
+ */
+ DBRefEntry self = new DBRefEntry(getDbSource(),
+ getEnsemblDataVersion(), seq.getName());
+ seq.addDBRef(self);
+ seq.setSourceDBRef(self);
+ }
+
+ /**
+ * Fetches sequences for the list of accession ids and adds them to the
+ * alignment. Returns the extended (or created) alignment.
+ *
+ * @param ids
+ * @param alignment
+ * @return
+ * @throws JalviewException
+ * @throws IOException
+ */
+ protected AlignmentI fetchSequences(List<String> ids, AlignmentI alignment)
+ throws JalviewException, IOException
+ {
+ if (!isEnsemblAvailable())
+ {
+ inProgress = false;
+ throw new JalviewException("ENSEMBL Rest API not available.");
+ }
+ FileParse fp = getSequenceReader(ids);
+ FastaFile fr = new FastaFile(fp);
+ if (fr.hasWarningMessage())
+ {
+ System.out.println(String.format(
+ "Warning when retrieving %d ids %s\n%s", ids.size(),
+ ids.toString(), fr.getWarningMessage()));
+ }
+ else if (fr.getSeqs().size() != ids.size())
+ {
+ System.out.println(String.format(
+ "Only retrieved %d sequences for %d query strings", fr
+ .getSeqs().size(), ids.size()));
+ }
+
+ if (fr.getSeqs().size() == 1 && fr.getSeqs().get(0).getLength() == 0)
+ {
+ /*
+ * POST request has returned an empty FASTA file e.g. for invalid id
+ */
+ throw new IOException("No data returned for " + ids);
+ }
+
+ if (fr.getSeqs().size() > 0)
+ {
+ AlignmentI seqal = new Alignment(
+ fr.getSeqsAsArray());
+ for (SequenceI sq:seqal.getSequences())
+ {
+ if (sq.getDescription() == null)