+ /**
+ * Fetches Ensembl features using the /overlap REST endpoint, and adds them to
+ * the sequence in the alignment. Also fetches the protein product, maps it
+ * from the CDS features of the sequence, and saves it as a cross-reference of
+ * the dna sequence.
+ *
+ * @param accId
+ * @param alignment
+ */
+ protected void addFeaturesAndProduct(String accId, AlignmentI alignment)
+ {
+ if (alignment == null)
+ {
+ return;
+ }
+
+ try
+ {
+ /*
+ * get 'dummy' genomic sequence with gene, transcript,
+ * exon, cds and variation features
+ */
+ SequenceI genomicSequence = null;
+ EnsemblFeatures gffFetcher = new EnsemblFeatures(getDomain());
+ EnsemblFeatureType[] features = getFeaturesToFetch();
+ AlignmentI geneFeatures = gffFetcher.getSequenceRecords(accId,
+ features);
+ if (geneFeatures != null && geneFeatures.getHeight() > 0)
+ {
+ genomicSequence = geneFeatures.getSequenceAt(0);
+ }
+ if (genomicSequence != null)
+ {
+ /*
+ * transfer features to the query sequence
+ */
+ SequenceI querySeq = alignment.findName(accId, true);
+ if (transferFeatures(accId, genomicSequence, querySeq))
+ {
+
+ /*
+ * fetch and map protein product, and add it as a cross-reference
+ * of the retrieved sequence
+ */
+ addProteinProduct(querySeq);
+ }
+ }
+ } catch (IOException e)
+ {
+ System.err.println(
+ "Error transferring Ensembl features: " + e.getMessage());
+ }
+ }
+
+ /**
+ * Returns those sequence feature types to fetch from Ensembl. We may want
+ * features either because they are of interest to the user, or as means to
+ * identify the locations of the sequence on the genomic sequence (CDS
+ * features identify CDS, exon features identify cDNA etc).
+ *
+ * @return
+ */
+ protected abstract EnsemblFeatureType[] getFeaturesToFetch();
+
+ /**
+ * Fetches and maps the protein product, and adds it as a cross-reference of
+ * the retrieved sequence
+ */
+ protected void addProteinProduct(SequenceI querySeq)
+ {
+ String accId = querySeq.getName();
+ try
+ {
+ AlignmentI protein = new EnsemblProtein(getDomain())
+ .getSequenceRecords(accId);
+ if (protein == null || protein.getHeight() == 0)
+ {
+ System.out.println("No protein product found for " + accId);
+ return;
+ }
+ SequenceI proteinSeq = protein.getSequenceAt(0);
+
+ /*
+ * need dataset sequences (to be the subject of mappings)
+ */
+ proteinSeq.createDatasetSequence();
+ querySeq.createDatasetSequence();
+
+ MapList mapList = AlignmentUtils.mapCdsToProtein(querySeq,
+ proteinSeq);
+ if (mapList != null)
+ {
+ // clunky: ensure Uniprot xref if we have one is on mapped sequence
+ SequenceI ds = proteinSeq.getDatasetSequence();
+ // TODO: Verify ensp primary ref is on proteinSeq.getDatasetSequence()
+ Mapping map = new Mapping(ds, mapList);
+ DBRefEntry dbr = new DBRefEntry(getDbSource(),
+ getEnsemblDataVersion(), proteinSeq.getName(), map);
+ querySeq.getDatasetSequence().addDBRef(dbr);
+ DBRefEntry[] uprots = DBRefUtils.selectRefs(ds.getDBRefs(),
+ new String[]
+ { DBRefSource.UNIPROT });
+ DBRefEntry[] upxrefs = DBRefUtils.selectRefs(querySeq.getDBRefs(),
+ new String[]
+ { DBRefSource.UNIPROT });
+ if (uprots != null)
+ {
+ for (DBRefEntry up : uprots)
+ {
+ // locate local uniprot ref and map
+ List<DBRefEntry> upx = DBRefUtils.searchRefs(upxrefs,
+ up.getAccessionId());
+ DBRefEntry upxref;
+ if (upx.size() != 0)
+ {
+ upxref = upx.get(0);
+
+ if (upx.size() > 1)
+ {
+ Cache.log.warn(
+ "Implementation issue - multiple uniprot acc on product sequence.");
+ }
+ }
+ else
+ {
+ upxref = new DBRefEntry(DBRefSource.UNIPROT,
+ getEnsemblDataVersion(), up.getAccessionId());
+ }
+
+ Mapping newMap = new Mapping(ds, mapList);
+ upxref.setVersion(getEnsemblDataVersion());
+ upxref.setMap(newMap);
+ if (upx.size() == 0)
+ {
+ // add the new uniprot ref
+ querySeq.getDatasetSequence().addDBRef(upxref);
+ }
+
+ }
+ }
+
+ /*
+ * copy exon features to protein, compute peptide variants from dna
+ * variants and add as features on the protein sequence ta-da
+ */
+ AlignmentUtils.computeProteinFeatures(querySeq, proteinSeq,
+ mapList);
+ }
+ } catch (Exception e)
+ {
+ System.err
+ .println(String.format("Error retrieving protein for %s: %s",
+ accId, e.getMessage()));
+ }
+ }
+
+ /**
+ * Get database xrefs from Ensembl, and attach them to the sequence
+ *
+ * @param seq
+ */
+ protected void getCrossReferences(SequenceI seq)