X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Fext%2Fensembl%2FEnsemblSeqProxy.java;h=5fccedda5e42b884b03f4f98c367b9dd268867a0;hb=3f12f4932226512316ec113e600695150431fd0a;hp=4af6525c7a3bdb97475f1766f63986206d12e57f;hpb=6c52cc0b81ae3abdc3c5f6f88a23364a0246351a;p=jalview.git diff --git a/src/jalview/ext/ensembl/EnsemblSeqProxy.java b/src/jalview/ext/ensembl/EnsemblSeqProxy.java index 4af6525..5fccedd 100644 --- a/src/jalview/ext/ensembl/EnsemblSeqProxy.java +++ b/src/jalview/ext/ensembl/EnsemblSeqProxy.java @@ -1,10 +1,10 @@ package jalview.ext.ensembl; import jalview.analysis.AlignmentUtils; +import jalview.analysis.Dna; import jalview.datamodel.Alignment; import jalview.datamodel.AlignmentI; import jalview.datamodel.DBRefEntry; -import jalview.datamodel.DBRefSource; import jalview.datamodel.Mapping; import jalview.datamodel.SequenceFeature; import jalview.datamodel.SequenceI; @@ -13,6 +13,7 @@ import jalview.io.FastaFile; import jalview.io.FileParse; import jalview.io.gff.SequenceOntologyFactory; import jalview.io.gff.SequenceOntologyI; +import jalview.util.Comparison; import jalview.util.DBRefUtils; import jalview.util.MapList; @@ -33,11 +34,7 @@ import java.util.List; */ public abstract class EnsemblSeqProxy extends EnsemblRestClient { - private static final List CROSS_REFERENCES = Arrays - .asList(new String[] { "CCDS", "Uniprot/SWISSPROT", - "Uniprot/SPTREMBL" }); - - protected static final String CONSEQUENCE_TYPE = "consequence_type"; + private static final String ALLELES = "alleles"; protected static final String PARENT = "Parent"; @@ -58,7 +55,7 @@ public abstract class EnsemblSeqProxy extends EnsemblRestClient GENOMIC("genomic"), /** - * type=cdna to fetch dna including UTRs + * type=cdna to fetch coding dna including UTRs */ CDNA("cdna"), @@ -279,10 +276,10 @@ public abstract class EnsemblSeqProxy extends EnsemblRestClient { // clunky: ensure Uniprot xref if we have one is on mapped sequence SequenceI ds = proteinSeq.getDatasetSequence(); - ds.setSourceDBRef(proteinSeq.getSourceDBRef()); + // TODO: Verify ensp primary ref is on proteinSeq.getDatasetSequence() Mapping map = new Mapping(ds, mapList); - DBRefEntry dbr = new DBRefEntry(getDbSource(), getDbVersion(), - accId, map); + DBRefEntry dbr = new DBRefEntry(getDbSource(), + getEnsemblDataVersion(), proteinSeq.getName(), map); querySeq.getDatasetSequence().addDBRef(dbr); /* @@ -311,32 +308,20 @@ public abstract class EnsemblSeqProxy extends EnsemblRestClient seq = seq.getDatasetSequence(); } - EnsemblXref xrefFetcher = new EnsemblXref(getDomain()); - List xrefs = xrefFetcher.getCrossReferences(seq.getName(), - getCrossReferenceDatabases()); + EnsemblXref xrefFetcher = new EnsemblXref(getDomain(), getDbSource(), + getEnsemblDataVersion()); + List xrefs = xrefFetcher.getCrossReferences(seq.getName()); for (DBRefEntry xref : xrefs) { seq.addDBRef(xref); - /* - * Save any Uniprot xref to be the reference for SIFTS mapping - */ - if (DBRefSource.UNIPROT.equals(xref.getSource())) - { - seq.setSourceDBRef(xref); - } } - } - /** - * Returns a list of database names to be used when fetching cross-references. - * Specifically, the names are used to filter data returned by the Ensembl - * xrefs REST service on the value in field 'dbname'. - * - * @return - */ - protected List getCrossReferenceDatabases() - { - return CROSS_REFERENCES; + /* + * and add a reference to itself + */ + DBRefEntry self = new DBRefEntry(getDbSource(), + getEnsemblDataVersion(), seq.getName()); + seq.addDBRef(self); } /** @@ -394,7 +379,9 @@ public abstract class EnsemblSeqProxy extends EnsemblRestClient if (ids.contains(name) || ids.contains(name.replace("ENSP", "ENST"))) { - DBRefUtils.parseToDbRef(sq, DBRefSource.ENSEMBL, "0", name); + DBRefEntry dbref = DBRefUtils.parseToDbRef(sq, getDbSource(), + getEnsemblDataVersion(), name); + sq.addDBRef(dbref); } } if (alignment == null) @@ -611,9 +598,10 @@ public abstract class EnsemblSeqProxy extends EnsemblRestClient * @param mapping * mapping from the sequence feature's coordinates to the target * sequence + * @param forwardStrand */ protected void transferFeature(SequenceFeature sf, - SequenceI targetSequence, MapList mapping) + SequenceI targetSequence, MapList mapping, boolean forwardStrand) { int start = sf.getBegin(); int end = sf.getEnd(); @@ -624,23 +612,89 @@ public abstract class EnsemblSeqProxy extends EnsemblRestClient SequenceFeature copy = new SequenceFeature(sf); copy.setBegin(Math.min(mappedRange[0], mappedRange[1])); copy.setEnd(Math.max(mappedRange[0], mappedRange[1])); + if (".".equals(copy.getFeatureGroup())) + { + copy.setFeatureGroup(getDbSource()); + } targetSequence.addSequenceFeature(copy); /* - * for sequence_variant, make an additional feature with consequence + * for sequence_variant on reverse strand, have to convert the allele + * values to their complements */ - // if (SequenceOntologyFactory.getInstance().isA(sf.getType(), - // SequenceOntologyI.SEQUENCE_VARIANT)) - // { - // String consequence = (String) sf.getValue(CONSEQUENCE_TYPE); - // if (consequence != null) - // { - // SequenceFeature sf2 = new SequenceFeature("consequence", - // consequence, copy.getBegin(), copy.getEnd(), 0f, - // null); - // targetSequence.addSequenceFeature(sf2); - // } - // } + if (!forwardStrand + && SequenceOntologyFactory.getInstance().isA(sf.getType(), + SequenceOntologyI.SEQUENCE_VARIANT)) + { + reverseComplementAlleles(copy); + } + } + } + + /** + * Change the 'alleles' value of a feature by converting to complementary + * bases, and also update the feature description to match + * + * @param sf + */ + static void reverseComplementAlleles(SequenceFeature sf) + { + final String alleles = (String) sf.getValue(ALLELES); + if (alleles == null) + { + return; + } + StringBuilder complement = new StringBuilder(alleles.length()); + for (String allele : alleles.split(",")) + { + reverseComplementAllele(complement, allele); + } + String comp = complement.toString(); + sf.setValue(ALLELES, comp); + sf.setDescription(comp); + + /* + * replace value of "alleles=" in sf.ATTRIBUTES as well + * so 'output as GFF' shows reverse complement alleles + */ + String atts = sf.getAttributes(); + if (atts != null) + { + atts = atts.replace(ALLELES + "=" + alleles, ALLELES + "=" + comp); + sf.setAttributes(atts); + } + } + + /** + * Makes the 'reverse complement' of the given allele and appends it to the + * buffer, after a comma separator if not the first + * + * @param complement + * @param allele + */ + static void reverseComplementAllele(StringBuilder complement, + String allele) + { + if (complement.length() > 0) + { + complement.append(","); + } + + /* + * some 'alleles' are actually descriptive terms + * e.g. HGMD_MUTATION, PhenCode_variation + * - we don't want to 'reverse complement' these + */ + if (!Comparison.isNucleotideSequence(allele, true)) + { + complement.append(allele); + } + else + { + for (int i = allele.length() - 1; i >= 0; i--) + { + complement.append(Dna.getComplement(allele.charAt(i))); + } } } @@ -695,25 +749,18 @@ public abstract class EnsemblSeqProxy extends EnsemblRestClient final boolean forwardStrand = mapping.isFromForwardStrand(); /* - * sort features by start position (descending if reverse strand) - * before transferring (in forwards order) to the target sequence + * sort features by start position (which corresponds to end + * position descending if reverse strand) so as to add them in + * 'forwards' order to the target sequence */ - Arrays.sort(features, new Comparator() - { - @Override - public int compare(SequenceFeature o1, SequenceFeature o2) - { - int c = Integer.compare(o1.getBegin(), o2.getBegin()); - return forwardStrand ? c : -c; - } - }); + sortFeatures(features, forwardStrand); boolean transferred = false; for (SequenceFeature sf : features) { if (retainFeature(sf, parentId)) { - transferFeature(sf, targetSequence, mapping); + transferFeature(sf, targetSequence, mapping, forwardStrand); transferred = true; } } @@ -721,6 +768,33 @@ public abstract class EnsemblSeqProxy extends EnsemblRestClient } /** + * Sort features by start position ascending (if on forward strand), or end + * position descending (if on reverse strand) + * + * @param features + * @param forwardStrand + */ + protected static void sortFeatures(SequenceFeature[] features, + final boolean forwardStrand) + { + Arrays.sort(features, new Comparator() + { + @Override + public int compare(SequenceFeature o1, SequenceFeature o2) + { + if (forwardStrand) + { + return Integer.compare(o1.getBegin(), o2.getBegin()); + } + else + { + return Integer.compare(o2.getEnd(), o1.getEnd()); + } + } + }); + } + + /** * Answers true if the feature type is one we want to keep for the sequence. * Some features are only retrieved in order to identify the sequence range, * and may then be discarded as redundant information (e.g. "CDS" feature for