X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;ds=sidebyside;f=src%2Fjalview%2Fext%2Fensembl%2FEnsemblSeqProxy.java;h=a558ad296d4fcf4713bbc42a8d2c5fcff13e5147;hb=eb0ecbb41335d28ac4c38fe617f69c0ffc51fea5;hp=7b448fd8a6389becba113ec2e2b40d6bb997b959;hpb=ef84f77ebe6c73e67e8ec789b02f41891715ebdd;p=jalview.git diff --git a/src/jalview/ext/ensembl/EnsemblSeqProxy.java b/src/jalview/ext/ensembl/EnsemblSeqProxy.java index 7b448fd..a558ad2 100644 --- a/src/jalview/ext/ensembl/EnsemblSeqProxy.java +++ b/src/jalview/ext/ensembl/EnsemblSeqProxy.java @@ -40,8 +40,8 @@ import jalview.util.Comparison; import jalview.util.DBRefUtils; import jalview.util.IntRangeComparator; import jalview.util.MapList; +import jalview.util.Platform; -import java.io.BufferedReader; import java.io.IOException; import java.net.MalformedURLException; import java.net.URL; @@ -49,9 +49,8 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; import java.util.List; +import java.util.Map; -import org.json.simple.JSONObject; -import org.json.simple.parser.JSONParser; import org.json.simple.parser.ParseException; /** @@ -62,8 +61,6 @@ import org.json.simple.parser.ParseException; */ public abstract class EnsemblSeqProxy extends EnsemblRestClient { - protected static final String NAME = "Name"; - protected static final String DESCRIPTION = "description"; /* @@ -211,7 +208,7 @@ public abstract class EnsemblSeqProxy extends EnsemblRestClient */ SequenceI genomicSequence = null; EnsemblFeatures gffFetcher = new EnsemblFeatures(getDomain()); - EnsemblFeatureType[] features = getFeaturesToFetch(); + EnsemblFeatureType[] features = getFeaturesToFetch(); AlignmentI geneFeatures = gffFetcher.getSequenceRecords(accId, features); if (geneFeatures != null && geneFeatures.getHeight() > 0) @@ -260,6 +257,7 @@ public abstract class EnsemblSeqProxy extends EnsemblRestClient String accId = querySeq.getName(); try { + System.out.println("Adding protein product for " + accId); AlignmentI protein = new EnsemblProtein(getDomain()) .getSequenceRecords(accId); if (protein == null || protein.getHeight() == 0) @@ -286,10 +284,10 @@ public abstract class EnsemblSeqProxy extends EnsemblRestClient DBRefEntry dbr = new DBRefEntry(getDbSource(), getEnsemblDataVersion(), proteinSeq.getName(), map); querySeq.getDatasetSequence().addDBRef(dbr); - DBRefEntry[] uprots = DBRefUtils.selectRefs(ds.getDBRefs(), + List uprots = DBRefUtils.selectRefs(ds.getDBRefs(), new String[] { DBRefSource.UNIPROT }); - DBRefEntry[] upxrefs = DBRefUtils.selectRefs(querySeq.getDBRefs(), + List upxrefs = DBRefUtils.selectRefs(querySeq.getDBRefs(), new String[] { DBRefSource.UNIPROT }); if (uprots != null) @@ -354,20 +352,23 @@ public abstract class EnsemblSeqProxy extends EnsemblRestClient { seq = seq.getDatasetSequence(); } - EnsemblXref xrefFetcher = new EnsemblXref(getDomain(), getDbSource(), getEnsemblDataVersion()); List xrefs = xrefFetcher.getCrossReferences(seq.getName()); - for (DBRefEntry xref : xrefs) + + for (int i = 0, n = xrefs.size(); i < n; i++) { - seq.addDBRef(xref); + // BH 2019.01.25 this next method was taking 174 ms PER addition for a 266-reference example. + // DBRefUtils.ensurePrimaries(seq) + // was at the end of seq.addDBRef, so executed after ever addition! + // This method was moved to seq.getPrimaryDBRefs() + seq.addDBRef(xrefs.get(i)); } - /* * and add a reference to itself */ DBRefEntry self = new DBRefEntry(getDbSource(), getEnsemblDataVersion(), - seq.getName()); + seq.getName()); seq.addDBRef(self); } @@ -389,13 +390,9 @@ public abstract class EnsemblSeqProxy extends EnsemblRestClient inProgress = false; throw new JalviewException("ENSEMBL Rest API not available."); } - BufferedReader br = getSequenceReader(ids); - if (br == null) - { - return alignment; - } - - List seqs = parseSequenceJson(br); + List seqs = parseSequenceJson(ids); + if (seqs == null) + return alignment; if (seqs.isEmpty()) { @@ -448,9 +445,9 @@ public abstract class EnsemblSeqProxy extends EnsemblRestClient * @return a single jalview.datamodel.Sequence * @see http://rest.ensembl.org/documentation/info/sequence_id */ - protected List parseSequenceJson(BufferedReader br) + @SuppressWarnings("unchecked") + protected List parseSequenceJson(List ids) { - JSONParser jp = new JSONParser(); List result = new ArrayList<>(); try { @@ -458,7 +455,9 @@ public abstract class EnsemblSeqProxy extends EnsemblRestClient * for now, assumes only one sequence returned; refactor if needed * in future to handle a JSONArray with more than one */ - final JSONObject val = (JSONObject) jp.parse(br); + Map val = (Map) getJSON(null, ids, -1, MODE_MAP, null); + if (val == null) + return null; Object s = val.get("desc"); String desc = s == null ? null : s.toString(); s = val.get("id"); @@ -506,7 +505,7 @@ public abstract class EnsemblSeqProxy extends EnsemblRestClient // @see https://github.com/Ensembl/ensembl-rest/wiki/Output-formats urlstring.append("?type=").append(getSourceEnsemblType().getType()); urlstring.append(("&Accept=application/json")); - urlstring.append(("&Content-Type=application/json")); + urlstring.append(("&content-type=application/json")); String objectType = getObjectType(); if (objectType != null) @@ -867,9 +866,8 @@ public abstract class EnsemblSeqProxy extends EnsemblRestClient protected boolean featureMayBelong(SequenceFeature sf, String identifier) { String parent = (String) sf.getValue(PARENT); - // using contains to allow for prefix "gene:", "transcript:" etc if (parent != null - && !parent.toUpperCase().contains(identifier.toUpperCase())) + && !parent.equalsIgnoreCase(identifier)) { // this genomic feature belongs to a different transcript return false; @@ -877,6 +875,9 @@ public abstract class EnsemblSeqProxy extends EnsemblRestClient return true; } + /** + * Answers a short description of the sequence fetcher + */ @Override public String getDescription() { @@ -915,10 +916,14 @@ public abstract class EnsemblSeqProxy extends EnsemblRestClient /** * Answers true if the feature type is either 'NMD_transcript_variant' or - * 'transcript' or one of its sub-types in the Sequence Ontology. This is - * needed because NMD_transcript_variant behaves like 'transcript' in Ensembl + * 'transcript' (or one of its sub-types in the Sequence Ontology). This is + * because NMD_transcript_variant behaves like 'transcript' in Ensembl * although strictly speaking it is not (it is a sub-type of * sequence_variant). + *

+ * (This test was needed when fetching transcript features as GFF. As we are + * now fetching as JSON, all features have type 'transcript' so the check for + * NMD_transcript_variant is redundant. Left in for any future case arising.) * * @param featureType * @return