X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Fext%2Fensembl%2FEnsemblSeqProxy.java;h=bdaef0b23a9fb6b77d72eb4d4a82b94314d3a9ca;hb=be94287924736b46264674a189fd1719d38273ad;hp=7bf2563d48ac5e022fbc0b4e35050347922d630c;hpb=8e7cf85a7f61f425e808cac53ead7bc27e402242;p=jalview.git diff --git a/src/jalview/ext/ensembl/EnsemblSeqProxy.java b/src/jalview/ext/ensembl/EnsemblSeqProxy.java index 7bf2563..bdaef0b 100644 --- a/src/jalview/ext/ensembl/EnsemblSeqProxy.java +++ b/src/jalview/ext/ensembl/EnsemblSeqProxy.java @@ -171,14 +171,15 @@ public abstract class EnsemblSeqProxy extends EnsemblRestClient * fetch and transfer genomic sequence features, * fetch protein product and add as cross-reference */ - for (String accId : allIds) + for (int i = 0, n = allIds.size(); i < n; i++) { - addFeaturesAndProduct(accId, alignment); + addFeaturesAndProduct(allIds.get(i), alignment); } - for (SequenceI seq : alignment.getSequences()) + List seqs = alignment.getSequences(); + for (int i = 0, n = seqs.size(); i < n; i++) { - getCrossReferences(seq); + getCrossReferences(seqs.get(i)); } return alignment; @@ -208,13 +209,20 @@ public abstract class EnsemblSeqProxy extends EnsemblRestClient */ SequenceI genomicSequence = null; EnsemblFeatures gffFetcher = new EnsemblFeatures(getDomain()); - EnsemblFeatureType[] features = getFeaturesToFetch(); + EnsemblFeatureType[] features = getFeaturesToFetch(); + + Platform.timeCheck("ESP.getsequencerec1", Platform.TIME_MARK); + + AlignmentI geneFeatures = gffFetcher.getSequenceRecords(accId, features); if (geneFeatures != null && geneFeatures.getHeight() > 0) { genomicSequence = geneFeatures.getSequenceAt(0); } + + Platform.timeCheck("ESP.getsequencerec2", Platform.TIME_MARK); + if (genomicSequence != null) { /* @@ -228,6 +236,7 @@ public abstract class EnsemblSeqProxy extends EnsemblRestClient * fetch and map protein product, and add it as a cross-reference * of the retrieved sequence */ + Platform.timeCheck("ESP.transferFeatures", Platform.TIME_MARK); addProteinProduct(querySeq); } } @@ -236,6 +245,7 @@ public abstract class EnsemblSeqProxy extends EnsemblRestClient System.err.println( "Error transferring Ensembl features: " + e.getMessage()); } + Platform.timeCheck("ESP.addfeat done", Platform.TIME_MARK); } /** @@ -284,10 +294,10 @@ public abstract class EnsemblSeqProxy extends EnsemblRestClient DBRefEntry dbr = new DBRefEntry(getDbSource(), getEnsemblDataVersion(), proteinSeq.getName(), map); querySeq.getDatasetSequence().addDBRef(dbr); - DBRefEntry[] uprots = DBRefUtils.selectRefs(ds.getDBRefs(), + List uprots = DBRefUtils.selectRefs(ds.getDBRefs(), new String[] { DBRefSource.UNIPROT }); - DBRefEntry[] upxrefs = DBRefUtils.selectRefs(querySeq.getDBRefs(), + List upxrefs = DBRefUtils.selectRefs(querySeq.getDBRefs(), new String[] { DBRefSource.UNIPROT }); if (uprots != null) @@ -348,25 +358,47 @@ public abstract class EnsemblSeqProxy extends EnsemblRestClient */ protected void getCrossReferences(SequenceI seq) { + + Platform.timeCheck("ESP. getdataseq ", Platform.TIME_MARK); + + while (seq.getDatasetSequence() != null) { seq = seq.getDatasetSequence(); } + Platform.timeCheck("ESP. getxref ", Platform.TIME_MARK); + EnsemblXref xrefFetcher = new EnsemblXref(getDomain(), getDbSource(), getEnsemblDataVersion()); List xrefs = xrefFetcher.getCrossReferences(seq.getName()); - for (DBRefEntry xref : xrefs) + + for (int i = 0, n = xrefs.size(); i < n; i++) { - seq.addDBRef(xref); +// Platform.timeCheck("ESP. getxref + " + (i) + "/" + n, Platform.TIME_MARK); + // BH 2019.01.25 this next method was taking 174 ms PER addition for a 266-reference example. + // DBRefUtils.ensurePrimaries(seq) + // was at the end of seq.addDBRef, so executed after ever addition! + // This method was moved to seq.getPrimaryDBRefs() + seq.addDBRef(xrefs.get(i)); } +// System.out.println("primaries are " + seq.getPrimaryDBRefs().toString()); /* * and add a reference to itself */ + +// Platform.timeCheck("ESP. getxref self ", Platform.TIME_MARK); + DBRefEntry self = new DBRefEntry(getDbSource(), getEnsemblDataVersion(), - seq.getName()); + seq.getName()); + +// Platform.timeCheck("ESP. getxref self add ", Platform.TIME_MARK); + seq.addDBRef(self); + + Platform.timeCheck("ESP. seqprox done ", Platform.TIME_MARK); + } /** @@ -391,7 +423,9 @@ public abstract class EnsemblSeqProxy extends EnsemblRestClient List seqs = parseSequenceJson(ids); if (seqs == null) - return alignment; + { + return alignment; + } if (seqs.isEmpty()) { @@ -454,9 +488,13 @@ public abstract class EnsemblSeqProxy extends EnsemblRestClient * for now, assumes only one sequence returned; refactor if needed * in future to handle a JSONArray with more than one */ + + Platform.timeCheck("ENS seqproxy", Platform.TIME_MARK); Map val = (Map) getJSON(null, ids, -1, MODE_MAP, null); if (val == null) - return null; + { + return null; + } Object s = val.get("desc"); String desc = s == null ? null : s.toString(); s = val.get("id"); @@ -479,6 +517,7 @@ public abstract class EnsemblSeqProxy extends EnsemblRestClient System.err.println("Error processing JSON response: " + e.toString()); // ignore } + Platform.timeCheck("ENS seqproxy2", Platform.TIME_MARK); return result; } @@ -658,7 +697,9 @@ public abstract class EnsemblSeqProxy extends EnsemblRestClient */ protected abstract List getIdentifyingFeatures( SequenceI seq, String accId); - + + int bhtest = 0; + /** * Transfers the sequence feature to the target sequence, locating its start * and end range based on the mapping. Features which do not overlap the @@ -680,6 +721,7 @@ public abstract class EnsemblSeqProxy extends EnsemblRestClient if (mappedRange != null) { +// Platform.timeCheck(null, Platform.TIME_SET); String group = sf.getFeatureGroup(); if (".".equals(group)) { @@ -687,15 +729,18 @@ public abstract class EnsemblSeqProxy extends EnsemblRestClient } int newBegin = Math.min(mappedRange[0], mappedRange[1]); int newEnd = Math.max(mappedRange[0], mappedRange[1]); - SequenceFeature copy = new SequenceFeature(sf, newBegin, newEnd, - group, sf.getScore()); +// Platform.timeCheck(null, Platform.TIME_MARK); + bhtest++; + // 280 ms/1000 here: + SequenceFeature copy = new SequenceFeature(sf, newBegin, newEnd, group, sf.getScore()); + // 0.175 ms here: targetSequence.addSequenceFeature(copy); /* * for sequence_variant on reverse strand, have to convert the allele * values to their complements */ - if (!forwardStrand && SequenceOntologyFactory.getInstance() + if (!forwardStrand && SequenceOntologyFactory.getSequenceOntology() .isA(sf.getType(), SequenceOntologyI.SEQUENCE_VARIANT)) { reverseComplementAlleles(copy); @@ -793,10 +838,14 @@ public abstract class EnsemblSeqProxy extends EnsemblRestClient MapList mapping = getGenomicRangesFromFeatures(sourceSequence, accessionId, targetSequence.getStart()); if (mapping == null) - { + { return false; } + + Platform.timeCheck("ESP. xfer " + sfs.size(), Platform.TIME_MARK); + + boolean result = transferFeatures(sfs, targetSequence, mapping, accessionId); // System.out.println("transferFeatures (" + (sfs.size()) + " --> " @@ -830,14 +879,23 @@ public abstract class EnsemblSeqProxy extends EnsemblRestClient SequenceFeatures.sortFeatures(sfs, forwardStrand); boolean transferred = false; - for (SequenceFeature sf : sfs) + + for (int i = 0, n = sfs.size(); i < n; i++) { + +// if ((i%1000) == 0) { +//// Platform.timeCheck("Feature " + bhtest, Platform.TIME_GET); +// Platform.timeCheck("ESP. xferFeature + " + (i) + "/" + n, Platform.TIME_MARK); +// } + + SequenceFeature sf = sfs.get(i); if (retainFeature(sf, parentId)) { transferFeature(sf, targetSequence, mapping, forwardStrand); transferred = true; } } + return transferred; } @@ -930,7 +988,7 @@ public abstract class EnsemblSeqProxy extends EnsemblRestClient public static boolean isTranscript(String featureType) { return SequenceOntologyI.NMD_TRANSCRIPT_VARIANT.equals(featureType) - || SequenceOntologyFactory.getInstance().isA(featureType, + || SequenceOntologyFactory.getSequenceOntology().isA(featureType, SequenceOntologyI.TRANSCRIPT); } }