X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Fext%2Fensembl%2FEnsemblSeqProxy.java;h=7bf2563d48ac5e022fbc0b4e35050347922d630c;hb=8e7cf85a7f61f425e808cac53ead7bc27e402242;hp=c903de3a3dafa4df60cc2374dbf3ad08f68910e8;hpb=da31d3e5aebfcdbb1ae7084bbc75bbc3539ee2e6;p=jalview.git diff --git a/src/jalview/ext/ensembl/EnsemblSeqProxy.java b/src/jalview/ext/ensembl/EnsemblSeqProxy.java index c903de3..7bf2563 100644 --- a/src/jalview/ext/ensembl/EnsemblSeqProxy.java +++ b/src/jalview/ext/ensembl/EnsemblSeqProxy.java @@ -33,14 +33,15 @@ import jalview.datamodel.SequenceFeature; import jalview.datamodel.SequenceI; import jalview.datamodel.features.SequenceFeatures; import jalview.exceptions.JalviewException; +import jalview.io.gff.Gff3Helper; import jalview.io.gff.SequenceOntologyFactory; import jalview.io.gff.SequenceOntologyI; import jalview.util.Comparison; import jalview.util.DBRefUtils; import jalview.util.IntRangeComparator; import jalview.util.MapList; +import jalview.util.Platform; -import java.io.BufferedReader; import java.io.IOException; import java.net.MalformedURLException; import java.net.URL; @@ -48,9 +49,8 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; import java.util.List; +import java.util.Map; -import org.json.simple.JSONObject; -import org.json.simple.parser.JSONParser; import org.json.simple.parser.ParseException; /** @@ -61,10 +61,6 @@ import org.json.simple.parser.ParseException; */ public abstract class EnsemblSeqProxy extends EnsemblRestClient { - private static final String ALLELES = "alleles"; - - protected static final String NAME = "Name"; - protected static final String DESCRIPTION = "description"; /* @@ -261,6 +257,7 @@ public abstract class EnsemblSeqProxy extends EnsemblRestClient String accId = querySeq.getName(); try { + System.out.println("Adding protein product for " + accId); AlignmentI protein = new EnsemblProtein(getDomain()) .getSequenceRecords(accId); if (protein == null || protein.getHeight() == 0) @@ -390,13 +387,11 @@ public abstract class EnsemblSeqProxy extends EnsemblRestClient inProgress = false; throw new JalviewException("ENSEMBL Rest API not available."); } - BufferedReader br = getSequenceReader(ids); - if (br == null) - { - return alignment; - } + Platform.timeCheck("EnsemblSeqProx.fetchSeq ", Platform.TIME_MARK); - List seqs = parseSequenceJson(br); + List seqs = parseSequenceJson(ids); + if (seqs == null) + return alignment; if (seqs.isEmpty()) { @@ -443,15 +438,15 @@ public abstract class EnsemblSeqProxy extends EnsemblRestClient } /** - * Parses a JSON response into a list of sequences + * Parses a JSON response for a single sequence ID query * * @param br - * @return + * @return a single jalview.datamodel.Sequence * @see http://rest.ensembl.org/documentation/info/sequence_id */ - protected List parseSequenceJson(BufferedReader br) + @SuppressWarnings("unchecked") + protected List parseSequenceJson(List ids) { - JSONParser jp = new JSONParser(); List result = new ArrayList<>(); try { @@ -459,7 +454,9 @@ public abstract class EnsemblSeqProxy extends EnsemblRestClient * for now, assumes only one sequence returned; refactor if needed * in future to handle a JSONArray with more than one */ - final JSONObject val = (JSONObject) jp.parse(br); + Map val = (Map) getJSON(null, ids, -1, MODE_MAP, null); + if (val == null) + return null; Object s = val.get("desc"); String desc = s == null ? null : s.toString(); s = val.get("id"); @@ -507,7 +504,7 @@ public abstract class EnsemblSeqProxy extends EnsemblRestClient // @see https://github.com/Ensembl/ensembl-rest/wiki/Output-formats urlstring.append("?type=").append(getSourceEnsemblType().getType()); urlstring.append(("&Accept=application/json")); - urlstring.append(("&Content-Type=application/json")); + urlstring.append(("&content-type=application/json")); String objectType = getObjectType(); if (objectType != null) @@ -582,8 +579,8 @@ public abstract class EnsemblSeqProxy extends EnsemblRestClient protected MapList getGenomicRangesFromFeatures(SequenceI sourceSequence, String accId, int start) { - List sfs = sourceSequence.getFeatures() - .getPositionalFeatures(); + List sfs = getIdentifyingFeatures(sourceSequence, + accId); if (sfs.isEmpty()) { return null; @@ -600,47 +597,31 @@ public abstract class EnsemblSeqProxy extends EnsemblRestClient for (SequenceFeature sf : sfs) { + int strand = sf.getStrand(); + strand = strand == 0 ? 1 : strand; // treat unknown as forward + + if (directionSet && strand != direction) + { + // abort - mix of forward and backward + System.err + .println("Error: forward and backward strand for " + accId); + return null; + } + direction = strand; + directionSet = true; + /* - * accept the target feature type or a specialisation of it - * (e.g. coding_exon for exon) + * add to CDS ranges, semi-sorted forwards/backwards */ - if (identifiesSequence(sf, accId)) + if (strand < 0) { - int strand = sf.getStrand(); - strand = strand == 0 ? 1 : strand; // treat unknown as forward - - if (directionSet && strand != direction) - { - // abort - mix of forward and backward - System.err.println( - "Error: forward and backward strand for " + accId); - return null; - } - direction = strand; - directionSet = true; - - /* - * add to CDS ranges, semi-sorted forwards/backwards - */ - if (strand < 0) - { - regions.add(0, new int[] { sf.getEnd(), sf.getBegin() }); - } - else - { - regions.add(new int[] { sf.getBegin(), sf.getEnd() }); - } - mappedLength += Math.abs(sf.getEnd() - sf.getBegin() + 1); - - if (!isSpliceable()) - { - /* - * 'gene' sequence is contiguous so we can stop as soon as its - * identifying feature has been found - */ - break; - } + regions.add(0, new int[] { sf.getEnd(), sf.getBegin() }); } + else + { + regions.add(new int[] { sf.getBegin(), sf.getEnd() }); + } + mappedLength += Math.abs(sf.getEnd() - sf.getBegin() + 1); } if (regions.isEmpty()) @@ -665,26 +646,18 @@ public abstract class EnsemblSeqProxy extends EnsemblRestClient } /** - * Answers true if the sequence being retrieved may occupy discontiguous - * regions on the genomic sequence. - */ - protected boolean isSpliceable() - { - return true; - } - - /** - * Returns true if the sequence feature marks positions of the genomic + * Answers a list of sequence features that mark positions of the genomic * sequence feature which are within the sequence being retrieved. For * example, an 'exon' feature whose parent is the target transcript marks the - * cdna positions of the transcript. + * cdna positions of the transcript. For a gene sequence, this is trivially + * just the 'gene' feature with matching gene id. * - * @param sf + * @param seq * @param accId * @return */ - protected abstract boolean identifiesSequence(SequenceFeature sf, - String accId); + protected abstract List getIdentifyingFeatures( + SequenceI seq, String accId); /** * Transfers the sequence feature to the target sequence, locating its start @@ -738,7 +711,7 @@ public abstract class EnsemblSeqProxy extends EnsemblRestClient */ static void reverseComplementAlleles(SequenceFeature sf) { - final String alleles = (String) sf.getValue(ALLELES); + final String alleles = (String) sf.getValue(Gff3Helper.ALLELES); if (alleles == null) { return; @@ -749,7 +722,7 @@ public abstract class EnsemblSeqProxy extends EnsemblRestClient reverseComplementAllele(complement, allele); } String comp = complement.toString(); - sf.setValue(ALLELES, comp); + sf.setValue(Gff3Helper.ALLELES, comp); sf.setDescription(comp); /* @@ -759,7 +732,8 @@ public abstract class EnsemblSeqProxy extends EnsemblRestClient String atts = sf.getAttributes(); if (atts != null) { - atts = atts.replace(ALLELES + "=" + alleles, ALLELES + "=" + comp); + atts = atts.replace(Gff3Helper.ALLELES + "=" + alleles, + Gff3Helper.ALLELES + "=" + comp); sf.setAttributes(atts); } } @@ -891,9 +865,8 @@ public abstract class EnsemblSeqProxy extends EnsemblRestClient protected boolean featureMayBelong(SequenceFeature sf, String identifier) { String parent = (String) sf.getValue(PARENT); - // using contains to allow for prefix "gene:", "transcript:" etc if (parent != null - && !parent.toUpperCase().contains(identifier.toUpperCase())) + && !parent.equalsIgnoreCase(identifier)) { // this genomic feature belongs to a different transcript return false; @@ -901,6 +874,9 @@ public abstract class EnsemblSeqProxy extends EnsemblRestClient return true; } + /** + * Answers a short description of the sequence fetcher + */ @Override public String getDescription() { @@ -939,10 +915,14 @@ public abstract class EnsemblSeqProxy extends EnsemblRestClient /** * Answers true if the feature type is either 'NMD_transcript_variant' or - * 'transcript' or one of its sub-types in the Sequence Ontology. This is - * needed because NMD_transcript_variant behaves like 'transcript' in Ensembl + * 'transcript' (or one of its sub-types in the Sequence Ontology). This is + * because NMD_transcript_variant behaves like 'transcript' in Ensembl * although strictly speaking it is not (it is a sub-type of * sequence_variant). + *

+ * (This test was needed when fetching transcript features as GFF. As we are + * now fetching as JSON, all features have type 'transcript' so the check for + * NMD_transcript_variant is redundant. Left in for any future case arising.) * * @param featureType * @return