X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Fws%2Fdbsources%2FEmblXmlSource.java;fp=src%2Fjalview%2Fws%2Fdbsources%2FEmblXmlSource.java;h=7bb1977d73418df228eaa0f74454832f88d11d76;hb=304e64fb34b32659be1bbfd39fb4e15b2f79586e;hp=51980269fc7e84fae452a07c9a7625735a988f5f;hpb=cfb79b69d9fa44595560659bd95d1d1cd27677ad;p=jalview.git diff --git a/src/jalview/ws/dbsources/EmblXmlSource.java b/src/jalview/ws/dbsources/EmblXmlSource.java index 5198026..7bb1977 100644 --- a/src/jalview/ws/dbsources/EmblXmlSource.java +++ b/src/jalview/ws/dbsources/EmblXmlSource.java @@ -20,6 +20,8 @@ */ package jalview.ws.dbsources; +import java.util.Locale; + import java.io.File; import java.io.FileInputStream; import java.io.InputStream; @@ -62,8 +64,17 @@ import jalview.xml.binding.embl.EntryType.Feature.Qualifier; import jalview.xml.binding.embl.ROOT; import jalview.xml.binding.embl.XrefType; +/** + * Provides XML binding and parsing of EMBL or EMBLCDS records retrieved from + * (e.g.) {@code https://www.ebi.ac.uk/ena/data/view/x53828&display=xml}. + * + * @deprecated endpoint withdrawn August 2020 (JAL-3692), use EmblFlatfileSource + */ + public abstract class EmblXmlSource extends EbiFileRetrievedProxy { + // TODO: delete class or update tyhis validator for 2.12 style Platform.regex + private static final Regex ACCESSION_REGEX = new Regex("^[A-Z]+[0-9]+"); /* * JAL-1856 Embl returns this text for query not found */ @@ -93,14 +104,15 @@ public abstract class EmblXmlSource extends EbiFileRetrievedProxy try { reply = dbFetch.fetchDataAsFile( - emprefx.toLowerCase() + ":" + query.trim(), "display=xml", + emprefx.toLowerCase(Locale.ROOT) + ":" + query.trim(), "display=xml", "xml"); } catch (Exception e) { stopQuery(); - throw new Exception(MessageManager.formatMessage( - "exception.ebiembl_retrieval_failed_on", new String[] - { emprefx.toLowerCase(), query.trim() }), e); + throw new Exception( + String.format("EBI EMBL XML retrieval failed for %s:%s", + emprefx.toLowerCase(Locale.ROOT), query.trim()), + e); } return getEmblSequenceRecords(emprefx, query, reply); } @@ -182,7 +194,8 @@ public abstract class EmblXmlSource extends EbiFileRetrievedProxy XMLStreamReader streamReader = XMLInputFactory.newInstance() .createXMLStreamReader(is); javax.xml.bind.Unmarshaller um = jc.createUnmarshaller(); - JAXBElement rootElement = um.unmarshal(streamReader, ROOT.class); + JAXBElement rootElement = um.unmarshal(streamReader, + ROOT.class); ROOT root = rootElement.getValue(); /* @@ -563,6 +576,7 @@ public abstract class EmblXmlSource extends EbiFileRetrievedProxy proteinSeq = new Sequence(proteinSeqName, product.getSequenceAsString()); matcher.add(proteinSeq); + proteinSeq.setDescription(product.getDescription()); peptides.add(proteinSeq); } dnaToProteinMapping.setTo(proteinSeq); @@ -616,8 +630,7 @@ public abstract class EmblXmlSource extends EbiFileRetrievedProxy && dnaToProteinMapping.getTo() != null) { DBRefEntry dnaToEmblProteinRef = new DBRefEntry( - DBRefSource.EMBLCDSProduct, sequenceVersion, - proteinId); + DBRefSource.EMBLCDSProduct, sequenceVersion, proteinId); dnaToEmblProteinRef.setMap(dnaToProteinMapping); dnaToProteinMapping.setMappedFromId(proteinId); dna.addDBRef(dnaToEmblProteinRef); @@ -646,7 +659,6 @@ public abstract class EmblXmlSource extends EbiFileRetrievedProxy { return new int[] {}; } - try { List ranges = DnaUtils.parseLocation(location); @@ -710,6 +722,39 @@ public abstract class EmblXmlSource extends EbiFileRetrievedProxy return sf; } + @Override + public String getAccessionSeparator() + { + return null; + } + + @Override + public Regex getAccessionValidator() + { + return ACCESSION_REGEX; + } + + @Override + public String getDbVersion() + { + return "0"; + } + + @Override + public int getTier() + { + return 0; + } + + @Override + public boolean isValidReference(String accession) + { + if (accession == null || accession.length() < 2) + { + return false; + } + return getAccessionValidator().search(accession); + } /** * Truncates (if necessary) the exon intervals to match 3 times the length of * the protein; also accepts 3 bases longer (for stop codon not included in @@ -728,7 +773,6 @@ public abstract class EmblXmlSource extends EbiFileRetrievedProxy } int expectedCdsLength = proteinLength * 3; int exonLength = MappingUtils.getLength(Arrays.asList(exon)); - /* * if exon length matches protein, or is shorter, or longer by the * length of a stop codon (3 bases), then leave it unchanged @@ -738,7 +782,6 @@ public abstract class EmblXmlSource extends EbiFileRetrievedProxy { return exon; } - int origxon[]; int sxpos = -1; int endxon = 0; @@ -758,7 +801,6 @@ public abstract class EmblXmlSource extends EbiFileRetrievedProxy // .println("Truncating final exon interval on region by " // + (cdspos - cdslength)); } - /* * shrink the final exon - reduce end position if forward * strand, increase it if reverse @@ -774,7 +816,6 @@ public abstract class EmblXmlSource extends EbiFileRetrievedProxy break; } } - if (sxpos != -1) { // and trim the exon interval set if necessary