X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Fws%2Fdbsources%2FEmblXmlSource.java;h=b3b99ca1ed5b156e26e609b06b40bcd6fa0a1a64;hb=a1984b1c8c273ed33c7ce9283039f4027dcae2de;hp=6b6f2ecea0d69f65fcc3a623bc5d93c412cad02a;hpb=fe3cd724aecdeb06a130a502ce3a967ad643f458;p=jalview.git diff --git a/src/jalview/ws/dbsources/EmblXmlSource.java b/src/jalview/ws/dbsources/EmblXmlSource.java index 6b6f2ec..b3b99ca 100644 --- a/src/jalview/ws/dbsources/EmblXmlSource.java +++ b/src/jalview/ws/dbsources/EmblXmlSource.java @@ -28,6 +28,7 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.Hashtable; import java.util.List; +import java.util.Locale; import java.util.Map; import java.util.Map.Entry; @@ -63,12 +64,6 @@ import jalview.xml.binding.embl.EntryType.Feature.Qualifier; import jalview.xml.binding.embl.ROOT; import jalview.xml.binding.embl.XrefType; -/** - * Provides XML binding and parsing of EMBL or EMBLCDS records retrieved from - * (e.g.) {@code https://www.ebi.ac.uk/ena/data/view/x53828&display=xml}. - * - * @deprecated endpoint withdrawn August 2020 (JAL-3692), use EmblFlatfileSource - */ public abstract class EmblXmlSource extends EbiFileRetrievedProxy { private static final Regex ACCESSION_REGEX = new Regex("^[A-Z]+[0-9]+"); @@ -102,14 +97,14 @@ public abstract class EmblXmlSource extends EbiFileRetrievedProxy try { reply = dbFetch.fetchDataAsFile( - emprefx.toLowerCase() + ":" + query.trim(), "display=xml", - "xml"); + emprefx.toLowerCase(Locale.ROOT) + ":" + query.trim(), + "display=xml", "xml"); } catch (Exception e) { stopQuery(); throw new Exception( String.format("EBI EMBL XML retrieval failed for %s:%s", - emprefx.toLowerCase(), query.trim()), + emprefx.toLowerCase(Locale.ROOT), query.trim()), e); } return getEmblSequenceRecords(emprefx, query, reply); @@ -453,9 +448,8 @@ public abstract class EmblXmlSource extends EbiFileRetrievedProxy else { // final product length truncation check - int[] cdsRanges = adjustForProteinLength(translationLength, - exons); - dnaToProteinMapping = new Mapping(product, cdsRanges, + int[] exons2 = adjustForProteinLength(translationLength, exons); + dnaToProteinMapping = new Mapping(product, exons2, new int[] { 1, translationLength }, 3, 1); if (product != null) @@ -574,6 +568,7 @@ public abstract class EmblXmlSource extends EbiFileRetrievedProxy proteinSeq = new Sequence(proteinSeqName, product.getSequenceAsString()); matcher.add(proteinSeq); + proteinSeq.setDescription(product.getDescription()); peptides.add(proteinSeq); } dnaToProteinMapping.setTo(proteinSeq); @@ -663,7 +658,7 @@ public abstract class EmblXmlSource extends EbiFileRetrievedProxy return listToArray(ranges); } catch (ParseException e) { - Cache.log.warn( + Cache.warn( String.format("Not parsing inexact CDS location %s in ENA %s", location, accession)); return new int[] {}; @@ -756,8 +751,7 @@ public abstract class EmblXmlSource extends EbiFileRetrievedProxy /** * Truncates (if necessary) the exon intervals to match 3 times the length of - * the protein; also accepts 3 bases longer (for stop codon not included in - * protein) + * the protein (including truncation for stop codon included in exon) * * @param proteinLength * @param exon @@ -774,11 +768,9 @@ public abstract class EmblXmlSource extends EbiFileRetrievedProxy int exonLength = MappingUtils.getLength(Arrays.asList(exon)); /* - * if exon length matches protein, or is shorter, or longer by the - * length of a stop codon (3 bases), then leave it unchanged + * if exon length matches protein, or is shorter, then leave it unchanged */ - if (expectedCdsLength >= exonLength - || expectedCdsLength == exonLength - 3) + if (expectedCdsLength >= exonLength) { return exon; }