From: Jim Procter Date: Tue, 31 May 2016 15:33:21 +0000 (+0100) Subject: Merge branch 'develop' into features/JAL-2113_emblXml1.2 X-Git-Tag: Release_2_10_0~196^2~2^2 X-Git-Url: http://source.jalview.org/gitweb/?a=commitdiff_plain;h=18e78fa415a301cf4423037a713b2b088b9fc656;hp=a6ca407ba239e57e9b9918056437b5215babc943;p=jalview.git Merge branch 'develop' into features/JAL-2113_emblXml1.2 JAL-2113 JAL-1919 mmcif/pdb configurable retrieval from EBI/PDBe - verfied as working --- diff --git a/resources/embl_mapping.xml b/resources/embl_mapping.xml index ccbde5e..01b921a 100644 --- a/resources/embl_mapping.xml +++ b/resources/embl_mapping.xml @@ -22,35 +22,61 @@ - + - - + - - - + + + - - + + + + + + + + + + + + + + + + + + + + - - + + - - + + - + + + + @@ -60,19 +86,13 @@ - + - - - - - - @@ -81,25 +101,25 @@ + + + - + - - - - + - + @@ -113,40 +133,4 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/src/MCview/PDBViewer.java b/src/MCview/PDBViewer.java index e032c7a..d5f0d0b 100755 --- a/src/MCview/PDBViewer.java +++ b/src/MCview/PDBViewer.java @@ -151,7 +151,7 @@ public class PDBViewer extends JInternalFrame implements Runnable { EBIFetchClient ebi = new EBIFetchClient(); String query = "pdb:" + pdbentry.getId(); - pdbentry.setFile(ebi.fetchDataAsFile(query, "default", "raw", ".xml") + pdbentry.setFile(ebi.fetchDataAsFile(query, "default", ".xml") .getAbsolutePath()); if (pdbentry.getFile() != null) diff --git a/src/jalview/datamodel/FeatureProperties.java b/src/jalview/datamodel/FeatureProperties.java index d25eb96..2306bec 100644 --- a/src/jalview/datamodel/FeatureProperties.java +++ b/src/jalview/datamodel/FeatureProperties.java @@ -28,8 +28,7 @@ package jalview.datamodel; */ public class FeatureProperties { - - private static final String EMBL_CODING_FEATURE = "CDS"; + public static final String EMBL_CODING_FEATURE = "CDS"; public static final String EXONPOS = "exon number"; diff --git a/src/jalview/datamodel/SequenceFeature.java b/src/jalview/datamodel/SequenceFeature.java index f2eb8ac..c75d6f2 100755 --- a/src/jalview/datamodel/SequenceFeature.java +++ b/src/jalview/datamodel/SequenceFeature.java @@ -39,6 +39,9 @@ public class SequenceFeature // private key for Phase designed not to conflict with real GFF data private static final String PHASE = "!Phase"; + // private key for ENA location designed not to conflict with real GFF data + private static final String LOCATION = "!Location"; + /* * ATTRIBUTES is reserved for the GFF 'column 9' data, formatted as * name1=value1;name2=value2,value3;...etc @@ -55,6 +58,10 @@ public class SequenceFeature public String description; + /* + * a map of key-value pairs; may be populated from GFF 'column 9' data, + * other data sources (e.g. GenBank file), or programmatically + */ public Map otherDetails; public Vector links; @@ -480,6 +487,26 @@ public class SequenceFeature } /** + * Sets the 'raw' ENA format location specifier e.g. join(12..45,89..121) + * + * @param loc + */ + public void setEnaLocation(String loc) + { + setValue(LOCATION, loc); + } + + /** + * Gets the 'raw' ENA format location specifier e.g. join(12..45,89..121) + * + * @param loc + */ + public String getEnaLocation() + { + return (String) getValue(LOCATION); + } + + /** * Readable representation, for debug only, not guaranteed not to change * between versions */ diff --git a/src/jalview/datamodel/xdb/embl/BasePosition.java b/src/jalview/datamodel/xdb/embl/BasePosition.java deleted file mode 100644 index 3737adc..0000000 --- a/src/jalview/datamodel/xdb/embl/BasePosition.java +++ /dev/null @@ -1,68 +0,0 @@ -/* - * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$) - * Copyright (C) $$Year-Rel$$ The Jalview Authors - * - * This file is part of Jalview. - * - * Jalview is free software: you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation, either version 3 - * of the License, or (at your option) any later version. - * - * Jalview is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty - * of MERCHANTABILITY or FITNESS FOR A PARTICULAR - * PURPOSE. See the GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Jalview. If not, see . - * The Jalview Authors are detailed in the 'AUTHORS' file. - */ -package jalview.datamodel.xdb.embl; - -/** - * Data model for a feature/location/locationElement/basePosition read from an - * EMBL query reply - * - * @see embl_mapping.xml - */ -public class BasePosition -{ - String type; - - String pos; - - /** - * @return the pos - */ - public String getPos() - { - return pos; - } - - /** - * @param pos - * the pos to set - */ - public void setPos(String pos) - { - this.pos = pos; - } - - /** - * @return the type - */ - public String getType() - { - return type; - } - - /** - * @param type - * the type to set - */ - public void setType(String type) - { - this.type = type; - } -} diff --git a/src/jalview/datamodel/xdb/embl/EmblEntry.java b/src/jalview/datamodel/xdb/embl/EmblEntry.java index 691a4c9..cfe87d9 100644 --- a/src/jalview/datamodel/xdb/embl/EmblEntry.java +++ b/src/jalview/datamodel/xdb/embl/EmblEntry.java @@ -29,6 +29,7 @@ import jalview.datamodel.Sequence; import jalview.datamodel.SequenceFeature; import jalview.datamodel.SequenceI; import jalview.util.DBRefUtils; +import jalview.util.DnaUtils; import jalview.util.MapList; import jalview.util.MappingUtils; import jalview.util.StringUtils; @@ -57,17 +58,29 @@ public class EmblEntry String accession; - String version; + String entryVersion; - String taxDivision; + String sequenceVersion; - String desc; + String dataClass; - String rCreated; + String moleculeType; - String rLastUpdated; + String topology; - String lastUpdated; + String sequenceLength; + + String taxonomicDivision; + + String description; + + String firstPublicDate; + + String firstPublicRelease; + + String lastUpdatedDate; + + String lastUpdatedRelease; Vector keywords; @@ -112,23 +125,6 @@ public class EmblEntry } /** - * @return the desc - */ - public String getDesc() - { - return desc; - } - - /** - * @param desc - * the desc to set - */ - public void setDesc(String desc) - { - this.desc = desc; - } - - /** * @return the features */ public Vector getFeatures() @@ -163,57 +159,6 @@ public class EmblEntry } /** - * @return the lastUpdated - */ - public String getLastUpdated() - { - return lastUpdated; - } - - /** - * @param lastUpdated - * the lastUpdated to set - */ - public void setLastUpdated(String lastUpdated) - { - this.lastUpdated = lastUpdated; - } - - /** - * @return the releaseCreated - */ - public String getRCreated() - { - return rCreated; - } - - /** - * @param releaseCreated - * the releaseCreated to set - */ - public void setRCreated(String releaseCreated) - { - this.rCreated = releaseCreated; - } - - /** - * @return the releaseLastUpdated - */ - public String getRLastUpdated() - { - return rLastUpdated; - } - - /** - * @param releaseLastUpdated - * the releaseLastUpdated to set - */ - public void setRLastUpdated(String releaseLastUpdated) - { - this.rLastUpdated = releaseLastUpdated; - } - - /** * @return the sequence */ public EmblSequence getSequence() @@ -231,40 +176,6 @@ public class EmblEntry } /** - * @return the taxDivision - */ - public String getTaxDivision() - { - return taxDivision; - } - - /** - * @param taxDivision - * the taxDivision to set - */ - public void setTaxDivision(String taxDivision) - { - this.taxDivision = taxDivision; - } - - /** - * @return the version - */ - public String getVersion() - { - return version; - } - - /** - * @param version - * the version to set - */ - public void setVersion(String version) - { - this.version = version; - } - - /** * Recover annotated sequences from EMBL file * * @param sourceDb @@ -276,8 +187,9 @@ public class EmblEntry { SequenceI dna = new Sequence(sourceDb + "|" + accession, sequence.getSequence()); - dna.setDescription(desc); - DBRefEntry retrievedref = new DBRefEntry(sourceDb, version, accession); + dna.setDescription(description); + DBRefEntry retrievedref = new DBRefEntry(sourceDb, + getSequenceVersion(), accession); dna.addDBRef(retrievedref); // add map to indicate the sequence is a valid coordinate frame for the // dbref @@ -479,7 +391,8 @@ public class EmblEntry DBRefEntry pcdnaref = new DBRefEntry(); pcdnaref.setAccessionId(prid); pcdnaref.setSource(DBRefSource.EMBLCDS); - pcdnaref.setVersion(getVersion()); // same as parent EMBL version. + pcdnaref.setVersion(getSequenceVersion()); // same as parent EMBL + // version. MapList mp = new MapList(new int[] { 1, prseq.length() }, new int[] { 1 + (codonStart - 1), (codonStart - 1) + 3 * prseq.length() }, 1, 3); @@ -499,6 +412,7 @@ public class EmblEntry SequenceFeature sf = makeCdsFeature(exon, xint, prname, prid, vals, codonStart); sf.setType(feature.getName()); // "CDS" + sf.setEnaLocation(feature.getLocation()); sf.setFeatureGroup(sourceDb); dna.addSequenceFeature(sf); } @@ -559,7 +473,7 @@ public class EmblEntry if (map != null) { Mapping pmap = new Mapping(dna, map.getMap().getInverse()); - pref = new DBRefEntry(sourceDb, getVersion(), + pref = new DBRefEntry(sourceDb, getSequenceVersion(), this.getAccession()); pref.setMap(pmap); if (map.getTo() != null) @@ -578,7 +492,7 @@ public class EmblEntry protEMBLCDS = new DBRefEntry(); protEMBLCDS.setAccessionId(prid); protEMBLCDS.setSource(DBRefSource.EMBLCDSProduct); - protEMBLCDS.setVersion(getVersion()); + protEMBLCDS.setVersion(getSequenceVersion()); protEMBLCDS .setMap(new Mapping(product, map.getMap().getInverse())); } @@ -658,28 +572,31 @@ public class EmblEntry */ protected int[] getCdsRanges(EmblFeature feature) { - if (feature.locations == null) + if (feature.location == null) { return new int[] {}; } - int cdsBoundaryCount = 0; // count of all start/stop locations - int[][] cdsLocations = new int[feature.locations.size()][]; - int locationNumber = 0; - for (EmblFeatureLocations loc : feature.locations) - { - int[] locationRanges = loc.getElementRanges(accession); - cdsLocations[locationNumber++] = locationRanges; - cdsBoundaryCount += locationRanges.length; - } - int[] cdsRanges = new int[cdsBoundaryCount]; - int copyTo = 0; - for (int[] ranges : cdsLocations) + List ranges = DnaUtils.parseLocation(feature.location); + return ranges == null ? new int[] {} : listToArray(ranges); + } + + /** + * Converts a list of [start, end] ranges to a single array of [start, end, + * start, end ...] + * + * @param ranges + * @return + */ + int[] listToArray(List ranges) + { + int[] result = new int[ranges.size() * 2]; + int i = 0; + for (int[] range : ranges) { - System.arraycopy(ranges, 0, cdsRanges, copyTo, ranges.length); - copyTo += ranges.length; + result[i++] = range[0]; + result[i++] = range[1]; } - return cdsRanges; - + return result; } /** @@ -754,4 +671,124 @@ public class EmblEntry } return exon; } + + public String getSequenceVersion() + { + return sequenceVersion; + } + + public void setSequenceVersion(String sequenceVersion) + { + this.sequenceVersion = sequenceVersion; + } + + public String getSequenceLength() + { + return sequenceLength; + } + + public void setSequenceLength(String sequenceLength) + { + this.sequenceLength = sequenceLength; + } + + public String getEntryVersion() + { + return entryVersion; + } + + public void setEntryVersion(String entryVersion) + { + this.entryVersion = entryVersion; + } + + public String getMoleculeType() + { + return moleculeType; + } + + public void setMoleculeType(String moleculeType) + { + this.moleculeType = moleculeType; + } + + public String getTopology() + { + return topology; + } + + public void setTopology(String topology) + { + this.topology = topology; + } + + public String getTaxonomicDivision() + { + return taxonomicDivision; + } + + public void setTaxonomicDivision(String taxonomicDivision) + { + this.taxonomicDivision = taxonomicDivision; + } + + public String getDescription() + { + return description; + } + + public void setDescription(String description) + { + this.description = description; + } + + public String getFirstPublicDate() + { + return firstPublicDate; + } + + public void setFirstPublicDate(String firstPublicDate) + { + this.firstPublicDate = firstPublicDate; + } + + public String getFirstPublicRelease() + { + return firstPublicRelease; + } + + public void setFirstPublicRelease(String firstPublicRelease) + { + this.firstPublicRelease = firstPublicRelease; + } + + public String getLastUpdatedDate() + { + return lastUpdatedDate; + } + + public void setLastUpdatedDate(String lastUpdatedDate) + { + this.lastUpdatedDate = lastUpdatedDate; + } + + public String getLastUpdatedRelease() + { + return lastUpdatedRelease; + } + + public void setLastUpdatedRelease(String lastUpdatedRelease) + { + this.lastUpdatedRelease = lastUpdatedRelease; + } + + public String getDataClass() + { + return dataClass; + } + + public void setDataClass(String dataClass) + { + this.dataClass = dataClass; + } } diff --git a/src/jalview/datamodel/xdb/embl/EmblFeature.java b/src/jalview/datamodel/xdb/embl/EmblFeature.java index 7e503c9..51d740b 100644 --- a/src/jalview/datamodel/xdb/embl/EmblFeature.java +++ b/src/jalview/datamodel/xdb/embl/EmblFeature.java @@ -37,7 +37,7 @@ public class EmblFeature Vector qualifiers; - Vector locations; + String location; /** * @return the dbRefs @@ -57,20 +57,19 @@ public class EmblFeature } /** - * @return the locations + * @return the location */ - public Vector getLocations() + public String getLocation() { - return locations; + return location; } /** - * @param locations - * the locations to set + * @param loc */ - public void setLocations(Vector locations) + public void setLocation(String loc) { - this.locations = locations; + this.location = loc; } /** diff --git a/src/jalview/datamodel/xdb/embl/EmblFeatureLocElement.java b/src/jalview/datamodel/xdb/embl/EmblFeatureLocElement.java deleted file mode 100644 index 134ce9e..0000000 --- a/src/jalview/datamodel/xdb/embl/EmblFeatureLocElement.java +++ /dev/null @@ -1,125 +0,0 @@ -/* - * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$) - * Copyright (C) $$Year-Rel$$ The Jalview Authors - * - * This file is part of Jalview. - * - * Jalview is free software: you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation, either version 3 - * of the License, or (at your option) any later version. - * - * Jalview is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty - * of MERCHANTABILITY or FITNESS FOR A PARTICULAR - * PURPOSE. See the GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Jalview. If not, see . - * The Jalview Authors are detailed in the 'AUTHORS' file. - */ -package jalview.datamodel.xdb.embl; - -/** - * Data model for a feature/location/locationElement read from an EMBL query - * reply - * - * @see embl_mapping.xml - */ -public class EmblFeatureLocElement -{ - String type; - - String accession; - - String version; - - boolean complement; - - BasePosition basePositions[]; - - /** - * @return the accession - */ - public String getAccession() - { - return accession; - } - - /** - * @param accession - * the accession to set - */ - public void setAccession(String accession) - { - this.accession = accession; - } - - /** - * @return the basePositions - */ - public BasePosition[] getBasePositions() - { - return basePositions; - } - - /** - * @param basePositions - * the basePositions to set - */ - public void setBasePositions(BasePosition[] basePositions) - { - this.basePositions = basePositions; - } - - /** - * @return the complement - */ - public boolean isComplement() - { - return complement; - } - - /** - * @param complement - * the complement to set - */ - public void setComplement(boolean complement) - { - this.complement = complement; - } - - /** - * @return the type - */ - public String getType() - { - return type; - } - - /** - * @param type - * the type to set - */ - public void setType(String type) - { - this.type = type; - } - - /** - * @return the version - */ - public String getVersion() - { - return version; - } - - /** - * @param version - * the version to set - */ - public void setVersion(String version) - { - this.version = version; - } -} diff --git a/src/jalview/datamodel/xdb/embl/EmblFeatureLocations.java b/src/jalview/datamodel/xdb/embl/EmblFeatureLocations.java deleted file mode 100644 index 9774004..0000000 --- a/src/jalview/datamodel/xdb/embl/EmblFeatureLocations.java +++ /dev/null @@ -1,191 +0,0 @@ -/* - * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$) - * Copyright (C) $$Year-Rel$$ The Jalview Authors - * - * This file is part of Jalview. - * - * Jalview is free software: you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation, either version 3 - * of the License, or (at your option) any later version. - * - * Jalview is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty - * of MERCHANTABILITY or FITNESS FOR A PARTICULAR - * PURPOSE. See the GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Jalview. If not, see . - * The Jalview Authors are detailed in the 'AUTHORS' file. - */ -package jalview.datamodel.xdb.embl; - -import jalview.bin.Cache; -import jalview.util.ArrayUtils; - -import java.util.Arrays; -import java.util.Vector; - -/** - * Data model for a <location> child element of a <feature> read - * from an EMBL query reply - * - * @see embl_mapping.xml - * @see http://www.insdc.org/files/feature_table.html#3.4.2 - */ -public class EmblFeatureLocations -{ - Vector locElements; - - String locationType; - - boolean locationComplement; - - /** - * @return the locationComplement - */ - public boolean isLocationComplement() - { - return locationComplement; - } - - /** - * @param locationComplement - * the locationComplement to set - */ - public void setLocationComplement(boolean locationComplement) - { - this.locationComplement = locationComplement; - } - - /** - * @return the locationType - */ - public String getLocationType() - { - return locationType; - } - - /** - * @param locationType - * the locationType to set - */ - public void setLocationType(String locationType) - { - this.locationType = locationType; - } - - /** - * @return the locElements - */ - public Vector getLocElements() - { - return locElements; - } - - /** - * @param locElements - * the locElements to set - */ - public void setLocElements(Vector locElements) - { - this.locElements = locElements; - } - - /** - * Return all location elements as start-end pairs (without accessions) TODO: - * pass back complement and 'less than or more than' range information Note: - * do not use this since it throws away any accessionIds associated with each - * location! - * - * @return int[] { start1, end1, ... } - */ - public int[] getElementRanges() - { - return getElementRanges(null); - } - - /** - * Return all location elements concerning given accession as start-end pairs. - * If the CDS feature is on the forward strand, then start <= end, if on the - * reverse strand then start > end. - * - * @param accession - * the accession string for which locations are requested, or null - * for all locations - * @return int[] { start1, end1, ... } - */ - int[] getElementRanges(String accession) - { - int sepos = 0; - int[] se = new int[locElements.size() * 2]; - if ("single".equalsIgnoreCase(locationType) - || "join".equalsIgnoreCase(locationType)) - { - for (EmblFeatureLocElement loce : locElements) - { - if (accession == null || loce.accession != null - && accession.equals(loce.accession)) - { - BasePosition bp[] = loce.getBasePositions(); - if (bp.length == 2) - { - try - { - int start = Integer.parseInt(bp[0].getPos()); - int end = Integer.parseInt(bp[1].getPos()); - se[sepos++] = start; - se[sepos++] = end; - } catch (NumberFormatException e) - { - System.err - .println("format error in EMBL CDS location basePosition: " - + e.getMessage()); - } - } - else - { - System.err - .println("format error in EMBL CDS location, basePosition count = " - + bp.length); - } - } - } - } - else if (locationType != null) - { - if (Cache.log != null) - { - Cache.log - .error("EmblFeatureLocations.getElementRanges cannot deal with locationType=='" - + locationType + "'"); - } - else - { - System.err - .println("EmblFeatureLocations.getElementRanges cannot deal with locationType=='" - + locationType + "'"); - } - } - - if (sepos != se.length) - { - /* - * we failed to parse something - trim off null values - */ - se = Arrays.copyOf(se, sepos); - } - - /* - * If on the complement, reverse the ranges to [end, start, ...end1, start1]. - * For an example of a joined complement, see (tRNA feature) CAGL0B00165r on - * http://www.ebi.ac.uk/ena/data/view/CR380948&display=xml - * http://www.ebi.ac.uk/Tools/dbfetch/dbfetch/embl/CR380948/emblxml - */ - if (locationComplement) - { - ArrayUtils.reverseIntArray(se); - } - return se; - } -} diff --git a/src/jalview/datamodel/xdb/embl/EmblSequence.java b/src/jalview/datamodel/xdb/embl/EmblSequence.java index 2a6fa84..92c424b 100644 --- a/src/jalview/datamodel/xdb/embl/EmblSequence.java +++ b/src/jalview/datamodel/xdb/embl/EmblSequence.java @@ -27,12 +27,8 @@ package jalview.datamodel.xdb.embl; */ public class EmblSequence { - String version; - String sequence; - String type; - /** * @return the sequence */ @@ -47,40 +43,7 @@ public class EmblSequence */ public void setSequence(String sequence) { - this.sequence = sequence; - } - - /** - * @return the type - */ - public String getType() - { - return type; - } - - /** - * @param type - * the type to set - */ - public void setType(String type) - { - this.type = type; - } - - /** - * @return the version - */ - public String getVersion() - { - return version; - } - - /** - * @param version - * the version to set - */ - public void setVersion(String version) - { - this.version = version; + // remove spaces introduced by unmarshalling of newline characters + this.sequence = sequence.replace(" ", ""); } } diff --git a/src/jalview/ws/dbsources/EmblXmlSource.java b/src/jalview/ws/dbsources/EmblXmlSource.java index 0085221..2049766 100644 --- a/src/jalview/ws/dbsources/EmblXmlSource.java +++ b/src/jalview/ws/dbsources/EmblXmlSource.java @@ -63,7 +63,7 @@ public abstract class EmblXmlSource extends EbiFileRetrievedProxy try { reply = dbFetch.fetchDataAsFile( - emprefx.toLowerCase() + ":" + query.trim(), "emblxml", null, + emprefx.toLowerCase() + ":" + query.trim(), "display=xml", ".xml"); } catch (Exception e) { diff --git a/src/jalview/ws/dbsources/Pdb.java b/src/jalview/ws/dbsources/Pdb.java index b2063c1..c87a111 100644 --- a/src/jalview/ws/dbsources/Pdb.java +++ b/src/jalview/ws/dbsources/Pdb.java @@ -1,3 +1,4 @@ + /* * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$) * Copyright (C) $$Year-Rel$$ The Jalview Authors @@ -136,7 +137,7 @@ public class Pdb extends EbiFileRetrievedProxy : ".xml"; EBIFetchClient ebi = new EBIFetchClient(); file = ebi.fetchDataAsFile("pdb:" + id, - getCurrentDefaultFormat().toLowerCase(), "raw", ext) + getCurrentDefaultFormat().toLowerCase(), ext) .getAbsolutePath(); stopQuery(); if (file == null) diff --git a/src/jalview/ws/dbsources/Uniprot.java b/src/jalview/ws/dbsources/Uniprot.java index 17f1842..8cc0ce4 100644 --- a/src/jalview/ws/dbsources/Uniprot.java +++ b/src/jalview/ws/dbsources/Uniprot.java @@ -165,7 +165,7 @@ public class Uniprot extends DbSourceProxyImpl // uniprotxml parameter required since december 2007 // uniprotkb dbname changed introduced december 2008 File file = ebi.fetchDataAsFile("uniprotkb:" + queries, "uniprotxml", - null, ".xml"); + ".xml"); Vector entries = getUniprotEntries(new FileReader(file)); if (entries != null) diff --git a/src/jalview/ws/ebi/EBIFetchClient.java b/src/jalview/ws/ebi/EBIFetchClient.java index 9f6bc65..1dff32f 100644 --- a/src/jalview/ws/ebi/EBIFetchClient.java +++ b/src/jalview/ws/ebi/EBIFetchClient.java @@ -42,9 +42,6 @@ import java.util.StringTokenizer; */ public class EBIFetchClient { - String format = "default"; - - String style = "raw"; /** * Creates a new EBIFetchClient object. @@ -93,14 +90,13 @@ public class EBIFetchClient * the query formatted as db:query1;query2;query3 * @param format * the format wanted - * @param s - * - unused parameter + * @param extension + * for the temporary file to hold response * @return the file holding the response * @throws OutOfMemoryError */ - public File fetchDataAsFile(String ids, String format, String s, - String ext) + public File fetchDataAsFile(String ids, String format, String ext) throws OutOfMemoryError { File outFile = null; @@ -108,7 +104,7 @@ public class EBIFetchClient { outFile = File.createTempFile("jalview", ext); outFile.deleteOnExit(); - fetchData(ids, format, s, outFile); + fetchData(ids, format, outFile); if (outFile.length() == 0) { outFile.delete(); @@ -121,92 +117,92 @@ public class EBIFetchClient } /** - * Single DB multiple record retrieval + * Fetches queries and either saves the response to a file or returns as + * string data * * @param ids - * db:query1;query2;query3 * @param format - * raw/xml - * @param s - * not used - remove? - * - * @return Raw string array result of query set + * @param outFile + * @return + * @throws OutOfMemoryError */ - public String[] fetchData(String ids, String format, String s) + String[] fetchData(String ids, String format, File outFile) throws OutOfMemoryError { - return fetchData(ids, format, s, null); + StringBuilder querystring = new StringBuilder(ids.length()); + String database = parseIds(ids, querystring); + if (database == null) + { + System.err.println("Invalid Query string : '" + ids + "'"); + System.err.println("Should be of form 'dbname:q1;q2;q3;q4'"); + return null; + } + + // note: outFile is currently always specified, so return value is null + String[] rslt = fetchBatch(querystring.toString(), database, format, outFile); + + return (rslt != null && rslt.length > 0 ? rslt : null); } - String[] fetchData(String ids, String f, String s, File outFile) - throws OutOfMemoryError + /** + * Parses ids formatted as dbname:q1;q2;q3, returns the dbname and adds + * queries as comma-separated items to the querystring. dbname must be + * specified for at least one queryId. Returns null if a mixture of different + * dbnames is found (ignoring case). + * + * @param ids + * @param queryString + * @return + */ + static String parseIds(String ids, StringBuilder queryString) { - // Need to split - // ids of the form uniprot:25KD_SARPE;ADHR_DROPS; - String[] rslts = new String[0]; + String database = null; StringTokenizer queries = new StringTokenizer(ids, ";"); - String db = null; - StringBuffer querystring = null; - int nq = 0; + boolean appending = queryString.length() > 0; while (queries.hasMoreTokens()) { String query = queries.nextToken(); - int p; - if ((p = query.indexOf(':')) > -1) + int p = query.indexOf(':'); + if (p > -1) { - db = query.substring(0, p); + String db = query.substring(0, p); + if (database != null && !db.equalsIgnoreCase(database)) + { + /* + * different databases mixed in together - invalid + */ + return null; + } + database = db; query = query.substring(p + 1); } - if (querystring == null) - { - querystring = new StringBuffer(query); - nq++; - } - else - { - querystring.append("," + query); - nq++; - } - } - if (db == null) - { - System.err.println("Invalid Query string : '" + ids - + "'\nShould be of form 'dbname:q1;q2;q3;q4'"); - return null; - } - String[] rslt = fetchBatch(querystring.toString(), db, f, s, outFile); - if (rslt != null) - { - String[] nrslts = new String[rslt.length + rslts.length]; - System.arraycopy(rslts, 0, nrslts, 0, rslts.length); - System.arraycopy(rslt, 0, nrslts, rslts.length, rslt.length); - rslts = nrslts; + queryString.append(appending ? "," : ""); + queryString.append(query); + appending = true; } - - return (rslts.length == 0 ? null : rslts); + return database; } - public String[] fetchBatch(String ids, String dbPath, String format, String s, + /** + * Fetches queries and either saves the response to a file or (if no file + * specified) returns as string data + * + * @param ids + * @param database + * @param format + * @param outFile + * @return + * @throws OutOfMemoryError + */ + String[] fetchBatch(String ids, String database, String format, File outFile) throws OutOfMemoryError { // long time = System.currentTimeMillis(); - /* - * JAL-1855 dbfetch from ena_sequence, ena_coding - */ - if (dbPath.equalsIgnoreCase(DBRefSource.EMBL)) - { - dbPath = "ena_sequence"; - } - else if (dbPath.equalsIgnoreCase(DBRefSource.EMBLCDS)) - { - dbPath = "ena_coding"; - } + String url = buildUrl(ids, database, format); try { - URL rcall = new URL("http://www.ebi.ac.uk/Tools/dbfetch/dbfetch/" - + dbPath.toLowerCase() + "/" + ids.toLowerCase() - + (format != null ? "/" + format : "")); + URL rcall = new URL(url); InputStream is = new BufferedInputStream(rcall.openStream()); if (outFile != null) @@ -234,8 +230,7 @@ public class EBIFetchClient } } catch (OutOfMemoryError er) { - - System.out.println("OUT OF MEMORY DOWNLOADING QUERY FROM " + dbPath + System.out.println("OUT OF MEMORY DOWNLOADING QUERY FROM " + database + ":\n" + ids); throw er; } catch (Exception ex) @@ -246,7 +241,7 @@ public class EBIFetchClient return null; } System.err.println("Unexpected exception when retrieving from " - + dbPath + + database + "\nQuery was : '" + ids + "'"); ex.printStackTrace(System.err); return null; @@ -257,4 +252,30 @@ public class EBIFetchClient } return null; } + + /** + * Constructs the URL to fetch from + * + * @param ids + * @param database + * @param format + * @return + */ + static String buildUrl(String ids, String database, String format) + { + String url; + if (database.equalsIgnoreCase(DBRefSource.EMBL) + || database.equalsIgnoreCase(DBRefSource.EMBLCDS)) + { + url = "http://www.ebi.ac.uk/ena/data/view/" + ids.toLowerCase() + + (format != null ? "&" + format : ""); + } + else + { + url = "http://www.ebi.ac.uk/Tools/dbfetch/dbfetch/" + + database.toLowerCase() + "/" + ids.toLowerCase() + + (format != null ? "/" + format : ""); + } + return url; + } } diff --git a/test/jalview/datamodel/xdb/embl/EmblEntryTest.java b/test/jalview/datamodel/xdb/embl/EmblEntryTest.java index 9fffc45..e8760bd 100644 --- a/test/jalview/datamodel/xdb/embl/EmblEntryTest.java +++ b/test/jalview/datamodel/xdb/embl/EmblEntryTest.java @@ -3,10 +3,13 @@ package jalview.datamodel.xdb.embl; import static org.testng.AssertJUnit.assertEquals; import static org.testng.AssertJUnit.assertSame; -import jalview.util.MappingUtils; +import jalview.datamodel.DBRefEntry; +import jalview.datamodel.Sequence; +import jalview.datamodel.SequenceI; +import java.util.ArrayList; import java.util.Arrays; -import java.util.Vector; +import java.util.List; import org.testng.annotations.Test; @@ -21,288 +24,93 @@ public class EmblEntryTest * Make a (CDS) Feature with 4 locations */ EmblFeature cds = new EmblFeature(); - Vector locs = new Vector(); - cds.setLocations(locs); - - /* - * single range [10-20] - */ - EmblFeatureLocations loc = new EmblFeatureLocations(); - loc.setLocationType("single"); - loc.setLocationComplement(false); - Vector elements = new Vector(); - EmblFeatureLocElement locElement = new EmblFeatureLocElement(); - BasePosition b1 = new BasePosition(); - b1.setPos("10"); - BasePosition b2 = new BasePosition(); - b2.setPos("20"); - locElement.setBasePositions(new BasePosition[] { b1, b2 }); - elements.add(locElement); - loc.setLocElements(elements); - locs.add(loc); - - /* - * complement range [30-40] - */ - loc = new EmblFeatureLocations(); - loc.setLocationType("single"); - loc.setLocationComplement(true); - elements = new Vector(); - locElement = new EmblFeatureLocElement(); - b1 = new BasePosition(); - b1.setPos("30"); - b2 = new BasePosition(); - b2.setPos("40"); - locElement.setBasePositions(new BasePosition[] { b1, b2 }); - elements.add(locElement); - loc.setLocElements(elements); - locs.add(loc); - - /* - * join range [50-60], [70-80] - */ - loc = new EmblFeatureLocations(); - loc.setLocationType("join"); - loc.setLocationComplement(false); - elements = new Vector(); - locElement = new EmblFeatureLocElement(); - b1 = new BasePosition(); - b1.setPos("50"); - b2 = new BasePosition(); - b2.setPos("60"); - locElement.setBasePositions(new BasePosition[] { b1, b2 }); - elements.add(locElement); - locElement = new EmblFeatureLocElement(); - b1 = new BasePosition(); - b1.setPos("70"); - b2 = new BasePosition(); - b2.setPos("80"); - locElement.setBasePositions(new BasePosition[] { b1, b2 }); - elements.add(locElement); - loc.setLocElements(elements); - locs.add(loc); - - /* - * complement range [90-100], [110-120] - * this should be the same as complement(join(90..100,110.120)) - * which is "join 90-100 and 110-120, then complement" - */ - loc = new EmblFeatureLocations(); - loc.setLocationType("join"); - loc.setLocationComplement(true); - elements = new Vector(); - locElement = new EmblFeatureLocElement(); - b1 = new BasePosition(); - b1.setPos("90"); - b2 = new BasePosition(); - b2.setPos("100"); - locElement.setBasePositions(new BasePosition[] { b1, b2 }); - elements.add(locElement); - locElement = new EmblFeatureLocElement(); - b1 = new BasePosition(); - b1.setPos("110"); - b2 = new BasePosition(); - b2.setPos("120"); - locElement.setBasePositions(new BasePosition[] { b1, b2 }); - elements.add(locElement); - loc.setLocElements(elements); - locs.add(loc); + cds.setLocation("join(10..20,complement(30..40),50..60,70..80,complement(110..120))"); int[] exons = testee.getCdsRanges(cds); - assertEquals("[10, 20, 40, 30, 50, 60, 70, 80, 120, 110, 100, 90]", + assertEquals("[10, 20, 40, 30, 50, 60, 70, 80, 120, 110]", Arrays.toString(exons)); } @Test(groups = "Functional") - public void testGetCdsRanges_badData() + public void testParseCodingFeature() { - EmblEntry testee = new EmblEntry(); + // not the whole sequence but enough for this test... + SequenceI dna = new Sequence("J03321", "GGATCCGTAAGTTAGACGAAATT"); + List peptides = new ArrayList(); + EmblFile ef = EmblTestHelper.getEmblFile(); /* - * Make a (CDS) Feature with 4 locations - */ - EmblFeature cds = new EmblFeature(); - Vector locs = new Vector(); - cds.setLocations(locs); - - /* - * single range [10-20] - */ - EmblFeatureLocations loc = new EmblFeatureLocations(); - loc.setLocationType("single"); - loc.setLocationComplement(false); - Vector elements = new Vector(); - EmblFeatureLocElement locElement = new EmblFeatureLocElement(); - BasePosition b1 = new BasePosition(); - b1.setPos("10"); - BasePosition b2 = new BasePosition(); - b2.setPos("20"); - locElement.setBasePositions(new BasePosition[] { b1, b2 }); - elements.add(locElement); - loc.setLocElements(elements); - locs.add(loc); - - /* - * single range with missing end position - should be skipped - */ - loc = new EmblFeatureLocations(); - loc.setLocationType("single"); - loc.setLocationComplement(false); - elements = new Vector(); - locElement = new EmblFeatureLocElement(); - b1 = new BasePosition(); - b1.setPos("30"); - locElement.setBasePositions(new BasePosition[] { b1 }); - elements.add(locElement); - loc.setLocElements(elements); - locs.add(loc); - - /* - * single range with extra base position - should be skipped + * parse two CDS features, one with two Uniprot cross-refs, + * the other with one */ - loc = new EmblFeatureLocations(); - loc.setLocationType("single"); - loc.setLocationComplement(false); - elements = new Vector(); - locElement = new EmblFeatureLocElement(); - b1 = new BasePosition(); - b1.setPos("30"); - locElement.setBasePositions(new BasePosition[] { b1, b1, b1 }); - elements.add(locElement); - loc.setLocElements(elements); - locs.add(loc); - - /* - * single valid range [50-60] to finish - */ - loc = new EmblFeatureLocations(); - loc.setLocationType("single"); - loc.setLocationComplement(false); - elements = new Vector(); - locElement = new EmblFeatureLocElement(); - b1 = new BasePosition(); - b1.setPos("50"); - b2 = new BasePosition(); - b2.setPos("60"); - locElement.setBasePositions(new BasePosition[] { b1, b2 }); - elements.add(locElement); - loc.setLocElements(elements); - locs.add(loc); - - int[] exons = testee.getCdsRanges(cds); - assertEquals("[10, 20, 50, 60]", Arrays.toString(exons)); - } - - /** - * Test retrieval of exon locations matching an accession id - */ - @Test(groups = "Functional") - public void testGetCdsRanges_forAccession() - { EmblEntry testee = new EmblEntry(); - String accession = "A1234"; - testee.setAccession(accession); - /* - * Make a (CDS) Feature with 4 locations - */ - EmblFeature cds = new EmblFeature(); - Vector locs = new Vector(); - cds.setLocations(locs); - - /* - * single range [10-20] for 'this' accession - */ - EmblFeatureLocations loc = new EmblFeatureLocations(); - loc.setLocationType("single"); - loc.setLocationComplement(false); - Vector elements = new Vector(); - EmblFeatureLocElement locElement = new EmblFeatureLocElement(); - locElement.setAccession(accession); - BasePosition b1 = new BasePosition(); - b1.setPos("10"); - BasePosition b2 = new BasePosition(); - b2.setPos("20"); - locElement.setBasePositions(new BasePosition[] { b1, b2 }); - elements.add(locElement); - loc.setLocElements(elements); - locs.add(loc); - - /* - * complement range [30-40] - no accession - */ - loc = new EmblFeatureLocations(); - loc.setLocationType("single"); - loc.setLocationComplement(true); - elements = new Vector(); - locElement = new EmblFeatureLocElement(); - b1 = new BasePosition(); - b1.setPos("30"); - b2 = new BasePosition(); - b2.setPos("40"); - locElement.setBasePositions(new BasePosition[] { b1, b2 }); - elements.add(locElement); - loc.setLocElements(elements); - locs.add(loc); - - /* - * join range [50-60] this accession, [70-80] another - */ - loc = new EmblFeatureLocations(); - loc.setLocationType("join"); - loc.setLocationComplement(false); - elements = new Vector(); - locElement = new EmblFeatureLocElement(); - locElement.setAccession(accession); - b1 = new BasePosition(); - b1.setPos("50"); - b2 = new BasePosition(); - b2.setPos("60"); - locElement.setBasePositions(new BasePosition[] { b1, b2 }); - elements.add(locElement); - locElement = new EmblFeatureLocElement(); - locElement.setAccession("notme"); - b1 = new BasePosition(); - b1.setPos("70"); - b2 = new BasePosition(); - b2.setPos("80"); - locElement.setBasePositions(new BasePosition[] { b1, b2 }); - elements.add(locElement); - loc.setLocElements(elements); - locs.add(loc); - - /* - * complement range [90-100] wrong accession, [110-120] good - * this should be the same as complement(join(90..100,110.120)) - * which is "join 90-100 and 110-120, then complement" - */ - loc = new EmblFeatureLocations(); - loc.setLocationType("join"); - loc.setLocationComplement(true); - elements = new Vector(); - locElement = new EmblFeatureLocElement(); - locElement.setAccession("wrong"); - b1 = new BasePosition(); - b1.setPos("90"); - b2 = new BasePosition(); - b2.setPos("100"); - locElement.setBasePositions(new BasePosition[] { b1, b2 }); - elements.add(locElement); - locElement = new EmblFeatureLocElement(); - locElement.setAccession(accession); - b1 = new BasePosition(); - b1.setPos("110"); - b2 = new BasePosition(); - b2.setPos("120"); - locElement.setBasePositions(new BasePosition[] { b1, b2 }); - elements.add(locElement); - loc.setLocElements(elements); - locs.add(loc); - - /* - * verify we pick out only ranges for A1234 - */ - int[] exons = testee.getCdsRanges(cds); - assertEquals("[10, 20, 50, 60, 120, 110]", - Arrays.toString(exons)); + for (EmblFeature feature : ef.getEntries().get(0).getFeatures()) + { + if ("CDS".equals(feature.getName())) + { + testee.parseCodingFeature(feature, "EMBL", dna, peptides); + } + } + + /* + * peptides should now have five entries: + * EMBL product and two Uniprot accessions for the first CDS / translation + * EMBL product and one Uniprot accession for the second CDS / translation + */ + assertEquals(5, peptides.size()); + assertEquals("CAA30420.1", peptides.get(0).getName()); + assertEquals("MLCF", peptides.get(0).getSequenceAsString()); + assertEquals("UNIPROT|B0BCM4", peptides.get(1).getName()); + assertEquals("MLCF", peptides.get(1).getSequenceAsString()); + assertEquals("UNIPROT|P0CE20", peptides.get(2).getName()); + assertEquals("MLCF", peptides.get(2).getSequenceAsString()); + assertEquals("CAA30421.1", peptides.get(3).getName()); + assertEquals("MSSS", peptides.get(3).getSequenceAsString()); + assertEquals("UNIPROT|B0BCM3", peptides.get(4).getName()); + assertEquals("MSSS", peptides.get(4).getSequenceAsString()); + + /* + * verify dna sequence has dbrefs with mappings to the peptide 'products' + */ + DBRefEntry[] dbrefs = dna.getDBRefs(); + assertEquals(3, dbrefs.length); + DBRefEntry dbRefEntry = dbrefs[0]; + assertEquals("UNIPROT", dbRefEntry.getSource()); + assertEquals("B0BCM4", dbRefEntry.getAccessionId()); + assertSame(peptides.get(1), dbRefEntry.getMap().getTo()); + List fromRanges = dbRefEntry.getMap().getMap().getFromRanges(); + assertEquals(1, fromRanges.size()); + assertEquals(57, fromRanges.get(0)[0]); + assertEquals(46, fromRanges.get(0)[1]); + List toRanges = dbRefEntry.getMap().getMap().getToRanges(); + assertEquals(1, toRanges.size()); + assertEquals(1, toRanges.get(0)[0]); + assertEquals(4, toRanges.get(0)[1]); + + dbRefEntry = dbrefs[1]; + assertEquals("UNIPROT", dbRefEntry.getSource()); + assertEquals("P0CE20", dbRefEntry.getAccessionId()); + assertSame(peptides.get(2), dbRefEntry.getMap().getTo()); + fromRanges = dbRefEntry.getMap().getMap().getFromRanges(); + assertEquals(1, fromRanges.size()); + assertEquals(57, fromRanges.get(0)[0]); + assertEquals(46, fromRanges.get(0)[1]); + toRanges = dbRefEntry.getMap().getMap().getToRanges(); + assertEquals(1, toRanges.size()); + assertEquals(1, toRanges.get(0)[0]); + assertEquals(4, toRanges.get(0)[1]); + + dbRefEntry = dbrefs[2]; + assertEquals("UNIPROT", dbRefEntry.getSource()); + assertEquals("B0BCM3", dbRefEntry.getAccessionId()); + assertSame(peptides.get(4), dbRefEntry.getMap().getTo()); + fromRanges = dbRefEntry.getMap().getMap().getFromRanges(); + assertEquals(1, fromRanges.size()); + assertEquals(4, fromRanges.get(0)[0]); + assertEquals(15, fromRanges.get(0)[1]); + toRanges = dbRefEntry.getMap().getMap().getToRanges(); + assertEquals(1, toRanges.size()); + assertEquals(1, toRanges.get(0)[0]); + assertEquals(4, toRanges.get(0)[1]); } } diff --git a/test/jalview/datamodel/xdb/embl/EmblFileTest.java b/test/jalview/datamodel/xdb/embl/EmblFileTest.java index c6a94d7..6955833 100644 --- a/test/jalview/datamodel/xdb/embl/EmblFileTest.java +++ b/test/jalview/datamodel/xdb/embl/EmblFileTest.java @@ -21,61 +21,49 @@ package jalview.datamodel.xdb.embl; import static org.testng.AssertJUnit.assertEquals; -import static org.testng.AssertJUnit.assertFalse; import static org.testng.AssertJUnit.assertNull; -import static org.testng.AssertJUnit.assertTrue; import jalview.datamodel.DBRefEntry; -import java.io.StringReader; import java.util.Vector; import org.testng.annotations.Test; public class EmblFileTest { - // adapted from http://www.ebi.ac.uk/Tools/dbfetch/dbfetch/embl/x53828/emblxml - private static final String TESTDATA = "" - + "" - + "" - + "Chicken LDH-A mRNA for lactate dehydrogenase A chain (EC 1.1.1.27)" - + "L-lactate dehydrogenasechutney" - + "" - + "" - + "" - + "L-lactate dehydrogenase A-chainpickle" - + "MSLKDHLIHNKeith" - + "" - + "" - + "601058" - + "" - + "GTGACG"; @Test(groups = { "Functional" }) public void testGetEmblFile() { - Vector entries = EmblFile.getEmblFile( - new StringReader(TESTDATA)).getEntries(); + Vector entries = EmblTestHelper.getEmblFile().getEntries(); assertEquals(1, entries.size()); EmblEntry entry = entries.get(0); - assertEquals("X53828", entry.getAccession()); - assertEquals( - "Chicken LDH-A mRNA for lactate dehydrogenase A chain (EC 1.1.1.27)", - entry.getDesc()); - assertEquals("2005-04-18", entry.getLastUpdated()); + assertEquals("X07547", entry.getAccession()); + assertEquals("C. trachomatis plasmid", entry.getDescription()); + assertEquals("STD", entry.getDataClass()); + assertEquals("PRO", entry.getTaxonomicDivision()); + assertEquals("1999-02-10", entry.getLastUpdatedDate()); + assertEquals("58", entry.getLastUpdatedRelease()); + assertEquals("1988-11-10", entry.getFirstPublicDate()); + assertEquals("18", entry.getFirstPublicRelease()); + assertEquals("genomic DNA", entry.getMoleculeType()); + assertEquals("1", entry.getSequenceVersion()); + assertEquals("8", entry.getEntryVersion()); + assertEquals("linear", entry.getTopology()); + assertEquals("7499", entry.getSequenceLength()); /* * FIXME these assertions fail - values are null - why?? Adding or removing * attributes in the test XML modifies behaviour. eg. inserting an attribute * _before_ lastUpdated results in a null value in this field. */ - // assertEquals("25", entry.getRCreated()); - // assertEquals("83", entry.getRLastUpdated()); + assertEquals("1988-11-10", entry.getFirstPublicDate()); + assertEquals("18", entry.getFirstPublicRelease()); assertEquals(2, entry.getKeywords().size()); - assertEquals("L-lactate dehydrogenase", entry.getKeywords().get(0)); - assertEquals("chutney", entry.getKeywords().get(1)); + assertEquals("plasmid", entry.getKeywords().get(0)); + assertEquals("unidentified reading frame", entry.getKeywords().get(1)); /* * dbrefs @@ -83,72 +71,81 @@ public class EmblFileTest assertEquals(2, entry.getDbRefs().size()); DBRefEntry dbref = entry.getDbRefs().get(0); assertEquals("EuropePMC", dbref.getSource()); - assertEquals("PMC1460223", dbref.getAccessionId()); - assertEquals("9649548", dbref.getVersion()); + assertEquals("PMC107176", dbref.getAccessionId()); + assertEquals("9573186", dbref.getVersion()); dbref = entry.getDbRefs().get(1); assertEquals("MD5", dbref.getSource()); - assertEquals("d3b68", dbref.getAccessionId()); + assertEquals("ac73317", dbref.getAccessionId()); // blank version has been converted to "0" assertEquals("0", dbref.getVersion()); /* - * sequence features + * two sequence features for CDS + */ + assertEquals(2, entry.getFeatures().size()); + /* + * first CDS */ - assertEquals(1, entry.getFeatures().size()); EmblFeature ef = entry.getFeatures().get(0); assertEquals("CDS", ef.getName()); + assertEquals("complement(46..57)", ef.getLocation()); assertEquals(2, ef.getDbRefs().size()); dbref = ef.getDbRefs().get(0); - assertEquals("GOA", dbref.getSource()); - assertEquals("P00340", dbref.getAccessionId()); + assertEquals("UniProtKB/Swiss-Prot", dbref.getSource()); + assertEquals("B0BCM4", dbref.getAccessionId()); assertEquals("2.1", dbref.getVersion()); dbref = ef.getDbRefs().get(1); - assertEquals("InterPro", dbref.getSource()); - assertEquals("IPR001236", dbref.getAccessionId()); - // blank version converted to "0": + assertEquals("UniProtKB/Swiss-Prot", dbref.getSource()); + assertEquals("P0CE20", dbref.getAccessionId()); + // blank version gets converted to "0": assertEquals("0", dbref.getVersion()); - assertEquals(2, ef.getQualifiers().size()); - - // feature qualifiers + // CDS feature qualifiers + assertEquals(3, ef.getQualifiers().size()); Qualifier q = ef.getQualifiers().get(0); assertEquals("note", q.getName()); assertEquals(2, q.getValues().length); - assertEquals("L-lactate dehydrogenase A-chain", q.getValues()[0]); + assertEquals("ORF 8 (AA 1-330)", q.getValues()[0]); assertEquals("pickle", q.getValues()[1]); assertNull(q.getEvidence()); q = ef.getQualifiers().get(1); + assertEquals("protein_id", q.getName()); + assertEquals(1, q.getValues().length); + assertEquals("CAA30420.1", q.getValues()[0]); + q = ef.getQualifiers().get(2); assertEquals("translation", q.getName()); assertEquals(1, q.getValues().length); - assertEquals("MSLKDHLIHN", q.getValues()[0]); + assertEquals("MLCF", q.getValues()[0]); assertEquals(1, q.getEvidence().length); assertEquals("Keith", q.getEvidence()[0]); - // feature locations - assertEquals(1, ef.getLocations().size()); - EmblFeatureLocations fl = ef.getLocations().get(0); - assertEquals("single", fl.getLocationType()); - assertTrue(fl.isLocationComplement()); - assertEquals(1, fl.getLocElements().size()); - EmblFeatureLocElement le = fl.getLocElements().get(0); - assertEquals("range", le.getType()); - assertEquals("X53828", le.getAccession()); - assertEquals("1", le.getVersion()); - assertFalse(le.isComplement()); - assertEquals(2, le.getBasePositions().length); - BasePosition bp = le.getBasePositions()[0]; - assertEquals("simple", bp.getType()); - assertEquals("60", bp.getPos()); - bp = le.getBasePositions()[1]; - assertEquals("join", bp.getType()); - assertEquals("1058", bp.getPos()); + /* + * second CDS + */ + ef = entry.getFeatures().get(1); + assertEquals("CDS", ef.getName()); + assertEquals("4..15", ef.getLocation()); + assertEquals(1, ef.getDbRefs().size()); + dbref = ef.getDbRefs().get(0); + assertEquals("UniProtKB/Swiss-Prot", dbref.getSource()); + assertEquals("B0BCM3", dbref.getAccessionId()); + assertEquals("0", dbref.getVersion()); + assertEquals(2, ef.getQualifiers().size()); + q = ef.getQualifiers().get(0); + assertEquals("protein_id", q.getName()); + assertEquals(1, q.getValues().length); + assertEquals("CAA30421.1", q.getValues()[0]); + q = ef.getQualifiers().get(1); + assertEquals("translation", q.getName()); + assertEquals(1, q.getValues().length); + assertEquals("MSSS", q.getValues()[0]); /* - * Sequence + * Sequence - verify newline not converted to space (JAL-2029) */ EmblSequence seq = entry.getSequence(); - assertEquals("mRNA", seq.getType()); - assertEquals("2", seq.getVersion()); - assertEquals("GTGACG", seq.getSequence()); + assertEquals( + "GGTATGTCCTCTAGTACAAACACCCCCAATATTGTGATATAATTAAAAACATAGCAT", + seq.getSequence()); /* * getSequence() converts empty DBRefEntry.version to "0" diff --git a/test/jalview/datamodel/xdb/embl/EmblTestHelper.java b/test/jalview/datamodel/xdb/embl/EmblTestHelper.java new file mode 100644 index 0000000..9957c72 --- /dev/null +++ b/test/jalview/datamodel/xdb/embl/EmblTestHelper.java @@ -0,0 +1,53 @@ +package jalview.datamodel.xdb.embl; + +import java.io.StringReader; + +public class EmblTestHelper +{ + // adapted from http://www.ebi.ac.uk/ena/data/view/X07547&display=xml + // dna and translations truncated for convenience + private static final String TESTDATA = "" + + "" + + "" + + "X07574" + + "C. trachomatis plasmid" + + "plasmidunidentified reading frame" + + "" + + "" + /* + * first CDS (range and translation changed to keep test data manageable) + */ + + "" + // test the case of >1 cross-ref to the same database (JAL-2029) + + "" + + "" + + "ORF 8 (AA 1-330)pickle" + + "CAA30420.1" + + "MLCFKeith" + + "" + /* + * second CDS (range and translation changed to keep test data manageable) + */ + + "" + + "" + + "CAA30421.1" + + "MSSS" + + "" + /* + * sequence (modified for test purposes) + * emulates EMBL XML 1.2 which splits sequence data every 60 characters + * see EmblSequence.setSequence + */ + + "GGTATGTCCTCTAGTACAAAC\n" + + "ACCCCCAATATTGTGATATAATTAAAAACATAGCAT" + + ""; + + static EmblFile getEmblFile() + { + return EmblFile.getEmblFile(new StringReader(TESTDATA)); + } +} diff --git a/test/jalview/ws/ebi/EBIFetchClientTest.java b/test/jalview/ws/ebi/EBIFetchClientTest.java new file mode 100644 index 0000000..4eaa5b1 --- /dev/null +++ b/test/jalview/ws/ebi/EBIFetchClientTest.java @@ -0,0 +1,113 @@ +package jalview.ws.ebi; + +import static org.testng.AssertJUnit.assertEquals; +import static org.testng.AssertJUnit.assertNull; + +import org.testng.annotations.Test; + +public class EBIFetchClientTest +{ + /** + * Test method that constructs URL to fetch from + */ + @Test(groups = "Functional") + public void testBuildUrl() + { + /* + * EMBL + */ + assertEquals("http://www.ebi.ac.uk/ena/data/view/x53838&display=xml", + EBIFetchClient.buildUrl("X53838", "EMBL", "display=xml")); + + /* + * EMBLCDS + */ + assertEquals("http://www.ebi.ac.uk/ena/data/view/caa37824&display=xml", + EBIFetchClient.buildUrl("CAA37824", "EMBL", "display=xml")); + + /* + * Uniprot + */ + assertEquals( + "http://www.ebi.ac.uk/Tools/dbfetch/dbfetch/uniprot/p00340/uniprotxml", + EBIFetchClient.buildUrl("P00340", "UNIPROT", "uniprotxml")); + + /* + * PDB / pdb + */ + assertEquals("http://www.ebi.ac.uk/Tools/dbfetch/dbfetch/pdb/3a6s/pdb", + EBIFetchClient.buildUrl("3A6S", "PDB", "pdb")); + + /* + * PDB / mmCIF + */ + assertEquals( + "http://www.ebi.ac.uk/Tools/dbfetch/dbfetch/pdb/3a6s/mmCIF", + EBIFetchClient.buildUrl("3A6S", "PDB", "mmCIF")); + } + + /** + * Test method that parses db:id;id;id + */ + @Test(groups = "Functional") + public void testParseIds() + { + /* + * pdb, two accessions + */ + StringBuilder queries = new StringBuilder(); + String db = EBIFetchClient.parseIds("pdb:3a6s;1A70", queries); + assertEquals("pdb", db); + assertEquals("3a6s,1A70", queries.toString()); + + /* + * pdb specified on second accession + */ + queries.setLength(0); + queries = new StringBuilder(); + db = EBIFetchClient.parseIds("3a6s;pdb:1A70", queries); + assertEquals("pdb", db); + assertEquals("3a6s,1A70", queries.toString()); + + /* + * uniprot, one accession + */ + queries.setLength(0); + db = EBIFetchClient.parseIds("uniprot:P00340", queries); + assertEquals("uniprot", db); + assertEquals("P00340", queries.toString()); + + /* + * uniprot, one accession, appending to existing queries + */ + queries.setLength(0); + queries.append("P30419"); + db = EBIFetchClient.parseIds("uniprot:P00340", queries); + assertEquals("uniprot", db); + assertEquals("P30419,P00340", queries.toString()); + + /* + * pdb and uniprot mixed - rejected + */ + queries.setLength(0); + db = EBIFetchClient.parseIds("pdb:3a6s;1a70;uniprot:P00340", queries); + assertNull(db); + assertEquals("3a6s,1a70", queries.toString()); + + /* + * pdb and PDB mixed - ok + */ + queries.setLength(0); + db = EBIFetchClient.parseIds("pdb:3a6s;pdb:1a70;PDB:1QIP", queries); + assertEquals("PDB", db); + assertEquals("3a6s,1a70,1QIP", queries.toString()); + + /* + * no database (improper format) + */ + queries.setLength(0); + db = EBIFetchClient.parseIds("P00340", queries); + assertNull(db); + assertEquals("P00340", queries.toString()); + } +}