From 5071850e8470077a8ac2bdc6a53908c70ad5526d Mon Sep 17 00:00:00 2001 From: gmungoc Date: Fri, 20 May 2016 20:24:01 +0100 Subject: [PATCH] JAL-2113 embl_mapping.xml and code updated for EMBL XML v1.2 --- resources/embl_mapping.xml | 55 ++-- src/jalview/datamodel/xdb/embl/BasePosition.java | 68 ----- src/jalview/datamodel/xdb/embl/EmblEntry.java | 128 +++++++-- src/jalview/datamodel/xdb/embl/EmblFeature.java | 15 +- .../datamodel/xdb/embl/EmblFeatureLocElement.java | 125 --------- .../datamodel/xdb/embl/EmblFeatureLocations.java | 191 ------------- src/jalview/datamodel/xdb/embl/EmblSequence.java | 38 --- src/jalview/ws/dbsources/EmblXmlSource.java | 3 +- src/jalview/ws/ebi/EBIFetchClient.java | 17 +- test/jalview/datamodel/xdb/embl/EmblEntryTest.java | 283 +------------------- test/jalview/datamodel/xdb/embl/EmblFileTest.java | 52 ++-- 11 files changed, 171 insertions(+), 804 deletions(-) delete mode 100644 src/jalview/datamodel/xdb/embl/BasePosition.java delete mode 100644 src/jalview/datamodel/xdb/embl/EmblFeatureLocElement.java delete mode 100644 src/jalview/datamodel/xdb/embl/EmblFeatureLocations.java diff --git a/resources/embl_mapping.xml b/resources/embl_mapping.xml index ccbde5e..41ee304 100644 --- a/resources/embl_mapping.xml +++ b/resources/embl_mapping.xml @@ -22,33 +22,50 @@ - + - - + - - + - - + + + + + + + + + + + + + + + - + - + @@ -60,19 +77,13 @@ - + - - - - - - @@ -81,25 +92,25 @@ + + + - + - - - - + - + diff --git a/src/jalview/datamodel/xdb/embl/BasePosition.java b/src/jalview/datamodel/xdb/embl/BasePosition.java deleted file mode 100644 index 3737adc..0000000 --- a/src/jalview/datamodel/xdb/embl/BasePosition.java +++ /dev/null @@ -1,68 +0,0 @@ -/* - * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$) - * Copyright (C) $$Year-Rel$$ The Jalview Authors - * - * This file is part of Jalview. - * - * Jalview is free software: you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation, either version 3 - * of the License, or (at your option) any later version. - * - * Jalview is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty - * of MERCHANTABILITY or FITNESS FOR A PARTICULAR - * PURPOSE. See the GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Jalview. If not, see . - * The Jalview Authors are detailed in the 'AUTHORS' file. - */ -package jalview.datamodel.xdb.embl; - -/** - * Data model for a feature/location/locationElement/basePosition read from an - * EMBL query reply - * - * @see embl_mapping.xml - */ -public class BasePosition -{ - String type; - - String pos; - - /** - * @return the pos - */ - public String getPos() - { - return pos; - } - - /** - * @param pos - * the pos to set - */ - public void setPos(String pos) - { - this.pos = pos; - } - - /** - * @return the type - */ - public String getType() - { - return type; - } - - /** - * @param type - * the type to set - */ - public void setType(String type) - { - this.type = type; - } -} diff --git a/src/jalview/datamodel/xdb/embl/EmblEntry.java b/src/jalview/datamodel/xdb/embl/EmblEntry.java index 691a4c9..3207411 100644 --- a/src/jalview/datamodel/xdb/embl/EmblEntry.java +++ b/src/jalview/datamodel/xdb/embl/EmblEntry.java @@ -29,6 +29,7 @@ import jalview.datamodel.Sequence; import jalview.datamodel.SequenceFeature; import jalview.datamodel.SequenceI; import jalview.util.DBRefUtils; +import jalview.util.DnaUtils; import jalview.util.MapList; import jalview.util.MappingUtils; import jalview.util.StringUtils; @@ -57,7 +58,15 @@ public class EmblEntry String accession; - String version; + String entryVersion; + + String sequenceVersion; + + String moleculeType; + + String topology; + + String sequenceLength; String taxDivision; @@ -248,20 +257,20 @@ public class EmblEntry } /** - * @return the version + * @return the entry version */ - public String getVersion() + public String getEntryVersion() { - return version; + return entryVersion; } /** * @param version * the version to set */ - public void setVersion(String version) + public void setEntryVersion(String version) { - this.version = version; + this.entryVersion = version; } /** @@ -277,7 +286,8 @@ public class EmblEntry SequenceI dna = new Sequence(sourceDb + "|" + accession, sequence.getSequence()); dna.setDescription(desc); - DBRefEntry retrievedref = new DBRefEntry(sourceDb, version, accession); + DBRefEntry retrievedref = new DBRefEntry(sourceDb, + getSequenceVersion(), accession); dna.addDBRef(retrievedref); // add map to indicate the sequence is a valid coordinate frame for the // dbref @@ -479,7 +489,8 @@ public class EmblEntry DBRefEntry pcdnaref = new DBRefEntry(); pcdnaref.setAccessionId(prid); pcdnaref.setSource(DBRefSource.EMBLCDS); - pcdnaref.setVersion(getVersion()); // same as parent EMBL version. + pcdnaref.setVersion(getSequenceVersion()); // same as parent EMBL + // version. MapList mp = new MapList(new int[] { 1, prseq.length() }, new int[] { 1 + (codonStart - 1), (codonStart - 1) + 3 * prseq.length() }, 1, 3); @@ -559,7 +570,7 @@ public class EmblEntry if (map != null) { Mapping pmap = new Mapping(dna, map.getMap().getInverse()); - pref = new DBRefEntry(sourceDb, getVersion(), + pref = new DBRefEntry(sourceDb, getSequenceVersion(), this.getAccession()); pref.setMap(pmap); if (map.getTo() != null) @@ -578,7 +589,7 @@ public class EmblEntry protEMBLCDS = new DBRefEntry(); protEMBLCDS.setAccessionId(prid); protEMBLCDS.setSource(DBRefSource.EMBLCDSProduct); - protEMBLCDS.setVersion(getVersion()); + protEMBLCDS.setVersion(getSequenceVersion()); protEMBLCDS .setMap(new Mapping(product, map.getMap().getInverse())); } @@ -658,28 +669,31 @@ public class EmblEntry */ protected int[] getCdsRanges(EmblFeature feature) { - if (feature.locations == null) + if (feature.location == null) { return new int[] {}; } - int cdsBoundaryCount = 0; // count of all start/stop locations - int[][] cdsLocations = new int[feature.locations.size()][]; - int locationNumber = 0; - for (EmblFeatureLocations loc : feature.locations) - { - int[] locationRanges = loc.getElementRanges(accession); - cdsLocations[locationNumber++] = locationRanges; - cdsBoundaryCount += locationRanges.length; - } - int[] cdsRanges = new int[cdsBoundaryCount]; - int copyTo = 0; - for (int[] ranges : cdsLocations) + List ranges = DnaUtils.parseLocation(feature.location); + return ranges == null ? new int[] {} : listToArray(ranges); + } + + /** + * Converts a list of [start, end] ranges to a single array of [start, end, + * start, end ...] + * + * @param ranges + * @return + */ + int[] listToArray(List ranges) + { + int[] result = new int[ranges.size() * 2]; + int i = 0; + for (int[] range : ranges) { - System.arraycopy(ranges, 0, cdsRanges, copyTo, ranges.length); - copyTo += ranges.length; + result[i++] = range[0]; + result[i++] = range[1]; } - return cdsRanges; - + return result; } /** @@ -754,4 +768,64 @@ public class EmblEntry } return exon; } + + public String getSequenceVersion() + { + return sequenceVersion; + } + + public void setSequenceVersion(String sequenceVersion) + { + this.sequenceVersion = sequenceVersion; + } + + public String getMoleculeType() + { + return moleculeType; + } + + public void setMoleculeType(String moleculeType) + { + this.moleculeType = moleculeType; + } + + public String getTopology() + { + return topology; + } + + public void setTopology(String topology) + { + this.topology = topology; + } + + public String getSequenceLength() + { + return sequenceLength; + } + + public void setSequenceLength(String sequenceLength) + { + this.sequenceLength = sequenceLength; + } + + public String getrCreated() + { + return rCreated; + } + + public void setrCreated(String rCreated) + { + this.rCreated = rCreated; + } + + public String getrLastUpdated() + { + return rLastUpdated; + } + + public void setrLastUpdated(String rLastUpdated) + { + this.rLastUpdated = rLastUpdated; + } } diff --git a/src/jalview/datamodel/xdb/embl/EmblFeature.java b/src/jalview/datamodel/xdb/embl/EmblFeature.java index 7e503c9..51d740b 100644 --- a/src/jalview/datamodel/xdb/embl/EmblFeature.java +++ b/src/jalview/datamodel/xdb/embl/EmblFeature.java @@ -37,7 +37,7 @@ public class EmblFeature Vector qualifiers; - Vector locations; + String location; /** * @return the dbRefs @@ -57,20 +57,19 @@ public class EmblFeature } /** - * @return the locations + * @return the location */ - public Vector getLocations() + public String getLocation() { - return locations; + return location; } /** - * @param locations - * the locations to set + * @param loc */ - public void setLocations(Vector locations) + public void setLocation(String loc) { - this.locations = locations; + this.location = loc; } /** diff --git a/src/jalview/datamodel/xdb/embl/EmblFeatureLocElement.java b/src/jalview/datamodel/xdb/embl/EmblFeatureLocElement.java deleted file mode 100644 index 134ce9e..0000000 --- a/src/jalview/datamodel/xdb/embl/EmblFeatureLocElement.java +++ /dev/null @@ -1,125 +0,0 @@ -/* - * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$) - * Copyright (C) $$Year-Rel$$ The Jalview Authors - * - * This file is part of Jalview. - * - * Jalview is free software: you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation, either version 3 - * of the License, or (at your option) any later version. - * - * Jalview is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty - * of MERCHANTABILITY or FITNESS FOR A PARTICULAR - * PURPOSE. See the GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Jalview. If not, see . - * The Jalview Authors are detailed in the 'AUTHORS' file. - */ -package jalview.datamodel.xdb.embl; - -/** - * Data model for a feature/location/locationElement read from an EMBL query - * reply - * - * @see embl_mapping.xml - */ -public class EmblFeatureLocElement -{ - String type; - - String accession; - - String version; - - boolean complement; - - BasePosition basePositions[]; - - /** - * @return the accession - */ - public String getAccession() - { - return accession; - } - - /** - * @param accession - * the accession to set - */ - public void setAccession(String accession) - { - this.accession = accession; - } - - /** - * @return the basePositions - */ - public BasePosition[] getBasePositions() - { - return basePositions; - } - - /** - * @param basePositions - * the basePositions to set - */ - public void setBasePositions(BasePosition[] basePositions) - { - this.basePositions = basePositions; - } - - /** - * @return the complement - */ - public boolean isComplement() - { - return complement; - } - - /** - * @param complement - * the complement to set - */ - public void setComplement(boolean complement) - { - this.complement = complement; - } - - /** - * @return the type - */ - public String getType() - { - return type; - } - - /** - * @param type - * the type to set - */ - public void setType(String type) - { - this.type = type; - } - - /** - * @return the version - */ - public String getVersion() - { - return version; - } - - /** - * @param version - * the version to set - */ - public void setVersion(String version) - { - this.version = version; - } -} diff --git a/src/jalview/datamodel/xdb/embl/EmblFeatureLocations.java b/src/jalview/datamodel/xdb/embl/EmblFeatureLocations.java deleted file mode 100644 index 9774004..0000000 --- a/src/jalview/datamodel/xdb/embl/EmblFeatureLocations.java +++ /dev/null @@ -1,191 +0,0 @@ -/* - * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$) - * Copyright (C) $$Year-Rel$$ The Jalview Authors - * - * This file is part of Jalview. - * - * Jalview is free software: you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation, either version 3 - * of the License, or (at your option) any later version. - * - * Jalview is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty - * of MERCHANTABILITY or FITNESS FOR A PARTICULAR - * PURPOSE. See the GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Jalview. If not, see . - * The Jalview Authors are detailed in the 'AUTHORS' file. - */ -package jalview.datamodel.xdb.embl; - -import jalview.bin.Cache; -import jalview.util.ArrayUtils; - -import java.util.Arrays; -import java.util.Vector; - -/** - * Data model for a <location> child element of a <feature> read - * from an EMBL query reply - * - * @see embl_mapping.xml - * @see http://www.insdc.org/files/feature_table.html#3.4.2 - */ -public class EmblFeatureLocations -{ - Vector locElements; - - String locationType; - - boolean locationComplement; - - /** - * @return the locationComplement - */ - public boolean isLocationComplement() - { - return locationComplement; - } - - /** - * @param locationComplement - * the locationComplement to set - */ - public void setLocationComplement(boolean locationComplement) - { - this.locationComplement = locationComplement; - } - - /** - * @return the locationType - */ - public String getLocationType() - { - return locationType; - } - - /** - * @param locationType - * the locationType to set - */ - public void setLocationType(String locationType) - { - this.locationType = locationType; - } - - /** - * @return the locElements - */ - public Vector getLocElements() - { - return locElements; - } - - /** - * @param locElements - * the locElements to set - */ - public void setLocElements(Vector locElements) - { - this.locElements = locElements; - } - - /** - * Return all location elements as start-end pairs (without accessions) TODO: - * pass back complement and 'less than or more than' range information Note: - * do not use this since it throws away any accessionIds associated with each - * location! - * - * @return int[] { start1, end1, ... } - */ - public int[] getElementRanges() - { - return getElementRanges(null); - } - - /** - * Return all location elements concerning given accession as start-end pairs. - * If the CDS feature is on the forward strand, then start <= end, if on the - * reverse strand then start > end. - * - * @param accession - * the accession string for which locations are requested, or null - * for all locations - * @return int[] { start1, end1, ... } - */ - int[] getElementRanges(String accession) - { - int sepos = 0; - int[] se = new int[locElements.size() * 2]; - if ("single".equalsIgnoreCase(locationType) - || "join".equalsIgnoreCase(locationType)) - { - for (EmblFeatureLocElement loce : locElements) - { - if (accession == null || loce.accession != null - && accession.equals(loce.accession)) - { - BasePosition bp[] = loce.getBasePositions(); - if (bp.length == 2) - { - try - { - int start = Integer.parseInt(bp[0].getPos()); - int end = Integer.parseInt(bp[1].getPos()); - se[sepos++] = start; - se[sepos++] = end; - } catch (NumberFormatException e) - { - System.err - .println("format error in EMBL CDS location basePosition: " - + e.getMessage()); - } - } - else - { - System.err - .println("format error in EMBL CDS location, basePosition count = " - + bp.length); - } - } - } - } - else if (locationType != null) - { - if (Cache.log != null) - { - Cache.log - .error("EmblFeatureLocations.getElementRanges cannot deal with locationType=='" - + locationType + "'"); - } - else - { - System.err - .println("EmblFeatureLocations.getElementRanges cannot deal with locationType=='" - + locationType + "'"); - } - } - - if (sepos != se.length) - { - /* - * we failed to parse something - trim off null values - */ - se = Arrays.copyOf(se, sepos); - } - - /* - * If on the complement, reverse the ranges to [end, start, ...end1, start1]. - * For an example of a joined complement, see (tRNA feature) CAGL0B00165r on - * http://www.ebi.ac.uk/ena/data/view/CR380948&display=xml - * http://www.ebi.ac.uk/Tools/dbfetch/dbfetch/embl/CR380948/emblxml - */ - if (locationComplement) - { - ArrayUtils.reverseIntArray(se); - } - return se; - } -} diff --git a/src/jalview/datamodel/xdb/embl/EmblSequence.java b/src/jalview/datamodel/xdb/embl/EmblSequence.java index 2a6fa84..5a18929 100644 --- a/src/jalview/datamodel/xdb/embl/EmblSequence.java +++ b/src/jalview/datamodel/xdb/embl/EmblSequence.java @@ -27,12 +27,8 @@ package jalview.datamodel.xdb.embl; */ public class EmblSequence { - String version; - String sequence; - String type; - /** * @return the sequence */ @@ -49,38 +45,4 @@ public class EmblSequence { this.sequence = sequence; } - - /** - * @return the type - */ - public String getType() - { - return type; - } - - /** - * @param type - * the type to set - */ - public void setType(String type) - { - this.type = type; - } - - /** - * @return the version - */ - public String getVersion() - { - return version; - } - - /** - * @param version - * the version to set - */ - public void setVersion(String version) - { - this.version = version; - } } diff --git a/src/jalview/ws/dbsources/EmblXmlSource.java b/src/jalview/ws/dbsources/EmblXmlSource.java index 0085221..b71f032 100644 --- a/src/jalview/ws/dbsources/EmblXmlSource.java +++ b/src/jalview/ws/dbsources/EmblXmlSource.java @@ -63,7 +63,8 @@ public abstract class EmblXmlSource extends EbiFileRetrievedProxy try { reply = dbFetch.fetchDataAsFile( - emprefx.toLowerCase() + ":" + query.trim(), "emblxml", null, + emprefx.toLowerCase() + ":" + query.trim(), "display=xml", + null, ".xml"); } catch (Exception e) { diff --git a/src/jalview/ws/ebi/EBIFetchClient.java b/src/jalview/ws/ebi/EBIFetchClient.java index 9f6bc65..f55aad4 100644 --- a/src/jalview/ws/ebi/EBIFetchClient.java +++ b/src/jalview/ws/ebi/EBIFetchClient.java @@ -193,20 +193,23 @@ public class EBIFetchClient /* * JAL-1855 dbfetch from ena_sequence, ena_coding */ - if (dbPath.equalsIgnoreCase(DBRefSource.EMBL)) + String url; + if (dbPath.equalsIgnoreCase(DBRefSource.EMBL) + || dbPath.equalsIgnoreCase(DBRefSource.EMBLCDS)) { - dbPath = "ena_sequence"; + url = "http://www.ebi.ac.uk/ena/data/view/" + ids.toLowerCase() + + (format != null ? "&" + format : ""); } - else if (dbPath.equalsIgnoreCase(DBRefSource.EMBLCDS)) + else { - dbPath = "ena_coding"; + url = "http://www.ebi.ac.uk/Tools/dbfetch/dbfetch/" + + dbPath.toLowerCase() + "/" + ids.toLowerCase() + + (format != null ? "/" + format : ""); } try { - URL rcall = new URL("http://www.ebi.ac.uk/Tools/dbfetch/dbfetch/" - + dbPath.toLowerCase() + "/" + ids.toLowerCase() - + (format != null ? "/" + format : "")); + URL rcall = new URL(url); InputStream is = new BufferedInputStream(rcall.openStream()); if (outFile != null) diff --git a/test/jalview/datamodel/xdb/embl/EmblEntryTest.java b/test/jalview/datamodel/xdb/embl/EmblEntryTest.java index 9fffc45..b331532 100644 --- a/test/jalview/datamodel/xdb/embl/EmblEntryTest.java +++ b/test/jalview/datamodel/xdb/embl/EmblEntryTest.java @@ -1,9 +1,6 @@ package jalview.datamodel.xdb.embl; import static org.testng.AssertJUnit.assertEquals; -import static org.testng.AssertJUnit.assertSame; - -import jalview.util.MappingUtils; import java.util.Arrays; import java.util.Vector; @@ -21,288 +18,10 @@ public class EmblEntryTest * Make a (CDS) Feature with 4 locations */ EmblFeature cds = new EmblFeature(); - Vector locs = new Vector(); - cds.setLocations(locs); - - /* - * single range [10-20] - */ - EmblFeatureLocations loc = new EmblFeatureLocations(); - loc.setLocationType("single"); - loc.setLocationComplement(false); - Vector elements = new Vector(); - EmblFeatureLocElement locElement = new EmblFeatureLocElement(); - BasePosition b1 = new BasePosition(); - b1.setPos("10"); - BasePosition b2 = new BasePosition(); - b2.setPos("20"); - locElement.setBasePositions(new BasePosition[] { b1, b2 }); - elements.add(locElement); - loc.setLocElements(elements); - locs.add(loc); - - /* - * complement range [30-40] - */ - loc = new EmblFeatureLocations(); - loc.setLocationType("single"); - loc.setLocationComplement(true); - elements = new Vector(); - locElement = new EmblFeatureLocElement(); - b1 = new BasePosition(); - b1.setPos("30"); - b2 = new BasePosition(); - b2.setPos("40"); - locElement.setBasePositions(new BasePosition[] { b1, b2 }); - elements.add(locElement); - loc.setLocElements(elements); - locs.add(loc); - - /* - * join range [50-60], [70-80] - */ - loc = new EmblFeatureLocations(); - loc.setLocationType("join"); - loc.setLocationComplement(false); - elements = new Vector(); - locElement = new EmblFeatureLocElement(); - b1 = new BasePosition(); - b1.setPos("50"); - b2 = new BasePosition(); - b2.setPos("60"); - locElement.setBasePositions(new BasePosition[] { b1, b2 }); - elements.add(locElement); - locElement = new EmblFeatureLocElement(); - b1 = new BasePosition(); - b1.setPos("70"); - b2 = new BasePosition(); - b2.setPos("80"); - locElement.setBasePositions(new BasePosition[] { b1, b2 }); - elements.add(locElement); - loc.setLocElements(elements); - locs.add(loc); - - /* - * complement range [90-100], [110-120] - * this should be the same as complement(join(90..100,110.120)) - * which is "join 90-100 and 110-120, then complement" - */ - loc = new EmblFeatureLocations(); - loc.setLocationType("join"); - loc.setLocationComplement(true); - elements = new Vector(); - locElement = new EmblFeatureLocElement(); - b1 = new BasePosition(); - b1.setPos("90"); - b2 = new BasePosition(); - b2.setPos("100"); - locElement.setBasePositions(new BasePosition[] { b1, b2 }); - elements.add(locElement); - locElement = new EmblFeatureLocElement(); - b1 = new BasePosition(); - b1.setPos("110"); - b2 = new BasePosition(); - b2.setPos("120"); - locElement.setBasePositions(new BasePosition[] { b1, b2 }); - elements.add(locElement); - loc.setLocElements(elements); - locs.add(loc); + cds.setLocation("join(10..20,complement(30..40),50..60,70..80,complement(join(90..100,110..120)))"); int[] exons = testee.getCdsRanges(cds); assertEquals("[10, 20, 40, 30, 50, 60, 70, 80, 120, 110, 100, 90]", Arrays.toString(exons)); } - - @Test(groups = "Functional") - public void testGetCdsRanges_badData() - { - EmblEntry testee = new EmblEntry(); - - /* - * Make a (CDS) Feature with 4 locations - */ - EmblFeature cds = new EmblFeature(); - Vector locs = new Vector(); - cds.setLocations(locs); - - /* - * single range [10-20] - */ - EmblFeatureLocations loc = new EmblFeatureLocations(); - loc.setLocationType("single"); - loc.setLocationComplement(false); - Vector elements = new Vector(); - EmblFeatureLocElement locElement = new EmblFeatureLocElement(); - BasePosition b1 = new BasePosition(); - b1.setPos("10"); - BasePosition b2 = new BasePosition(); - b2.setPos("20"); - locElement.setBasePositions(new BasePosition[] { b1, b2 }); - elements.add(locElement); - loc.setLocElements(elements); - locs.add(loc); - - /* - * single range with missing end position - should be skipped - */ - loc = new EmblFeatureLocations(); - loc.setLocationType("single"); - loc.setLocationComplement(false); - elements = new Vector(); - locElement = new EmblFeatureLocElement(); - b1 = new BasePosition(); - b1.setPos("30"); - locElement.setBasePositions(new BasePosition[] { b1 }); - elements.add(locElement); - loc.setLocElements(elements); - locs.add(loc); - - /* - * single range with extra base position - should be skipped - */ - loc = new EmblFeatureLocations(); - loc.setLocationType("single"); - loc.setLocationComplement(false); - elements = new Vector(); - locElement = new EmblFeatureLocElement(); - b1 = new BasePosition(); - b1.setPos("30"); - locElement.setBasePositions(new BasePosition[] { b1, b1, b1 }); - elements.add(locElement); - loc.setLocElements(elements); - locs.add(loc); - - /* - * single valid range [50-60] to finish - */ - loc = new EmblFeatureLocations(); - loc.setLocationType("single"); - loc.setLocationComplement(false); - elements = new Vector(); - locElement = new EmblFeatureLocElement(); - b1 = new BasePosition(); - b1.setPos("50"); - b2 = new BasePosition(); - b2.setPos("60"); - locElement.setBasePositions(new BasePosition[] { b1, b2 }); - elements.add(locElement); - loc.setLocElements(elements); - locs.add(loc); - - int[] exons = testee.getCdsRanges(cds); - assertEquals("[10, 20, 50, 60]", Arrays.toString(exons)); - } - - /** - * Test retrieval of exon locations matching an accession id - */ - @Test(groups = "Functional") - public void testGetCdsRanges_forAccession() - { - EmblEntry testee = new EmblEntry(); - String accession = "A1234"; - testee.setAccession(accession); - /* - * Make a (CDS) Feature with 4 locations - */ - EmblFeature cds = new EmblFeature(); - Vector locs = new Vector(); - cds.setLocations(locs); - - /* - * single range [10-20] for 'this' accession - */ - EmblFeatureLocations loc = new EmblFeatureLocations(); - loc.setLocationType("single"); - loc.setLocationComplement(false); - Vector elements = new Vector(); - EmblFeatureLocElement locElement = new EmblFeatureLocElement(); - locElement.setAccession(accession); - BasePosition b1 = new BasePosition(); - b1.setPos("10"); - BasePosition b2 = new BasePosition(); - b2.setPos("20"); - locElement.setBasePositions(new BasePosition[] { b1, b2 }); - elements.add(locElement); - loc.setLocElements(elements); - locs.add(loc); - - /* - * complement range [30-40] - no accession - */ - loc = new EmblFeatureLocations(); - loc.setLocationType("single"); - loc.setLocationComplement(true); - elements = new Vector(); - locElement = new EmblFeatureLocElement(); - b1 = new BasePosition(); - b1.setPos("30"); - b2 = new BasePosition(); - b2.setPos("40"); - locElement.setBasePositions(new BasePosition[] { b1, b2 }); - elements.add(locElement); - loc.setLocElements(elements); - locs.add(loc); - - /* - * join range [50-60] this accession, [70-80] another - */ - loc = new EmblFeatureLocations(); - loc.setLocationType("join"); - loc.setLocationComplement(false); - elements = new Vector(); - locElement = new EmblFeatureLocElement(); - locElement.setAccession(accession); - b1 = new BasePosition(); - b1.setPos("50"); - b2 = new BasePosition(); - b2.setPos("60"); - locElement.setBasePositions(new BasePosition[] { b1, b2 }); - elements.add(locElement); - locElement = new EmblFeatureLocElement(); - locElement.setAccession("notme"); - b1 = new BasePosition(); - b1.setPos("70"); - b2 = new BasePosition(); - b2.setPos("80"); - locElement.setBasePositions(new BasePosition[] { b1, b2 }); - elements.add(locElement); - loc.setLocElements(elements); - locs.add(loc); - - /* - * complement range [90-100] wrong accession, [110-120] good - * this should be the same as complement(join(90..100,110.120)) - * which is "join 90-100 and 110-120, then complement" - */ - loc = new EmblFeatureLocations(); - loc.setLocationType("join"); - loc.setLocationComplement(true); - elements = new Vector(); - locElement = new EmblFeatureLocElement(); - locElement.setAccession("wrong"); - b1 = new BasePosition(); - b1.setPos("90"); - b2 = new BasePosition(); - b2.setPos("100"); - locElement.setBasePositions(new BasePosition[] { b1, b2 }); - elements.add(locElement); - locElement = new EmblFeatureLocElement(); - locElement.setAccession(accession); - b1 = new BasePosition(); - b1.setPos("110"); - b2 = new BasePosition(); - b2.setPos("120"); - locElement.setBasePositions(new BasePosition[] { b1, b2 }); - elements.add(locElement); - loc.setLocElements(elements); - locs.add(loc); - - /* - * verify we pick out only ranges for A1234 - */ - int[] exons = testee.getCdsRanges(cds); - assertEquals("[10, 20, 50, 60, 120, 110]", - Arrays.toString(exons)); - } } diff --git a/test/jalview/datamodel/xdb/embl/EmblFileTest.java b/test/jalview/datamodel/xdb/embl/EmblFileTest.java index c6a94d7..fda25e3 100644 --- a/test/jalview/datamodel/xdb/embl/EmblFileTest.java +++ b/test/jalview/datamodel/xdb/embl/EmblFileTest.java @@ -21,9 +21,7 @@ package jalview.datamodel.xdb.embl; import static org.testng.AssertJUnit.assertEquals; -import static org.testng.AssertJUnit.assertFalse; import static org.testng.AssertJUnit.assertNull; -import static org.testng.AssertJUnit.assertTrue; import jalview.datamodel.DBRefEntry; @@ -34,22 +32,21 @@ import org.testng.annotations.Test; public class EmblFileTest { - // adapted from http://www.ebi.ac.uk/Tools/dbfetch/dbfetch/embl/x53828/emblxml + // adapted from http://www.ebi.ac.uk/ena/data/view/x53828&display=xml private static final String TESTDATA = "" - + "" - + "" + + "" + + "" + "Chicken LDH-A mRNA for lactate dehydrogenase A chain (EC 1.1.1.27)" + "L-lactate dehydrogenasechutney" - + "" - + "" - + "" + + "" + + "" + + "" + + "" + "L-lactate dehydrogenase A-chainpickle" + "MSLKDHLIHNKeith" - + "" - + "" - + "601058" - + "" - + "GTGACG"; + + "" + + "GTGACG"; @Test(groups = { "Functional" }) public void testGetEmblFile() @@ -64,6 +61,11 @@ public class EmblFileTest "Chicken LDH-A mRNA for lactate dehydrogenase A chain (EC 1.1.1.27)", entry.getDesc()); assertEquals("2005-04-18", entry.getLastUpdated()); + assertEquals("mRNA", entry.getMoleculeType()); + assertEquals("1", entry.getSequenceVersion()); + assertEquals("3", entry.getEntryVersion()); + assertEquals("linear", entry.getTopology()); + assertEquals("1575", entry.getSequenceLength()); /* * FIXME these assertions fail - values are null - why?? Adding or removing @@ -92,11 +94,12 @@ public class EmblFileTest assertEquals("0", dbref.getVersion()); /* - * sequence features + * sequence feature for CDS */ assertEquals(1, entry.getFeatures().size()); EmblFeature ef = entry.getFeatures().get(0); assertEquals("CDS", ef.getName()); + assertEquals("60..1058", ef.getLocation()); assertEquals(2, ef.getDbRefs().size()); dbref = ef.getDbRefs().get(0); assertEquals("GOA", dbref.getSource()); @@ -123,31 +126,10 @@ public class EmblFileTest assertEquals(1, q.getEvidence().length); assertEquals("Keith", q.getEvidence()[0]); - // feature locations - assertEquals(1, ef.getLocations().size()); - EmblFeatureLocations fl = ef.getLocations().get(0); - assertEquals("single", fl.getLocationType()); - assertTrue(fl.isLocationComplement()); - assertEquals(1, fl.getLocElements().size()); - EmblFeatureLocElement le = fl.getLocElements().get(0); - assertEquals("range", le.getType()); - assertEquals("X53828", le.getAccession()); - assertEquals("1", le.getVersion()); - assertFalse(le.isComplement()); - assertEquals(2, le.getBasePositions().length); - BasePosition bp = le.getBasePositions()[0]; - assertEquals("simple", bp.getType()); - assertEquals("60", bp.getPos()); - bp = le.getBasePositions()[1]; - assertEquals("join", bp.getType()); - assertEquals("1058", bp.getPos()); - /* * Sequence */ EmblSequence seq = entry.getSequence(); - assertEquals("mRNA", seq.getType()); - assertEquals("2", seq.getVersion()); assertEquals("GTGACG", seq.getSequence()); /* -- 1.7.10.2