<!--
History: Originally created from EMBL_common_V1.0
Updated on 24th April 2007 for WsDBFetch Service move to EMBL_Services_V1.1.xsd
+ Updated May 2016 for EMBL XML 1.2 JAL-2113 JAL-2114
+ see ftp://ftp.sra.ebi.ac.uk/meta/xsd/sra_1_5/ENA.embl.xsd
+ see http://www.ebi.ac.uk/ena/submit/data-formats
-->
<class name="jalview.datamodel.xdb.embl.EmblFile">
- <map-to xml="EMBL_Services"/>
+ <map-to xml="ROOT"/>
<field name="entries" type="jalview.datamodel.xdb.embl.EmblEntry" collection="vector">
<bind-xml name="entry"/>
</field>
-
<field name="errors" type="jalview.datamodel.xdb.embl.EmblError" collection="vector">
<bind-xml name="Error"/>
</field>
</class>
<class name="jalview.datamodel.xdb.embl.EmblEntry">
<field name="accession" type="string">
- <bind-xml location="accession" node="attribute"/>
+ <bind-xml name="accession" node="attribute"/>
</field>
- <!-- May 2015 changed from last-updated to match xml -->
<field name="lastUpdated" type="string">
- <bind-xml location="lastUpdated" node="attribute"/>
+ <bind-xml name="lastUpdated" node="attribute"/>
</field>
- <field name="version" type="string">
- <bind-xml location="version" node="attribute"/>
+ <!--
+ in EMBL XML 1.2 sequence/@version becomes entry/version
+ entry/@version becomes entry/@entryVersion
+ -->
+ <field name="sequenceVersion" type="string">
+ <bind-xml name="version" node="attribute"/>
+ </field>
+ <field name="entryVersion" type="string">
+ <bind-xml name="entryVersion" node="attribute"/>
+ </field>
+ <field name="moleculeType" type="string">
+ <bind-xml name="moleculeType" node="attribute"/>
+ </field>
+ <field name="sequenceLength" type="string">
+ <bind-xml name="sequenceLength" node="attribute"/>
+ </field>
+ <field name="topology" type="string">
+ <bind-xml name="topology" node="attribute" location="type"/>
</field>
<field name="rCreated" type="string">
- <bind-xml location="releaseCreated" node="attribute"/>
+ <bind-xml name="releaseCreated" node="attribute"/>
</field>
<field name="rLastUpdated" type="string">
- <bind-xml location="releaseLastUpdated" node="attribute"/>
+ <bind-xml name="releaseLastUpdated" node="attribute"/>
</field>
<field name="desc" type="string">
<bind-xml name="description" node="element"/>
<bind-xml name="feature"/>
</field>
<field name="dbRefs" type="jalview.datamodel.DBRefEntry" collection="vector">
- <bind-xml name="dbreference" />
+ <bind-xml name="xref" />
</field>
<field name="sequence" type="jalview.datamodel.xdb.embl.EmblSequence">
<bind-xml name="sequence"/>
</field>
</class>
<class name="jalview.datamodel.xdb.embl.EmblSequence">
- <field name="type" type="string">
- <bind-xml name="type" node="attribute" location="type"/>
- </field>
- <field name="version" type="string">
- <bind-xml name="version" node="attribute" location="version"/>
- </field>
<field name="sequence" type="string">
<bind-xml node="text"/>
</field>
<field name="name" type="string">
<bind-xml name="name" node="attribute"/>
</field>
+ <field name="location" type="string">
+ <bind-xml name="location" node="attribute"/>
+ </field>
<field name="dbRefs" type="jalview.datamodel.DBRefEntry" collection="vector">
- <bind-xml name="dbreference" node="element"/>
+ <bind-xml name="xref" node="element"/>
</field>
<field name="qualifiers" type="jalview.datamodel.xdb.embl.Qualifier" collection="vector">
<bind-xml name="qualifier"/>
</field>
- <field name="locations" type="jalview.datamodel.xdb.embl.EmblFeatureLocations" collection="vector">
- <bind-xml name="location"/>
- </field>
</class>
<class name="jalview.datamodel.DBRefEntry" verify-constructable="false">
<field name="accessionId" type="java.lang.String">
- <bind-xml name="primary" node="attribute"/>
+ <bind-xml name="id" node="attribute"/>
</field>
<field name="source" type="java.lang.String">
<bind-xml name="db" node="attribute"/>
</field>
<field name="version" type="string">
- <bind-xml name="secondary" node="attribute"/>
+ <bind-xml name="secondaryId" node="attribute"/>
</field>
</class>
<class name="jalview.datamodel.xdb.embl.Qualifier" verify-constructable="false">
+++ /dev/null
-/*
- * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
- * Copyright (C) $$Year-Rel$$ The Jalview Authors
- *
- * This file is part of Jalview.
- *
- * Jalview is free software: you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation, either version 3
- * of the License, or (at your option) any later version.
- *
- * Jalview is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty
- * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
- * PURPOSE. See the GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
- * The Jalview Authors are detailed in the 'AUTHORS' file.
- */
-package jalview.datamodel.xdb.embl;
-
-/**
- * Data model for a feature/location/locationElement/basePosition read from an
- * EMBL query reply
- *
- * @see embl_mapping.xml
- */
-public class BasePosition
-{
- String type;
-
- String pos;
-
- /**
- * @return the pos
- */
- public String getPos()
- {
- return pos;
- }
-
- /**
- * @param pos
- * the pos to set
- */
- public void setPos(String pos)
- {
- this.pos = pos;
- }
-
- /**
- * @return the type
- */
- public String getType()
- {
- return type;
- }
-
- /**
- * @param type
- * the type to set
- */
- public void setType(String type)
- {
- this.type = type;
- }
-}
import jalview.datamodel.SequenceFeature;
import jalview.datamodel.SequenceI;
import jalview.util.DBRefUtils;
+import jalview.util.DnaUtils;
import jalview.util.MapList;
import jalview.util.MappingUtils;
import jalview.util.StringUtils;
String accession;
- String version;
+ String entryVersion;
+
+ String sequenceVersion;
+
+ String moleculeType;
+
+ String topology;
+
+ String sequenceLength;
String taxDivision;
}
/**
- * @return the version
+ * @return the entry version
*/
- public String getVersion()
+ public String getEntryVersion()
{
- return version;
+ return entryVersion;
}
/**
* @param version
* the version to set
*/
- public void setVersion(String version)
+ public void setEntryVersion(String version)
{
- this.version = version;
+ this.entryVersion = version;
}
/**
SequenceI dna = new Sequence(sourceDb + "|" + accession,
sequence.getSequence());
dna.setDescription(desc);
- DBRefEntry retrievedref = new DBRefEntry(sourceDb, version, accession);
+ DBRefEntry retrievedref = new DBRefEntry(sourceDb,
+ getSequenceVersion(), accession);
dna.addDBRef(retrievedref);
// add map to indicate the sequence is a valid coordinate frame for the
// dbref
DBRefEntry pcdnaref = new DBRefEntry();
pcdnaref.setAccessionId(prid);
pcdnaref.setSource(DBRefSource.EMBLCDS);
- pcdnaref.setVersion(getVersion()); // same as parent EMBL version.
+ pcdnaref.setVersion(getSequenceVersion()); // same as parent EMBL
+ // version.
MapList mp = new MapList(new int[] { 1, prseq.length() },
new int[] { 1 + (codonStart - 1),
(codonStart - 1) + 3 * prseq.length() }, 1, 3);
if (map != null)
{
Mapping pmap = new Mapping(dna, map.getMap().getInverse());
- pref = new DBRefEntry(sourceDb, getVersion(),
+ pref = new DBRefEntry(sourceDb, getSequenceVersion(),
this.getAccession());
pref.setMap(pmap);
if (map.getTo() != null)
protEMBLCDS = new DBRefEntry();
protEMBLCDS.setAccessionId(prid);
protEMBLCDS.setSource(DBRefSource.EMBLCDSProduct);
- protEMBLCDS.setVersion(getVersion());
+ protEMBLCDS.setVersion(getSequenceVersion());
protEMBLCDS
.setMap(new Mapping(product, map.getMap().getInverse()));
}
*/
protected int[] getCdsRanges(EmblFeature feature)
{
- if (feature.locations == null)
+ if (feature.location == null)
{
return new int[] {};
}
- int cdsBoundaryCount = 0; // count of all start/stop locations
- int[][] cdsLocations = new int[feature.locations.size()][];
- int locationNumber = 0;
- for (EmblFeatureLocations loc : feature.locations)
- {
- int[] locationRanges = loc.getElementRanges(accession);
- cdsLocations[locationNumber++] = locationRanges;
- cdsBoundaryCount += locationRanges.length;
- }
- int[] cdsRanges = new int[cdsBoundaryCount];
- int copyTo = 0;
- for (int[] ranges : cdsLocations)
+ List<int[]> ranges = DnaUtils.parseLocation(feature.location);
+ return ranges == null ? new int[] {} : listToArray(ranges);
+ }
+
+ /**
+ * Converts a list of [start, end] ranges to a single array of [start, end,
+ * start, end ...]
+ *
+ * @param ranges
+ * @return
+ */
+ int[] listToArray(List<int[]> ranges)
+ {
+ int[] result = new int[ranges.size() * 2];
+ int i = 0;
+ for (int[] range : ranges)
{
- System.arraycopy(ranges, 0, cdsRanges, copyTo, ranges.length);
- copyTo += ranges.length;
+ result[i++] = range[0];
+ result[i++] = range[1];
}
- return cdsRanges;
-
+ return result;
}
/**
}
return exon;
}
+
+ public String getSequenceVersion()
+ {
+ return sequenceVersion;
+ }
+
+ public void setSequenceVersion(String sequenceVersion)
+ {
+ this.sequenceVersion = sequenceVersion;
+ }
+
+ public String getMoleculeType()
+ {
+ return moleculeType;
+ }
+
+ public void setMoleculeType(String moleculeType)
+ {
+ this.moleculeType = moleculeType;
+ }
+
+ public String getTopology()
+ {
+ return topology;
+ }
+
+ public void setTopology(String topology)
+ {
+ this.topology = topology;
+ }
+
+ public String getSequenceLength()
+ {
+ return sequenceLength;
+ }
+
+ public void setSequenceLength(String sequenceLength)
+ {
+ this.sequenceLength = sequenceLength;
+ }
+
+ public String getrCreated()
+ {
+ return rCreated;
+ }
+
+ public void setrCreated(String rCreated)
+ {
+ this.rCreated = rCreated;
+ }
+
+ public String getrLastUpdated()
+ {
+ return rLastUpdated;
+ }
+
+ public void setrLastUpdated(String rLastUpdated)
+ {
+ this.rLastUpdated = rLastUpdated;
+ }
}
Vector<Qualifier> qualifiers;
- Vector<EmblFeatureLocations> locations;
+ String location;
/**
* @return the dbRefs
}
/**
- * @return the locations
+ * @return the location
*/
- public Vector<EmblFeatureLocations> getLocations()
+ public String getLocation()
{
- return locations;
+ return location;
}
/**
- * @param locations
- * the locations to set
+ * @param loc
*/
- public void setLocations(Vector<EmblFeatureLocations> locations)
+ public void setLocation(String loc)
{
- this.locations = locations;
+ this.location = loc;
}
/**
+++ /dev/null
-/*
- * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
- * Copyright (C) $$Year-Rel$$ The Jalview Authors
- *
- * This file is part of Jalview.
- *
- * Jalview is free software: you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation, either version 3
- * of the License, or (at your option) any later version.
- *
- * Jalview is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty
- * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
- * PURPOSE. See the GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
- * The Jalview Authors are detailed in the 'AUTHORS' file.
- */
-package jalview.datamodel.xdb.embl;
-
-/**
- * Data model for a feature/location/locationElement read from an EMBL query
- * reply
- *
- * @see embl_mapping.xml
- */
-public class EmblFeatureLocElement
-{
- String type;
-
- String accession;
-
- String version;
-
- boolean complement;
-
- BasePosition basePositions[];
-
- /**
- * @return the accession
- */
- public String getAccession()
- {
- return accession;
- }
-
- /**
- * @param accession
- * the accession to set
- */
- public void setAccession(String accession)
- {
- this.accession = accession;
- }
-
- /**
- * @return the basePositions
- */
- public BasePosition[] getBasePositions()
- {
- return basePositions;
- }
-
- /**
- * @param basePositions
- * the basePositions to set
- */
- public void setBasePositions(BasePosition[] basePositions)
- {
- this.basePositions = basePositions;
- }
-
- /**
- * @return the complement
- */
- public boolean isComplement()
- {
- return complement;
- }
-
- /**
- * @param complement
- * the complement to set
- */
- public void setComplement(boolean complement)
- {
- this.complement = complement;
- }
-
- /**
- * @return the type
- */
- public String getType()
- {
- return type;
- }
-
- /**
- * @param type
- * the type to set
- */
- public void setType(String type)
- {
- this.type = type;
- }
-
- /**
- * @return the version
- */
- public String getVersion()
- {
- return version;
- }
-
- /**
- * @param version
- * the version to set
- */
- public void setVersion(String version)
- {
- this.version = version;
- }
-}
+++ /dev/null
-/*
- * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
- * Copyright (C) $$Year-Rel$$ The Jalview Authors
- *
- * This file is part of Jalview.
- *
- * Jalview is free software: you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation, either version 3
- * of the License, or (at your option) any later version.
- *
- * Jalview is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty
- * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
- * PURPOSE. See the GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
- * The Jalview Authors are detailed in the 'AUTHORS' file.
- */
-package jalview.datamodel.xdb.embl;
-
-import jalview.bin.Cache;
-import jalview.util.ArrayUtils;
-
-import java.util.Arrays;
-import java.util.Vector;
-
-/**
- * Data model for a <location> child element of a <feature> read
- * from an EMBL query reply
- *
- * @see embl_mapping.xml
- * @see http://www.insdc.org/files/feature_table.html#3.4.2
- */
-public class EmblFeatureLocations
-{
- Vector<EmblFeatureLocElement> locElements;
-
- String locationType;
-
- boolean locationComplement;
-
- /**
- * @return the locationComplement
- */
- public boolean isLocationComplement()
- {
- return locationComplement;
- }
-
- /**
- * @param locationComplement
- * the locationComplement to set
- */
- public void setLocationComplement(boolean locationComplement)
- {
- this.locationComplement = locationComplement;
- }
-
- /**
- * @return the locationType
- */
- public String getLocationType()
- {
- return locationType;
- }
-
- /**
- * @param locationType
- * the locationType to set
- */
- public void setLocationType(String locationType)
- {
- this.locationType = locationType;
- }
-
- /**
- * @return the locElements
- */
- public Vector<EmblFeatureLocElement> getLocElements()
- {
- return locElements;
- }
-
- /**
- * @param locElements
- * the locElements to set
- */
- public void setLocElements(Vector<EmblFeatureLocElement> locElements)
- {
- this.locElements = locElements;
- }
-
- /**
- * Return all location elements as start-end pairs (without accessions) TODO:
- * pass back complement and 'less than or more than' range information Note:
- * do not use this since it throws away any accessionIds associated with each
- * location!
- *
- * @return int[] { start1, end1, ... }
- */
- public int[] getElementRanges()
- {
- return getElementRanges(null);
- }
-
- /**
- * Return all location elements concerning given accession as start-end pairs.
- * If the CDS feature is on the forward strand, then start <= end, if on the
- * reverse strand then start > end.
- *
- * @param accession
- * the accession string for which locations are requested, or null
- * for all locations
- * @return int[] { start1, end1, ... }
- */
- int[] getElementRanges(String accession)
- {
- int sepos = 0;
- int[] se = new int[locElements.size() * 2];
- if ("single".equalsIgnoreCase(locationType)
- || "join".equalsIgnoreCase(locationType))
- {
- for (EmblFeatureLocElement loce : locElements)
- {
- if (accession == null || loce.accession != null
- && accession.equals(loce.accession))
- {
- BasePosition bp[] = loce.getBasePositions();
- if (bp.length == 2)
- {
- try
- {
- int start = Integer.parseInt(bp[0].getPos());
- int end = Integer.parseInt(bp[1].getPos());
- se[sepos++] = start;
- se[sepos++] = end;
- } catch (NumberFormatException e)
- {
- System.err
- .println("format error in EMBL CDS location basePosition: "
- + e.getMessage());
- }
- }
- else
- {
- System.err
- .println("format error in EMBL CDS location, basePosition count = "
- + bp.length);
- }
- }
- }
- }
- else if (locationType != null)
- {
- if (Cache.log != null)
- {
- Cache.log
- .error("EmblFeatureLocations.getElementRanges cannot deal with locationType=='"
- + locationType + "'");
- }
- else
- {
- System.err
- .println("EmblFeatureLocations.getElementRanges cannot deal with locationType=='"
- + locationType + "'");
- }
- }
-
- if (sepos != se.length)
- {
- /*
- * we failed to parse something - trim off null values
- */
- se = Arrays.copyOf(se, sepos);
- }
-
- /*
- * If on the complement, reverse the ranges to [end, start, ...end1, start1].
- * For an example of a joined complement, see (tRNA feature) CAGL0B00165r on
- * http://www.ebi.ac.uk/ena/data/view/CR380948&display=xml
- * http://www.ebi.ac.uk/Tools/dbfetch/dbfetch/embl/CR380948/emblxml
- */
- if (locationComplement)
- {
- ArrayUtils.reverseIntArray(se);
- }
- return se;
- }
-}
*/
public class EmblSequence
{
- String version;
-
String sequence;
- String type;
-
/**
* @return the sequence
*/
{
this.sequence = sequence;
}
-
- /**
- * @return the type
- */
- public String getType()
- {
- return type;
- }
-
- /**
- * @param type
- * the type to set
- */
- public void setType(String type)
- {
- this.type = type;
- }
-
- /**
- * @return the version
- */
- public String getVersion()
- {
- return version;
- }
-
- /**
- * @param version
- * the version to set
- */
- public void setVersion(String version)
- {
- this.version = version;
- }
}
try
{
reply = dbFetch.fetchDataAsFile(
- emprefx.toLowerCase() + ":" + query.trim(), "emblxml", null,
+ emprefx.toLowerCase() + ":" + query.trim(), "display=xml",
+ null,
".xml");
} catch (Exception e)
{
/*
* JAL-1855 dbfetch from ena_sequence, ena_coding
*/
- if (dbPath.equalsIgnoreCase(DBRefSource.EMBL))
+ String url;
+ if (dbPath.equalsIgnoreCase(DBRefSource.EMBL)
+ || dbPath.equalsIgnoreCase(DBRefSource.EMBLCDS))
{
- dbPath = "ena_sequence";
+ url = "http://www.ebi.ac.uk/ena/data/view/" + ids.toLowerCase()
+ + (format != null ? "&" + format : "");
}
- else if (dbPath.equalsIgnoreCase(DBRefSource.EMBLCDS))
+ else
{
- dbPath = "ena_coding";
+ url = "http://www.ebi.ac.uk/Tools/dbfetch/dbfetch/"
+ + dbPath.toLowerCase() + "/" + ids.toLowerCase()
+ + (format != null ? "/" + format : "");
}
try
{
- URL rcall = new URL("http://www.ebi.ac.uk/Tools/dbfetch/dbfetch/"
- + dbPath.toLowerCase() + "/" + ids.toLowerCase()
- + (format != null ? "/" + format : ""));
+ URL rcall = new URL(url);
InputStream is = new BufferedInputStream(rcall.openStream());
if (outFile != null)
package jalview.datamodel.xdb.embl;
import static org.testng.AssertJUnit.assertEquals;
-import static org.testng.AssertJUnit.assertSame;
-
-import jalview.util.MappingUtils;
import java.util.Arrays;
import java.util.Vector;
* Make a (CDS) Feature with 4 locations
*/
EmblFeature cds = new EmblFeature();
- Vector<EmblFeatureLocations> locs = new Vector<EmblFeatureLocations>();
- cds.setLocations(locs);
-
- /*
- * single range [10-20]
- */
- EmblFeatureLocations loc = new EmblFeatureLocations();
- loc.setLocationType("single");
- loc.setLocationComplement(false);
- Vector<EmblFeatureLocElement> elements = new Vector<EmblFeatureLocElement>();
- EmblFeatureLocElement locElement = new EmblFeatureLocElement();
- BasePosition b1 = new BasePosition();
- b1.setPos("10");
- BasePosition b2 = new BasePosition();
- b2.setPos("20");
- locElement.setBasePositions(new BasePosition[] { b1, b2 });
- elements.add(locElement);
- loc.setLocElements(elements);
- locs.add(loc);
-
- /*
- * complement range [30-40]
- */
- loc = new EmblFeatureLocations();
- loc.setLocationType("single");
- loc.setLocationComplement(true);
- elements = new Vector<EmblFeatureLocElement>();
- locElement = new EmblFeatureLocElement();
- b1 = new BasePosition();
- b1.setPos("30");
- b2 = new BasePosition();
- b2.setPos("40");
- locElement.setBasePositions(new BasePosition[] { b1, b2 });
- elements.add(locElement);
- loc.setLocElements(elements);
- locs.add(loc);
-
- /*
- * join range [50-60], [70-80]
- */
- loc = new EmblFeatureLocations();
- loc.setLocationType("join");
- loc.setLocationComplement(false);
- elements = new Vector<EmblFeatureLocElement>();
- locElement = new EmblFeatureLocElement();
- b1 = new BasePosition();
- b1.setPos("50");
- b2 = new BasePosition();
- b2.setPos("60");
- locElement.setBasePositions(new BasePosition[] { b1, b2 });
- elements.add(locElement);
- locElement = new EmblFeatureLocElement();
- b1 = new BasePosition();
- b1.setPos("70");
- b2 = new BasePosition();
- b2.setPos("80");
- locElement.setBasePositions(new BasePosition[] { b1, b2 });
- elements.add(locElement);
- loc.setLocElements(elements);
- locs.add(loc);
-
- /*
- * complement range [90-100], [110-120]
- * this should be the same as complement(join(90..100,110.120))
- * which is "join 90-100 and 110-120, then complement"
- */
- loc = new EmblFeatureLocations();
- loc.setLocationType("join");
- loc.setLocationComplement(true);
- elements = new Vector<EmblFeatureLocElement>();
- locElement = new EmblFeatureLocElement();
- b1 = new BasePosition();
- b1.setPos("90");
- b2 = new BasePosition();
- b2.setPos("100");
- locElement.setBasePositions(new BasePosition[] { b1, b2 });
- elements.add(locElement);
- locElement = new EmblFeatureLocElement();
- b1 = new BasePosition();
- b1.setPos("110");
- b2 = new BasePosition();
- b2.setPos("120");
- locElement.setBasePositions(new BasePosition[] { b1, b2 });
- elements.add(locElement);
- loc.setLocElements(elements);
- locs.add(loc);
+ cds.setLocation("join(10..20,complement(30..40),50..60,70..80,complement(join(90..100,110..120)))");
int[] exons = testee.getCdsRanges(cds);
assertEquals("[10, 20, 40, 30, 50, 60, 70, 80, 120, 110, 100, 90]",
Arrays.toString(exons));
}
-
- @Test(groups = "Functional")
- public void testGetCdsRanges_badData()
- {
- EmblEntry testee = new EmblEntry();
-
- /*
- * Make a (CDS) Feature with 4 locations
- */
- EmblFeature cds = new EmblFeature();
- Vector<EmblFeatureLocations> locs = new Vector<EmblFeatureLocations>();
- cds.setLocations(locs);
-
- /*
- * single range [10-20]
- */
- EmblFeatureLocations loc = new EmblFeatureLocations();
- loc.setLocationType("single");
- loc.setLocationComplement(false);
- Vector<EmblFeatureLocElement> elements = new Vector<EmblFeatureLocElement>();
- EmblFeatureLocElement locElement = new EmblFeatureLocElement();
- BasePosition b1 = new BasePosition();
- b1.setPos("10");
- BasePosition b2 = new BasePosition();
- b2.setPos("20");
- locElement.setBasePositions(new BasePosition[] { b1, b2 });
- elements.add(locElement);
- loc.setLocElements(elements);
- locs.add(loc);
-
- /*
- * single range with missing end position - should be skipped
- */
- loc = new EmblFeatureLocations();
- loc.setLocationType("single");
- loc.setLocationComplement(false);
- elements = new Vector<EmblFeatureLocElement>();
- locElement = new EmblFeatureLocElement();
- b1 = new BasePosition();
- b1.setPos("30");
- locElement.setBasePositions(new BasePosition[] { b1 });
- elements.add(locElement);
- loc.setLocElements(elements);
- locs.add(loc);
-
- /*
- * single range with extra base position - should be skipped
- */
- loc = new EmblFeatureLocations();
- loc.setLocationType("single");
- loc.setLocationComplement(false);
- elements = new Vector<EmblFeatureLocElement>();
- locElement = new EmblFeatureLocElement();
- b1 = new BasePosition();
- b1.setPos("30");
- locElement.setBasePositions(new BasePosition[] { b1, b1, b1 });
- elements.add(locElement);
- loc.setLocElements(elements);
- locs.add(loc);
-
- /*
- * single valid range [50-60] to finish
- */
- loc = new EmblFeatureLocations();
- loc.setLocationType("single");
- loc.setLocationComplement(false);
- elements = new Vector<EmblFeatureLocElement>();
- locElement = new EmblFeatureLocElement();
- b1 = new BasePosition();
- b1.setPos("50");
- b2 = new BasePosition();
- b2.setPos("60");
- locElement.setBasePositions(new BasePosition[] { b1, b2 });
- elements.add(locElement);
- loc.setLocElements(elements);
- locs.add(loc);
-
- int[] exons = testee.getCdsRanges(cds);
- assertEquals("[10, 20, 50, 60]", Arrays.toString(exons));
- }
-
- /**
- * Test retrieval of exon locations matching an accession id
- */
- @Test(groups = "Functional")
- public void testGetCdsRanges_forAccession()
- {
- EmblEntry testee = new EmblEntry();
- String accession = "A1234";
- testee.setAccession(accession);
- /*
- * Make a (CDS) Feature with 4 locations
- */
- EmblFeature cds = new EmblFeature();
- Vector<EmblFeatureLocations> locs = new Vector<EmblFeatureLocations>();
- cds.setLocations(locs);
-
- /*
- * single range [10-20] for 'this' accession
- */
- EmblFeatureLocations loc = new EmblFeatureLocations();
- loc.setLocationType("single");
- loc.setLocationComplement(false);
- Vector<EmblFeatureLocElement> elements = new Vector<EmblFeatureLocElement>();
- EmblFeatureLocElement locElement = new EmblFeatureLocElement();
- locElement.setAccession(accession);
- BasePosition b1 = new BasePosition();
- b1.setPos("10");
- BasePosition b2 = new BasePosition();
- b2.setPos("20");
- locElement.setBasePositions(new BasePosition[] { b1, b2 });
- elements.add(locElement);
- loc.setLocElements(elements);
- locs.add(loc);
-
- /*
- * complement range [30-40] - no accession
- */
- loc = new EmblFeatureLocations();
- loc.setLocationType("single");
- loc.setLocationComplement(true);
- elements = new Vector<EmblFeatureLocElement>();
- locElement = new EmblFeatureLocElement();
- b1 = new BasePosition();
- b1.setPos("30");
- b2 = new BasePosition();
- b2.setPos("40");
- locElement.setBasePositions(new BasePosition[] { b1, b2 });
- elements.add(locElement);
- loc.setLocElements(elements);
- locs.add(loc);
-
- /*
- * join range [50-60] this accession, [70-80] another
- */
- loc = new EmblFeatureLocations();
- loc.setLocationType("join");
- loc.setLocationComplement(false);
- elements = new Vector<EmblFeatureLocElement>();
- locElement = new EmblFeatureLocElement();
- locElement.setAccession(accession);
- b1 = new BasePosition();
- b1.setPos("50");
- b2 = new BasePosition();
- b2.setPos("60");
- locElement.setBasePositions(new BasePosition[] { b1, b2 });
- elements.add(locElement);
- locElement = new EmblFeatureLocElement();
- locElement.setAccession("notme");
- b1 = new BasePosition();
- b1.setPos("70");
- b2 = new BasePosition();
- b2.setPos("80");
- locElement.setBasePositions(new BasePosition[] { b1, b2 });
- elements.add(locElement);
- loc.setLocElements(elements);
- locs.add(loc);
-
- /*
- * complement range [90-100] wrong accession, [110-120] good
- * this should be the same as complement(join(90..100,110.120))
- * which is "join 90-100 and 110-120, then complement"
- */
- loc = new EmblFeatureLocations();
- loc.setLocationType("join");
- loc.setLocationComplement(true);
- elements = new Vector<EmblFeatureLocElement>();
- locElement = new EmblFeatureLocElement();
- locElement.setAccession("wrong");
- b1 = new BasePosition();
- b1.setPos("90");
- b2 = new BasePosition();
- b2.setPos("100");
- locElement.setBasePositions(new BasePosition[] { b1, b2 });
- elements.add(locElement);
- locElement = new EmblFeatureLocElement();
- locElement.setAccession(accession);
- b1 = new BasePosition();
- b1.setPos("110");
- b2 = new BasePosition();
- b2.setPos("120");
- locElement.setBasePositions(new BasePosition[] { b1, b2 });
- elements.add(locElement);
- loc.setLocElements(elements);
- locs.add(loc);
-
- /*
- * verify we pick out only ranges for A1234
- */
- int[] exons = testee.getCdsRanges(cds);
- assertEquals("[10, 20, 50, 60, 120, 110]",
- Arrays.toString(exons));
- }
}
package jalview.datamodel.xdb.embl;
import static org.testng.AssertJUnit.assertEquals;
-import static org.testng.AssertJUnit.assertFalse;
import static org.testng.AssertJUnit.assertNull;
-import static org.testng.AssertJUnit.assertTrue;
import jalview.datamodel.DBRefEntry;
public class EmblFileTest
{
- // adapted from http://www.ebi.ac.uk/Tools/dbfetch/dbfetch/embl/x53828/emblxml
+ // adapted from http://www.ebi.ac.uk/ena/data/view/x53828&display=xml
private static final String TESTDATA = "<?xml version=\"1.0\" encoding=\"UTF-8\" ?>"
- + "<EMBL_Services>"
- + "<entry accession=\"X53828\" version=\"3\" lastUpdated=\"2005-04-18\" releaseCreated=\"25\" releaseLastUpdated=\"83\">"
+ + "<ROOT>"
+ + "<entry accession=\"X53828\" entryVersion=\"3\" lastUpdated=\"2005-04-18\" releaseCreated=\"25\" releaseLastUpdated=\"83\""
+ + " version=\"1\" moleculeType=\"mRNA\" topology=\"linear\" sequenceLength=\"1575\">"
+ "<description>Chicken LDH-A mRNA for lactate dehydrogenase A chain (EC 1.1.1.27)</description>"
+ "<keyword>L-lactate dehydrogenase</keyword><keyword>chutney</keyword>"
- + "<dbreference db=\"EuropePMC\" primary=\"PMC1460223\" secondary=\"9649548\" />"
- + "<dbreference db=\"MD5\" primary=\"d3b68\" />"
- + "<feature name=\"CDS\"><dbreference db=\"GOA\" primary=\"P00340\" secondary=\"2.1\" /><dbreference db=\"InterPro\" primary=\"IPR001236\" />"
+ + "<xref db=\"EuropePMC\" id=\"PMC1460223\" secondaryId=\"9649548\" />"
+ + "<xref db=\"MD5\" id=\"d3b68\" />"
+ + "<feature name=\"CDS\" location=\"60..1058\">"
+ + "<xref db=\"GOA\" id=\"P00340\" secondaryId=\"2.1\" /><xref db=\"InterPro\" id=\"IPR001236\" />"
+ "<qualifier name=\"note\"><value>L-lactate dehydrogenase A-chain</value><value>pickle</value></qualifier>"
+ "<qualifier name=\"translation\"><value>MSLKDHLIHN</value><evidence>Keith</evidence></qualifier>"
- + "<location type=\"single\" complement=\"true\">"
- + "<locationElement type=\"range\" accession=\"X53828\" version=\"1\" complement=\"false\">"
- + "<basePosition type=\"simple\">60</basePosition><basePosition type=\"join\">1058</basePosition>"
- + "</locationElement></location></feature>"
- + "<sequence type=\"mRNA\" version=\"2\">GTGACG</sequence></entry></EMBL_Services>";
+ + "</feature>"
+ + "<sequence>GTGACG</sequence></entry></ROOT>";
@Test(groups = { "Functional" })
public void testGetEmblFile()
"Chicken LDH-A mRNA for lactate dehydrogenase A chain (EC 1.1.1.27)",
entry.getDesc());
assertEquals("2005-04-18", entry.getLastUpdated());
+ assertEquals("mRNA", entry.getMoleculeType());
+ assertEquals("1", entry.getSequenceVersion());
+ assertEquals("3", entry.getEntryVersion());
+ assertEquals("linear", entry.getTopology());
+ assertEquals("1575", entry.getSequenceLength());
/*
* FIXME these assertions fail - values are null - why?? Adding or removing
assertEquals("0", dbref.getVersion());
/*
- * sequence features
+ * sequence feature for CDS
*/
assertEquals(1, entry.getFeatures().size());
EmblFeature ef = entry.getFeatures().get(0);
assertEquals("CDS", ef.getName());
+ assertEquals("60..1058", ef.getLocation());
assertEquals(2, ef.getDbRefs().size());
dbref = ef.getDbRefs().get(0);
assertEquals("GOA", dbref.getSource());
assertEquals(1, q.getEvidence().length);
assertEquals("Keith", q.getEvidence()[0]);
- // feature locations
- assertEquals(1, ef.getLocations().size());
- EmblFeatureLocations fl = ef.getLocations().get(0);
- assertEquals("single", fl.getLocationType());
- assertTrue(fl.isLocationComplement());
- assertEquals(1, fl.getLocElements().size());
- EmblFeatureLocElement le = fl.getLocElements().get(0);
- assertEquals("range", le.getType());
- assertEquals("X53828", le.getAccession());
- assertEquals("1", le.getVersion());
- assertFalse(le.isComplement());
- assertEquals(2, le.getBasePositions().length);
- BasePosition bp = le.getBasePositions()[0];
- assertEquals("simple", bp.getType());
- assertEquals("60", bp.getPos());
- bp = le.getBasePositions()[1];
- assertEquals("join", bp.getType());
- assertEquals("1058", bp.getPos());
-
/*
* Sequence
*/
EmblSequence seq = entry.getSequence();
- assertEquals("mRNA", seq.getType());
- assertEquals("2", seq.getVersion());
assertEquals("GTGACG", seq.getSequence());
/*