From: gmungoc Date: Tue, 4 Oct 2016 14:11:25 +0000 (+0100) Subject: JAL-2245 Castor mapping and code changes for change to ENA XML format X-Git-Tag: Release_2_10_0~19^2 X-Git-Url: http://source.jalview.org/gitweb/?a=commitdiff_plain;h=30b2b47cbdfa35b127b0fb09e911815cddd9ed7b;p=jalview.git JAL-2245 Castor mapping and code changes for change to ENA XML format --- diff --git a/resources/embl_mapping.xml b/resources/embl_mapping.xml index 01b921a..7e494b4 100644 --- a/resources/embl_mapping.xml +++ b/resources/embl_mapping.xml @@ -26,6 +26,7 @@ see ftp://ftp.sra.ebi.ac.uk/meta/xsd/sra_1_5/ENA.embl.xsd see http://www.ebi.ac.uk/ena/submit/data-formats --> + + diff --git a/src/jalview/datamodel/xdb/embl/EmblFile.java b/src/jalview/datamodel/xdb/embl/EmblFile.java index 69870b6..48acb88 100644 --- a/src/jalview/datamodel/xdb/embl/EmblFile.java +++ b/src/jalview/datamodel/xdb/embl/EmblFile.java @@ -20,6 +20,7 @@ */ package jalview.datamodel.xdb.embl; +import jalview.bin.Cache; import jalview.datamodel.DBRefEntry; import jalview.ws.dbsources.Uniprot; @@ -27,6 +28,7 @@ import java.io.File; import java.io.FileReader; import java.io.PrintWriter; import java.io.Reader; +import java.net.URL; import java.util.Vector; import org.exolab.castor.mapping.Mapping; @@ -81,12 +83,12 @@ public class EmblFile } /** - * Parse an EmblXML file into an EmblFile object + * Parse an Embl XML file into an EmblEntry object * * @param file * @return parsed EmblXML or null if exceptions were raised */ - public static EmblFile getEmblFile(File file) + public static EmblEntry getEmblEntry(File file) { if (file == null) { @@ -94,7 +96,7 @@ public class EmblFile } try { - return EmblFile.getEmblFile(new FileReader(file)); + return EmblFile.getEntry(new FileReader(file)); } catch (Exception e) { System.err.println("Exception whilst reading EMBLfile from " + file); @@ -103,26 +105,32 @@ public class EmblFile return null; } - public static EmblFile getEmblFile(Reader file) + /** + * Reads the XML response from file and unmarshals into a Java object + * + * @param fileReader + * @return + */ + public static EmblEntry getEntry(Reader fileReader) { - EmblFile record = new EmblFile(); + EmblEntry record = new EmblEntry(); try { // 1. Load the mapping information from the file Mapping map = new Mapping(record.getClass().getClassLoader()); - java.net.URL url = record.getClass().getResource("/embl_mapping.xml"); + URL url = record.getClass().getResource("/embl_mapping.xml"); map.loadMapping(url); // 2. Unmarshal the data Unmarshaller unmar = new Unmarshaller(record); try { - // uncomment to DEBUG EMBLFile reading - if (jalview.bin.Cache.getDefault(jalview.bin.Cache.CASTORLOGLEVEL, + if (Cache.getDefault(Cache.CASTORLOGLEVEL, "debug").equalsIgnoreCase("DEBUG")) { - unmar.setDebug(jalview.bin.Cache.log.isDebugEnabled()); + unmar.setDebug(Cache.log.isDebugEnabled()); + // unmar.setDebug(true);// uncomment to debug unmarshalling } } catch (Exception e) { @@ -131,7 +139,7 @@ public class EmblFile unmar.setIgnoreExtraAttributes(true); unmar.setMapping(map); unmar.setLogWriter(new PrintWriter(System.out)); - record = (EmblFile) unmar.unmarshal(file); + record = (EmblEntry) unmar.unmarshal(fileReader); canonicaliseDbRefs(record); } catch (Exception e) @@ -147,13 +155,17 @@ public class EmblFile * Change blank version to "0" in any DBRefEntry, to ensure consistent * comparison with other DBRefEntry in Jalview * - * @param record + * @param entry * @see Uniprot#getDbVersion */ - static void canonicaliseDbRefs(EmblFile record) + static void canonicaliseDbRefs(EmblEntry entry) { - for (EmblEntry entry : record.getEntries()) + if (entry == null) { + return; + } +// for (EmblEntry entry : record.getEntries()) +// { if (entry.getDbRefs() != null) { for (DBRefEntry dbref : entry.getDbRefs()) @@ -165,7 +177,7 @@ public class EmblFile } } - if (entry.getFeatures() != null) +// if (entry.getFeatures() != null) { for (EmblFeature feature : entry.getFeatures()) { @@ -181,6 +193,6 @@ public class EmblFile } } } - } + // } } } diff --git a/src/jalview/ws/dbsources/EmblXmlSource.java b/src/jalview/ws/dbsources/EmblXmlSource.java index 2049766..73e67aa 100644 --- a/src/jalview/ws/dbsources/EmblXmlSource.java +++ b/src/jalview/ws/dbsources/EmblXmlSource.java @@ -72,7 +72,7 @@ public abstract class EmblXmlSource extends EbiFileRetrievedProxy "exception.ebiembl_retrieval_failed_on", new String[] { emprefx.toLowerCase(), query.trim() }), e); } - return getEmblSequenceRecords(emprefx, query, reply); + return getEmblSequenceRecords(emprefx, reply); } /** @@ -81,46 +81,38 @@ public abstract class EmblXmlSource extends EbiFileRetrievedProxy * @param emprefx * either EMBL or EMBLCDS strings are allowed - anything else will * not retrieve emblxml - * @param query * @param file * the EMBL XML file containing the results of a query * @return * @throws Exception */ - public AlignmentI getEmblSequenceRecords(String emprefx, String query, - File reply) throws Exception + public AlignmentI getEmblSequenceRecords(String emprefx, File reply) + throws Exception { - EmblFile efile = null; - List seqs = new ArrayList(); + EmblEntry entry = null; if (reply != null && reply.exists()) { file = reply.getAbsolutePath(); if (reply.length() > EMBL_NOT_FOUND_REPLY.length()) { - efile = EmblFile.getEmblFile(reply); + entry = EmblFile.getEmblEntry(reply); } } + // TODO don't need peptides any more? List peptides = new ArrayList(); - if (efile != null) + AlignmentI al = null; + if (entry != null) { - for (EmblEntry entry : efile.getEntries()) + SequenceI seq = entry.getSequence(emprefx, peptides); + if (seq != null) { - SequenceI seq = entry.getSequence(emprefx, peptides); - if (seq != null) - { - seqs.add(seq.deriveSequence()); - // place DBReferences on dataset and refer - } + seq.deriveSequence(); + // place DBReferences on dataset and refer + al = new Alignment(new SequenceI[] { seq }); } } - - AlignmentI al = null; - if (!seqs.isEmpty()) - { - al = new Alignment(seqs.toArray(new SequenceI[seqs.size()])); - } stopQuery(); return al; } diff --git a/src/jalview/ws/ebi/EBIFetchClient.java b/src/jalview/ws/ebi/EBIFetchClient.java index 1dff32f..5531512 100644 --- a/src/jalview/ws/ebi/EBIFetchClient.java +++ b/src/jalview/ws/ebi/EBIFetchClient.java @@ -208,6 +208,7 @@ public class EBIFetchClient if (outFile != null) { FileOutputStream fio = new FileOutputStream(outFile); + // fio.write("\n".getBytes()); byte[] bb = new byte[32 * 1024]; int l; while ((l = is.read(bb)) > 0) diff --git a/test/jalview/datamodel/xdb/embl/EmblEntryTest.java b/test/jalview/datamodel/xdb/embl/EmblEntryTest.java index abe5099..f332fa6 100644 --- a/test/jalview/datamodel/xdb/embl/EmblEntryTest.java +++ b/test/jalview/datamodel/xdb/embl/EmblEntryTest.java @@ -1,6 +1,7 @@ package jalview.datamodel.xdb.embl; import static org.testng.AssertJUnit.assertEquals; +import static org.testng.AssertJUnit.assertNotNull; import static org.testng.AssertJUnit.assertNull; import static org.testng.AssertJUnit.assertSame; @@ -40,20 +41,21 @@ public class EmblEntryTest // not the whole sequence but enough for this test... List peptides = new ArrayList(); SequenceIdMatcher matcher = new SequenceIdMatcher(peptides); - EmblFile ef = EmblTestHelper.getEmblFile(); - assertEquals(1, ef.getEntries().size()); - EmblEntry testee = ef.getEntries().get(0); + EmblEntry ef = EmblTestHelper.getEmblFile(); + assertNotNull(ef); + // assertEquals(1, ef.getEntries().size()); + // EmblEntry testee = ef.getEntries().get(0); String sourceDb = "EMBL"; - SequenceI dna = testee.makeSequence(sourceDb); + SequenceI dna = ef.makeSequence(sourceDb); /* * parse three CDS features, with two/one/no Uniprot cross-refs */ - for (EmblFeature feature : ef.getEntries().get(0).getFeatures()) + for (EmblFeature feature : ef.getFeatures()) { if ("CDS".equals(feature.getName())) { - testee.parseCodingFeature(feature, sourceDb, dna, peptides, matcher); + ef.parseCodingFeature(feature, sourceDb, dna, peptides, matcher); } } diff --git a/test/jalview/datamodel/xdb/embl/EmblFileTest.java b/test/jalview/datamodel/xdb/embl/EmblFileTest.java index 906436f..6afdced 100644 --- a/test/jalview/datamodel/xdb/embl/EmblFileTest.java +++ b/test/jalview/datamodel/xdb/embl/EmblFileTest.java @@ -21,12 +21,11 @@ package jalview.datamodel.xdb.embl; import static org.testng.AssertJUnit.assertEquals; +import static org.testng.AssertJUnit.assertNotNull; import static org.testng.AssertJUnit.assertNull; import jalview.datamodel.DBRefEntry; -import java.util.Vector; - import org.testng.annotations.Test; public class EmblFileTest @@ -35,9 +34,10 @@ public class EmblFileTest @Test(groups = { "Functional" }) public void testGetEmblFile() { - Vector entries = EmblTestHelper.getEmblFile().getEntries(); - assertEquals(1, entries.size()); - EmblEntry entry = entries.get(0); + EmblEntry entry = EmblTestHelper.getEmblFile(); + assertNotNull(entry); + // assertEquals(1, entries.size()); + // EmblEntry entry = entries.get(0); assertEquals("X07547", entry.getAccession()); assertEquals("C. trachomatis plasmid", entry.getDescription()); diff --git a/test/jalview/datamodel/xdb/embl/EmblTestHelper.java b/test/jalview/datamodel/xdb/embl/EmblTestHelper.java index 6349164..a79bdb8 100644 --- a/test/jalview/datamodel/xdb/embl/EmblTestHelper.java +++ b/test/jalview/datamodel/xdb/embl/EmblTestHelper.java @@ -7,7 +7,7 @@ public class EmblTestHelper // adapted from http://www.ebi.ac.uk/ena/data/view/X07547&display=xml // dna and translations truncated for convenience private static final String TESTDATA = "" - + "" + // + "" + "GGTATGTCCTCTAGTACAAAC\n" + "ACCCCCAATATTGTGATATAATTAAAAACATAGCAT" - + ""; + + ""; - static EmblFile getEmblFile() + static EmblEntry getEmblFile() { - return EmblFile.getEmblFile(new StringReader(TESTDATA)); + return EmblFile.getEntry(new StringReader(TESTDATA)); } }