see ftp://ftp.sra.ebi.ac.uk/meta/xsd/sra_1_5/ENA.embl.xsd
see http://www.ebi.ac.uk/ena/submit/data-formats
-->
- <!--
<class name="jalview.datamodel.xdb.embl.EmblFile">
<map-to xml="ROOT"/>
<field name="entries" type="jalview.datamodel.xdb.embl.EmblEntry" collection="vector">
<bind-xml name="Error"/>
</field>
</class>
- -->
<class name="jalview.datamodel.xdb.embl.EmblEntry">
- <map-to xml="entry"/>
<field name="accession" type="string">
<bind-xml name="accession" node="attribute"/>
</field>
*/
package jalview.datamodel.xdb.embl;
-import jalview.bin.Cache;
import jalview.datamodel.DBRefEntry;
import jalview.ws.dbsources.Uniprot;
import java.io.FileReader;
import java.io.PrintWriter;
import java.io.Reader;
-import java.net.URL;
import java.util.Vector;
import org.exolab.castor.mapping.Mapping;
}
/**
- * Parse an Embl XML file into an EmblEntry object
+ * Parse an EmblXML file into an EmblFile object
*
* @param file
* @return parsed EmblXML or null if exceptions were raised
*/
- public static EmblEntry getEmblEntry(File file)
+ public static EmblFile getEmblFile(File file)
{
if (file == null)
{
}
try
{
- return EmblFile.getEntry(new FileReader(file));
+ return EmblFile.getEmblFile(new FileReader(file));
} catch (Exception e)
{
System.err.println("Exception whilst reading EMBLfile from " + file);
return null;
}
- /**
- * Reads the XML response from file and unmarshals into a Java object
- *
- * @param fileReader
- * @return
- */
- public static EmblEntry getEntry(Reader fileReader)
+ public static EmblFile getEmblFile(Reader file)
{
- EmblEntry record = new EmblEntry();
+ EmblFile record = new EmblFile();
try
{
// 1. Load the mapping information from the file
Mapping map = new Mapping(record.getClass().getClassLoader());
- URL url = record.getClass().getResource("/embl_mapping.xml");
+ java.net.URL url = record.getClass().getResource("/embl_mapping.xml");
map.loadMapping(url);
// 2. Unmarshal the data
Unmarshaller unmar = new Unmarshaller(record);
try
{
- if (Cache.getDefault(Cache.CASTORLOGLEVEL,
+ // uncomment to DEBUG EMBLFile reading
+ if (jalview.bin.Cache.getDefault(jalview.bin.Cache.CASTORLOGLEVEL,
"debug").equalsIgnoreCase("DEBUG"))
{
- unmar.setDebug(Cache.log.isDebugEnabled());
- // unmar.setDebug(true);// uncomment to debug unmarshalling
+ unmar.setDebug(jalview.bin.Cache.log.isDebugEnabled());
}
} catch (Exception e)
{
unmar.setIgnoreExtraAttributes(true);
unmar.setMapping(map);
unmar.setLogWriter(new PrintWriter(System.out));
- record = (EmblEntry) unmar.unmarshal(fileReader);
+ record = (EmblFile) unmar.unmarshal(file);
canonicaliseDbRefs(record);
} catch (Exception e)
* Change blank version to "0" in any DBRefEntry, to ensure consistent
* comparison with other DBRefEntry in Jalview
*
- * @param entry
+ * @param record
* @see Uniprot#getDbVersion
*/
- static void canonicaliseDbRefs(EmblEntry entry)
+ static void canonicaliseDbRefs(EmblFile record)
{
- if (entry == null)
+ for (EmblEntry entry : record.getEntries())
{
- return;
- }
-// for (EmblEntry entry : record.getEntries())
-// {
if (entry.getDbRefs() != null)
{
for (DBRefEntry dbref : entry.getDbRefs())
}
}
- if (entry.getFeatures() != null)
+ if (entry.getFeatures() != null)
{
for (EmblFeature feature : entry.getFeatures())
{
}
}
}
- // }
+ }
}
}
"exception.ebiembl_retrieval_failed_on", new String[] {
emprefx.toLowerCase(), query.trim() }), e);
}
- return getEmblSequenceRecords(emprefx, reply);
+ return getEmblSequenceRecords(emprefx, query, reply);
}
/**
* @param emprefx
* either EMBL or EMBLCDS strings are allowed - anything else will
* not retrieve emblxml
+ * @param query
* @param file
* the EMBL XML file containing the results of a query
* @return
* @throws Exception
*/
- public AlignmentI getEmblSequenceRecords(String emprefx, File reply)
- throws Exception
+ public AlignmentI getEmblSequenceRecords(String emprefx, String query,
+ File reply) throws Exception
{
- EmblEntry entry = null;
+ EmblFile efile = null;
+ List<SequenceI> seqs = new ArrayList<SequenceI>();
if (reply != null && reply.exists())
{
file = reply.getAbsolutePath();
if (reply.length() > EMBL_NOT_FOUND_REPLY.length())
{
- entry = EmblFile.getEmblEntry(reply);
+ efile = EmblFile.getEmblFile(reply);
}
}
- // TODO don't need peptides any more?
List<SequenceI> peptides = new ArrayList<SequenceI>();
- AlignmentI al = null;
- if (entry != null)
+ if (efile != null)
{
- SequenceI seq = entry.getSequence(emprefx, peptides);
- if (seq != null)
+ for (EmblEntry entry : efile.getEntries())
{
- seq.deriveSequence();
- // place DBReferences on dataset and refer
- al = new Alignment(new SequenceI[] { seq });
+ SequenceI seq = entry.getSequence(emprefx, peptides);
+ if (seq != null)
+ {
+ seqs.add(seq.deriveSequence());
+ // place DBReferences on dataset and refer
+ }
}
}
+
+ AlignmentI al = null;
+ if (!seqs.isEmpty())
+ {
+ al = new Alignment(seqs.toArray(new SequenceI[seqs.size()]));
+ }
stopQuery();
return al;
}
if (outFile != null)
{
FileOutputStream fio = new FileOutputStream(outFile);
- // fio.write("<?xml version='1.0' encoding='UTF-8'?>\n".getBytes());
byte[] bb = new byte[32 * 1024];
int l;
while ((l = is.read(bb)) > 0)
package jalview.datamodel.xdb.embl;
import static org.testng.AssertJUnit.assertEquals;
-import static org.testng.AssertJUnit.assertNotNull;
import static org.testng.AssertJUnit.assertNull;
import static org.testng.AssertJUnit.assertSame;
// not the whole sequence but enough for this test...
List<SequenceI> peptides = new ArrayList<SequenceI>();
SequenceIdMatcher matcher = new SequenceIdMatcher(peptides);
- EmblEntry ef = EmblTestHelper.getEmblFile();
- assertNotNull(ef);
- // assertEquals(1, ef.getEntries().size());
- // EmblEntry testee = ef.getEntries().get(0);
+ EmblFile ef = EmblTestHelper.getEmblFile();
+ assertEquals(1, ef.getEntries().size());
+ EmblEntry testee = ef.getEntries().get(0);
String sourceDb = "EMBL";
- SequenceI dna = ef.makeSequence(sourceDb);
+ SequenceI dna = testee.makeSequence(sourceDb);
/*
* parse three CDS features, with two/one/no Uniprot cross-refs
*/
- for (EmblFeature feature : ef.getFeatures())
+ for (EmblFeature feature : ef.getEntries().get(0).getFeatures())
{
if ("CDS".equals(feature.getName()))
{
- ef.parseCodingFeature(feature, sourceDb, dna, peptides, matcher);
+ testee.parseCodingFeature(feature, sourceDb, dna, peptides, matcher);
}
}
package jalview.datamodel.xdb.embl;
import static org.testng.AssertJUnit.assertEquals;
-import static org.testng.AssertJUnit.assertNotNull;
import static org.testng.AssertJUnit.assertNull;
import jalview.datamodel.DBRefEntry;
+import java.util.Vector;
+
import org.testng.annotations.Test;
public class EmblFileTest
@Test(groups = { "Functional" })
public void testGetEmblFile()
{
- EmblEntry entry = EmblTestHelper.getEmblFile();
- assertNotNull(entry);
- // assertEquals(1, entries.size());
- // EmblEntry entry = entries.get(0);
+ Vector<EmblEntry> entries = EmblTestHelper.getEmblFile().getEntries();
+ assertEquals(1, entries.size());
+ EmblEntry entry = entries.get(0);
assertEquals("X07547", entry.getAccession());
assertEquals("C. trachomatis plasmid", entry.getDescription());
// adapted from http://www.ebi.ac.uk/ena/data/view/X07547&display=xml
// dna and translations truncated for convenience
private static final String TESTDATA = "<?xml version=\"1.0\" encoding=\"UTF-8\" ?>"
- // + "<ROOT>"
+ + "<ROOT>"
+ "<entry accession=\"X07547\" version=\"1\" entryVersion=\"8\""
+ " dataClass=\"STD\" taxonomicDivision=\"PRO\""
+ " moleculeType=\"genomic DNA\" sequenceLength=\"7499\" topology=\"linear\""
*/
+ "<sequence>GGTATGTCCTCTAGTACAAAC\n"
+ "ACCCCCAATATTGTGATATAATTAAAAACATAGCAT"
- + "</sequence></entry>";
+ + "</sequence></entry></ROOT>";
- static EmblEntry getEmblFile()
+ static EmblFile getEmblFile()
{
- return EmblFile.getEntry(new StringReader(TESTDATA));
+ return EmblFile.getEmblFile(new StringReader(TESTDATA));
}
}