From 0879ca19b6df4983f82521a479b848526b2d14d4 Mon Sep 17 00:00:00 2001 From: jprocter Date: Mon, 19 Mar 2007 13:44:18 +0000 Subject: [PATCH] initial support for EMBL file parsing. --- resources/embl_mapping.xml | 135 +++++++++ src/jalview/datamodel/xdb/embl/BasePosition.java | 30 ++ src/jalview/datamodel/xdb/embl/EmblEntry.java | 304 ++++++++++++++++++++ src/jalview/datamodel/xdb/embl/EmblError.java | 19 ++ src/jalview/datamodel/xdb/embl/EmblFeature.java | 58 ++++ .../datamodel/xdb/embl/EmblFeatureLocElement.java | 70 +++++ .../datamodel/xdb/embl/EmblFeatureLocations.java | 66 +++++ src/jalview/datamodel/xdb/embl/EmblFile.java | 90 ++++++ src/jalview/datamodel/xdb/embl/EmblSequence.java | 43 +++ src/jalview/datamodel/xdb/embl/Qualifier.java | 31 ++ 10 files changed, 846 insertions(+) create mode 100644 resources/embl_mapping.xml create mode 100644 src/jalview/datamodel/xdb/embl/BasePosition.java create mode 100644 src/jalview/datamodel/xdb/embl/EmblEntry.java create mode 100644 src/jalview/datamodel/xdb/embl/EmblError.java create mode 100644 src/jalview/datamodel/xdb/embl/EmblFeature.java create mode 100644 src/jalview/datamodel/xdb/embl/EmblFeatureLocElement.java create mode 100644 src/jalview/datamodel/xdb/embl/EmblFeatureLocations.java create mode 100644 src/jalview/datamodel/xdb/embl/EmblFile.java create mode 100644 src/jalview/datamodel/xdb/embl/EmblSequence.java create mode 100644 src/jalview/datamodel/xdb/embl/Qualifier.java diff --git a/resources/embl_mapping.xml b/resources/embl_mapping.xml new file mode 100644 index 0000000..7238e89 --- /dev/null +++ b/resources/embl_mapping.xml @@ -0,0 +1,135 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/src/jalview/datamodel/xdb/embl/BasePosition.java b/src/jalview/datamodel/xdb/embl/BasePosition.java new file mode 100644 index 0000000..0b2dcbe --- /dev/null +++ b/src/jalview/datamodel/xdb/embl/BasePosition.java @@ -0,0 +1,30 @@ +package jalview.datamodel.xdb.embl; + +public class BasePosition { + String type; + String pos; + /** + * @return the pos + */ + public String getPos() { + return pos; + } + /** + * @param pos the pos to set + */ + public void setPos(String pos) { + this.pos = pos; + } + /** + * @return the type + */ + public String getType() { + return type; + } + /** + * @param type the type to set + */ + public void setType(String type) { + this.type = type; + } +} diff --git a/src/jalview/datamodel/xdb/embl/EmblEntry.java b/src/jalview/datamodel/xdb/embl/EmblEntry.java new file mode 100644 index 0000000..ffe138b --- /dev/null +++ b/src/jalview/datamodel/xdb/embl/EmblEntry.java @@ -0,0 +1,304 @@ +package jalview.datamodel.xdb.embl; + +import jalview.datamodel.DBRefEntry; +import jalview.datamodel.Sequence; +import jalview.datamodel.SequenceFeature; +import jalview.datamodel.SequenceI; + +import java.util.Iterator; +import java.util.Vector; + +public class EmblEntry { + String accession; + String version; + String taxDivision; + String desc; + String rCreated; + String rLastUpdated; + String lastUpdated; + Vector keywords; + Vector refs; + Vector dbRefs; + Vector features; + EmblSequence sequence; + /** + * @return the accession + */ + public String getAccession() { + return accession; + } + /** + * @param accession the accession to set + */ + public void setAccession(String accession) { + this.accession = accession; + } + /** + * @return the dbRefs + */ + public Vector getDbRefs() { + return dbRefs; + } + /** + * @param dbRefs the dbRefs to set + */ + public void setDbRefs(Vector dbRefs) { + this.dbRefs = dbRefs; + } + /** + * @return the desc + */ + public String getDesc() { + return desc; + } + /** + * @param desc the desc to set + */ + public void setDesc(String desc) { + this.desc = desc; + } + /** + * @return the features + */ + public Vector getFeatures() { + return features; + } + /** + * @param features the features to set + */ + public void setFeatures(Vector features) { + this.features = features; + } + /** + * @return the keywords + */ + public Vector getKeywords() { + return keywords; + } + /** + * @param keywords the keywords to set + */ + public void setKeywords(Vector keywords) { + this.keywords = keywords; + } + /** + * @return the lastUpdated + */ + public String getLastUpdated() { + return lastUpdated; + } + /** + * @param lastUpdated the lastUpdated to set + */ + public void setLastUpdated(String lastUpdated) { + this.lastUpdated = lastUpdated; + } + /** + * @return the refs + */ + public Vector getRefs() { + return refs; + } + /** + * @param refs the refs to set + */ + public void setRefs(Vector refs) { + this.refs = refs; + } + /** + * @return the releaseCreated + */ + public String getRCreated() { + return rCreated; + } + /** + * @param releaseCreated the releaseCreated to set + */ + public void setRcreated(String releaseCreated) { + this.rCreated = releaseCreated; + } + /** + * @return the releaseLastUpdated + */ + public String getRLastUpdated() { + return rLastUpdated; + } + /** + * @param releaseLastUpdated the releaseLastUpdated to set + */ + public void setRLastUpdated(String releaseLastUpdated) { + this.rLastUpdated = releaseLastUpdated; + } + /** + * @return the sequence + */ + public EmblSequence getSequence() { + return sequence; + } + /** + * @param sequence the sequence to set + */ + public void setSequence(EmblSequence sequence) { + this.sequence = sequence; + } + /** + * @return the taxDivision + */ + public String getTaxDivision() { + return taxDivision; + } + /** + * @param taxDivision the taxDivision to set + */ + public void setTaxDivision(String taxDivision) { + this.taxDivision = taxDivision; + } + /** + * @return the version + */ + public String getVersion() { + return version; + } + /** + * @param version the version to set + */ + public void setVersion(String version) { + this.version = version; + } + + /** + * Recover annotated sequences from EMBL file + * @param noNa don't return nucleic acid sequences + * @param sourceDb TODO + * @param noProtein don't return any translated protein sequences marked in features + * @return dataset sequences with DBRefs and features - DNA always comes first + */ + public jalview.datamodel.SequenceI[] getSequences(boolean noNa, boolean noPeptide, String sourceDb) { + Vector seqs=new Vector(); + Sequence dna=null; + if (!noNa) { + dna = new Sequence(sourceDb+"|"+accession, sequence.getSequence()); + dna.setDescription(desc); + dna.addDBRef(new DBRefEntry(sourceDb, version, accession)); + // TODO: add mapping for parentAccession attribute + // TODO: transform EMBL Database refs to canonical form + if (dbRefs!=null) + for (Iterator i=dbRefs.iterator(); i.hasNext(); dna.addDBRef((DBRefEntry)i.next())); + } + for (Iterator i=features.iterator(); i.hasNext(); ) { + EmblFeature feature = (EmblFeature) i.next(); + if (!noNa) { + if (feature.dbRefs!=null && feature.dbRefs.size()>0) { + for (Iterator dbr=feature.dbRefs.iterator(); dbr.hasNext(); dna.addDBRef((DBRefEntry)dbr.next()) ) + ; + } + } + if (feature.getName().equalsIgnoreCase("CDS")) { + // extract coding region(s) + jalview.datamodel.Mapping map = null; + int[] exon=null; + if (feature.locations!=null && feature.locations.size()>0) { + for (Iterator locs=feature.locations.iterator(); + locs.hasNext(); ) { + EmblFeatureLocations loc = (EmblFeatureLocations) locs.next(); + int[] se = loc.getElementRanges(); + if (exon==null) { + exon=se; + } else { + int[] t=new int[exon.length+se.length]; + System.arraycopy(exon, 0, t, 0, exon.length); + System.arraycopy(se, 0, t, exon.length,se.length); + exon=t; + } + } + } + String prseq=null; + String prname=null; + String prid=null; + int prstart=1; + // get qualifiers + if (feature.getQualifiers()!=null && feature.getQualifiers().size()>0) { + for (Iterator quals=feature.getQualifiers().iterator(); quals.hasNext(); ) { + Qualifier q = (Qualifier) quals.next(); + if (q.getName().equals("translation")) + { + prseq=q.getValue(); + } + else + if (q.getName().equals("protein_id")) + { + prid=q.getValue(); + } + else + if (q.getName().equals("codon_start")) + { + prstart = Integer.parseInt(q.getValue()); + } + else + if (q.getName().equals("product")) { + prname = q.getValue(); + } + } + } + Sequence product=null; + if (prseq!=null && prname!=null && prid!=null) { + // extract proteins. + if (!noPeptide) { + product = new Sequence(sourceDb+"|"+"EMBLCDS|"+prid+"|"+prname, prseq, prstart, prstart+prseq.length()-1); + product.setDescription("Protein Product from "+sourceDb); + seqs.add(product); + } + // we have everything - create the mapping and perhaps the protein sequence + map = new jalview.datamodel.Mapping(product, exon, new int[] { prstart, prstart+prseq.length()-1}, 3, 1); + // add cds feature to dna seq - this may include the stop codon + for (int xint=0;xint0) + { + for (Iterator dbr=feature.dbRefs.iterator(); dbr.hasNext(); ) + { + DBRefEntry ref = (DBRefEntry)dbr.next(); + ref.setSource(jalview.util.DBRefUtils.getCanonicalName(ref.getSource())); + if (ref.getSource().equals(jalview.datamodel.DBRefSource.UNIPROT)) + { + ref.setMap(map); + } + if (product!=null) { + DBRefEntry pref = new DBRefEntry(ref.getSource(), ref.getVersion(), ref.getAccessionId()); + pref.setMap(null); // reference is direct + } + dna.addDBRef(ref); + } + } + + } else { + // General feature type. + if (!noNa) { + if (feature.dbRefs!=null && feature.dbRefs.size()>0) { + for (Iterator dbr=feature.dbRefs.iterator(); dbr.hasNext(); dna.addDBRef((DBRefEntry)dbr.next()) ) + ; + } + } + } + + } + if (!noNa) { + seqs.add(dna); + } + SequenceI[] sqs = new SequenceI[seqs.size()]; + for (int i=0,j=seqs.size();i0) + System.out.println(myfile.entries.size()+" Records read."); + } +} diff --git a/src/jalview/datamodel/xdb/embl/EmblSequence.java b/src/jalview/datamodel/xdb/embl/EmblSequence.java new file mode 100644 index 0000000..ac0bfef --- /dev/null +++ b/src/jalview/datamodel/xdb/embl/EmblSequence.java @@ -0,0 +1,43 @@ +package jalview.datamodel.xdb.embl; + +public class EmblSequence { + String version; + String sequence; + String type; + /** + * @return the sequence + */ + public String getSequence() { + return sequence; + } + /** + * @param sequence the sequence to set + */ + public void setSequence(String sequence) { + this.sequence = sequence; + } + /** + * @return the type + */ + public String getType() { + return type; + } + /** + * @param type the type to set + */ + public void setType(String type) { + this.type = type; + } + /** + * @return the version + */ + public String getVersion() { + return version; + } + /** + * @param version the version to set + */ + public void setVersion(String version) { + this.version = version; + } +} diff --git a/src/jalview/datamodel/xdb/embl/Qualifier.java b/src/jalview/datamodel/xdb/embl/Qualifier.java new file mode 100644 index 0000000..1c2d66e --- /dev/null +++ b/src/jalview/datamodel/xdb/embl/Qualifier.java @@ -0,0 +1,31 @@ +package jalview.datamodel.xdb.embl; + +public class Qualifier { + String name; + String value; + /** + * @return the name + */ + public String getName() { + return name; + } + /** + * @param name the name to set + */ + public void setName(String name) { + this.name = name; + } + /** + * @return the value + */ + public String getValue() { + return value; + } + /** + * @param value the value to set + */ + public void setValue(String value) { + this.value = value; + } + +} -- 1.7.10.2