-package jalview.datamodel.xdb.embl;\r
-\r
-import jalview.datamodel.DBRefEntry;\r
-import jalview.datamodel.Sequence;\r
-import jalview.datamodel.SequenceFeature;\r
-import jalview.datamodel.SequenceI;\r
-\r
-import java.util.Enumeration;\r
-import java.util.Hashtable;\r
-import java.util.Iterator;\r
-import java.util.Vector;\r
-\r
-public class EmblEntry {\r
- String accession;\r
- String version;\r
- String taxDivision;\r
- String desc;\r
- String rCreated;\r
- String rLastUpdated;\r
- String lastUpdated;\r
- Vector keywords;\r
- Vector refs;\r
- Vector dbRefs;\r
- Vector features;\r
- EmblSequence sequence;\r
- /**\r
- * @return the accession\r
- */\r
- public String getAccession() {\r
- return accession;\r
- }\r
- /**\r
- * @param accession the accession to set\r
- */\r
- public void setAccession(String accession) {\r
- this.accession = accession;\r
- }\r
- /**\r
- * @return the dbRefs\r
- */\r
- public Vector getDbRefs() {\r
- return dbRefs;\r
- }\r
- /**\r
- * @param dbRefs the dbRefs to set\r
- */\r
- public void setDbRefs(Vector dbRefs) {\r
- this.dbRefs = dbRefs;\r
- }\r
- /**\r
- * @return the desc\r
- */\r
- public String getDesc() {\r
- return desc;\r
- }\r
- /**\r
- * @param desc the desc to set\r
- */\r
- public void setDesc(String desc) {\r
- this.desc = desc;\r
- }\r
- /**\r
- * @return the features\r
- */\r
- public Vector getFeatures() {\r
- return features;\r
- }\r
- /**\r
- * @param features the features to set\r
- */\r
- public void setFeatures(Vector features) {\r
- this.features = features;\r
- }\r
- /**\r
- * @return the keywords\r
- */\r
- public Vector getKeywords() {\r
- return keywords;\r
- }\r
- /**\r
- * @param keywords the keywords to set\r
- */\r
- public void setKeywords(Vector keywords) {\r
- this.keywords = keywords;\r
- }\r
- /**\r
- * @return the lastUpdated\r
- */\r
- public String getLastUpdated() {\r
- return lastUpdated;\r
- }\r
- /**\r
- * @param lastUpdated the lastUpdated to set\r
- */\r
- public void setLastUpdated(String lastUpdated) {\r
- this.lastUpdated = lastUpdated;\r
- }\r
- /**\r
- * @return the refs\r
- */\r
- public Vector getRefs() {\r
- return refs;\r
- }\r
- /**\r
- * @param refs the refs to set\r
- */\r
- public void setRefs(Vector refs) {\r
- this.refs = refs;\r
- }\r
- /**\r
- * @return the releaseCreated\r
- */\r
- public String getRCreated() {\r
- return rCreated;\r
- }\r
- /**\r
- * @param releaseCreated the releaseCreated to set\r
- */\r
- public void setRcreated(String releaseCreated) {\r
- this.rCreated = releaseCreated;\r
- }\r
- /**\r
- * @return the releaseLastUpdated\r
- */\r
- public String getRLastUpdated() {\r
- return rLastUpdated;\r
- }\r
- /**\r
- * @param releaseLastUpdated the releaseLastUpdated to set\r
- */\r
- public void setRLastUpdated(String releaseLastUpdated) {\r
- this.rLastUpdated = releaseLastUpdated;\r
- }\r
- /**\r
- * @return the sequence\r
- */\r
- public EmblSequence getSequence() {\r
- return sequence;\r
- }\r
- /**\r
- * @param sequence the sequence to set\r
- */\r
- public void setSequence(EmblSequence sequence) {\r
- this.sequence = sequence;\r
- }\r
- /**\r
- * @return the taxDivision\r
- */\r
- public String getTaxDivision() {\r
- return taxDivision;\r
- }\r
- /**\r
- * @param taxDivision the taxDivision to set\r
- */\r
- public void setTaxDivision(String taxDivision) {\r
- this.taxDivision = taxDivision;\r
- }\r
- /**\r
- * @return the version\r
- */\r
- public String getVersion() {\r
- return version;\r
- }\r
- /**\r
- * @param version the version to set\r
- */\r
- public void setVersion(String version) {\r
- this.version = version;\r
- }\r
-/*\r
- * EMBL Feature support is limited. The text below is included for the benefit of\r
- * any developer working on improving EMBL feature import in Jalview.\r
- * Extract from EMBL feature specification\r
- * see http://www.embl-ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html\r
-3.5 Location\r
-3.5.1 Purpose\r
-\r
-The location indicates the region of the presented sequence which corresponds \r
-to a feature. \r
-\r
-3.5.2 Format and conventions\r
-The location contains at least one sequence location descriptor and may \r
-contain one or more operators with one or more sequence location descriptors. \r
-Base numbers refer to the numbering in the entry. This numbering designates \r
-the first base (5' end) of the presented sequence as base 1. \r
-Base locations beyond the range of the presented sequence may not be used in \r
-location descriptors, the only exception being location in a remote entry (see \r
-3.5.2.1, e). \r
-\r
-Location operators and descriptors are discussed in more detail below. \r
-\r
-3.5.2.1 Location descriptors\r
-\r
-The location descriptor can be one of the following: \r
-(a) a single base number\r
-(b) a site between two indicated adjoining bases\r
-(c) a single base chosen from within a specified range of bases (not allowed for new\r
- entries)\r
-(d) the base numbers delimiting a sequence span\r
-(e) a remote entry identifier followed by a local location descriptor\r
- (i.e., a-d)\r
-\r
-A site between two adjoining nucleotides, such as endonucleolytic cleavage \r
-site, is indicated by listing the two points separated by a carat (^). The \r
-permitted formats for this descriptor are n^n+1 (for example 55^56), or, for \r
-circular molecules, n^1, where "n" is the full length of the molecule, ie \r
-1000^1 for circular molecule with length 1000.\r
-\r
-A single base chosen from a range of bases is indicated by the first base\r
-number and the last base number of the range separated by a single period\r
-(e.g., '12.21' indicates a single base taken from between the indicated\r
-points). From October 2006 the usage of this descriptor is restricted :\r
-it is illegal to use "a single base from a range" (c) either on its own or\r
-in combination with the "sequence span" (d) descriptor for newly created entries.\r
-The existing entries where such descriptors exist are going to be retrofitted.\r
-\r
-Sequence spans are indicated by the starting base number and the ending base \r
-number separated by two periods (e.g., '34..456'). The '<' and '>' symbols may \r
-be used with the starting and ending base numbers to indicate that an end \r
-point is beyond the specified base number. The starting and ending base \r
-positions can be represented as distinct base numbers ('34..456') or a site \r
-between two indicated adjoining bases. \r
-\r
-A location in a remote entry (not the entry to which the feature table \r
-belongs) can be specified by giving the accession-number and sequence version \r
-of the remote entry, followed by a colon ":", followed by a location \r
-descriptor which applies to that entry's sequence (i.e. J12345.1:1..15, see \r
-also examples below) \r
-\r
-3.5.2.2 Operators\r
-\r
-The location operator is a prefix that specifies what must be done to the \r
-indicated sequence to find or construct the location corresponding to the \r
-feature. A list of operators is given below with their definitions and most \r
-common format. \r
-\r
-complement(location) \r
-Find the complement of the presented sequence in the span specified by "\r
-location" (i.e., read the complement of the presented strand in its 5'-to-3' \r
-direction) \r
-\r
-join(location,location, ... location) \r
-The indicated elements should be joined (placed end-to-end) to form one \r
-contiguous sequence \r
-\r
-order(location,location, ... location) \r
-The elements can be found in the \r
-specified order (5' to 3' direction), but nothing is implied about the \r
-reasonableness about joining them \r
-\r
-Note : location operator "complement" can be used in combination with either "\r
-join" or "order" within the same location; combinations of "join" and "order" \r
-within the same location (nested operators) are illegal.\r
-\r
-\r
-\r
-3.5.3 Location examples \r
-\r
-The following is a list of common location descriptors with their meanings: \r
-\r
-Location Description \r
-\r
-467 Points to a single base in the presented sequence \r
-\r
-340..565 Points to a continuous range of bases bounded by and\r
- including the starting and ending bases\r
-\r
-<345..500 Indicates that the exact lower boundary point of a feature\r
- is unknown. The location begins at some base previous to\r
- the first base specified (which need not be contained in \r
- the presented sequence) and continues to and includes the \r
- ending base \r
-\r
-<1..888 The feature starts before the first sequenced base and \r
- continues to and includes base 888\r
-\r
-1..>888 The feature starts at the first sequenced base and \r
- continues beyond base 888\r
-\r
-102.110 Indicates that the exact location is unknown but that it is \r
- one of the bases between bases 102 and 110, inclusive\r
-\r
-123^124 Points to a site between bases 123 and 124\r
-\r
-join(12..78,134..202) Regions 12 to 78 and 134 to 202 should be joined to form \r
- one contiguous sequence\r
-\r
-\r
-complement(34..126) Start at the base complementary to 126 and finish at the \r
- base complementary to base 34 (the feature is on the strand \r
- complementary to the presented strand)\r
-\r
-\r
-complement(join(2691..4571,4918..5163))\r
- Joins regions 2691 to 4571 and 4918 to 5163, then \r
- complements the joined segments (the feature is on the \r
- strand complementary to the presented strand) \r
-\r
-join(complement(4918..5163),complement(2691..4571))\r
- Complements regions 4918 to 5163 and 2691 to 4571, then \r
- joins the complemented segments (the feature is on the \r
- strand complementary to the presented strand)\r
- \r
-J00194.1:100..202 Points to bases 100 to 202, inclusive, in the entry (in \r
- this database) with primary accession number 'J00194'\r
- \r
-join(1..100,J00194.1:100..202)\r
- Joins region 1..100 of the existing entry with the region\r
- 100..202 of remote entry J00194\r
-\r
- */\r
- /**\r
- * Recover annotated sequences from EMBL file\r
- * @param noNa don't return nucleic acid sequences \r
- * @param sourceDb TODO\r
- * @param noProtein don't return any translated protein sequences marked in features\r
- * @return dataset sequences with DBRefs and features - DNA always comes first\r
- */\r
- public jalview.datamodel.SequenceI[] getSequences(boolean noNa, boolean noPeptide, String sourceDb) {\r
- Vector seqs=new Vector();\r
- Sequence dna=null;\r
- if (!noNa) {\r
- dna = new Sequence(sourceDb+"|"+accession, sequence.getSequence());\r
- dna.setDescription(desc);\r
- dna.addDBRef(new DBRefEntry(sourceDb, version, accession));\r
- // TODO: add mapping for parentAccession attribute\r
- // TODO: transform EMBL Database refs to canonical form\r
- if (dbRefs!=null)\r
- for (Iterator i=dbRefs.iterator(); i.hasNext(); dna.addDBRef((DBRefEntry)i.next()));\r
- }\r
- for (Iterator i=features.iterator(); i.hasNext(); ) {\r
- EmblFeature feature = (EmblFeature) i.next();\r
- if (!noNa) {\r
- if (feature.dbRefs!=null && feature.dbRefs.size()>0) {\r
- for (Iterator dbr=feature.dbRefs.iterator(); dbr.hasNext(); dna.addDBRef((DBRefEntry)dbr.next()) )\r
- ;\r
- }\r
- }\r
- if (feature.getName().equalsIgnoreCase("CDS")) {\r
- // extract coding region(s)\r
- jalview.datamodel.Mapping map = null;\r
- int[] exon=null;\r
- if (feature.locations!=null && feature.locations.size()>0) {\r
- for (Iterator locs=feature.locations.iterator();\r
- locs.hasNext(); ) {\r
- EmblFeatureLocations loc = (EmblFeatureLocations) locs.next();\r
- int[] se = loc.getElementRanges();\r
- if (exon==null) {\r
- exon=se;\r
- } else {\r
- int[] t=new int[exon.length+se.length];\r
- System.arraycopy(exon, 0, t, 0, exon.length);\r
- System.arraycopy(se, 0, t, exon.length,se.length);\r
- exon=t;\r
- }\r
- }\r
- }\r
- String prseq=null;\r
- String prname=new String();\r
- String prid=null;\r
- Hashtable vals=new Hashtable();\r
- int prstart=1;\r
- // get qualifiers\r
- if (feature.getQualifiers()!=null && feature.getQualifiers().size()>0) {\r
- for (Iterator quals=feature.getQualifiers().iterator(); quals.hasNext(); ) {\r
- Qualifier q = (Qualifier) quals.next();\r
- if (q.getName().equals("translation")) \r
- {\r
- prseq=q.getValue();\r
- } \r
- else\r
- if (q.getName().equals("protein_id")) \r
- {\r
- prid=q.getValue();\r
- }\r
- else\r
- if (q.getName().equals("codon_start"))\r
- {\r
- prstart = Integer.parseInt(q.getValue());\r
- }\r
- else \r
- if (q.getName().equals("product")){\r
- prname = q.getValue();\r
- } else {\r
- // throw anything else into the additional properties hash\r
- vals.put(q.getName(), q.getValue());\r
- }\r
- }\r
- }\r
- Sequence product=null;\r
- if (prseq!=null && prname!=null && prid!=null) {\r
- // extract proteins.\r
- if (!noPeptide) {\r
- product = new Sequence(sourceDb+"|"+"EMBLCDS|"+prid+"|"+prname, prseq, prstart, prstart+prseq.length()-1);\r
- product.setDescription("Protein Product from "+sourceDb);\r
- seqs.add(product);\r
- }\r
- // we have everything - create the mapping and perhaps the protein sequence\r
- map = new jalview.datamodel.Mapping(product, exon, new int[] { prstart, prstart+prseq.length()-1}, 3, 1);\r
- // add cds feature to dna seq - this may include the stop codon\r
- for (int xint=0;xint<exon.length; xint+=2) {\r
- SequenceFeature sf = new SequenceFeature();\r
- sf.setBegin(exon[xint]);\r
- sf.setEnd(exon[xint+1]);\r
- sf.setType(feature.getName());\r
- sf.setFeatureGroup(jalview.datamodel.DBRefSource.EMBL);\r
- sf.setDescription("Exon "+(1+xint)+" for protein '"+prname+"' EMBLCDS:"+prid);\r
- if (vals!=null && vals.size()>0) {\r
- Enumeration kv = vals.elements();\r
- while (kv.hasMoreElements()) {\r
- Object key=kv.nextElement();\r
- if (key!=null)\r
- sf.setValue(key.toString(), vals.get(key));\r
- }\r
- }\r
- dna.addSequenceFeature(sf);\r
- }\r
- }\r
- // add dbRefs to sequence\r
- if (feature.dbRefs!=null && feature.dbRefs.size()>0) \r
- {\r
- for (Iterator dbr=feature.dbRefs.iterator(); dbr.hasNext(); ) \r
- {\r
- DBRefEntry ref = (DBRefEntry)dbr.next();\r
- ref.setSource(jalview.util.DBRefUtils.getCanonicalName(ref.getSource()));\r
- if (ref.getSource().equals(jalview.datamodel.DBRefSource.UNIPROT)) \r
- {\r
- ref.setMap(map);\r
- }\r
- if (product!=null) {\r
- DBRefEntry pref = new DBRefEntry(ref.getSource(), ref.getVersion(), ref.getAccessionId());\r
- pref.setMap(null); // reference is direct\r
- }\r
- dna.addDBRef(ref);\r
- }\r
- }\r
- \r
- } else {\r
- // General feature type.\r
- if (!noNa) {\r
- if (feature.dbRefs!=null && feature.dbRefs.size()>0) {\r
- for (Iterator dbr=feature.dbRefs.iterator(); dbr.hasNext(); dna.addDBRef((DBRefEntry)dbr.next()) )\r
- ;\r
- }\r
- }\r
- }\r
-\r
- }\r
- if (!noNa) {\r
- seqs.add(dna);\r
- }\r
- SequenceI[] sqs = new SequenceI[seqs.size()];\r
- for (int i=0,j=seqs.size();i<j; i++) {\r
- sqs[i] = (SequenceI) seqs.elementAt(i);\r
- seqs.set(i, null);\r
- }\r
- return sqs;\r
- }\r
-}\r
+package jalview.datamodel.xdb.embl;
+
+import jalview.datamodel.DBRefEntry;
+import jalview.datamodel.Sequence;
+import jalview.datamodel.SequenceFeature;
+import jalview.datamodel.SequenceI;
+
+import java.util.Enumeration;
+import java.util.Hashtable;
+import java.util.Iterator;
+import java.util.Vector;
+
+public class EmblEntry {
+ String accession;
+ String version;
+ String taxDivision;
+ String desc;
+ String rCreated;
+ String rLastUpdated;
+ String lastUpdated;
+ Vector keywords;
+ Vector refs;
+ Vector dbRefs;
+ Vector features;
+ EmblSequence sequence;
+ /**
+ * @return the accession
+ */
+ public String getAccession() {
+ return accession;
+ }
+ /**
+ * @param accession the accession to set
+ */
+ public void setAccession(String accession) {
+ this.accession = accession;
+ }
+ /**
+ * @return the dbRefs
+ */
+ public Vector getDbRefs() {
+ return dbRefs;
+ }
+ /**
+ * @param dbRefs the dbRefs to set
+ */
+ public void setDbRefs(Vector dbRefs) {
+ this.dbRefs = dbRefs;
+ }
+ /**
+ * @return the desc
+ */
+ public String getDesc() {
+ return desc;
+ }
+ /**
+ * @param desc the desc to set
+ */
+ public void setDesc(String desc) {
+ this.desc = desc;
+ }
+ /**
+ * @return the features
+ */
+ public Vector getFeatures() {
+ return features;
+ }
+ /**
+ * @param features the features to set
+ */
+ public void setFeatures(Vector features) {
+ this.features = features;
+ }
+ /**
+ * @return the keywords
+ */
+ public Vector getKeywords() {
+ return keywords;
+ }
+ /**
+ * @param keywords the keywords to set
+ */
+ public void setKeywords(Vector keywords) {
+ this.keywords = keywords;
+ }
+ /**
+ * @return the lastUpdated
+ */
+ public String getLastUpdated() {
+ return lastUpdated;
+ }
+ /**
+ * @param lastUpdated the lastUpdated to set
+ */
+ public void setLastUpdated(String lastUpdated) {
+ this.lastUpdated = lastUpdated;
+ }
+ /**
+ * @return the refs
+ */
+ public Vector getRefs() {
+ return refs;
+ }
+ /**
+ * @param refs the refs to set
+ */
+ public void setRefs(Vector refs) {
+ this.refs = refs;
+ }
+ /**
+ * @return the releaseCreated
+ */
+ public String getRCreated() {
+ return rCreated;
+ }
+ /**
+ * @param releaseCreated the releaseCreated to set
+ */
+ public void setRcreated(String releaseCreated) {
+ this.rCreated = releaseCreated;
+ }
+ /**
+ * @return the releaseLastUpdated
+ */
+ public String getRLastUpdated() {
+ return rLastUpdated;
+ }
+ /**
+ * @param releaseLastUpdated the releaseLastUpdated to set
+ */
+ public void setRLastUpdated(String releaseLastUpdated) {
+ this.rLastUpdated = releaseLastUpdated;
+ }
+ /**
+ * @return the sequence
+ */
+ public EmblSequence getSequence() {
+ return sequence;
+ }
+ /**
+ * @param sequence the sequence to set
+ */
+ public void setSequence(EmblSequence sequence) {
+ this.sequence = sequence;
+ }
+ /**
+ * @return the taxDivision
+ */
+ public String getTaxDivision() {
+ return taxDivision;
+ }
+ /**
+ * @param taxDivision the taxDivision to set
+ */
+ public void setTaxDivision(String taxDivision) {
+ this.taxDivision = taxDivision;
+ }
+ /**
+ * @return the version
+ */
+ public String getVersion() {
+ return version;
+ }
+ /**
+ * @param version the version to set
+ */
+ public void setVersion(String version) {
+ this.version = version;
+ }
+/*
+ * EMBL Feature support is limited. The text below is included for the benefit of
+ * any developer working on improving EMBL feature import in Jalview.
+ * Extract from EMBL feature specification
+ * see http://www.embl-ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html
+3.5 Location
+3.5.1 Purpose
+
+The location indicates the region of the presented sequence which corresponds
+to a feature.
+
+3.5.2 Format and conventions
+The location contains at least one sequence location descriptor and may
+contain one or more operators with one or more sequence location descriptors.
+Base numbers refer to the numbering in the entry. This numbering designates
+the first base (5' end) of the presented sequence as base 1.
+Base locations beyond the range of the presented sequence may not be used in
+location descriptors, the only exception being location in a remote entry (see
+3.5.2.1, e).
+
+Location operators and descriptors are discussed in more detail below.
+
+3.5.2.1 Location descriptors
+
+The location descriptor can be one of the following:
+(a) a single base number
+(b) a site between two indicated adjoining bases
+(c) a single base chosen from within a specified range of bases (not allowed for new
+ entries)
+(d) the base numbers delimiting a sequence span
+(e) a remote entry identifier followed by a local location descriptor
+ (i.e., a-d)
+
+A site between two adjoining nucleotides, such as endonucleolytic cleavage
+site, is indicated by listing the two points separated by a carat (^). The
+permitted formats for this descriptor are n^n+1 (for example 55^56), or, for
+circular molecules, n^1, where "n" is the full length of the molecule, ie
+1000^1 for circular molecule with length 1000.
+
+A single base chosen from a range of bases is indicated by the first base
+number and the last base number of the range separated by a single period
+(e.g., '12.21' indicates a single base taken from between the indicated
+points). From October 2006 the usage of this descriptor is restricted :
+it is illegal to use "a single base from a range" (c) either on its own or
+in combination with the "sequence span" (d) descriptor for newly created entries.
+The existing entries where such descriptors exist are going to be retrofitted.
+
+Sequence spans are indicated by the starting base number and the ending base
+number separated by two periods (e.g., '34..456'). The '<' and '>' symbols may
+be used with the starting and ending base numbers to indicate that an end
+point is beyond the specified base number. The starting and ending base
+positions can be represented as distinct base numbers ('34..456') or a site
+between two indicated adjoining bases.
+
+A location in a remote entry (not the entry to which the feature table
+belongs) can be specified by giving the accession-number and sequence version
+of the remote entry, followed by a colon ":", followed by a location
+descriptor which applies to that entry's sequence (i.e. J12345.1:1..15, see
+also examples below)
+
+3.5.2.2 Operators
+
+The location operator is a prefix that specifies what must be done to the
+indicated sequence to find or construct the location corresponding to the
+feature. A list of operators is given below with their definitions and most
+common format.
+
+complement(location)
+Find the complement of the presented sequence in the span specified by "
+location" (i.e., read the complement of the presented strand in its 5'-to-3'
+direction)
+
+join(location,location, ... location)
+The indicated elements should be joined (placed end-to-end) to form one
+contiguous sequence
+
+order(location,location, ... location)
+The elements can be found in the
+specified order (5' to 3' direction), but nothing is implied about the
+reasonableness about joining them
+
+Note : location operator "complement" can be used in combination with either "
+join" or "order" within the same location; combinations of "join" and "order"
+within the same location (nested operators) are illegal.
+
+
+
+3.5.3 Location examples
+
+The following is a list of common location descriptors with their meanings:
+
+Location Description
+
+467 Points to a single base in the presented sequence
+
+340..565 Points to a continuous range of bases bounded by and
+ including the starting and ending bases
+
+<345..500 Indicates that the exact lower boundary point of a feature
+ is unknown. The location begins at some base previous to
+ the first base specified (which need not be contained in
+ the presented sequence) and continues to and includes the
+ ending base
+
+<1..888 The feature starts before the first sequenced base and
+ continues to and includes base 888
+
+1..>888 The feature starts at the first sequenced base and
+ continues beyond base 888
+
+102.110 Indicates that the exact location is unknown but that it is
+ one of the bases between bases 102 and 110, inclusive
+
+123^124 Points to a site between bases 123 and 124
+
+join(12..78,134..202) Regions 12 to 78 and 134 to 202 should be joined to form
+ one contiguous sequence
+
+
+complement(34..126) Start at the base complementary to 126 and finish at the
+ base complementary to base 34 (the feature is on the strand
+ complementary to the presented strand)
+
+
+complement(join(2691..4571,4918..5163))
+ Joins regions 2691 to 4571 and 4918 to 5163, then
+ complements the joined segments (the feature is on the
+ strand complementary to the presented strand)
+
+join(complement(4918..5163),complement(2691..4571))
+ Complements regions 4918 to 5163 and 2691 to 4571, then
+ joins the complemented segments (the feature is on the
+ strand complementary to the presented strand)
+
+J00194.1:100..202 Points to bases 100 to 202, inclusive, in the entry (in
+ this database) with primary accession number 'J00194'
+
+join(1..100,J00194.1:100..202)
+ Joins region 1..100 of the existing entry with the region
+ 100..202 of remote entry J00194
+
+ */
+ /**
+ * Recover annotated sequences from EMBL file
+ * @param noNa don't return nucleic acid sequences
+ * @param sourceDb TODO
+ * @param noProtein don't return any translated protein sequences marked in features
+ * @return dataset sequences with DBRefs and features - DNA always comes first
+ */
+ public jalview.datamodel.SequenceI[] getSequences(boolean noNa, boolean noPeptide, String sourceDb) {
+ Vector seqs=new Vector();
+ Sequence dna=null;
+ if (!noNa) {
+ dna = new Sequence(sourceDb+"|"+accession, sequence.getSequence());
+ dna.setDescription(desc);
+ dna.addDBRef(new DBRefEntry(sourceDb, version, accession));
+ // TODO: add mapping for parentAccession attribute
+ // TODO: transform EMBL Database refs to canonical form
+ if (dbRefs!=null)
+ for (Iterator i=dbRefs.iterator(); i.hasNext(); dna.addDBRef((DBRefEntry)i.next()));
+ }
+ for (Iterator i=features.iterator(); i.hasNext(); ) {
+ EmblFeature feature = (EmblFeature) i.next();
+ if (!noNa) {
+ if (feature.dbRefs!=null && feature.dbRefs.size()>0) {
+ for (Iterator dbr=feature.dbRefs.iterator(); dbr.hasNext(); dna.addDBRef((DBRefEntry)dbr.next()) )
+ ;
+ }
+ }
+ if (feature.getName().equalsIgnoreCase("CDS")) {
+ // extract coding region(s)
+ jalview.datamodel.Mapping map = null;
+ int[] exon=null;
+ if (feature.locations!=null && feature.locations.size()>0) {
+ for (Iterator locs=feature.locations.iterator();
+ locs.hasNext(); ) {
+ EmblFeatureLocations loc = (EmblFeatureLocations) locs.next();
+ int[] se = loc.getElementRanges();
+ if (exon==null) {
+ exon=se;
+ } else {
+ int[] t=new int[exon.length+se.length];
+ System.arraycopy(exon, 0, t, 0, exon.length);
+ System.arraycopy(se, 0, t, exon.length,se.length);
+ exon=t;
+ }
+ }
+ }
+ String prseq=null;
+ String prname=new String();
+ String prid=null;
+ Hashtable vals=new Hashtable();
+ int prstart=1;
+ // get qualifiers
+ if (feature.getQualifiers()!=null && feature.getQualifiers().size()>0) {
+ for (Iterator quals=feature.getQualifiers().iterator(); quals.hasNext(); ) {
+ Qualifier q = (Qualifier) quals.next();
+ if (q.getName().equals("translation"))
+ {
+ prseq=q.getValue();
+ }
+ else
+ if (q.getName().equals("protein_id"))
+ {
+ prid=q.getValue();
+ }
+ else
+ if (q.getName().equals("codon_start"))
+ {
+ prstart = Integer.parseInt(q.getValue());
+ }
+ else
+ if (q.getName().equals("product")){
+ prname = q.getValue();
+ } else {
+ // throw anything else into the additional properties hash
+ vals.put(q.getName(), q.getValue());
+ }
+ }
+ }
+ Sequence product=null;
+ if (prseq!=null && prname!=null && prid!=null) {
+ // extract proteins.
+ if (!noPeptide) {
+ product = new Sequence(sourceDb+"|"+"EMBLCDS|"+prid+"|"+prname, prseq, prstart, prstart+prseq.length()-1);
+ product.setDescription("Protein Product from "+sourceDb);
+ seqs.add(product);
+ }
+ // we have everything - create the mapping and perhaps the protein sequence
+ map = new jalview.datamodel.Mapping(product, exon, new int[] { prstart, prstart+prseq.length()-1}, 3, 1);
+ // add cds feature to dna seq - this may include the stop codon
+ for (int xint=0;xint<exon.length; xint+=2) {
+ SequenceFeature sf = new SequenceFeature();
+ sf.setBegin(exon[xint]);
+ sf.setEnd(exon[xint+1]);
+ sf.setType(feature.getName());
+ sf.setFeatureGroup(jalview.datamodel.DBRefSource.EMBL);
+ sf.setDescription("Exon "+(1+xint)+" for protein '"+prname+"' EMBLCDS:"+prid);
+ if (vals!=null && vals.size()>0) {
+ Enumeration kv = vals.elements();
+ while (kv.hasMoreElements()) {
+ Object key=kv.nextElement();
+ if (key!=null)
+ sf.setValue(key.toString(), vals.get(key));
+ }
+ }
+ dna.addSequenceFeature(sf);
+ }
+ }
+ // add dbRefs to sequence
+ if (feature.dbRefs!=null && feature.dbRefs.size()>0)
+ {
+ for (Iterator dbr=feature.dbRefs.iterator(); dbr.hasNext(); )
+ {
+ DBRefEntry ref = (DBRefEntry)dbr.next();
+ ref.setSource(jalview.util.DBRefUtils.getCanonicalName(ref.getSource()));
+ if (ref.getSource().equals(jalview.datamodel.DBRefSource.UNIPROT))
+ {
+ ref.setMap(map);
+ }
+ if (product!=null) {
+ DBRefEntry pref = new DBRefEntry(ref.getSource(), ref.getVersion(), ref.getAccessionId());
+ pref.setMap(null); // reference is direct
+ }
+ dna.addDBRef(ref);
+ }
+ }
+
+ } else {
+ // General feature type.
+ if (!noNa) {
+ if (feature.dbRefs!=null && feature.dbRefs.size()>0) {
+ for (Iterator dbr=feature.dbRefs.iterator(); dbr.hasNext(); dna.addDBRef((DBRefEntry)dbr.next()) )
+ ;
+ }
+ }
+ }
+
+ }
+ if (!noNa) {
+ seqs.add(dna);
+ }
+ SequenceI[] sqs = new SequenceI[seqs.size()];
+ for (int i=0,j=seqs.size();i<j; i++) {
+ sqs[i] = (SequenceI) seqs.elementAt(i);
+ seqs.set(i, null);
+ }
+ return sqs;
+ }
+}