package jalview.datamodel.xdb.embl;
import jalview.analysis.SequenceIdMatcher;
+import jalview.bin.Cache;
import jalview.datamodel.DBRefEntry;
import jalview.datamodel.DBRefSource;
import jalview.datamodel.FeatureProperties;
import jalview.datamodel.SequenceFeature;
import jalview.datamodel.SequenceI;
import jalview.util.DBRefUtils;
+import jalview.util.DnaUtils;
import jalview.util.MapList;
import jalview.util.MappingUtils;
import jalview.util.StringUtils;
+import java.text.ParseException;
import java.util.Arrays;
import java.util.Hashtable;
import java.util.List;
String accession;
- String version;
+ String entryVersion;
- String taxDivision;
+ String sequenceVersion;
- String desc;
+ String dataClass;
- String rCreated;
+ String moleculeType;
- String rLastUpdated;
+ String topology;
- String lastUpdated;
+ String sequenceLength;
+
+ String taxonomicDivision;
+
+ String description;
+
+ String firstPublicDate;
+
+ String firstPublicRelease;
+
+ String lastUpdatedDate;
+
+ String lastUpdatedRelease;
Vector<String> keywords;
}
/**
- * @return the desc
- */
- public String getDesc()
- {
- return desc;
- }
-
- /**
- * @param desc
- * the desc to set
- */
- public void setDesc(String desc)
- {
- this.desc = desc;
- }
-
- /**
* @return the features
*/
public Vector<EmblFeature> getFeatures()
}
/**
- * @return the lastUpdated
- */
- public String getLastUpdated()
- {
- return lastUpdated;
- }
-
- /**
- * @param lastUpdated
- * the lastUpdated to set
- */
- public void setLastUpdated(String lastUpdated)
- {
- this.lastUpdated = lastUpdated;
- }
-
- /**
- * @return the releaseCreated
- */
- public String getRCreated()
- {
- return rCreated;
- }
-
- /**
- * @param releaseCreated
- * the releaseCreated to set
- */
- public void setRCreated(String releaseCreated)
- {
- this.rCreated = releaseCreated;
- }
-
- /**
- * @return the releaseLastUpdated
- */
- public String getRLastUpdated()
- {
- return rLastUpdated;
- }
-
- /**
- * @param releaseLastUpdated
- * the releaseLastUpdated to set
- */
- public void setRLastUpdated(String releaseLastUpdated)
- {
- this.rLastUpdated = releaseLastUpdated;
- }
-
- /**
* @return the sequence
*/
public EmblSequence getSequence()
}
/**
- * @return the taxDivision
- */
- public String getTaxDivision()
- {
- return taxDivision;
- }
-
- /**
- * @param taxDivision
- * the taxDivision to set
- */
- public void setTaxDivision(String taxDivision)
- {
- this.taxDivision = taxDivision;
- }
-
- /**
- * @return the version
- */
- public String getVersion()
- {
- return version;
- }
-
- /**
- * @param version
- * the version to set
- */
- public void setVersion(String version)
- {
- this.version = version;
- }
-
- /**
* Recover annotated sequences from EMBL file
*
* @param sourceDb
{
SequenceI dna = new Sequence(sourceDb + "|" + accession,
sequence.getSequence());
- dna.setDescription(desc);
- DBRefEntry retrievedref = new DBRefEntry(sourceDb, version, accession);
+ dna.setDescription(description);
+ DBRefEntry retrievedref = new DBRefEntry(sourceDb,
+ getSequenceVersion(), accession);
dna.addDBRef(retrievedref);
// add map to indicate the sequence is a valid coordinate frame for the
// dbref
retrievedref.setMap(new Mapping(null, new int[] { 1, dna.getLength() },
new int[] { 1, dna.getLength() }, 1, 1));
- // TODO: transform EMBL Database refs to canonical form
+
if (dbRefs != null)
{
for (DBRefEntry dbref : dbRefs)
}
}
+ SequenceIdMatcher matcher = new SequenceIdMatcher(peptides);
try
{
for (EmblFeature feature : features)
}
if (FeatureProperties.isCodingFeature(sourceDb, feature.getName()))
{
- parseCodingFeature(feature, sourceDb, dna, peptides);
+ parseCodingFeature(feature, sourceDb, dna, peptides, matcher);
}
}
} catch (Exception e)
* list of protein product sequences for Embl entry
*/
void parseCodingFeature(EmblFeature feature, String sourceDb,
- SequenceI dna, List<SequenceI> peptides)
+ SequenceI dna, List<SequenceI> peptides, SequenceIdMatcher matcher)
{
boolean isEmblCdna = sourceDb.equals(DBRefSource.EMBLCDS);
String prname = "";
String prid = null;
Map<String, String> vals = new Hashtable<String, String>();
- SequenceIdMatcher matcher = new SequenceIdMatcher(peptides);
/*
* codon_start 1/2/3 in EMBL corresponds to phase 0/1/2 in CDS
}
else if (qname.equals("protein_id"))
{
- prid = q.getValues()[0];
+ prid = q.getValues()[0].trim();
}
else if (qname.equals("codon_start"))
{
try
{
- codonStart = Integer.parseInt(q.getValues()[0]);
+ codonStart = Integer.parseInt(q.getValues()[0].trim());
} catch (NumberFormatException e)
{
System.err.println("Invalid codon_start in XML for "
else if (qname.equals("product"))
{
// sometimes name is returned e.g. for V00488
- prname = q.getValues()[0];
+ prname = q.getValues()[0].trim();
}
else
{
DBRefEntry pcdnaref = new DBRefEntry();
pcdnaref.setAccessionId(prid);
pcdnaref.setSource(DBRefSource.EMBLCDS);
- pcdnaref.setVersion(getVersion()); // same as parent EMBL version.
+ pcdnaref.setVersion(getSequenceVersion()); // same as parent EMBL
+ // version.
MapList mp = new MapList(new int[] { 1, prseq.length() },
new int[] { 1 + (codonStart - 1),
(codonStart - 1) + 3 * prseq.length() }, 1, 3);
SequenceFeature sf = makeCdsFeature(exon, xint, prname, prid, vals,
codonStart);
sf.setType(feature.getName()); // "CDS"
+ sf.setEnaLocation(feature.getLocation());
sf.setFeatureGroup(sourceDb);
dna.addSequenceFeature(sf);
}
if (map != null)
{
Mapping pmap = new Mapping(dna, map.getMap().getInverse());
- pref = new DBRefEntry(sourceDb, getVersion(),
+ pref = new DBRefEntry(sourceDb, getSequenceVersion(),
this.getAccession());
pref.setMap(pmap);
if (map.getTo() != null)
protEMBLCDS = new DBRefEntry();
protEMBLCDS.setAccessionId(prid);
protEMBLCDS.setSource(DBRefSource.EMBLCDSProduct);
- protEMBLCDS.setVersion(getVersion());
+ protEMBLCDS.setVersion(getSequenceVersion());
protEMBLCDS
.setMap(new Mapping(product, map.getMap().getInverse()));
}
}
/**
- * Returns the CDS positions as a list of [start, end, start, end...]
+ * Returns the CDS positions as a single array of [start, end, start, end...]
* positions. If on the reverse strand, these will be in descending order.
*
* @param feature
*/
protected int[] getCdsRanges(EmblFeature feature)
{
- if (feature.locations == null)
+ if (feature.location == null)
{
return new int[] {};
}
- int cdsBoundaryCount = 0; // count of all start/stop locations
- int[][] cdsLocations = new int[feature.locations.size()][];
- int locationNumber = 0;
- for (EmblFeatureLocations loc : feature.locations)
+
+ try
{
- int[] locationRanges = loc.getElementRanges(accession);
- cdsLocations[locationNumber++] = locationRanges;
- cdsBoundaryCount += locationRanges.length;
- }
- int[] cdsRanges = new int[cdsBoundaryCount];
- int copyTo = 0;
- for (int[] ranges : cdsLocations)
+ List<int[]> ranges = DnaUtils.parseLocation(feature.location);
+ return listToArray(ranges);
+ } catch (ParseException e)
{
- System.arraycopy(ranges, 0, cdsRanges, copyTo, ranges.length);
- copyTo += ranges.length;
+ Cache.log.warn(String.format(
+ "Not parsing inexact CDS location %s in ENA %s",
+ feature.location, this.accession));
+ return new int[] {};
}
- return cdsRanges;
+ }
+ /**
+ * Converts a list of [start, end] ranges to a single array of [start, end,
+ * start, end ...]
+ *
+ * @param ranges
+ * @return
+ */
+ int[] listToArray(List<int[]> ranges)
+ {
+ int[] result = new int[ranges.size() * 2];
+ int i = 0;
+ for (int[] range : ranges)
+ {
+ result[i++] = range[0];
+ result[i++] = range[1];
+ }
+ return result;
}
/**
}
return exon;
}
+
+ public String getSequenceVersion()
+ {
+ return sequenceVersion;
+ }
+
+ public void setSequenceVersion(String sequenceVersion)
+ {
+ this.sequenceVersion = sequenceVersion;
+ }
+
+ public String getSequenceLength()
+ {
+ return sequenceLength;
+ }
+
+ public void setSequenceLength(String sequenceLength)
+ {
+ this.sequenceLength = sequenceLength;
+ }
+
+ public String getEntryVersion()
+ {
+ return entryVersion;
+ }
+
+ public void setEntryVersion(String entryVersion)
+ {
+ this.entryVersion = entryVersion;
+ }
+
+ public String getMoleculeType()
+ {
+ return moleculeType;
+ }
+
+ public void setMoleculeType(String moleculeType)
+ {
+ this.moleculeType = moleculeType;
+ }
+
+ public String getTopology()
+ {
+ return topology;
+ }
+
+ public void setTopology(String topology)
+ {
+ this.topology = topology;
+ }
+
+ public String getTaxonomicDivision()
+ {
+ return taxonomicDivision;
+ }
+
+ public void setTaxonomicDivision(String taxonomicDivision)
+ {
+ this.taxonomicDivision = taxonomicDivision;
+ }
+
+ public String getDescription()
+ {
+ return description;
+ }
+
+ public void setDescription(String description)
+ {
+ this.description = description;
+ }
+
+ public String getFirstPublicDate()
+ {
+ return firstPublicDate;
+ }
+
+ public void setFirstPublicDate(String firstPublicDate)
+ {
+ this.firstPublicDate = firstPublicDate;
+ }
+
+ public String getFirstPublicRelease()
+ {
+ return firstPublicRelease;
+ }
+
+ public void setFirstPublicRelease(String firstPublicRelease)
+ {
+ this.firstPublicRelease = firstPublicRelease;
+ }
+
+ public String getLastUpdatedDate()
+ {
+ return lastUpdatedDate;
+ }
+
+ public void setLastUpdatedDate(String lastUpdatedDate)
+ {
+ this.lastUpdatedDate = lastUpdatedDate;
+ }
+
+ public String getLastUpdatedRelease()
+ {
+ return lastUpdatedRelease;
+ }
+
+ public void setLastUpdatedRelease(String lastUpdatedRelease)
+ {
+ this.lastUpdatedRelease = lastUpdatedRelease;
+ }
+
+ public String getDataClass()
+ {
+ return dataClass;
+ }
+
+ public void setDataClass(String dataClass)
+ {
+ this.dataClass = dataClass;
+ }
}