package jalview.datamodel.xdb.embl;
import jalview.analysis.SequenceIdMatcher;
+import jalview.bin.Cache;
import jalview.datamodel.DBRefEntry;
import jalview.datamodel.DBRefSource;
import jalview.datamodel.FeatureProperties;
import jalview.util.MappingUtils;
import jalview.util.StringUtils;
+import java.text.ParseException;
import java.util.Arrays;
import java.util.Hashtable;
import java.util.List;
String sequenceVersion;
+ String dataClass;
+
String moleculeType;
String topology;
String sequenceLength;
- String taxDivision;
+ String taxonomicDivision;
+
+ String description;
- String desc;
+ String firstPublicDate;
- String rCreated;
+ String firstPublicRelease;
- String rLastUpdated;
+ String lastUpdatedDate;
- String lastUpdated;
+ String lastUpdatedRelease;
Vector<String> keywords;
}
/**
- * @return the desc
- */
- public String getDesc()
- {
- return desc;
- }
-
- /**
- * @param desc
- * the desc to set
- */
- public void setDesc(String desc)
- {
- this.desc = desc;
- }
-
- /**
* @return the features
*/
public Vector<EmblFeature> getFeatures()
}
/**
- * @return the lastUpdated
- */
- public String getLastUpdated()
- {
- return lastUpdated;
- }
-
- /**
- * @param lastUpdated
- * the lastUpdated to set
- */
- public void setLastUpdated(String lastUpdated)
- {
- this.lastUpdated = lastUpdated;
- }
-
- /**
- * @return the releaseCreated
- */
- public String getRCreated()
- {
- return rCreated;
- }
-
- /**
- * @param releaseCreated
- * the releaseCreated to set
- */
- public void setRCreated(String releaseCreated)
- {
- this.rCreated = releaseCreated;
- }
-
- /**
- * @return the releaseLastUpdated
- */
- public String getRLastUpdated()
- {
- return rLastUpdated;
- }
-
- /**
- * @param releaseLastUpdated
- * the releaseLastUpdated to set
- */
- public void setRLastUpdated(String releaseLastUpdated)
- {
- this.rLastUpdated = releaseLastUpdated;
- }
-
- /**
* @return the sequence
*/
public EmblSequence getSequence()
}
/**
- * @return the taxDivision
- */
- public String getTaxDivision()
- {
- return taxDivision;
- }
-
- /**
- * @param taxDivision
- * the taxDivision to set
- */
- public void setTaxDivision(String taxDivision)
- {
- this.taxDivision = taxDivision;
- }
-
- /**
- * @return the entry version
- */
- public String getEntryVersion()
- {
- return entryVersion;
- }
-
- /**
- * @param version
- * the version to set
- */
- public void setEntryVersion(String version)
- {
- this.entryVersion = version;
- }
-
- /**
* Recover annotated sequences from EMBL file
*
* @param sourceDb
{
SequenceI dna = new Sequence(sourceDb + "|" + accession,
sequence.getSequence());
- dna.setDescription(desc);
+ dna.setDescription(description);
DBRefEntry retrievedref = new DBRefEntry(sourceDb,
getSequenceVersion(), accession);
dna.addDBRef(retrievedref);
// dbref
retrievedref.setMap(new Mapping(null, new int[] { 1, dna.getLength() },
new int[] { 1, dna.getLength() }, 1, 1));
- // TODO: transform EMBL Database refs to canonical form
+
+
+ /*
+ * transform EMBL Database refs to canonical form
+ */
if (dbRefs != null)
{
for (DBRefEntry dbref : dbRefs)
{
+ dbref.setSource(DBRefUtils.getCanonicalName(dbref.getSource()));
dna.addDBRef(dbref);
}
}
+ SequenceIdMatcher matcher = new SequenceIdMatcher(peptides);
try
{
for (EmblFeature feature : features)
{
- if (feature.dbRefs != null)
- {
- for (DBRefEntry dbref : feature.dbRefs)
- {
- dna.addDBRef(dbref);
- }
- }
if (FeatureProperties.isCodingFeature(sourceDb, feature.getName()))
{
- parseCodingFeature(feature, sourceDb, dna, peptides);
+ parseCodingFeature(feature, sourceDb, dna, peptides, matcher);
}
}
} catch (Exception e)
* parent dna sequence for this record
* @param peptides
* list of protein product sequences for Embl entry
+ * @param matcher
+ * helper to match xrefs in already retrieved sequences
*/
void parseCodingFeature(EmblFeature feature, String sourceDb,
- SequenceI dna, List<SequenceI> peptides)
+ SequenceI dna, List<SequenceI> peptides, SequenceIdMatcher matcher)
{
boolean isEmblCdna = sourceDb.equals(DBRefSource.EMBLCDS);
String prname = "";
String prid = null;
Map<String, String> vals = new Hashtable<String, String>();
- SequenceIdMatcher matcher = new SequenceIdMatcher(peptides);
/*
* codon_start 1/2/3 in EMBL corresponds to phase 0/1/2 in CDS
}
else if (qname.equals("protein_id"))
{
- prid = q.getValues()[0];
+ prid = q.getValues()[0].trim();
}
else if (qname.equals("codon_start"))
{
try
{
- codonStart = Integer.parseInt(q.getValues()[0]);
+ codonStart = Integer.parseInt(q.getValues()[0].trim());
} catch (NumberFormatException e)
{
System.err.println("Invalid codon_start in XML for "
else if (qname.equals("product"))
{
// sometimes name is returned e.g. for V00488
- prname = q.getValues()[0];
+ prname = q.getValues()[0].trim();
}
else
{
SequenceFeature sf = makeCdsFeature(exon, xint, prname, prid, vals,
codonStart);
sf.setType(feature.getName()); // "CDS"
+ sf.setEnaLocation(feature.getLocation());
sf.setFeatureGroup(sourceDb);
dna.addSequenceFeature(sf);
}
boolean mappingUsed = false;
for (DBRefEntry ref : feature.dbRefs)
{
+ /*
+ * ensure UniProtKB/Swiss-Prot converted to UNIPROT
+ */
ref.setSource(DBRefUtils.getCanonicalName(ref.getSource()));
if (ref.getSource().equals(DBRefSource.UNIPROT))
{
}
/**
- * Returns the CDS positions as a list of [start, end, start, end...]
+ * Returns the CDS positions as a single array of [start, end, start, end...]
* positions. If on the reverse strand, these will be in descending order.
*
* @param feature
{
return new int[] {};
}
- List<int[]> ranges = DnaUtils.parseLocation(feature.location);
- return ranges == null ? new int[] {} : listToArray(ranges);
+
+ try
+ {
+ List<int[]> ranges = DnaUtils.parseLocation(feature.location);
+ return listToArray(ranges);
+ } catch (ParseException e)
+ {
+ Cache.log.warn(String.format(
+ "Not parsing inexact CDS location %s in ENA %s",
+ feature.location, this.accession));
+ return new int[] {};
+ }
}
/**
this.sequenceVersion = sequenceVersion;
}
+ public String getSequenceLength()
+ {
+ return sequenceLength;
+ }
+
+ public void setSequenceLength(String sequenceLength)
+ {
+ this.sequenceLength = sequenceLength;
+ }
+
+ public String getEntryVersion()
+ {
+ return entryVersion;
+ }
+
+ public void setEntryVersion(String entryVersion)
+ {
+ this.entryVersion = entryVersion;
+ }
+
public String getMoleculeType()
{
return moleculeType;
this.topology = topology;
}
- public String getSequenceLength()
+ public String getTaxonomicDivision()
{
- return sequenceLength;
+ return taxonomicDivision;
}
- public void setSequenceLength(String sequenceLength)
+ public void setTaxonomicDivision(String taxonomicDivision)
{
- this.sequenceLength = sequenceLength;
+ this.taxonomicDivision = taxonomicDivision;
+ }
+
+ public String getDescription()
+ {
+ return description;
+ }
+
+ public void setDescription(String description)
+ {
+ this.description = description;
+ }
+
+ public String getFirstPublicDate()
+ {
+ return firstPublicDate;
+ }
+
+ public void setFirstPublicDate(String firstPublicDate)
+ {
+ this.firstPublicDate = firstPublicDate;
+ }
+
+ public String getFirstPublicRelease()
+ {
+ return firstPublicRelease;
+ }
+
+ public void setFirstPublicRelease(String firstPublicRelease)
+ {
+ this.firstPublicRelease = firstPublicRelease;
+ }
+
+ public String getLastUpdatedDate()
+ {
+ return lastUpdatedDate;
+ }
+
+ public void setLastUpdatedDate(String lastUpdatedDate)
+ {
+ this.lastUpdatedDate = lastUpdatedDate;
}
- public String getrCreated()
+ public String getLastUpdatedRelease()
{
- return rCreated;
+ return lastUpdatedRelease;
}
- public void setrCreated(String rCreated)
+ public void setLastUpdatedRelease(String lastUpdatedRelease)
{
- this.rCreated = rCreated;
+ this.lastUpdatedRelease = lastUpdatedRelease;
}
- public String getrLastUpdated()
+ public String getDataClass()
{
- return rLastUpdated;
+ return dataClass;
}
- public void setrLastUpdated(String rLastUpdated)
+ public void setDataClass(String dataClass)
{
- this.rLastUpdated = rLastUpdated;
+ this.dataClass = dataClass;
}
}