return null;
}
dna.setDescription(description);
- DBRefEntry retrievedref = new DBRefEntry(sourceDb,
- getSequenceVersion(), accession);
+ DBRefEntry retrievedref = new DBRefEntry(sourceDb, getSequenceVersion(),
+ accession);
dna.addDBRef(retrievedref);
// add map to indicate the sequence is a valid coordinate frame for the
// dbref
- retrievedref.setMap(new Mapping(null, new int[] { 1, dna.getLength() },
- new int[] { 1, dna.getLength() }, 1, 1));
+ retrievedref
+ .setMap(new Mapping(null, new int[]
+ { 1, dna.getLength() }, new int[] { 1, dna.getLength() }, 1,
+ 1));
/*
* transform EMBL Database refs to canonical form
{
if (sequence == null)
{
- System.err.println("No sequence was returned for ENA accession "
- + accession);
+ System.err.println(
+ "No sequence was returned for ENA accession " + accession);
return null;
}
SequenceI dna = new Sequence(sourceDb + "|" + accession,
* helper to match xrefs in already retrieved sequences
*/
void parseCodingFeature(EmblFeature feature, String sourceDb,
- SequenceI dna, List<SequenceI> peptides, SequenceIdMatcher matcher)
+ SequenceI dna, List<SequenceI> peptides,
+ SequenceIdMatcher matcher)
{
boolean isEmblCdna = sourceDb.equals(DBRefSource.EMBLCDS);
if (qname.equals("translation"))
{
// remove all spaces (precompiled String.replaceAll(" ", ""))
- translation = SPACE_PATTERN.matcher(q.getValues()[0]).replaceAll(
- "");
+ translation = SPACE_PATTERN.matcher(q.getValues()[0])
+ .replaceAll("");
}
else if (qname.equals("protein_id"))
{
codonStart = Integer.parseInt(q.getValues()[0].trim());
} catch (NumberFormatException e)
{
- System.err.println("Invalid codon_start in XML for "
- + accession + ": " + e.getMessage());
+ System.err.println("Invalid codon_start in XML for " + accession
+ + ": " + e.getMessage());
}
}
else if (qname.equals("product"))
product = matcher.findIdMatch(proteinId);
if (product == null)
{
- product = new Sequence(proteinId, translation, 1, translationLength);
- product.setDescription(((proteinName.length() == 0) ? "Protein Product from "
- + sourceDb
+ product = new Sequence(proteinId, translation, 1,
+ translationLength);
+ product.setDescription(((proteinName.length() == 0)
+ ? "Protein Product from " + sourceDb
: proteinName));
peptides.add(product);
matcher.add(product);
* workaround until we handle dna location for CDS sequence
* e.g. location="X53828.1:60..1058" correctly
*/
- System.err
- .println("Implementation Notice: EMBLCDS records not properly supported yet - Making up the CDNA region of this sequence... may be incorrect ("
+ System.err.println(
+ "Implementation Notice: EMBLCDS records not properly supported yet - Making up the CDNA region of this sequence... may be incorrect ("
+ sourceDb + ":" + getAccession() + ")");
- if (translationLength * 3 == (1 - codonStart + dna.getSequence().length))
+ int dnaLength = dna.getLength();
+ if (translationLength * 3 == (1 - codonStart + dnaLength))
{
- System.err
- .println("Not allowing for additional stop codon at end of cDNA fragment... !");
+ System.err.println(
+ "Not allowing for additional stop codon at end of cDNA fragment... !");
// this might occur for CDS sequences where no features are marked
exons = new int[] { dna.getStart() + (codonStart - 1),
dna.getEnd() };
- dnaToProteinMapping = new Mapping(product, exons, new int[] { 1,
- translationLength }, 3, 1);
+ dnaToProteinMapping = new Mapping(product, exons,
+ new int[]
+ { 1, translationLength }, 3, 1);
}
- if ((translationLength + 1) * 3 == (1 - codonStart + dna
- .getSequence().length))
+ if ((translationLength + 1) * 3 == (1 - codonStart + dnaLength))
{
- System.err
- .println("Allowing for additional stop codon at end of cDNA fragment... will probably cause an error in VAMSAs!");
+ System.err.println(
+ "Allowing for additional stop codon at end of cDNA fragment... will probably cause an error in VAMSAs!");
exons = new int[] { dna.getStart() + (codonStart - 1),
dna.getEnd() - 3 };
- dnaToProteinMapping = new Mapping(product, exons, new int[] { 1,
- translationLength }, 3, 1);
+ dnaToProteinMapping = new Mapping(product, exons,
+ new int[]
+ { 1, translationLength }, 3, 1);
}
}
else
else
{
// final product length truncation check
- int[] cdsRanges = adjustForProteinLength(translationLength, exons);
- dnaToProteinMapping = new Mapping(product, cdsRanges, new int[] {
- 1, translationLength }, 3, 1);
+ int[] cdsRanges = adjustForProteinLength(translationLength,
+ exons);
+ dnaToProteinMapping = new Mapping(product, cdsRanges,
+ new int[]
+ { 1, translationLength }, 3, 1);
if (product != null)
{
/*
* make xref with mapping from protein to EMBL dna
*/
DBRefEntry proteinToEmblRef = new DBRefEntry(DBRefSource.EMBL,
- getSequenceVersion(), proteinId, new Mapping(
- dnaToProteinMapping.getMap().getInverse()));
+ getSequenceVersion(), proteinId,
+ new Mapping(dnaToProteinMapping.getMap().getInverse()));
product.addDBRef(proteinToEmblRef);
/*
* make xref from protein to EMBLCDS; we assume here that the
* CDS sequence version is same as dna sequence (?!)
*/
- MapList proteinToCdsMapList = new MapList(new int[] { 1,
- translationLength }, new int[] { 1 + (codonStart - 1),
- (codonStart - 1) + 3 * translationLength }, 1, 3);
+ MapList proteinToCdsMapList = new MapList(
+ new int[]
+ { 1, translationLength },
+ new int[]
+ { 1 + (codonStart - 1),
+ (codonStart - 1) + 3 * translationLength },
+ 1, 3);
DBRefEntry proteinToEmblCdsRef = new DBRefEntry(
DBRefSource.EMBLCDS, getSequenceVersion(), proteinId,
new Mapping(proteinToCdsMapList));
/*
* add cds features to dna sequence
*/
- for (int xint = 0; exons != null && xint < exons.length; xint += 2)
+ String cds = feature.getName(); // "CDS"
+ for (int xint = 0; exons != null && xint < exons.length - 1; xint += 2)
{
- SequenceFeature sf = makeCdsFeature(exons, xint, proteinName,
- proteinId, vals, codonStart);
- sf.setType(feature.getName()); // "CDS"
+ int exonStart = exons[xint];
+ int exonEnd = exons[xint + 1];
+ int begin = Math.min(exonStart, exonEnd);
+ int end = Math.max(exonStart, exonEnd);
+ int exonNumber = xint / 2 + 1;
+ String desc = String.format("Exon %d for protein '%s' EMBLCDS:%s",
+ exonNumber, proteinName, proteinId);
+
+ SequenceFeature sf = makeCdsFeature(cds, desc, begin, end,
+ sourceDb, vals);
+
sf.setEnaLocation(feature.getLocation());
- sf.setFeatureGroup(sourceDb);
+ boolean forwardStrand = exonStart <= exonEnd;
+ sf.setStrand(forwardStrand ? "+" : "-");
+ sf.setPhase(String.valueOf(codonStart - 1));
+ sf.setValue(FeatureProperties.EXONPOS, exonNumber);
+ sf.setValue(FeatureProperties.EXONPRODUCT, proteinName);
+
dna.addSequenceFeature(sf);
}
}
// Add converse mapping reference
if (dnaToProteinMapping != null)
{
- Mapping pmap = new Mapping(dna, dnaToProteinMapping.getMap()
- .getInverse());
+ Mapping pmap = new Mapping(dna,
+ dnaToProteinMapping.getMap().getInverse());
pref = new DBRefEntry(sourceDb, getSequenceVersion(),
this.getAccession());
pref.setMap(pmap);
if (proteinToEmblProteinRef == null)
{
// assuming CDSPROTEIN sequence version = dna version (?!)
- proteinToEmblProteinRef = new DBRefEntry(
- DBRefSource.EMBLCDSProduct, getSequenceVersion(), proteinId);
+ proteinToEmblProteinRef = new DBRefEntry(DBRefSource.EMBLCDSProduct,
+ getSequenceVersion(), proteinId);
}
product.addDBRef(proteinToEmblProteinRef);
&& dnaToProteinMapping.getTo() != null)
{
DBRefEntry dnaToEmblProteinRef = new DBRefEntry(
- DBRefSource.EMBLCDSProduct, getSequenceVersion(), proteinId);
+ DBRefSource.EMBLCDSProduct, getSequenceVersion(),
+ proteinId);
dnaToEmblProteinRef.setMap(dnaToProteinMapping);
dnaToProteinMapping.setMappedFromId(proteinId);
dna.addDBRef(dnaToEmblProteinRef);
/**
* Helper method to construct a SequenceFeature for one cds range
*
- * @param exons
- * array of cds [start, end, ...] positions
- * @param exonStartIndex
- * offset into the exons array
- * @param proteinName
- * @param proteinAccessionId
+ * @param type
+ * feature type ("CDS")
+ * @param desc
+ * description
+ * @param begin
+ * start position
+ * @param end
+ * end position
+ * @param group
+ * feature group
* @param vals
* map of 'miscellaneous values' for feature
- * @param codonStart
- * codon start position for CDS (1/2/3, normally 1)
* @return
*/
- protected SequenceFeature makeCdsFeature(int[] exons, int exonStartIndex,
- String proteinName, String proteinAccessionId,
- Map<String, String> vals, int codonStart)
- {
- int exonNumber = exonStartIndex / 2 + 1;
- SequenceFeature sf = new SequenceFeature();
- sf.setBegin(Math.min(exons[exonStartIndex], exons[exonStartIndex + 1]));
- sf.setEnd(Math.max(exons[exonStartIndex], exons[exonStartIndex + 1]));
- sf.setDescription(String.format("Exon %d for protein '%s' EMBLCDS:%s",
- exonNumber, proteinName, proteinAccessionId));
- sf.setPhase(String.valueOf(codonStart - 1));
- sf.setStrand(exons[exonStartIndex] <= exons[exonStartIndex + 1] ? "+"
- : "-");
- sf.setValue(FeatureProperties.EXONPOS, exonNumber);
- sf.setValue(FeatureProperties.EXONPRODUCT, proteinName);
+ protected SequenceFeature makeCdsFeature(String type, String desc,
+ int begin, int end, String group, Map<String, String> vals)
+ {
+ SequenceFeature sf = new SequenceFeature(type, desc, begin, end, group);
if (!vals.isEmpty())
{
StringBuilder sb = new StringBuilder();
return listToArray(ranges);
} catch (ParseException e)
{
- Cache.log.warn(String.format(
- "Not parsing inexact CDS location %s in ENA %s",
- feature.location, this.accession));
+ Cache.log.warn(
+ String.format("Not parsing inexact CDS location %s in ENA %s",
+ feature.location, this.accession));
return new int[] {};
}
}