mappedCds = makeCdsSequences(ds, acf,
newMapping);
if (!mappedCds.isEmpty())
{
cdsSequences.addAll(mappedCds);
newMappings.add(newMapping);
}
}
}
AlignmentI al = new Alignment(
cdsSequences.toArray(new SequenceI[cdsSequences.size()]));
al.setDataset(null);
/*
* Replace the old mappings with the new ones
*/
mappings.clear();
mappings.addAll(newMappings);
return al;
}
/**
* Helper method to make cds-only sequences and populate their mappings to
* protein products
*
* For example, if ggCCaTTcGAg has mappings [3, 4, 6, 7, 9, 10] to protein
* then generate a sequence CCTTGA with mapping [1, 6] to the same protein
* residues
*
* Typically eukaryotic dna will include cds encoding for a single peptide
* sequence i.e. return a single result. Bacterial dna may have overlapping
* cds mappings coding for multiple peptides so return multiple results
* (example EMBL KF591215).
*
* @param dnaSeq
* a dna dataset sequence
* @param mapping
* containing one or more mappings of the sequence to protein
* @param newMappings
* the new mapping to populate, from the cds-only sequences to their
* mapped protein sequences
* @return
*/
protected static List makeCdsSequences(SequenceI dnaSeq,
AlignedCodonFrame mapping, AlignedCodonFrame newMappings)
{
List cdsSequences = new ArrayList();
List seqMappings = mapping.getMappingsForSequence(dnaSeq);
for (Mapping seqMapping : seqMappings)
{
SequenceI cds = makeCdsSequence(dnaSeq, seqMapping);
cdsSequences.add(cds);
/*
* add new mappings, from dna to cds, and from cds to peptide
*/
MapList dnaToCds = addCdsMappings(dnaSeq, cds, seqMapping,
newMappings);
/*
* transfer any features on dna that overlap the CDS
*/
transferFeatures(dnaSeq, cds, dnaToCds, null, "CDS" /* SequenceOntology.CDS */);
}
return cdsSequences;
}
/**
* Transfers co-located features on 'fromSeq' to 'toSeq', adjusting the
* feature start/end ranges, optionally omitting specified feature types.
* Returns the number of features copied.
*
* @param fromSeq
* @param toSeq
* @param select
* if not null, only features of this type are copied (including
* subtypes in the Sequence Ontology)
* @param mapping
* the mapping from 'fromSeq' to 'toSeq'
* @param omitting
*/
public static int transferFeatures(SequenceI fromSeq, SequenceI toSeq,
MapList mapping, String select, String... omitting)
{
SequenceI copyTo = toSeq;
while (copyTo.getDatasetSequence() != null)
{
copyTo = copyTo.getDatasetSequence();
}
SequenceOntologyI so = SequenceOntologyFactory.getInstance();
int count = 0;
SequenceFeature[] sfs = fromSeq.getSequenceFeatures();
if (sfs != null)
{
for (SequenceFeature sf : sfs)
{
String type = sf.getType();
if (select != null && !so.isA(type, select))
{
continue;
}
boolean omit = false;
for (String toOmit : omitting)
{
if (type.equals(toOmit))
{
omit = true;
}
}
if (omit)
{
continue;
}
/*
* locate the mapped range - null if either start or end is
* not mapped (no partial overlaps are calculated)
*/
int start = sf.getBegin();
int end = sf.getEnd();
int[] mappedTo = mapping.locateInTo(start, end);
/*
* if whole exon range doesn't map, try interpreting it
* as 5' or 3' exon overlapping the CDS range
*/
if (mappedTo == null)
{
mappedTo = mapping.locateInTo(end, end);
if (mappedTo != null)
{
/*
* end of exon is in CDS range - 5' overlap
* to a range from the start of the peptide
*/
mappedTo[0] = 1;
}
}
if (mappedTo == null)
{
mappedTo = mapping.locateInTo(start, start);
if (mappedTo != null)
{
/*
* start of exon is in CDS range - 3' overlap
* to a range up to the end of the peptide
*/
mappedTo[1] = toSeq.getLength();
}
}
if (mappedTo != null)
{
SequenceFeature copy = new SequenceFeature(sf);
copy.setBegin(Math.min(mappedTo[0], mappedTo[1]));
copy.setEnd(Math.max(mappedTo[0], mappedTo[1]));
copyTo.addSequenceFeature(copy);
count++;
}
}
}
return count;
}
/**
* Creates and adds mappings
*
* - from cds to peptide
* - from dna to cds
*
* and returns the dna-to-cds mapping
*
* @param dnaSeq
* @param cdsSeq
* @param dnaMapping
* @param newMappings
* @return
*/
protected static MapList addCdsMappings(SequenceI dnaSeq,
SequenceI cdsSeq,
Mapping dnaMapping, AlignedCodonFrame newMappings)
{
cdsSeq.createDatasetSequence();
/*
* CDS to peptide is just a contiguous 3:1 mapping, with
* the peptide ranges taken unchanged from the dna mapping
*/
List cdsRanges = new ArrayList();
cdsRanges.add(new int[] { 1, cdsSeq.getLength() });
MapList cdsToPeptide = new MapList(cdsRanges, dnaMapping.getMap()
.getToRanges(), 3, 1);
newMappings.addMap(cdsSeq.getDatasetSequence(), dnaMapping.getTo(),
cdsToPeptide);
/*
* dna 'from' ranges map 1:1 to the contiguous extracted CDS
*/
MapList dnaToCds = new MapList(
dnaMapping.getMap().getFromRanges(), cdsRanges, 1, 1);
newMappings.addMap(dnaSeq, cdsSeq.getDatasetSequence(), dnaToCds);
return dnaToCds;
}
/**
* Makes and returns a CDS-only sequence, where the CDS regions are identified
* as the 'from' ranges of the mapping on the dna.
*
* @param dnaSeq
* nucleotide sequence
* @param seqMapping
* mappings from CDS regions of nucleotide
* @return
*/
protected static SequenceI makeCdsSequence(SequenceI dnaSeq,
Mapping seqMapping)
{
StringBuilder newSequence = new StringBuilder(dnaSeq.getLength());
final char[] dna = dnaSeq.getSequence();
int offset = dnaSeq.getStart() - 1;
/*
* Get the codon regions as { [2, 5], [7, 12], [14, 14] etc }
*/
final List dnaCdsRanges = seqMapping.getMap().getFromRanges();
for (int[] range : dnaCdsRanges)
{
// TODO handle reverse mapping as well (range[1] < range[0])
for (int pos = range[0]; pos <= range[1]; pos++)
{
newSequence.append(dna[pos - offset - 1]);
}
}
SequenceI cds = new Sequence(dnaSeq.getName(),
newSequence.toString());
transferDbRefs(seqMapping.getTo(), cds);
return cds;
}
/**
* Locate any xrefs to CDS databases on the protein product and attach to the
* CDS sequence. Also add as a sub-token of the sequence name.
*
* @param from
* @param to
*/
protected static void transferDbRefs(SequenceI from, SequenceI to)
{
String cdsAccId = FeatureProperties.getCodingFeature(DBRefSource.EMBL);
DBRefEntry[] cdsRefs = DBRefUtils.selectRefs(from.getDBRefs(),
DBRefSource.CODINGDBS);
if (cdsRefs != null)
{
for (DBRefEntry cdsRef : cdsRefs)
{
to.addDBRef(new DBRefEntry(cdsRef));
cdsAccId = cdsRef.getAccessionId();
}
}
if (!to.getName().contains(cdsAccId))
{
to.setName(to.getName() + "|" + cdsAccId);
}
}
}