X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;ds=sidebyside;f=src%2Fjalview%2Fanalysis%2FAlignmentUtils.java;h=90d91970afeb520c60cdd342e6c68285695eecaa;hb=refs%2Fheads%2Fbug%2FJAL-2842;hp=2b9b9f97c097afca3afffece8d8a1587e433a09d;hpb=be762d8d9c71a7aa3121e845c45911c7192b7827;p=jalview.git diff --git a/src/jalview/analysis/AlignmentUtils.java b/src/jalview/analysis/AlignmentUtils.java index 2b9b9f9..90d9197 100644 --- a/src/jalview/analysis/AlignmentUtils.java +++ b/src/jalview/analysis/AlignmentUtils.java @@ -2164,7 +2164,10 @@ public class AlignmentUtils /** * Returns a mapping from dna to protein by inspecting sequence features of - * type "CDS" on the dna. + * type "CDS" on the dna. A mapping is constructed if the total CDS feature + * length is 3 times the peptide length (optionally after dropping a trailing + * stop codon). This method does not check whether the CDS nucleotide sequence + * translates to the peptide sequence. * * @param dnaSeq * @param proteinSeq @@ -2176,6 +2179,15 @@ public class AlignmentUtils List ranges = findCdsPositions(dnaSeq); int mappedDnaLength = MappingUtils.getLength(ranges); + /* + * if not a whole number of codons, something is wrong, + * abort mapping + */ + if (mappedDnaLength % CODON_LENGTH > 0) + { + return null; + } + int proteinLength = proteinSeq.getLength(); int proteinStart = proteinSeq.getStart(); int proteinEnd = proteinSeq.getEnd(); @@ -2199,8 +2211,12 @@ public class AlignmentUtils if (codesForResidues == (proteinLength + 1)) { // assuming extra codon is for STOP and not in peptide + // todo: check trailing codon is indeed a STOP codon codesForResidues--; + mappedDnaLength -= CODON_LENGTH; + MappingUtils.removeEndPositions(CODON_LENGTH, ranges); } + if (codesForResidues == proteinLength) { proteinRange.add(new int[] { proteinStart, proteinEnd }); @@ -2211,7 +2227,7 @@ public class AlignmentUtils /** * Returns a list of CDS ranges found (as sequence positions base 1), i.e. of - * start/end positions of sequence features of type "CDS" (or a sub-type of + * [start, end] positions of sequence features of type "CDS" (or a sub-type of * CDS in the Sequence Ontology). The ranges are sorted into ascending start * position order, so this method is only valid for linear CDS in the same * sense as the protein product. @@ -2230,7 +2246,6 @@ public class AlignmentUtils return result; } SequenceFeatures.sortFeatures(sfs, true); - int startPhase = 0; for (SequenceFeature sf : sfs) { @@ -2248,7 +2263,7 @@ public class AlignmentUtils */ int begin = sf.getBegin(); int end = sf.getEnd(); - if (result.isEmpty()) + if (result.isEmpty() && phase > 0) { begin += phase; if (begin > end) @@ -2263,16 +2278,6 @@ public class AlignmentUtils } /* - * remove 'startPhase' positions (usually 0) from the first range - * so we begin at the start of a complete codon - */ - if (!result.isEmpty()) - { - // TODO JAL-2022 correctly model start phase > 0 - result.get(0)[0] += startPhase; - } - - /* * Finally sort ranges by start position. This avoids a dependency on * keeping features in order on the sequence (if they are in order anyway, * the sort will have almost no work to do). The implicit assumption is CDS