import jalview.util.MappingUtils;
import jalview.util.StringUtils;
+import java.util.Arrays;
import java.util.Hashtable;
import java.util.List;
import java.util.Map;
}
}
- // SequenceI product = null;
DBRefEntry protEMBLCDS = null;
exon = MappingUtils.removeStartPositions(codonStart - 1, exon);
boolean noProteinDbref = true;
.println("Not allowing for additional stop codon at end of cDNA fragment... !");
// this might occur for CDS sequences where no features are
// marked.
- exon = new int[] { dna.getStart() + (codonStart - 1), dna.getEnd() };
+ exon = new int[] { dna.getStart() + (codonStart - 1),
+ dna.getEnd() };
map = new Mapping(product, exon, new int[] { 1, prseq.length() },
3, 1);
}
// TODO should from range include stop codon even if not in protein
// in order to include stop codon in CDS sequence (as done for
// Ensembl)?
- int[] cdsRanges = adjustForProteinLength(prseq.length(),
- exon);
- map = new Mapping(product, cdsRanges, new int[] { 1, prseq.length() }, 3, 1);
+ int[] cdsRanges = adjustForProteinLength(prseq.length(), exon);
+ map = new Mapping(product, cdsRanges, new int[] { 1,
+ prseq.length() }, 3, 1);
// reconstruct the EMBLCDS entry
// TODO: this is only necessary when there codon annotation is
// complete (I think JBPNote)
if (proteinSeq == null)
{
proteinSeq = new Sequence(proteinSeqName,
- product
- .getSequenceAsString());
+ product.getSequenceAsString());
matcher.add(proteinSeq);
peptides.add(proteinSeq);
}
SequenceFeature sf = new SequenceFeature();
sf.setBegin(Math.min(exons[exonStartIndex], exons[exonStartIndex + 1]));
sf.setEnd(Math.max(exons[exonStartIndex], exons[exonStartIndex + 1]));
- sf.setDescription(String.format(
- "Exon %d for protein '%s' EMBLCDS:%s", exonNumber, proteinName,
- proteinAccessionId));
+ sf.setDescription(String.format("Exon %d for protein '%s' EMBLCDS:%s",
+ exonNumber, proteinName, proteinAccessionId));
sf.setPhase(String.valueOf(codonStart - 1));
- sf.setStrand(exons[exonStartIndex] <= exons[exonStartIndex + 1] ? "+" : "-");
+ sf.setStrand(exons[exonStartIndex] <= exons[exonStartIndex + 1] ? "+"
+ : "-");
sf.setValue(FeatureProperties.EXONPOS, exonNumber);
sf.setValue(FeatureProperties.EXONPRODUCT, proteinName);
if (!vals.isEmpty())
* @param exon
* @return new exon
*/
- private int[] adjustForProteinLength(int prlength, int[] exon)
+ static int[] adjustForProteinLength(int prlength, int[] exon)
{
+ if (prlength <= 0 || exon == null)
+ {
+ return exon;
+ }
+ int desiredCdsLength = prlength * 3;
+ int exonLength = MappingUtils.getLength(Arrays.asList(exon));
+
+ /*
+ * assuming here exon might include stop codon in addition to protein codons
+ */
+ if (desiredCdsLength == exonLength
+ || desiredCdsLength == exonLength - 3)
+ {
+ return exon;
+ }
- int origxon[], sxpos = -1, endxon = 0, cdslength = prlength * 3;
- // first adjust range for codon start attribute
- if (prlength >= 1 && exon != null)
+ int origxon[];
+ int sxpos = -1;
+ int endxon = 0;
+ origxon = new int[exon.length];
+ System.arraycopy(exon, 0, origxon, 0, exon.length);
+ int cdspos = 0;
+ for (int x = 0; x < exon.length; x += 2)
{
- origxon = new int[exon.length];
- System.arraycopy(exon, 0, origxon, 0, exon.length);
- int cdspos = 0;
- for (int x = 0; x < exon.length && sxpos == -1; x += 2)
+ cdspos += Math.abs(exon[x + 1] - exon[x]) + 1;
+ if (desiredCdsLength <= cdspos)
{
- cdspos += Math.abs(exon[x + 1] - exon[x]) + 1;
- if (cdslength <= cdspos)
+ // advanced beyond last codon.
+ sxpos = x;
+ if (desiredCdsLength != cdspos)
{
- // advanced beyond last codon.
- sxpos = x;
- if (cdslength != cdspos)
- {
- // System.err
- // .println("Truncating final exon interval on region by "
- // + (cdspos - cdslength));
- }
- // locate the new end boundary of final exon as endxon
- endxon = exon[x + 1] - cdspos + cdslength;
- break;
+ // System.err
+ // .println("Truncating final exon interval on region by "
+ // + (cdspos - cdslength));
}
- }
- if (sxpos != -1)
- {
- // and trim the exon interval set if necessary
- int[] nxon = new int[sxpos + 2];
- System.arraycopy(exon, 0, nxon, 0, sxpos + 2);
- nxon[sxpos + 1] = endxon; // update the end boundary for the new exon
- // set
- exon = nxon;
+ /*
+ * shrink the final exon - reduce end position if forward
+ * strand, increase it if reverse
+ */
+ if (exon[x + 1] >= exon[x])
+ {
+ endxon = exon[x + 1] - cdspos + desiredCdsLength;
+ }
+ else
+ {
+ endxon = exon[x + 1] + cdspos - desiredCdsLength;
+ }
+ break;
}
}
+
+ if (sxpos != -1)
+ {
+ // and trim the exon interval set if necessary
+ int[] nxon = new int[sxpos + 2];
+ System.arraycopy(exon, 0, nxon, 0, sxpos + 2);
+ nxon[sxpos + 1] = endxon; // update the end boundary for the new exon
+ // set
+ exon = nxon;
+ }
return exon;
}
}