X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Fdatamodel%2Fxdb%2Fembl%2FEmblEntry.java;h=2ee1fb2f6f2906cb44d3f170b22979e4b9ac2a66;hb=c16c1633e1a5acc9f44e4de40b9abbb8a59b99b4;hp=763e64ae563913eb029ceb9637947572f59a4aa1;hpb=20c3a3bb8feb78f4b4ccd02f3e7906b3775eb870;p=jalview.git diff --git a/src/jalview/datamodel/xdb/embl/EmblEntry.java b/src/jalview/datamodel/xdb/embl/EmblEntry.java index 763e64a..2ee1fb2 100644 --- a/src/jalview/datamodel/xdb/embl/EmblEntry.java +++ b/src/jalview/datamodel/xdb/embl/EmblEntry.java @@ -580,35 +580,8 @@ public class EmblEntry } } Sequence product = null; - int origxon[], sxpos = -1, sxstart, sxstop; - // first adjust range for codon start attribute - if (prstart > 1) - { - origxon = new int[exon.length]; - System.arraycopy(exon, 0, origxon, 0, exon.length); - int cdspos = 0; - for (int x = 0; x < exon.length && sxpos == -1; x += 2) - { - cdspos += exon[x + 1] - exon[x] + 1; - if (prstart <= cdspos) - { - sxpos = x; - sxstart = exon[x]; - sxstop = exon[x + 1]; - // and adjust start boundary of first exon. - exon[x] = exon[x + 1] - cdspos + prstart; - break; - } - } - - if (sxpos > 0) - { - int[] nxon = new int[exon.length - sxpos]; - System.arraycopy(exon, sxpos, nxon, 0, exon.length - sxpos); - exon = nxon; - } - } - + exon = adjustForPrStart(prstart, exon); + if (prseq != null && prname != null && prid != null) { // extract proteins. @@ -617,7 +590,6 @@ public class EmblEntry .setDescription(((prname.length() == 0) ? "Protein Product from " + sourceDb : prname)); - if (!noPeptide) { // Protein is also added to vector of sequences returned @@ -653,6 +625,9 @@ public class EmblEntry } else { + // Trim the exon mapping if necessary - the given product may only be a fragment of a larger protein. (EMBL:AY043181 is an example) + + if (isEmblCdna) { // TODO: Add a DbRef back to the parent EMBL sequence with the exon @@ -663,9 +638,12 @@ public class EmblEntry } else { - map = new jalview.datamodel.Mapping(product, exon, new int[] + // final product length trunctation check + + map = new jalview.datamodel.Mapping(product, adjustForProteinLength(prseq.length(),exon), new int[] { 1, prseq.length() }, 3, 1); // reconstruct the EMBLCDS entry + // TODO: this is only necessary when there codon annotation is complete (I think JBPNote) DBRefEntry pcdnaref = new DBRefEntry(); pcdnaref.setAccessionId(prid); pcdnaref.setSource(DBRefSource.EMBLCDS); @@ -757,4 +735,85 @@ public class EmblEntry } } } + + private int[] adjustForPrStart(int prstart, int[] exon) + { + + int origxon[], sxpos = -1; + int sxstart, sxstop; // unnecessary variables used for debugging + // first adjust range for codon start attribute + if (prstart > 1) + { + origxon = new int[exon.length]; + System.arraycopy(exon, 0, origxon, 0, exon.length); + int cdspos = 0; + for (int x = 0; x < exon.length && sxpos == -1; x += 2) + { + cdspos += exon[x + 1] - exon[x] + 1; + if (prstart <= cdspos) + { + sxpos = x; + sxstart = exon[x]; + sxstop = exon[x + 1]; + // and adjust start boundary of first exon. + exon[x] = exon[x + 1] - cdspos + prstart; + break; + } + } + + if (sxpos > 0) + { + int[] nxon = new int[exon.length - sxpos]; + System.arraycopy(exon, sxpos, nxon, 0, exon.length - sxpos); + exon = nxon; + } + } + return exon; + } + /** + * truncate the last exon interval to the prlength'th codon + * @param prlength + * @param exon + * @return new exon + */ + private int[] adjustForProteinLength(int prlength, int[] exon) + { + + int origxon[], sxpos = -1,endxon=0,cdslength=prlength*3; + int sxstart, sxstop; // unnecessary variables used for debugging + // first adjust range for codon start attribute + if (prlength >= 1 && exon!=null) + { + origxon = new int[exon.length]; + System.arraycopy(exon, 0, origxon, 0, exon.length); + int cdspos = 0; + for (int x = 0; x < exon.length && sxpos==-1; x += 2) + { + cdspos += exon[x + 1] - exon[x] + 1; + if (cdslength <= cdspos) + { + // advanced beyond last codon. + sxpos = x; + sxstart = exon[x]; + sxstop = exon[x + 1]; + if (cdslength!=cdspos) { + System.err.println("Truncating final exon interval on region by "+(cdspos-cdslength)); + } + // locate the new end boundary of final exon as endxon + endxon = exon[x+1] - cdspos + cdslength; + break; + } + } + + if (sxpos !=-1) + { + // and trim the exon interval set if necessary + int[] nxon = new int[sxpos+2]; + System.arraycopy(exon, 0, nxon, 0, sxpos+2); + nxon[sxpos+1] = endxon; // update the end boundary for the new exon set + exon = nxon; + } + } + return exon; + } }