X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Fdatamodel%2Fxdb%2Fembl%2FEmblEntry.java;h=3c88083b7e190445e7ba54925c8583eca4c86a00;hb=abdc3a7c9a4b7dbf7a3bb6dd26ba9d76881933d6;hp=763e64ae563913eb029ceb9637947572f59a4aa1;hpb=20c3a3bb8feb78f4b4ccd02f3e7906b3775eb870;p=jalview.git diff --git a/src/jalview/datamodel/xdb/embl/EmblEntry.java b/src/jalview/datamodel/xdb/embl/EmblEntry.java index 763e64a..3c88083 100644 --- a/src/jalview/datamodel/xdb/embl/EmblEntry.java +++ b/src/jalview/datamodel/xdb/embl/EmblEntry.java @@ -1,20 +1,22 @@ /* - * Jalview - A Sequence Alignment Editor and Viewer (Development Version 2.4.1) - * Copyright (C) 2009 AM Waterhouse, J Procter, G Barton, M Clamp, S Searle + * Jalview - A Sequence Alignment Editor and Viewer (Version 2.8.2) + * Copyright (C) 2014 The Jalview Authors * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version 2 - * of the License, or (at your option) any later version. + * This file is part of Jalview. * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. + * Jalview is free software: you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, either version 3 + * of the License, or (at your option) any later version. + * + * Jalview is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR + * PURPOSE. See the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA + * along with Jalview. If not, see . + * The Jalview Authors are detailed in the 'AUTHORS' file. */ package jalview.datamodel.xdb.embl; @@ -580,44 +582,15 @@ public class EmblEntry } } Sequence product = null; - int origxon[], sxpos = -1, sxstart, sxstop; - // first adjust range for codon start attribute - if (prstart > 1) - { - origxon = new int[exon.length]; - System.arraycopy(exon, 0, origxon, 0, exon.length); - int cdspos = 0; - for (int x = 0; x < exon.length && sxpos == -1; x += 2) - { - cdspos += exon[x + 1] - exon[x] + 1; - if (prstart <= cdspos) - { - sxpos = x; - sxstart = exon[x]; - sxstop = exon[x + 1]; - // and adjust start boundary of first exon. - exon[x] = exon[x + 1] - cdspos + prstart; - break; - } - } - - if (sxpos > 0) - { - int[] nxon = new int[exon.length - sxpos]; - System.arraycopy(exon, sxpos, nxon, 0, exon.length - sxpos); - exon = nxon; - } - } + exon = adjustForPrStart(prstart, exon); if (prseq != null && prname != null && prid != null) { // extract proteins. product = new Sequence(prid, prseq, 1, prseq.length()); - product - .setDescription(((prname.length() == 0) ? "Protein Product from " - + sourceDb - : prname)); - + product.setDescription(((prname.length() == 0) ? "Protein Product from " + + sourceDb + : prname)); if (!noPeptide) { // Protein is also added to vector of sequences returned @@ -630,7 +603,7 @@ public class EmblEntry System.err .println("Implementation Notice: EMBLCDS records not properly supported yet - Making up the CDNA region of this sequence... may be incorrect (" + sourceDb + ":" + getAccession() + ")"); - if (prseq.length() * 3 == (1-prstart + dna.getSequence().length)) + if (prseq.length() * 3 == (1 - prstart + dna.getSequence().length)) { System.err .println("Not allowing for additional stop codon at end of cDNA fragment... !"); @@ -641,7 +614,7 @@ public class EmblEntry map = new jalview.datamodel.Mapping(product, exon, new int[] { 1, prseq.length() }, 3, 1); } - if ((prseq.length() + 1) * 3 == (1-prstart + dna.getSequence().length)) + if ((prseq.length() + 1) * 3 == (1 - prstart + dna.getSequence().length)) { System.err .println("Allowing for additional stop codon at end of cDNA fragment... will probably cause an error in VAMSAs!"); @@ -653,6 +626,9 @@ public class EmblEntry } else { + // Trim the exon mapping if necessary - the given product may only be a + // fragment of a larger protein. (EMBL:AY043181 is an example) + if (isEmblCdna) { // TODO: Add a DbRef back to the parent EMBL sequence with the exon @@ -663,18 +639,21 @@ public class EmblEntry } else { - map = new jalview.datamodel.Mapping(product, exon, new int[] - { 1, prseq.length() }, 3, 1); + // final product length trunctation check + + map = new jalview.datamodel.Mapping(product, + adjustForProteinLength(prseq.length(), exon), new int[] + { 1, prseq.length() }, 3, 1); // reconstruct the EMBLCDS entry + // TODO: this is only necessary when there codon annotation is + // complete (I think JBPNote) DBRefEntry pcdnaref = new DBRefEntry(); pcdnaref.setAccessionId(prid); pcdnaref.setSource(DBRefSource.EMBLCDS); pcdnaref.setVersion(getVersion()); // same as parent EMBL version. jalview.util.MapList mp = new jalview.util.MapList(new int[] - { 1, prseq.length() }, - new int[] - { 1 + (prstart - 1), - (prstart - 1) + 3 * prseq.length() }, 1, 3); + { 1, prseq.length() }, new int[] + { 1 + (prstart - 1), (prstart - 1) + 3 * prseq.length() }, 1, 3); // { 1 + (prstart - 1) * 3, // 1 + (prstart - 1) * 3 + prseq.length() * 3 - 1 }, new int[] // { 1prstart, prstart + prseq.length() - 1 }, 3, 1); @@ -736,16 +715,16 @@ public class EmblEntry } if (product != null) { - DBRefEntry pref = new DBRefEntry(ref.getSource(), ref - .getVersion(), ref.getAccessionId()); + DBRefEntry pref = new DBRefEntry(ref.getSource(), + ref.getVersion(), ref.getAccessionId()); pref.setMap(null); // reference is direct product.addDBRef(pref); // Add converse mapping reference if (map != null) { Mapping pmap = new Mapping(dna, map.getMap().getInverse()); - pref = new DBRefEntry(sourceDb, getVersion(), this - .getAccession()); + pref = new DBRefEntry(sourceDb, getVersion(), + this.getAccession()); pref.setMap(pmap); if (map.getTo() != null) { @@ -757,4 +736,91 @@ public class EmblEntry } } } + + private int[] adjustForPrStart(int prstart, int[] exon) + { + + int origxon[], sxpos = -1; + int sxstart, sxstop; // unnecessary variables used for debugging + // first adjust range for codon start attribute + if (prstart > 1) + { + origxon = new int[exon.length]; + System.arraycopy(exon, 0, origxon, 0, exon.length); + int cdspos = 0; + for (int x = 0; x < exon.length && sxpos == -1; x += 2) + { + cdspos += exon[x + 1] - exon[x] + 1; + if (prstart <= cdspos) + { + sxpos = x; + sxstart = exon[x]; + sxstop = exon[x + 1]; + // and adjust start boundary of first exon. + exon[x] = exon[x + 1] - cdspos + prstart; + break; + } + } + + if (sxpos > 0) + { + int[] nxon = new int[exon.length - sxpos]; + System.arraycopy(exon, sxpos, nxon, 0, exon.length - sxpos); + exon = nxon; + } + } + return exon; + } + + /** + * truncate the last exon interval to the prlength'th codon + * + * @param prlength + * @param exon + * @return new exon + */ + private int[] adjustForProteinLength(int prlength, int[] exon) + { + + int origxon[], sxpos = -1, endxon = 0, cdslength = prlength * 3; + int sxstart, sxstop; // unnecessary variables used for debugging + // first adjust range for codon start attribute + if (prlength >= 1 && exon != null) + { + origxon = new int[exon.length]; + System.arraycopy(exon, 0, origxon, 0, exon.length); + int cdspos = 0; + for (int x = 0; x < exon.length && sxpos == -1; x += 2) + { + cdspos += exon[x + 1] - exon[x] + 1; + if (cdslength <= cdspos) + { + // advanced beyond last codon. + sxpos = x; + sxstart = exon[x]; + sxstop = exon[x + 1]; + if (cdslength != cdspos) + { + System.err + .println("Truncating final exon interval on region by " + + (cdspos - cdslength)); + } + // locate the new end boundary of final exon as endxon + endxon = exon[x + 1] - cdspos + cdslength; + break; + } + } + + if (sxpos != -1) + { + // and trim the exon interval set if necessary + int[] nxon = new int[sxpos + 2]; + System.arraycopy(exon, 0, nxon, 0, sxpos + 2); + nxon[sxpos + 1] = endxon; // update the end boundary for the new exon + // set + exon = nxon; + } + } + return exon; + } }