X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Fdatamodel%2Fxdb%2Fembl%2FEmblEntry.java;h=2ee1fb2f6f2906cb44d3f170b22979e4b9ac2a66;hb=4e892da7333869ad8b26d75b25f2c87352ca8c7e;hp=59320f76e088e29412deb9c0dcd2dc1774362082;hpb=3dbf3b87899897f71f5d10f103fe5cd59cc329f7;p=jalview.git diff --git a/src/jalview/datamodel/xdb/embl/EmblEntry.java b/src/jalview/datamodel/xdb/embl/EmblEntry.java index 59320f7..2ee1fb2 100644 --- a/src/jalview/datamodel/xdb/embl/EmblEntry.java +++ b/src/jalview/datamodel/xdb/embl/EmblEntry.java @@ -1,3 +1,21 @@ +/* + * Jalview - A Sequence Alignment Editor and Viewer (Development Version 2.4.1) + * Copyright (C) 2009 AM Waterhouse, J Procter, G Barton, M Clamp, S Searle + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA + */ package jalview.datamodel.xdb.embl; import jalview.datamodel.DBRefEntry; @@ -247,8 +265,8 @@ public class EmblEntry * EMBL Feature support is limited. The text below is included for the benefit * of any developer working on improving EMBL feature import in Jalview. * Extract from EMBL feature specification see - * http://www.embl-ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html - * 3.5 Location 3.5.1 Purpose + * http://www.embl-ebi.ac.uk/embl/Documentation + * /FT_definitions/feature_table.html 3.5 Location 3.5.1 Purpose * * The location indicates the region of the presented sequence which * corresponds to a feature. @@ -372,7 +390,6 @@ public class EmblEntry * * join(1..100,J00194.1:100..202) Joins region 1..100 of the existing entry * with the region 100..202 of remote entry J00194 - * */ /** * Recover annotated sequences from EMBL file @@ -387,21 +404,28 @@ public class EmblEntry */ public jalview.datamodel.SequenceI[] getSequences(boolean noNa, boolean noPeptide, String sourceDb) - { //TODO: ensure emblEntry.getSequences behaves correctly for returning all cases of noNa and noPeptide + { // TODO: ensure emblEntry.getSequences behaves correctly for returning all + // cases of noNa and noPeptide Vector seqs = new Vector(); Sequence dna = null; if (!noNa) { - // In theory we still need to create this if noNa is set to avoid a null pointer exception + // In theory we still need to create this if noNa is set to avoid a null + // pointer exception dna = new Sequence(sourceDb + "|" + accession, sequence.getSequence()); dna.setDescription(desc); - dna.addDBRef(new DBRefEntry(sourceDb, version, accession)); - // TODO: add mapping for parentAccession attribute + DBRefEntry retrievedref = new DBRefEntry(sourceDb, version, accession); + dna.addDBRef(retrievedref); + // add map to indicate the sequence is a valid coordinate frame for the + // dbref + retrievedref.setMap(new Mapping(null, new int[] + { 1, dna.getLength() }, new int[] + { 1, dna.getLength() }, 1, 1)); // TODO: transform EMBL Database refs to canonical form if (dbRefs != null) for (Iterator i = dbRefs.iterator(); i.hasNext(); dna - .addDBRef((DBRefEntry) i.next())) - ; + .addDBRef((DBRefEntry) i.next())) + ; } try { @@ -435,8 +459,7 @@ public class EmblEntry } } } - } - catch (Exception e) + } catch (Exception e) { System.err.println("EMBL Record Features parsing error!"); System.err @@ -459,14 +482,22 @@ public class EmblEntry } /** - * attempt to extract coding region and product from a feature and properly decorate it with annotations. - * @param feature coding feature - * @param sourceDb source database for the EMBLXML - * @param seqs place where sequences go - * @param dna parent dna sequence for this record - * @param noPeptide flag for generation of Peptide sequence objects + * attempt to extract coding region and product from a feature and properly + * decorate it with annotations. + * + * @param feature + * coding feature + * @param sourceDb + * source database for the EMBLXML + * @param seqs + * place where sequences go + * @param dna + * parent dna sequence for this record + * @param noPeptide + * flag for generation of Peptide sequence objects */ - private void parseCodingFeature(EmblFeature feature, String sourceDb, Vector seqs, Sequence dna, boolean noPeptide) + private void parseCodingFeature(EmblFeature feature, String sourceDb, + Vector seqs, Sequence dna, boolean noPeptide) { boolean isEmblCdna = sourceDb.equals(DBRefSource.EMBLCDS); // extract coding region(s) @@ -534,11 +565,11 @@ public class EmblEntry else { // throw anything else into the additional properties hash - String[] s= q.getValues(); + String[] s = q.getValues(); StringBuffer sb = new StringBuffer(); - if (s!=null) + if (s != null) { - for (int i=0; i 0) @@ -649,23 +696,26 @@ public class EmblEntry ref.setSource(jalview.util.DBRefUtils.getCanonicalName(ref .getSource())); // Hard code the kind of protein product accessions that EMBL cite - if (ref.getSource().equals( - jalview.datamodel.DBRefSource.UNIPROT)) + if (ref.getSource().equals(jalview.datamodel.DBRefSource.UNIPROT)) { ref.setMap(map); - if (map!=null && map.getTo()!=null) + if (map != null && map.getTo() != null) { - map.getTo().addDBRef(new DBRefEntry(ref.getSource(), ref.getVersion(), ref.getAccessionId())); // don't copy map over. - if (map.getTo().getName().indexOf(prid)==0) + map.getTo().addDBRef( + new DBRefEntry(ref.getSource(), ref.getVersion(), ref + .getAccessionId())); // don't copy map over. + if (map.getTo().getName().indexOf(prid) == 0) { - map.getTo().setName(jalview.datamodel.DBRefSource.UNIPROT+"|"+ref.getAccessionId()); + map.getTo().setName( + jalview.datamodel.DBRefSource.UNIPROT + "|" + + ref.getAccessionId()); } } } if (product != null) { DBRefEntry pref = new DBRefEntry(ref.getSource(), ref - .getVersion(), ref.getAccessionId()); + .getVersion(), ref.getAccessionId()); pref.setMap(null); // reference is direct product.addDBRef(pref); // Add converse mapping reference @@ -675,7 +725,7 @@ public class EmblEntry pref = new DBRefEntry(sourceDb, getVersion(), this .getAccession()); pref.setMap(pmap); - if (map.getTo()!=null) + if (map.getTo() != null) { map.getTo().addDBRef(pref); } @@ -685,4 +735,85 @@ public class EmblEntry } } } + + private int[] adjustForPrStart(int prstart, int[] exon) + { + + int origxon[], sxpos = -1; + int sxstart, sxstop; // unnecessary variables used for debugging + // first adjust range for codon start attribute + if (prstart > 1) + { + origxon = new int[exon.length]; + System.arraycopy(exon, 0, origxon, 0, exon.length); + int cdspos = 0; + for (int x = 0; x < exon.length && sxpos == -1; x += 2) + { + cdspos += exon[x + 1] - exon[x] + 1; + if (prstart <= cdspos) + { + sxpos = x; + sxstart = exon[x]; + sxstop = exon[x + 1]; + // and adjust start boundary of first exon. + exon[x] = exon[x + 1] - cdspos + prstart; + break; + } + } + + if (sxpos > 0) + { + int[] nxon = new int[exon.length - sxpos]; + System.arraycopy(exon, sxpos, nxon, 0, exon.length - sxpos); + exon = nxon; + } + } + return exon; + } + /** + * truncate the last exon interval to the prlength'th codon + * @param prlength + * @param exon + * @return new exon + */ + private int[] adjustForProteinLength(int prlength, int[] exon) + { + + int origxon[], sxpos = -1,endxon=0,cdslength=prlength*3; + int sxstart, sxstop; // unnecessary variables used for debugging + // first adjust range for codon start attribute + if (prlength >= 1 && exon!=null) + { + origxon = new int[exon.length]; + System.arraycopy(exon, 0, origxon, 0, exon.length); + int cdspos = 0; + for (int x = 0; x < exon.length && sxpos==-1; x += 2) + { + cdspos += exon[x + 1] - exon[x] + 1; + if (cdslength <= cdspos) + { + // advanced beyond last codon. + sxpos = x; + sxstart = exon[x]; + sxstop = exon[x + 1]; + if (cdslength!=cdspos) { + System.err.println("Truncating final exon interval on region by "+(cdspos-cdslength)); + } + // locate the new end boundary of final exon as endxon + endxon = exon[x+1] - cdspos + cdslength; + break; + } + } + + if (sxpos !=-1) + { + // and trim the exon interval set if necessary + int[] nxon = new int[sxpos+2]; + System.arraycopy(exon, 0, nxon, 0, sxpos+2); + nxon[sxpos+1] = endxon; // update the end boundary for the new exon set + exon = nxon; + } + } + return exon; + } }