X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Fdatamodel%2Fxdb%2Fembl%2FEmblEntry.java;h=0cee0b410a2bfc81cd4373ee406d05afc1085fee;hb=506d60f0e188723ddc91c26824b41ac7034df3fe;hp=350c44e97a977e3ab94378c5cec12f410a8cdae5;hpb=d68c7e07403bd581700e4a79a269b221e0018d36;p=jalview.git diff --git a/src/jalview/datamodel/xdb/embl/EmblEntry.java b/src/jalview/datamodel/xdb/embl/EmblEntry.java index 350c44e..0cee0b4 100644 --- a/src/jalview/datamodel/xdb/embl/EmblEntry.java +++ b/src/jalview/datamodel/xdb/embl/EmblEntry.java @@ -1,3 +1,21 @@ +/* + * Jalview - A Sequence Alignment Editor and Viewer (Version 2.4) + * Copyright (C) 2008 AM Waterhouse, J Procter, G Barton, M Clamp, S Searle + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA + */ package jalview.datamodel.xdb.embl; import jalview.datamodel.DBRefEntry; @@ -49,7 +67,7 @@ public class EmblEntry /** * @param accession - * the accession to set + * the accession to set */ public void setAccession(String accession) { @@ -66,7 +84,7 @@ public class EmblEntry /** * @param dbRefs - * the dbRefs to set + * the dbRefs to set */ public void setDbRefs(Vector dbRefs) { @@ -83,7 +101,7 @@ public class EmblEntry /** * @param desc - * the desc to set + * the desc to set */ public void setDesc(String desc) { @@ -100,7 +118,7 @@ public class EmblEntry /** * @param features - * the features to set + * the features to set */ public void setFeatures(Vector features) { @@ -117,7 +135,7 @@ public class EmblEntry /** * @param keywords - * the keywords to set + * the keywords to set */ public void setKeywords(Vector keywords) { @@ -134,7 +152,7 @@ public class EmblEntry /** * @param lastUpdated - * the lastUpdated to set + * the lastUpdated to set */ public void setLastUpdated(String lastUpdated) { @@ -151,7 +169,7 @@ public class EmblEntry /** * @param refs - * the refs to set + * the refs to set */ public void setRefs(Vector refs) { @@ -168,7 +186,7 @@ public class EmblEntry /** * @param releaseCreated - * the releaseCreated to set + * the releaseCreated to set */ public void setRcreated(String releaseCreated) { @@ -185,7 +203,7 @@ public class EmblEntry /** * @param releaseLastUpdated - * the releaseLastUpdated to set + * the releaseLastUpdated to set */ public void setRLastUpdated(String releaseLastUpdated) { @@ -202,7 +220,7 @@ public class EmblEntry /** * @param sequence - * the sequence to set + * the sequence to set */ public void setSequence(EmblSequence sequence) { @@ -219,7 +237,7 @@ public class EmblEntry /** * @param taxDivision - * the taxDivision to set + * the taxDivision to set */ public void setTaxDivision(String taxDivision) { @@ -236,7 +254,7 @@ public class EmblEntry /** * @param version - * the version to set + * the version to set */ public void setVersion(String version) { @@ -378,21 +396,24 @@ public class EmblEntry * Recover annotated sequences from EMBL file * * @param noNa - * don't return nucleic acid sequences + * don't return nucleic acid sequences * @param sourceDb - * TODO + * TODO * @param noProtein - * don't return any translated protein sequences marked in features + * don't return any translated protein sequences marked in + * features * @return dataset sequences with DBRefs and features - DNA always comes first */ public jalview.datamodel.SequenceI[] getSequences(boolean noNa, boolean noPeptide, String sourceDb) - { //TODO: ensure emblEntry.getSequences behaves correctly for returning all cases of noNa and noPeptide + { // TODO: ensure emblEntry.getSequences behaves correctly for returning all + // cases of noNa and noPeptide Vector seqs = new Vector(); Sequence dna = null; if (!noNa) { - // In theory we still need to create this if noNa is set to avoid a null pointer exception + // In theory we still need to create this if noNa is set to avoid a null + // pointer exception dna = new Sequence(sourceDb + "|" + accession, sequence.getSequence()); dna.setDescription(desc); dna.addDBRef(new DBRefEntry(sourceDb, version, accession)); @@ -400,8 +421,8 @@ public class EmblEntry // TODO: transform EMBL Database refs to canonical form if (dbRefs != null) for (Iterator i = dbRefs.iterator(); i.hasNext(); dna - .addDBRef((DBRefEntry) i.next())) - ; + .addDBRef((DBRefEntry) i.next())) + ; } try { @@ -435,8 +456,7 @@ public class EmblEntry } } } - } - catch (Exception e) + } catch (Exception e) { System.err.println("EMBL Record Features parsing error!"); System.err @@ -459,14 +479,22 @@ public class EmblEntry } /** - * attempt to extract coding region and product from a feature and properly decorate it with annotations. - * @param feature coding feature - * @param sourceDb source database for the EMBLXML - * @param seqs place where sequences go - * @param dna parent dna sequence for this record - * @param noPeptide flag for generation of Peptide sequence objects + * attempt to extract coding region and product from a feature and properly + * decorate it with annotations. + * + * @param feature + * coding feature + * @param sourceDb + * source database for the EMBLXML + * @param seqs + * place where sequences go + * @param dna + * parent dna sequence for this record + * @param noPeptide + * flag for generation of Peptide sequence objects */ - private void parseCodingFeature(EmblFeature feature, String sourceDb, Vector seqs, Sequence dna, boolean noPeptide) + private void parseCodingFeature(EmblFeature feature, String sourceDb, + Vector seqs, Sequence dna, boolean noPeptide) { boolean isEmblCdna = sourceDb.equals(DBRefSource.EMBLCDS); // extract coding region(s) @@ -534,7 +562,17 @@ public class EmblEntry else { // throw anything else into the additional properties hash - vals.put(q.getName(), q.getValues().toString()); + String[] s = q.getValues(); + StringBuffer sb = new StringBuffer(); + if (s != null) + { + for (int i = 0; i < s.length; i++) + { + sb.append(s[i]); + sb.append("\n"); + } + } + vals.put(q.getName(), sb.toString()); } } } @@ -542,11 +580,13 @@ public class EmblEntry if (prseq != null && prname != null && prid != null) { // extract proteins. - product = new Sequence(sourceDb + "|" + "EMBLCDS|" + prid - +((prname.length()==0) ? "" : " " + prname), prseq, prstart, prstart - + prseq.length() - 1); - product.setDescription("Protein Product from " + sourceDb); - + product = new Sequence(prid, prseq, prstart, prstart + prseq.length() + - 1); + product + .setDescription(((prname.length() == 0) ? "Protein Product from " + + sourceDb + : prname)); + if (!noPeptide) { // Protein is also added to vector of sequences returned @@ -565,17 +605,15 @@ public class EmblEntry // marked. exon = new int[] { dna.getStart(), dna.getEnd() }; - map = new jalview.datamodel.Mapping(product, exon, - new int[] - { prstart, prstart + prseq.length() - 1 }, 3, 1); + map = new jalview.datamodel.Mapping(product, exon, new int[] + { prstart, prstart + prseq.length() - 1 }, 3, 1); } if ((prseq.length() + 1) * 3 == dna.getSequence().length) { exon = new int[] { dna.getStart(), dna.getEnd() - 3 }; - map = new jalview.datamodel.Mapping(product, exon, - new int[] - { prstart, prstart + prseq.length() - 1 }, 3, 1); + map = new jalview.datamodel.Mapping(product, exon, new int[] + { prstart, prstart + prseq.length() - 1 }, 3, 1); } } else @@ -584,25 +622,27 @@ public class EmblEntry { // TODO: Add a DbRef back to the parent EMBL sequence with the exon // map - + // if given a dataset reference, search dataset for parent EMBL + // sequence if it exists and set its map // make a new feature annotating the coding contig } else { - map = new jalview.datamodel.Mapping(product, exon, - new int[] - { prstart, prstart + prseq.length() - 1 }, 3, 1); + map = new jalview.datamodel.Mapping(product, exon, new int[] + { prstart, prstart + prseq.length() - 1 }, 3, 1); // reconstruct the EMBLCDS entry DBRefEntry pcdnaref = new DBRefEntry(); pcdnaref.setAccessionId(prid); pcdnaref.setSource(DBRefSource.EMBLCDS); pcdnaref.setVersion(getVersion()); // same as parent EMBL version. jalview.util.MapList mp = new jalview.util.MapList(new int[] - { 1+(prstart-1)*3, 1+(prstart-1)*3 + (prseq.length()-1)*3 }, new int[] { prstart, prstart+prseq.length() - 1 }, 3, 1); + { 1 + (prstart - 1) * 3, + 1 + (prstart - 1) * 3 + (prseq.length() - 1) * 3 }, new int[] + { prstart, prstart + prseq.length() - 1 }, 3, 1); pcdnaref.setMap(new Mapping(mp)); - if (product!=null) + if (product != null) product.addDBRef(pcdnaref); - + } } // add cds feature to dna seq - this may include the stop codon @@ -613,8 +653,8 @@ public class EmblEntry sf.setEnd(exon[xint + 1]); sf.setType(feature.getName()); sf.setFeatureGroup(sourceDb); - sf.setDescription("Exon " + (1 + xint) + " for protein '" - + prname + "' EMBLCDS:" + prid); + sf.setDescription("Exon " + (1 + (int) (xint / 2)) + + " for protein '" + prname + "' EMBLCDS:" + prid); sf.setValue(FeatureProperties.EXONPOS, new Integer(1 + xint)); sf.setValue(FeatureProperties.EXONPRODUCT, prname); if (vals != null && vals.size() > 0) @@ -639,19 +679,26 @@ public class EmblEntry ref.setSource(jalview.util.DBRefUtils.getCanonicalName(ref .getSource())); // Hard code the kind of protein product accessions that EMBL cite - if (ref.getSource().equals( - jalview.datamodel.DBRefSource.UNIPROT)) + if (ref.getSource().equals(jalview.datamodel.DBRefSource.UNIPROT)) { ref.setMap(map); - if (map!=null && map.getTo()!=null) + if (map != null && map.getTo() != null) { - map.getTo().addDBRef(new DBRefEntry(ref.getSource(), ref.getVersion(), ref.getAccessionId())); // don't copy map over. + map.getTo().addDBRef( + new DBRefEntry(ref.getSource(), ref.getVersion(), ref + .getAccessionId())); // don't copy map over. + if (map.getTo().getName().indexOf(prid) == 0) + { + map.getTo().setName( + jalview.datamodel.DBRefSource.UNIPROT + "|" + + ref.getAccessionId()); + } } } if (product != null) { DBRefEntry pref = new DBRefEntry(ref.getSource(), ref - .getVersion(), ref.getAccessionId()); + .getVersion(), ref.getAccessionId()); pref.setMap(null); // reference is direct product.addDBRef(pref); // Add converse mapping reference @@ -661,7 +708,7 @@ public class EmblEntry pref = new DBRefEntry(sourceDb, getVersion(), this .getAccession()); pref.setMap(pmap); - if (map.getTo()!=null) + if (map.getTo() != null) { map.getTo().addDBRef(pref); }