X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Fws%2Fdbsources%2FUniprot.java;h=c2efee70050d09890637fe8a5be150575e0a8a43;hb=838e4f91d4a53dd315640dbc9ff6ef7a815ee576;hp=e7a6e2c22619adba266b38098e2fe0af265f20f5;hpb=d423f22792e47dbc800ae220a58677f988971d06;p=jalview.git diff --git a/src/jalview/ws/dbsources/Uniprot.java b/src/jalview/ws/dbsources/Uniprot.java index e7a6e2c..c2efee7 100644 --- a/src/jalview/ws/dbsources/Uniprot.java +++ b/src/jalview/ws/dbsources/Uniprot.java @@ -1,34 +1,25 @@ /* - * Jalview - A Sequence Alignment Editor and Viewer (Version 2.5) - * Copyright (C) 2010 J Procter, AM Waterhouse, G Barton, M Clamp, S Searle + * Jalview - A Sequence Alignment Editor and Viewer (Version 2.9.0b1) + * Copyright (C) 2015 The Jalview Authors * * This file is part of Jalview. * * Jalview is free software: you can redistribute it and/or * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. - * + * as published by the Free Software Foundation, either version 3 + * of the License, or (at your option) any later version. + * * Jalview is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty * of MERCHANTABILITY or FITNESS FOR A PARTICULAR * PURPOSE. See the GNU General Public License for more details. * - * You should have received a copy of the GNU General Public License along with Jalview. If not, see . + * You should have received a copy of the GNU General Public License + * along with Jalview. If not, see . + * The Jalview Authors are detailed in the 'AUTHORS' file. */ package jalview.ws.dbsources; -import java.io.File; -import java.io.FileReader; -import java.io.IOException; -import java.util.Enumeration; -import java.util.Hashtable; -import java.util.Vector; - -import org.exolab.castor.xml.Unmarshaller; - -import com.stevesoft.pat.Regex; - -import jalview.datamodel.Alignment; import jalview.datamodel.AlignmentI; import jalview.datamodel.DBRefEntry; import jalview.datamodel.DBRefSource; @@ -37,19 +28,35 @@ import jalview.datamodel.SequenceFeature; import jalview.datamodel.SequenceI; import jalview.datamodel.UniprotEntry; import jalview.datamodel.UniprotFile; -import jalview.io.FormatAdapter; -import jalview.io.IdentifyFile; -import jalview.ws.DBRefFetcher; import jalview.ws.ebi.EBIFetchClient; import jalview.ws.seqfetcher.DbSourceProxy; import jalview.ws.seqfetcher.DbSourceProxyImpl; +import java.io.File; +import java.io.FileReader; +import java.io.Reader; +import java.util.Vector; + +import org.exolab.castor.xml.Unmarshaller; + +import com.stevesoft.pat.Regex; + /** * @author JimP * */ public class Uniprot extends DbSourceProxyImpl implements DbSourceProxy { + + private static final String BAR_DELIMITER = "|"; + + private static final String NEWLINE = "\n"; + + private static org.exolab.castor.mapping.Mapping map; + + /** + * Constructor + */ public Uniprot() { super(); @@ -98,26 +105,35 @@ public class Uniprot extends DbSourceProxyImpl implements DbSourceProxy return "0"; // we really don't know what version we're on. } - private EBIFetchClient ebi = null; - - public Vector getUniprotEntries(File file) + /** + * Reads a file containing the reply to the EBI Fetch Uniprot data query, + * unmarshals it to a UniprotFile object, and returns the list of UniprotEntry + * data models (mapped from <entry> elements) + * + * @param fileReader + * @return + */ + public Vector getUniprotEntries(Reader fileReader) { UniprotFile uni = new UniprotFile(); try { - // 1. Load the mapping information from the file - org.exolab.castor.mapping.Mapping map = new org.exolab.castor.mapping.Mapping( - uni.getClass().getClassLoader()); - java.net.URL url = getClass().getResource("/uniprot_mapping.xml"); - map.loadMapping(url); + if (map == null) + { + // 1. Load the mapping information from the file + map = new org.exolab.castor.mapping.Mapping(uni.getClass() + .getClassLoader()); + java.net.URL url = getClass().getResource("/uniprot_mapping.xml"); + map.loadMapping(url); + } // 2. Unmarshal the data Unmarshaller unmar = new Unmarshaller(uni); unmar.setIgnoreExtraElements(true); unmar.setMapping(map); - if (file != null) + if (fileReader != null) { - uni = (UniprotFile) unmar.unmarshal(new FileReader(file)); + uni = (UniprotFile) unmar.unmarshal(fileReader); } } catch (Exception e) { @@ -139,49 +155,29 @@ public class Uniprot extends DbSourceProxyImpl implements DbSourceProxy { queries = queries.toUpperCase().replaceAll( "(UNIPROT\\|?|UNIPROT_|UNIREF\\d+_|UNIREF\\d+\\|?)", ""); - Alignment al = null; - ebi = new EBIFetchClient(); - StringBuffer result = new StringBuffer(); + AlignmentI al = null; + EBIFetchClient ebi = new EBIFetchClient(); // uniprotxml parameter required since december 2007 // uniprotkb dbname changed introduced december 2008 File file = ebi.fetchDataAsFile("uniprotkb:" + queries, "uniprotxml", null); - Vector entries = getUniprotEntries(file); + Vector entries = getUniprotEntries(new FileReader(file)); if (entries != null) { + /* + * If Castor binding included sequence@length, we could guesstimate the + * size of buffer to hold the alignment + */ + StringBuffer result = new StringBuffer(128); // First, make the new sequences - Enumeration en = entries.elements(); - while (en.hasMoreElements()) + for (UniprotEntry entry : entries) { - UniprotEntry entry = (UniprotEntry) en.nextElement(); - - StringBuffer name = new StringBuffer(">UniProt/Swiss-Prot"); - Enumeration en2 = entry.getAccession().elements(); - while (en2.hasMoreElements()) - { - name.append("|"); - name.append(en2.nextElement()); - } - en2 = entry.getName().elements(); - while (en2.hasMoreElements()) - { - name.append("|"); - name.append(en2.nextElement()); - } - - if (entry.getProtein() != null - && entry.getProtein().getName() != null) - { - for (int nm = 0, nmSize = entry.getProtein().getName().size(); nm < nmSize; nm++) - { - name.append(" " + entry.getProtein().getName().elementAt(nm)); - } - } - - result.append(name + "\n" - + entry.getUniprotSequence().getContent() + "\n"); + StringBuilder name = constructSequenceFastaHeader(entry); + result.append(name).append(NEWLINE) + .append(entry.getUniprotSequence().getContent()) + .append(NEWLINE); } // Then read in the features and apply them to the dataset @@ -206,63 +202,94 @@ public class Uniprot extends DbSourceProxyImpl implements DbSourceProxy } /** + * Construct a Fasta-format sequence header by concatenating the source, + * accession id(s) and name(s), delimited by '|', plus any protein names, now + * with space rather than bar delimiter + * + * @param entry + * @return + */ + public static StringBuilder constructSequenceFastaHeader( + UniprotEntry entry) + { + StringBuilder name = new StringBuilder(32); + name.append(">UniProt/Swiss-Prot"); + for (String accessionId : entry.getAccession()) + { + name.append(BAR_DELIMITER); + name.append(accessionId); + } + for (String n : entry.getName()) + { + name.append(BAR_DELIMITER); + name.append(n); + } + + if (entry.getProtein() != null && entry.getProtein().getName() != null) + { + for (String nm : entry.getProtein().getName()) + { + name.append(" ").append(nm); + } + } + return name; + } + + /** * add an ordered set of UniprotEntry objects to an ordered set of seuqences. * * @param al * - a sequence of n sequences * @param entries - * a seuqence of n uniprot entries to be analysed. + * a list of n uniprot entries to be analysed. */ - public void addUniprotXrefs(Alignment al, Vector entries) + public void addUniprotXrefs(AlignmentI al, Vector entries) { + final String dbVersion = getDbVersion(); + for (int i = 0; i < entries.size(); i++) { - UniprotEntry entry = (UniprotEntry) entries.elementAt(i); - Enumeration e = entry.getDbReference().elements(); - Vector onlyPdbEntries = new Vector(); - Vector dbxrefs = new Vector(); - while (e.hasMoreElements()) + UniprotEntry entry = entries.elementAt(i); + Vector onlyPdbEntries = new Vector(); + Vector dbxrefs = new Vector(); + + for (PDBEntry pdb : entry.getDbReference()) { - PDBEntry pdb = (PDBEntry) e.nextElement(); DBRefEntry dbr = new DBRefEntry(); dbr.setSource(pdb.getType()); dbr.setAccessionId(pdb.getId()); - dbr.setVersion(DBRefSource.UNIPROT + ":" + getDbVersion()); + dbr.setVersion(DBRefSource.UNIPROT + ":" + dbVersion); dbxrefs.addElement(dbr); - if (!pdb.getType().equals("PDB")) + if ("PDB".equals(pdb.getType())) { - continue; + onlyPdbEntries.addElement(pdb); } - - onlyPdbEntries.addElement(pdb); } + SequenceI sq = al.getSequenceAt(i); while (sq.getDatasetSequence() != null) { sq = sq.getDatasetSequence(); } - Enumeration en2 = entry.getAccession().elements(); - while (en2.hasMoreElements()) + for (String accessionId : entry.getAccession()) { - // we always add as uniprot if we retrieved from uniprot or uniprot name - sq.addDBRef(new DBRefEntry(DBRefSource.UNIPROT, getDbVersion(), en2 - .nextElement().toString())); + /* + * add as uniprot whether retrieved from uniprot or uniprot_name + */ + sq.addDBRef(new DBRefEntry(DBRefSource.UNIPROT, dbVersion, + accessionId)); } - en2 = dbxrefs.elements(); - while (en2.hasMoreElements()) - { - // we always add as uniprot if we retrieved from uniprot or uniprot name - sq.addDBRef((DBRefEntry) en2.nextElement()); + for (DBRefEntry dbRef : dbxrefs) + { + sq.addDBRef(dbRef); } sq.setPDBId(onlyPdbEntries); if (entry.getFeature() != null) { - e = entry.getFeature().elements(); - while (e.hasMoreElements()) + for (SequenceFeature sf : entry.getFeature()) { - SequenceFeature sf = (SequenceFeature) e.nextElement(); sf.setFeatureGroup("Uniprot"); sq.addSequenceFeature(sf); } @@ -294,4 +321,10 @@ public class Uniprot extends DbSourceProxyImpl implements DbSourceProxy { return "Uniprot"; // getDbSource(); } + + @Override + public int getTier() + { + return 0; + } }