From: Jim Procter Date: Tue, 27 Jul 2021 12:52:16 +0000 (+0100) Subject: JAL-3855 ALPHAFOLD database source based on the PDB retriever X-Git-Tag: Release_2_11_2_0~39^2~36^2~2 X-Git-Url: http://source.jalview.org/gitweb/?a=commitdiff_plain;h=9cfb16c8953b9e08d0fbc015c591a136ec34231b;p=jalview.git JAL-3855 ALPHAFOLD database source based on the PDB retriever --- diff --git a/src/jalview/ws/dbsources/EBIAlfaFold.java b/src/jalview/ws/dbsources/EBIAlfaFold.java new file mode 100644 index 0000000..5edcafa --- /dev/null +++ b/src/jalview/ws/dbsources/EBIAlfaFold.java @@ -0,0 +1,281 @@ + +/* + * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$) + * Copyright (C) $$Year-Rel$$ The Jalview Authors + * + * This file is part of Jalview. + * + * Jalview is free software: you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, either version 3 + * of the License, or (at your option) any later version. + * + * Jalview is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR + * PURPOSE. See the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Jalview. If not, see . + * The Jalview Authors are detailed in the 'AUTHORS' file. + */ +package jalview.ws.dbsources; + +import jalview.api.FeatureSettingsModelI; +import jalview.datamodel.AlignmentAnnotation; +import jalview.datamodel.AlignmentI; +import jalview.datamodel.DBRefEntry; +import jalview.datamodel.DBRefSource; +import jalview.datamodel.PDBEntry; +import jalview.datamodel.PDBEntry.Type; +import jalview.datamodel.SequenceI; +import jalview.io.DataSourceType; +import jalview.io.FileFormat; +import jalview.io.FileFormatI; +import jalview.io.FormatAdapter; +import jalview.io.PDBFeatureSettings; +import jalview.structure.StructureImportSettings; +import jalview.util.HttpUtils; +import jalview.util.MessageManager; +import jalview.ws.ebi.EBIFetchClient; +import jalview.ws.utils.UrlDownloadClient; + +import java.io.File; +import java.util.ArrayList; +import java.util.List; + +import com.stevesoft.pat.Regex; + +/** + * @author JimP + * + */ +public class EBIAlfaFold extends EbiFileRetrievedProxy +{ + private static final String SEPARATOR = "|"; + + private static final String COLON = ":"; + + private static final int PDB_ID_LENGTH = 4; + + public EBIAlfaFold() + { + super(); + } + + /* + * (non-Javadoc) + * + * @see jalview.ws.DbSourceProxy#getAccessionSeparator() + */ + @Override + public String getAccessionSeparator() + { + return null; + } + + /* + * (non-Javadoc) + * + * @see jalview.ws.DbSourceProxy#getAccessionValidator() + */ + @Override + public Regex getAccessionValidator() + { + return new Regex("(AF-[A-Z]+[0-9]+[A-Z0-9]+-F1)"); + } + + /* + * (non-Javadoc) + * + * @see jalview.ws.DbSourceProxy#getDbSource() + */ + @Override + public String getDbSource() + { + return "ALPHAFOLD"; + } + + /* + * (non-Javadoc) + * + * @see jalview.ws.DbSourceProxy#getDbVersion() + */ + @Override + public String getDbVersion() + { + return "1"; + } + public static String getAlphaFoldCifDownloadUrl(String id) + { + return "https://alphafold.ebi.ac.uk/files/"+id+"-model_v1.cif"; + } + /* + * (non-Javadoc) + * + * @see jalview.ws.DbSourceProxy#getSequenceRecords(java.lang.String[]) + */ + @Override + public AlignmentI getSequenceRecords(String queries) throws Exception + { + AlignmentI pdbAlignment = null; + String chain = null; + String id = null; + if (queries.indexOf(COLON) > -1) + { + chain = queries.substring(queries.indexOf(COLON) + 1); + id = queries.substring(0, queries.indexOf(COLON)); + } + else + { + id = queries; + } + + if (!isValidReference(id)) + { + System.err.println("(AFClient) Ignoring invalid pdb query: '" + id + "'"); + stopQuery(); + return null; + } + String alphaFoldCif = getAlphaFoldCifDownloadUrl(id); + + try { + File tmpFile = File.createTempFile(id,"cif"); + UrlDownloadClient.download(alphaFoldCif, tmpFile); + file = tmpFile.getAbsolutePath(); + if (file == null) + { + return null; + } + // todo get rid of Type and use FileFormatI instead? + FileFormatI fileFormat = FileFormat.MMCif; + pdbAlignment = new FormatAdapter().readFile(tmpFile, DataSourceType.FILE, + fileFormat); + if (pdbAlignment != null) + { + List toremove = new ArrayList(); + for (SequenceI pdbcs : pdbAlignment.getSequences()) + { + String chid = null; + // Mapping map=null; + for (PDBEntry pid : pdbcs.getAllPDBEntries()) + { + if (pid.getFile() == file) + { + chid = pid.getChainCode(); + + } + } + if (chain == null || (chid != null && (chid.equals(chain) + || chid.trim().equals(chain.trim()) + || (chain.trim().length() == 0 && chid.equals("_"))))) + { + // FIXME seems to result in 'PDB|1QIP|1qip|A' - 1QIP is redundant. + // TODO: suggest simplify naming to 1qip|A as default name defined + pdbcs.setName(id + + SEPARATOR + pdbcs.getName()); + // Might need to add more metadata to the PDBEntry object + // like below + /* + * PDBEntry entry = new PDBEntry(); // Construct the PDBEntry + * entry.setId(id); if (entry.getProperty() == null) + * entry.setProperty(new Hashtable()); + * entry.getProperty().put("chains", pdbchain.id + "=" + + * sq.getStart() + "-" + sq.getEnd()); + * sq.getDatasetSequence().addPDBId(entry); + */ + // Add PDB DB Refs + // We make a DBRefEtntry because we have obtained the PDB file from + // a + // verifiable source + // JBPNote - PDB DBRefEntry should also carry the chain and mapping + // information + DBRefEntry dbentry = new DBRefEntry(getDbSource(), + getDbVersion(), (chid == null ? id : id + chid)); + // dbentry.setMap() + pdbcs.addDBRef(dbentry); + } + else + { + // mark this sequence to be removed from the alignment + // - since it's not from the right chain + toremove.add(pdbcs); + } + } + // now remove marked sequences + for (SequenceI pdbcs : toremove) + { + pdbAlignment.deleteSequence(pdbcs); + if (pdbcs.getAnnotation() != null) + { + for (AlignmentAnnotation aa : pdbcs.getAnnotation()) + { + pdbAlignment.deleteAnnotation(aa); + } + } + } + } + + if (pdbAlignment == null || pdbAlignment.getHeight() < 1) + { + throw new Exception(MessageManager.formatMessage( + "exception.no_pdb_records_for_chain", new String[] + { id, ((chain == null) ? "' '" : chain) })); + } + + } catch (Exception ex) // Problem parsing PDB file + { + stopQuery(); + throw (ex); + } + return pdbAlignment; + } + + /* + * (non-Javadoc) + * + * @see jalview.ws.DbSourceProxy#isValidReference(java.lang.String) + */ + @Override + public boolean isValidReference(String accession) + { + Regex r = getAccessionValidator(); + return r.search(accession.trim()); + } + + /** + * human glyoxalase + */ + @Override + public String getTestQuery() + { + return "1QIP"; + } + + @Override + public String getDbName() + { + return "PDB"; // getDbSource(); + } + + @Override + public int getTier() + { + return 0; + } + + /** + * Returns a descriptor for suitable feature display settings with + *
    + *
  • ResNums or insertions features visible
  • + *
  • insertions features coloured red
  • + *
  • ResNum features coloured by label
  • + *
  • Insertions displayed above (on top of) ResNums
  • + *
+ */ + @Override + public FeatureSettingsModelI getFeatureColourScheme() + { + return new PDBFeatureSettings(); + } +}