3 * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
4 * Copyright (C) $$Year-Rel$$ The Jalview Authors
6 * This file is part of Jalview.
8 * Jalview is free software: you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation, either version 3
11 * of the License, or (at your option) any later version.
13 * Jalview is distributed in the hope that it will be useful, but
14 * WITHOUT ANY WARRANTY; without even the implied warranty
15 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
16 * PURPOSE. See the GNU General Public License for more details.
18 * You should have received a copy of the GNU General Public License
19 * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
20 * The Jalview Authors are detailed in the 'AUTHORS' file.
22 package jalview.ws.dbsources;
25 import java.io.FileInputStream;
26 import java.io.FileNotFoundException;
27 import java.io.IOException;
28 import java.io.InputStream;
29 import java.util.ArrayList;
30 import java.util.List;
33 import org.json.simple.JSONArray;
34 import org.json.simple.JSONObject;
35 import org.json.simple.parser.ParseException;
37 import com.stevesoft.pat.Regex;
39 import jalview.api.FeatureSettingsModelI;
40 import jalview.bin.Console;
41 import jalview.datamodel.AlignedCodonFrame;
42 import jalview.datamodel.AlignmentAnnotation;
43 import jalview.datamodel.AlignmentI;
44 import jalview.datamodel.ContactMatrixI;
45 import jalview.datamodel.DBRefEntry;
46 import jalview.datamodel.PDBEntry;
47 import jalview.datamodel.SequenceFeature;
48 import jalview.datamodel.SequenceI;
49 import jalview.gui.Desktop;
50 import jalview.io.DataSourceType;
51 import jalview.io.FileFormat;
52 import jalview.io.FileFormatI;
53 import jalview.io.FormatAdapter;
54 import jalview.io.PDBFeatureSettings;
55 import jalview.structure.StructureMapping;
56 import jalview.structure.StructureSelectionManager;
57 import jalview.util.MessageManager;
58 import jalview.util.Platform;
59 import jalview.ws.datamodel.alphafold.PAEContactMatrix;
60 import jalview.ws.utils.UrlDownloadClient;
66 public class EBIAlfaFold extends EbiFileRetrievedProxy
68 private static final String SEPARATOR = "|";
70 private static final String COLON = ":";
72 private static final int PDB_ID_LENGTH = 4;
74 private static String AF_VERSION = "3";
84 * @see jalview.ws.DbSourceProxy#getAccessionSeparator()
87 public String getAccessionSeparator()
95 * @see jalview.ws.DbSourceProxy#getAccessionValidator()
98 public Regex getAccessionValidator()
100 Regex validator = new Regex("(AF-[A-Z]+[0-9]+[A-Z0-9]+-F1)");
101 validator.setIgnoreCase(true);
108 * @see jalview.ws.DbSourceProxy#getDbSource()
111 public String getDbSource()
119 * @see jalview.ws.DbSourceProxy#getDbVersion()
122 public String getDbVersion()
127 public static String getAlphaFoldCifDownloadUrl(String id, String vnum)
129 if (vnum == null || vnum.length() == 0)
133 return "https://alphafold.ebi.ac.uk/files/" + id + "-model_v" + vnum
137 public static String getAlphaFoldPaeDownloadUrl(String id, String vnum)
139 if (vnum == null || vnum.length() == 0)
143 return "https://alphafold.ebi.ac.uk/files/" + id
144 + "-predicted_aligned_error_v" + vnum + ".json";
150 * @see jalview.ws.DbSourceProxy#getSequenceRecords(java.lang.String[])
153 public AlignmentI getSequenceRecords(String queries) throws Exception
155 return getSequenceRecords(queries, null);
158 public AlignmentI getSequenceRecords(String queries, String retrievalUrl)
161 AlignmentI pdbAlignment = null;
164 if (queries.indexOf(COLON) > -1)
166 chain = queries.substring(queries.indexOf(COLON) + 1);
167 id = queries.substring(0, queries.indexOf(COLON));
174 if (!isValidReference(id))
177 "(AFClient) Ignoring invalid alphafold query: '" + id + "'");
181 String alphaFoldCif = getAlphaFoldCifDownloadUrl(id, AF_VERSION);
182 if (retrievalUrl != null)
184 alphaFoldCif = retrievalUrl;
189 File tmpFile = File.createTempFile(id, ".cif");
190 Console.debug("Retrieving structure file for " + id + " from "
192 UrlDownloadClient.download(alphaFoldCif, tmpFile);
194 // may not need this check ?
195 file = tmpFile.getAbsolutePath();
201 pdbAlignment = importDownloadedStructureFromUrl(alphaFoldCif, tmpFile,
202 id, chain, getDbSource(), getDbVersion());
204 if (pdbAlignment == null || pdbAlignment.getHeight() < 1)
206 throw new Exception(MessageManager.formatMessage(
207 "exception.no_pdb_records_for_chain", new String[]
208 { id, ((chain == null) ? "' '" : chain) }));
210 // done during structure retrieval
211 // retrieve_AlphaFold_pAE(id, pdbAlignment, retrievalUrl);
213 } catch (Exception ex) // Problem parsing PDB file
222 * get an alphafold pAE for the given id, and add it to sequence 0 in
223 * pdbAlignment (assuming it came from structurefile parser).
226 * @param pdbAlignment
227 * @param retrievalUrl
228 * - URL of .mmcif from EBI-AlphaFold - will be used to generate the
229 * pAE URL automatically
230 * @throws IOException
233 public static void retrieve_AlphaFold_pAE(String id,
234 AlignmentI pdbAlignment, String retrievalUrl) throws IOException
236 // import PAE as contact matrix - assume this will work if there was a
238 String paeURL = getAlphaFoldPaeDownloadUrl(id, AF_VERSION);
240 if (retrievalUrl != null)
242 // manufacture the PAE url from a url like ...-model-vN.cif
243 paeURL = retrievalUrl.replace("model", "predicted_aligned_error")
244 .replace(".cif", ".json");
250 pae = File.createTempFile(id == null ? "af_pae" : id, "pae_json");
251 } catch (IOException e)
255 Console.debug("Downloading pae from " + paeURL + " to " + pae.toString()
257 UrlDownloadClient.download(paeURL, pae);
258 addAlphaFoldPAEToSequence(pdbAlignment, pae, 0, null);
261 public static void addAlphaFoldPAEToSequence(AlignmentI pdbAlignment,
262 File pae, int index, String seqId)
264 addAlphaFoldPAE(pdbAlignment, pae, index, seqId, false, false);
267 public static void addAlphaFoldPAEToStructure(AlignmentI pdbAlignment,
268 File pae, int index, String structIdOrFile, boolean isStructId)
270 addAlphaFoldPAE(pdbAlignment, pae, index, structIdOrFile, true,
274 public static void addAlphaFoldPAE(AlignmentI pdbAlignment, File pae,
275 int index, String id, boolean isStruct, boolean isStructId)
277 FileInputStream paeInput = null;
280 paeInput = new FileInputStream(pae);
281 } catch (FileNotFoundException e)
284 "Could not find pAE file '" + pae.getAbsolutePath() + "'", e);
290 StructureSelectionManager ssm = StructureSelectionManager
291 .getStructureSelectionManager(Desktop.instance);
294 String structFile = isStructId ? ssm.findFileForPDBId(id) : id;
295 Console.debug("##### AHA! structFile = " + structFile);
296 Console.debug("##### structFile "
297 + (ssm.isPDBFileRegistered(structFile) ? "IS " : "is NOT ")
300 StructureMapping[] smArray = ssm.getMapping(structFile);
301 Console.debug("##### AHA! smArray obtained with " + smArray.length
306 if (!importPaeJSONAsContactMatrixToStructure(smArray, paeInput))
308 Console.warn("Could not import contact matrix from '"
309 + pae.getAbsolutePath() + "' to structure.");
311 } catch (IOException e1)
313 Console.error("Error when importing pAE file '"
314 + pae.getAbsolutePath() + "'", e1);
315 } catch (ParseException e2)
317 Console.error("Error when parsing pAE file '"
318 + pae.getAbsolutePath() + "'", e2);
325 // attach to sequence?!
328 if (!importPaeJSONAsContactMatrixToSequence(pdbAlignment, paeInput,
331 Console.warn("Could not import contact matrix from '"
332 + pae.getAbsolutePath() + "' to sequence.");
334 } catch (IOException e1)
336 Console.error("Error when importing pAE file '"
337 + pae.getAbsolutePath() + "'", e1);
338 } catch (ParseException e2)
340 Console.error("Error when parsing pAE file '"
341 + pae.getAbsolutePath() + "'", e2);
348 * parses the given pAE matrix and adds it to sequence 0 in the given
351 * @param pdbAlignment
353 * @return true if there was a pAE matrix added
354 * @throws ParseException
355 * @throws IOException
358 public static boolean importPaeJSONAsContactMatrixToSequence(
359 AlignmentI pdbAlignment, InputStream pae_input)
360 throws IOException, ParseException
362 return importPaeJSONAsContactMatrixToSequence(pdbAlignment, pae_input,
366 public static boolean importPaeJSONAsContactMatrixToSequence(
367 AlignmentI pdbAlignment, InputStream pae_input, int index,
368 String seqId) throws IOException, ParseException
370 SequenceI sequence = null;
372 SequenceI[] seqs = pdbAlignment.getSequencesArray();
374 Console.debug("******* sequences is null");
377 for (int i = 0; i < seqs.length; i++)
379 SequenceI s = seqs[i];
385 int seqToGet = index > 0 ? index : 0;
386 sequence = pdbAlignment.getSequenceAt(seqToGet);
387 Console.debug("***** Got sequence at index " + seqToGet + ": "
388 + (sequence == null ? null : sequence.getName()));
390 if (sequence == null)
392 SequenceI[] sequences = pdbAlignment.findSequenceMatch(seqId);
393 if (sequences == null || sequences.length < 1)
395 Console.warn("Could not find sequence with id '" + seqId
396 + "' to attach pAE matrix to. Ignoring matrix.");
401 sequence = sequences[0]; // just use the first sequence with this seqId
405 JSONObject paeDict = parseJSONtoPAEContactMatrix(pae_input);
408 Console.debug("JSON file did not parse properly.");
411 ContactMatrixI matrix = new PAEContactMatrix(sequence,
412 (Map<String, Object>) paeDict);
413 ((PAEContactMatrix) matrix).makeGroups(5f, true);
415 AlignmentAnnotation cmannot = sequence.addContactList(matrix);
416 pdbAlignment.addAnnotation(cmannot);
421 public static JSONObject parseJSONtoPAEContactMatrix(
422 InputStream pae_input) throws IOException,ParseException
424 Object paeJson = Platform.parseJSON(pae_input);
425 JSONObject paeDict=null;
426 if (paeJson instanceof JSONObject)
428 Console.debug("***** paeJson is a JSONObject");
429 paeDict = (JSONObject) paeJson;
431 else if (paeJson instanceof JSONArray)
433 JSONArray jsonArray = (JSONArray) paeJson;
434 if (jsonArray.size() > 0)
435 paeDict = (JSONObject) jsonArray.get(0);
441 public static boolean importPaeJSONAsContactMatrixToStructure(
442 StructureMapping[] smArray, InputStream paeInput)
443 throws IOException, ParseException
445 boolean someDone = false;
446 Console.debug("##### smArray.length=" + smArray.length);
447 for (StructureMapping sm : smArray)
449 Console.debug("##### sm[n]=" + sm.getPdbId());
450 boolean thisDone = importPaeJSONAsContactMatrixToStructure(sm,
452 Console.debug("##### thisDone = " + thisDone);
453 someDone |= thisDone;
458 public static boolean importPaeJSONAsContactMatrixToStructure(
459 StructureMapping sm, InputStream paeInput)
460 throws IOException, ParseException
463 JSONObject pae_obj = parseJSONtoPAEContactMatrix(paeInput);
466 Console.debug("JSON file did not parse properly.");
470 ContactMatrixI matrix = new PAEContactMatrix(sm.getSequence(),
471 (Map<String, Object>) pae_obj);
472 ((PAEContactMatrix) matrix).makeGroups(5f, true);
473 AlignmentAnnotation cmannot = sm.getSequence().addContactList(matrix);
474 // sm.getSequence().addAlignmentAnnotation(cmannot);
475 sm.transfer(cmannot);
478 StructureSelectionManager ssm = StructureSelectionManager
479 .getStructureSelectionManager(Desktop.instance);
480 List<AlignedCodonFrame> acfList = ssm.getSequenceMappings();
486 * general purpose structure importer - designed to yield alignment useful for
487 * transfer of annotation to associated sequences
489 * @param alphaFoldCif
498 public static AlignmentI importDownloadedStructureFromUrl(
499 String alphaFoldCif, File tmpFile, String id, String chain,
500 String dbSource, String dbVersion) throws Exception
502 String file = tmpFile.getAbsolutePath();
503 // todo get rid of Type and use FileFormatI instead?
504 FileFormatI fileFormat = FileFormat.MMCif;
505 AlignmentI pdbAlignment = new FormatAdapter().readFile(tmpFile,
506 DataSourceType.FILE, fileFormat);
507 if (pdbAlignment != null)
509 List<SequenceI> toremove = new ArrayList<SequenceI>();
510 for (SequenceI pdbcs : pdbAlignment.getSequences())
514 for (PDBEntry pid : pdbcs.getAllPDBEntries())
516 if (pid.getFile() == file)
518 chid = pid.getChainCode();
522 if (chain == null || (chid != null && (chid.equals(chain)
523 || chid.trim().equals(chain.trim())
524 || (chain.trim().length() == 0 && chid.equals("_")))))
526 // FIXME seems to result in 'PDB|1QIP|1qip|A' - 1QIP is redundant.
527 // TODO: suggest simplify naming to 1qip|A as default name defined
528 pdbcs.setName(id + SEPARATOR + pdbcs.getName());
529 // Might need to add more metadata to the PDBEntry object
532 * PDBEntry entry = new PDBEntry(); // Construct the PDBEntry
533 * entry.setId(id); if (entry.getProperty() == null)
534 * entry.setProperty(new Hashtable());
535 * entry.getProperty().put("chains", pdbchain.id + "=" +
536 * sq.getStart() + "-" + sq.getEnd());
537 * sq.getDatasetSequence().addPDBId(entry);
540 // We make a DBRefEtntry because we have obtained the PDB file from
543 // JBPNote - PDB DBRefEntry should also carry the chain and mapping
545 if (dbSource != null)
547 DBRefEntry dbentry = new DBRefEntry(dbSource,
549 dbVersion, (chid == null ? id : id + chid));
551 pdbcs.addDBRef(dbentry);
552 // update any feature groups
553 List<SequenceFeature> allsf = pdbcs.getFeatures()
555 List<SequenceFeature> newsf = new ArrayList<SequenceFeature>();
556 if (allsf != null && allsf.size() > 0)
558 for (SequenceFeature f : allsf)
560 if (file.equals(f.getFeatureGroup()))
562 f = new SequenceFeature(f, f.type, f.begin, f.end, id,
567 pdbcs.setSequenceFeatures(newsf);
573 // mark this sequence to be removed from the alignment
574 // - since it's not from the right chain
578 // now remove marked sequences
579 for (SequenceI pdbcs : toremove)
581 pdbAlignment.deleteSequence(pdbcs);
582 if (pdbcs.getAnnotation() != null)
584 for (AlignmentAnnotation aa : pdbcs.getAnnotation())
586 pdbAlignment.deleteAnnotation(aa);
597 * @see jalview.ws.DbSourceProxy#isValidReference(java.lang.String)
600 public boolean isValidReference(String accession)
602 Regex r = getAccessionValidator();
603 return r.search(accession.trim());
610 public String getTestQuery()
612 return "AF-O15552-F1";
616 public String getDbName()
618 return "ALPHAFOLD"; // getDbSource();
628 * Returns a descriptor for suitable feature display settings with
630 * <li>ResNums or insertions features visible</li>
631 * <li>insertions features coloured red</li>
632 * <li>ResNum features coloured by label</li>
633 * <li>Insertions displayed above (on top of) ResNums</li>
637 public FeatureSettingsModelI getFeatureColourScheme()
639 return new PDBFeatureSettings();