3 * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
4 * Copyright (C) $$Year-Rel$$ The Jalview Authors
6 * This file is part of Jalview.
8 * Jalview is free software: you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation, either version 3
11 * of the License, or (at your option) any later version.
13 * Jalview is distributed in the hope that it will be useful, but
14 * WITHOUT ANY WARRANTY; without even the implied warranty
15 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
16 * PURPOSE. See the GNU General Public License for more details.
18 * You should have received a copy of the GNU General Public License
19 * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
20 * The Jalview Authors are detailed in the 'AUTHORS' file.
22 package jalview.ws.dbsources;
25 import java.io.FileInputStream;
26 import java.io.FileNotFoundException;
27 import java.io.IOException;
28 import java.io.InputStream;
29 import java.util.ArrayList;
30 import java.util.List;
33 import org.json.simple.parser.ParseException;
35 import com.stevesoft.pat.Regex;
37 import jalview.api.FeatureSettingsModelI;
38 import jalview.bin.Console;
39 import jalview.datamodel.AlignedCodonFrame;
40 import jalview.datamodel.AlignmentAnnotation;
41 import jalview.datamodel.AlignmentI;
42 import jalview.datamodel.ContactMatrixI;
43 import jalview.datamodel.DBRefEntry;
44 import jalview.datamodel.PDBEntry;
45 import jalview.datamodel.SequenceFeature;
46 import jalview.datamodel.SequenceI;
47 import jalview.gui.Desktop;
48 import jalview.io.DataSourceType;
49 import jalview.io.FileFormat;
50 import jalview.io.FileFormatI;
51 import jalview.io.FormatAdapter;
52 import jalview.io.PDBFeatureSettings;
53 import jalview.structure.StructureMapping;
54 import jalview.structure.StructureSelectionManager;
55 import jalview.util.MessageManager;
56 import jalview.util.Platform;
57 import jalview.ws.datamodel.alphafold.PAEContactMatrix;
58 import jalview.ws.utils.UrlDownloadClient;
64 public class EBIAlfaFold extends EbiFileRetrievedProxy
66 private static final String SEPARATOR = "|";
68 private static final String COLON = ":";
70 private static final int PDB_ID_LENGTH = 4;
72 private static String AF_VERSION = "3";
82 * @see jalview.ws.DbSourceProxy#getAccessionSeparator()
85 public String getAccessionSeparator()
93 * @see jalview.ws.DbSourceProxy#getAccessionValidator()
96 public Regex getAccessionValidator()
98 Regex validator = new Regex("(AF-[A-Z]+[0-9]+[A-Z0-9]+-F1)");
99 validator.setIgnoreCase(true);
106 * @see jalview.ws.DbSourceProxy#getDbSource()
109 public String getDbSource()
117 * @see jalview.ws.DbSourceProxy#getDbVersion()
120 public String getDbVersion()
125 public static String getAlphaFoldCifDownloadUrl(String id, String vnum)
127 if (vnum == null || vnum.length() == 0)
131 return "https://alphafold.ebi.ac.uk/files/" + id + "-model_v" + vnum
135 public static String getAlphaFoldPaeDownloadUrl(String id, String vnum)
137 if (vnum == null || vnum.length() == 0)
141 return "https://alphafold.ebi.ac.uk/files/" + id
142 + "-predicted_aligned_error_v" + vnum + ".json";
148 * @see jalview.ws.DbSourceProxy#getSequenceRecords(java.lang.String[])
151 public AlignmentI getSequenceRecords(String queries) throws Exception
153 return getSequenceRecords(queries, null);
156 public AlignmentI getSequenceRecords(String queries, String retrievalUrl)
159 AlignmentI pdbAlignment = null;
162 if (queries.indexOf(COLON) > -1)
164 chain = queries.substring(queries.indexOf(COLON) + 1);
165 id = queries.substring(0, queries.indexOf(COLON));
172 if (!isValidReference(id))
175 "(AFClient) Ignoring invalid alphafold query: '" + id + "'");
179 String alphaFoldCif = getAlphaFoldCifDownloadUrl(id, AF_VERSION);
180 if (retrievalUrl != null)
182 alphaFoldCif = retrievalUrl;
187 File tmpFile = File.createTempFile(id, ".cif");
188 Console.debug("Retrieving structure file for " + id + " from "
190 UrlDownloadClient.download(alphaFoldCif, tmpFile);
192 // may not need this check ?
193 file = tmpFile.getAbsolutePath();
199 pdbAlignment = importDownloadedStructureFromUrl(alphaFoldCif, tmpFile,
200 id, chain, getDbSource(), getDbVersion());
202 if (pdbAlignment == null || pdbAlignment.getHeight() < 1)
204 throw new Exception(MessageManager.formatMessage(
205 "exception.no_pdb_records_for_chain", new String[]
206 { id, ((chain == null) ? "' '" : chain) }));
208 // done during structure retrieval
209 // retrieve_AlphaFold_pAE(id, pdbAlignment, retrievalUrl);
211 } catch (Exception ex) // Problem parsing PDB file
220 * get an alphafold pAE for the given id, and add it to sequence 0 in
221 * pdbAlignment (assuming it came from structurefile parser).
224 * @param pdbAlignment
225 * @param retrievalUrl
226 * - URL of .mmcif from EBI-AlphaFold - will be used to generate the
227 * pAE URL automatically
228 * @throws IOException
231 public static void retrieve_AlphaFold_pAE(String id,
232 AlignmentI pdbAlignment, String retrievalUrl) throws IOException
234 // import PAE as contact matrix - assume this will work if there was a
236 String paeURL = getAlphaFoldPaeDownloadUrl(id, AF_VERSION);
238 if (retrievalUrl != null)
240 // manufacture the PAE url from a url like ...-model-vN.cif
241 paeURL = retrievalUrl.replace("model", "predicted_aligned_error")
242 .replace(".cif", ".json");
248 pae = File.createTempFile(id == null ? "af_pae" : id, "pae_json");
249 } catch (IOException e)
253 Console.debug("Downloading pae from " + paeURL + " to " + pae.toString()
255 UrlDownloadClient.download(paeURL, pae);
256 addAlphaFoldPAEToSequence(pdbAlignment, pae, 0, null);
259 public static void addAlphaFoldPAEToSequence(AlignmentI pdbAlignment,
260 File pae, int index, String seqId)
262 addAlphaFoldPAE(pdbAlignment, pae, index, seqId, false, false);
265 public static void addAlphaFoldPAEToStructure(AlignmentI pdbAlignment,
266 File pae, int index, String structIdOrFile, boolean isStructId)
268 addAlphaFoldPAE(pdbAlignment, pae, index, structIdOrFile, true,
272 public static void addAlphaFoldPAE(AlignmentI pdbAlignment, File pae,
273 int index, String id, boolean isStruct, boolean isStructId)
275 FileInputStream paeInput = null;
278 paeInput = new FileInputStream(pae);
279 } catch (FileNotFoundException e)
282 "Could not find pAE file '" + pae.getAbsolutePath() + "'", e);
288 StructureSelectionManager ssm = StructureSelectionManager
289 .getStructureSelectionManager(Desktop.instance);
292 String structFile = isStructId ? ssm.findFileForPDBId(id) : id;
293 Console.debug("##### AHA! structFile = " + structFile);
294 Console.debug("##### structFile "
295 + (ssm.isPDBFileRegistered(structFile) ? "IS " : "is NOT ")
298 StructureMapping[] smArray = ssm.getMapping(structFile);
299 Console.debug("##### AHA! smArray obtained with " + smArray.length
304 if (!importPaeJSONAsContactMatrixToStructure(smArray, paeInput))
306 Console.warn("Could not import contact matrix from '"
307 + pae.getAbsolutePath() + "' to structure.");
309 } catch (IOException e1)
311 Console.error("Error when importing pAE file '"
312 + pae.getAbsolutePath() + "'", e1);
313 } catch (ParseException e2)
315 Console.error("Error when parsing pAE file '"
316 + pae.getAbsolutePath() + "'", e2);
323 // attach to sequence?!
326 if (!importPaeJSONAsContactMatrixToSequence(pdbAlignment, paeInput,
329 Console.warn("Could not import contact matrix from '"
330 + pae.getAbsolutePath() + "' to sequence.");
332 } catch (IOException e1)
334 Console.error("Error when importing pAE file '"
335 + pae.getAbsolutePath() + "'", e1);
336 } catch (ParseException e2)
338 Console.error("Error when parsing pAE file '"
339 + pae.getAbsolutePath() + "'", e2);
346 * parses the given pAE matrix and adds it to sequence 0 in the given
349 * @param pdbAlignment
351 * @return true if there was a pAE matrix added
352 * @throws ParseException
353 * @throws IOException
356 public static boolean importPaeJSONAsContactMatrixToSequence(
357 AlignmentI pdbAlignment, InputStream pae_input)
358 throws IOException, ParseException
360 return importPaeJSONAsContactMatrixToSequence(pdbAlignment, pae_input,
364 public static boolean importPaeJSONAsContactMatrixToSequence(
365 AlignmentI pdbAlignment, InputStream pae_input, int index,
366 String seqId) throws IOException, ParseException
368 SequenceI sequence = null;
370 SequenceI[] seqs = pdbAlignment.getSequencesArray();
372 Console.debug("******* sequences is null");
375 for (int i = 0; i < seqs.length; i++)
377 SequenceI s = seqs[i];
378 Console.debug("******* sequences[" + i + "]='" + s.getName() + "'");
384 int seqToGet = index > 0 ? index : 0;
385 sequence = pdbAlignment.getSequenceAt(seqToGet);
386 Console.debug("***** Got sequence at index " + seqToGet + ": "
387 + (sequence == null ? null : sequence.getName()));
389 if (sequence == null)
391 Console.debug("***** Looking for sequence with id '" + seqId + "'");
393 SequenceI[] sequences = pdbAlignment.findSequenceMatch(seqId);
394 if (sequences == null || sequences.length < 1)
396 Console.warn("Could not find sequence with id '" + seqId
397 + "' to attach pAE matrix to. Ignoring matrix.");
402 sequence = sequences[0]; // just use the first sequence with this seqId
406 List<Object> pae_obj = (List<Object>) Platform.parseJSON(pae_input);
409 Console.debug("JSON file did not parse properly.");
412 ContactMatrixI matrix = new PAEContactMatrix(sequence,
413 (Map<String, Object>) pae_obj.get(0));
415 AlignmentAnnotation cmannot = sequence.addContactList(matrix);
416 pdbAlignment.addAnnotation(cmannot);
420 public static boolean importPaeJSONAsContactMatrixToStructure(
421 StructureMapping[] smArray, InputStream paeInput)
422 throws IOException, ParseException
424 boolean someDone = false;
425 Console.debug("##### smArray.length=" + smArray.length);
426 for (StructureMapping sm : smArray)
428 Console.debug("##### sm[n]=" + sm.getPdbId());
429 boolean thisDone = importPaeJSONAsContactMatrixToStructure(sm,
431 Console.debug("##### thisDone = " + thisDone);
432 someDone |= thisDone;
437 public static boolean importPaeJSONAsContactMatrixToStructure(
438 StructureMapping sm, InputStream paeInput)
439 throws IOException, ParseException
442 List<Object> pae_obj = (List<Object>) Platform.parseJSON(paeInput);
445 Console.debug("JSON file did not parse properly.");
449 ContactMatrixI matrix = new PAEContactMatrix(sm.getSequence(),
450 (Map<String, Object>) pae_obj.get(0));
452 AlignmentAnnotation cmannot = sm.getSequence().addContactList(matrix);
453 // sm.getSequence().addAlignmentAnnotation(cmannot);
454 sm.transfer(cmannot);
457 StructureSelectionManager ssm = StructureSelectionManager
458 .getStructureSelectionManager(Desktop.instance);
459 List<AlignedCodonFrame> acfList = ssm.getSequenceMappings();
465 * general purpose structure importer - designed to yield alignment useful for
466 * transfer of annotation to associated sequences
468 * @param alphaFoldCif
477 public static AlignmentI importDownloadedStructureFromUrl(
478 String alphaFoldCif, File tmpFile, String id, String chain,
479 String dbSource, String dbVersion) throws Exception
481 String file = tmpFile.getAbsolutePath();
482 // todo get rid of Type and use FileFormatI instead?
483 FileFormatI fileFormat = FileFormat.MMCif;
484 AlignmentI pdbAlignment = new FormatAdapter().readFile(tmpFile,
485 DataSourceType.FILE, fileFormat);
486 if (pdbAlignment != null)
488 List<SequenceI> toremove = new ArrayList<SequenceI>();
489 for (SequenceI pdbcs : pdbAlignment.getSequences())
493 for (PDBEntry pid : pdbcs.getAllPDBEntries())
495 if (pid.getFile() == file)
497 chid = pid.getChainCode();
501 if (chain == null || (chid != null && (chid.equals(chain)
502 || chid.trim().equals(chain.trim())
503 || (chain.trim().length() == 0 && chid.equals("_")))))
505 // FIXME seems to result in 'PDB|1QIP|1qip|A' - 1QIP is redundant.
506 // TODO: suggest simplify naming to 1qip|A as default name defined
507 pdbcs.setName(id + SEPARATOR + pdbcs.getName());
508 // Might need to add more metadata to the PDBEntry object
511 * PDBEntry entry = new PDBEntry(); // Construct the PDBEntry
512 * entry.setId(id); if (entry.getProperty() == null)
513 * entry.setProperty(new Hashtable());
514 * entry.getProperty().put("chains", pdbchain.id + "=" +
515 * sq.getStart() + "-" + sq.getEnd());
516 * sq.getDatasetSequence().addPDBId(entry);
519 // We make a DBRefEtntry because we have obtained the PDB file from
522 // JBPNote - PDB DBRefEntry should also carry the chain and mapping
524 if (dbSource != null)
526 DBRefEntry dbentry = new DBRefEntry(dbSource,
528 dbVersion, (chid == null ? id : id + chid));
530 pdbcs.addDBRef(dbentry);
531 // update any feature groups
532 List<SequenceFeature> allsf = pdbcs.getFeatures()
534 List<SequenceFeature> newsf = new ArrayList<SequenceFeature>();
535 if (allsf != null && allsf.size() > 0)
537 for (SequenceFeature f : allsf)
539 if (file.equals(f.getFeatureGroup()))
541 f = new SequenceFeature(f, f.type, f.begin, f.end, id,
546 pdbcs.setSequenceFeatures(newsf);
552 // mark this sequence to be removed from the alignment
553 // - since it's not from the right chain
557 // now remove marked sequences
558 for (SequenceI pdbcs : toremove)
560 pdbAlignment.deleteSequence(pdbcs);
561 if (pdbcs.getAnnotation() != null)
563 for (AlignmentAnnotation aa : pdbcs.getAnnotation())
565 pdbAlignment.deleteAnnotation(aa);
576 * @see jalview.ws.DbSourceProxy#isValidReference(java.lang.String)
579 public boolean isValidReference(String accession)
581 Regex r = getAccessionValidator();
582 return r.search(accession.trim());
589 public String getTestQuery()
591 return "AF-O15552-F1";
595 public String getDbName()
597 return "ALPHAFOLD"; // getDbSource();
607 * Returns a descriptor for suitable feature display settings with
609 * <li>ResNums or insertions features visible</li>
610 * <li>insertions features coloured red</li>
611 * <li>ResNum features coloured by label</li>
612 * <li>Insertions displayed above (on top of) ResNums</li>
616 public FeatureSettingsModelI getFeatureColourScheme()
618 return new PDBFeatureSettings();