X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Fws%2Fdbsources%2FEBIAlfaFold.java;h=7c72f4b168e0dce5cb3127b61d06fde40c1d79dc;hb=fbfbbe26bee37143d5279fe4d254a5a89c96b021;hp=5edcafa8dc5ca8f079c10d1beb11d96f80280eb4;hpb=9cfb16c8953b9e08d0fbc015c591a136ec34231b;p=jalview.git diff --git a/src/jalview/ws/dbsources/EBIAlfaFold.java b/src/jalview/ws/dbsources/EBIAlfaFold.java index 5edcafa..7c72f4b 100644 --- a/src/jalview/ws/dbsources/EBIAlfaFold.java +++ b/src/jalview/ws/dbsources/EBIAlfaFold.java @@ -22,27 +22,41 @@ package jalview.ws.dbsources; import jalview.api.FeatureSettingsModelI; +import jalview.bin.Cache; +import jalview.bin.Console; import jalview.datamodel.AlignmentAnnotation; import jalview.datamodel.AlignmentI; +import jalview.datamodel.ContactMatrix; +import jalview.datamodel.ContactMatrixI; import jalview.datamodel.DBRefEntry; import jalview.datamodel.DBRefSource; import jalview.datamodel.PDBEntry; import jalview.datamodel.PDBEntry.Type; +import jalview.datamodel.SequenceFeature; import jalview.datamodel.SequenceI; +import jalview.datamodel.features.SequenceFeaturesI; import jalview.io.DataSourceType; import jalview.io.FileFormat; import jalview.io.FileFormatI; import jalview.io.FormatAdapter; import jalview.io.PDBFeatureSettings; +import jalview.javascript.json.JSON; import jalview.structure.StructureImportSettings; import jalview.util.HttpUtils; import jalview.util.MessageManager; +import jalview.util.Platform; +import jalview.ws.datamodel.alphafold.PAEContactMatrix; import jalview.ws.ebi.EBIFetchClient; import jalview.ws.utils.UrlDownloadClient; +import java.io.BufferedReader; import java.io.File; +import java.io.FileInputStream; import java.util.ArrayList; import java.util.List; +import java.util.Map; + +import org.jmol.adapter.readers.simple.JSONReader; import com.stevesoft.pat.Regex; @@ -82,7 +96,9 @@ public class EBIAlfaFold extends EbiFileRetrievedProxy @Override public Regex getAccessionValidator() { - return new Regex("(AF-[A-Z]+[0-9]+[A-Z0-9]+-F1)"); + Regex validator = new Regex("(AF-[A-Z]+[0-9]+[A-Z0-9]+-F1)"); + validator.setIgnoreCase(true); + return validator; } /* @@ -106,10 +122,18 @@ public class EBIAlfaFold extends EbiFileRetrievedProxy { return "1"; } + public static String getAlphaFoldCifDownloadUrl(String id) { - return "https://alphafold.ebi.ac.uk/files/"+id+"-model_v1.cif"; + return "https://alphafold.ebi.ac.uk/files/" + id + "-model_v1.cif"; + } + + public static String getAlphaFoldPaeDownloadUrl(String id) + { + return "https://alphafold.ebi.ac.uk/files/" + id + + "-predicted_aligned_error_v1.json"; } + /* * (non-Javadoc) * @@ -118,6 +142,12 @@ public class EBIAlfaFold extends EbiFileRetrievedProxy @Override public AlignmentI getSequenceRecords(String queries) throws Exception { + return getSequenceRecords(queries, null); + } + + public AlignmentI getSequenceRecords(String queries, String retrievalUrl) + throws Exception + { AlignmentI pdbAlignment = null; String chain = null; String id = null; @@ -133,100 +163,194 @@ public class EBIAlfaFold extends EbiFileRetrievedProxy if (!isValidReference(id)) { - System.err.println("(AFClient) Ignoring invalid pdb query: '" + id + "'"); + System.err.println( + "(AFClient) Ignoring invalid alphafold query: '" + id + "'"); stopQuery(); return null; } String alphaFoldCif = getAlphaFoldCifDownloadUrl(id); - - try { - File tmpFile = File.createTempFile(id,"cif"); + if (retrievalUrl != null) + { + alphaFoldCif = retrievalUrl; + } + + try + { + File tmpFile = File.createTempFile(id, ".cif"); + Console.debug("Retrieving structure file for "+id+" from "+alphaFoldCif); UrlDownloadClient.download(alphaFoldCif, tmpFile); + + // may not need this check ? file = tmpFile.getAbsolutePath(); if (file == null) { - return null; + return null; + } + + pdbAlignment = importDownloadedStructureFromUrl(alphaFoldCif, tmpFile, + id, chain, getDbSource(), getDbVersion()); + + if (pdbAlignment == null || pdbAlignment.getHeight() < 1) + { + throw new Exception(MessageManager.formatMessage( + "exception.no_pdb_records_for_chain", new String[] + { id, ((chain == null) ? "' '" : chain) })); + } + + // import PAE as contact matrix - assume this will work if there was a + // model + File pae = File.createTempFile(id, "pae_json"); + String paeURL = getAlphaFoldPaeDownloadUrl(id); + + if (retrievalUrl!=null) { + // manufacture the PAE url from a url like ...-model-vN.cif + paeURL = retrievalUrl.replace("model","predicted_aligned_error").replace(".cif",".json"); + } + Console.debug("Downloading pae from " + paeURL + + " to " + pae.toString() + ""); + + try { + UrlDownloadClient.download(paeURL, pae); + if (!importPaeJSONAsContactMatrix(pdbAlignment, pae)) + { + Console.warn("Couln't import contact matrix from " + paeURL + + " (stored in " + pae.toString() + ")"); + } + } catch (Exception pae_ex) { + Console.debug("Couldn't download PAE",pae_ex); + } + + } catch (Exception ex) // Problem parsing PDB file + { + stopQuery(); + throw (ex); + } + return pdbAlignment; + } + + private boolean importPaeJSONAsContactMatrix(AlignmentI pdbAlignment, + File pae) throws Exception + { + FileInputStream pae_input = new FileInputStream(pae); + + List pae_obj = (List) Platform + .parseJSON(pae_input); + if (pae_obj == null) + { + return false; } - // todo get rid of Type and use FileFormatI instead? - FileFormatI fileFormat = FileFormat.MMCif; - pdbAlignment = new FormatAdapter().readFile(tmpFile, DataSourceType.FILE, - fileFormat); - if (pdbAlignment != null) + ContactMatrixI matrix = new PAEContactMatrix( + pdbAlignment.getSequenceAt(0), (Map)pae_obj.get(0)); + + pdbAlignment.getSequenceAt(0).addAlignmentAnnotation(pdbAlignment.addContactList(matrix)); + return true; + } + + /** + * general purpose structure importer - designed to yield alignment useful for + * transfer of annotation to associated sequences + * + * @param alphaFoldCif + * @param tmpFile + * @param id + * @param chain + * @param dbSource + * @param dbVersion + * @return + * @throws Exception + */ + public static AlignmentI importDownloadedStructureFromUrl( + String alphaFoldCif, File tmpFile, String id, String chain, + String dbSource, String dbVersion) throws Exception + { + String file = tmpFile.getAbsolutePath(); + // todo get rid of Type and use FileFormatI instead? + FileFormatI fileFormat = FileFormat.MMCif; + AlignmentI pdbAlignment = new FormatAdapter().readFile(tmpFile, + DataSourceType.FILE, fileFormat); + if (pdbAlignment != null) + { + List toremove = new ArrayList(); + for (SequenceI pdbcs : pdbAlignment.getSequences()) { - List toremove = new ArrayList(); - for (SequenceI pdbcs : pdbAlignment.getSequences()) + String chid = null; + // Mapping map=null; + for (PDBEntry pid : pdbcs.getAllPDBEntries()) { - String chid = null; - // Mapping map=null; - for (PDBEntry pid : pdbcs.getAllPDBEntries()) + if (pid.getFile() == file) { - if (pid.getFile() == file) - { - chid = pid.getChainCode(); + chid = pid.getChainCode(); - } - } - if (chain == null || (chid != null && (chid.equals(chain) - || chid.trim().equals(chain.trim()) - || (chain.trim().length() == 0 && chid.equals("_"))))) - { - // FIXME seems to result in 'PDB|1QIP|1qip|A' - 1QIP is redundant. - // TODO: suggest simplify naming to 1qip|A as default name defined - pdbcs.setName(id - + SEPARATOR + pdbcs.getName()); - // Might need to add more metadata to the PDBEntry object - // like below - /* - * PDBEntry entry = new PDBEntry(); // Construct the PDBEntry - * entry.setId(id); if (entry.getProperty() == null) - * entry.setProperty(new Hashtable()); - * entry.getProperty().put("chains", pdbchain.id + "=" + - * sq.getStart() + "-" + sq.getEnd()); - * sq.getDatasetSequence().addPDBId(entry); - */ - // Add PDB DB Refs - // We make a DBRefEtntry because we have obtained the PDB file from - // a - // verifiable source - // JBPNote - PDB DBRefEntry should also carry the chain and mapping - // information - DBRefEntry dbentry = new DBRefEntry(getDbSource(), - getDbVersion(), (chid == null ? id : id + chid)); - // dbentry.setMap() - pdbcs.addDBRef(dbentry); - } - else - { - // mark this sequence to be removed from the alignment - // - since it's not from the right chain - toremove.add(pdbcs); } } - // now remove marked sequences - for (SequenceI pdbcs : toremove) + if (chain == null || (chid != null && (chid.equals(chain) + || chid.trim().equals(chain.trim()) + || (chain.trim().length() == 0 && chid.equals("_"))))) { - pdbAlignment.deleteSequence(pdbcs); - if (pdbcs.getAnnotation() != null) + // FIXME seems to result in 'PDB|1QIP|1qip|A' - 1QIP is redundant. + // TODO: suggest simplify naming to 1qip|A as default name defined + pdbcs.setName(id + SEPARATOR + pdbcs.getName()); + // Might need to add more metadata to the PDBEntry object + // like below + /* + * PDBEntry entry = new PDBEntry(); // Construct the PDBEntry + * entry.setId(id); if (entry.getProperty() == null) + * entry.setProperty(new Hashtable()); + * entry.getProperty().put("chains", pdbchain.id + "=" + + * sq.getStart() + "-" + sq.getEnd()); + * sq.getDatasetSequence().addPDBId(entry); + */ + // Add PDB DB Refs + // We make a DBRefEtntry because we have obtained the PDB file from + // a + // verifiable source + // JBPNote - PDB DBRefEntry should also carry the chain and mapping + // information + if (dbSource != null) { - for (AlignmentAnnotation aa : pdbcs.getAnnotation()) + DBRefEntry dbentry = new DBRefEntry(dbSource, + + dbVersion, (chid == null ? id : id + chid)); + // dbentry.setMap() + pdbcs.addDBRef(dbentry); + // update any feature groups + List allsf = pdbcs.getFeatures() + .getAllFeatures(); + List newsf = new ArrayList(); + if (allsf != null && allsf.size() > 0) { - pdbAlignment.deleteAnnotation(aa); + for (SequenceFeature f : allsf) + { + if (file.equals(f.getFeatureGroup())) + { + f = new SequenceFeature(f, f.type, f.begin, f.end, id, + f.score); + } + newsf.add(f); + } + pdbcs.setSequenceFeatures(newsf); } } } + else + { + // mark this sequence to be removed from the alignment + // - since it's not from the right chain + toremove.add(pdbcs); + } } - - if (pdbAlignment == null || pdbAlignment.getHeight() < 1) + // now remove marked sequences + for (SequenceI pdbcs : toremove) { - throw new Exception(MessageManager.formatMessage( - "exception.no_pdb_records_for_chain", new String[] - { id, ((chain == null) ? "' '" : chain) })); + pdbAlignment.deleteSequence(pdbcs); + if (pdbcs.getAnnotation() != null) + { + for (AlignmentAnnotation aa : pdbcs.getAnnotation()) + { + pdbAlignment.deleteAnnotation(aa); + } + } } - - } catch (Exception ex) // Problem parsing PDB file - { - stopQuery(); - throw (ex); } return pdbAlignment; } @@ -249,13 +373,13 @@ public class EBIAlfaFold extends EbiFileRetrievedProxy @Override public String getTestQuery() { - return "1QIP"; + return "AF-O15552-F1"; } @Override public String getDbName() { - return "PDB"; // getDbSource(); + return "ALPHAFOLD"; // getDbSource(); } @Override @@ -278,4 +402,5 @@ public class EBIAlfaFold extends EbiFileRetrievedProxy { return new PDBFeatureSettings(); } + }