X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Fws%2Fdbsources%2FEBIAlfaFold.java;h=672f0ac8ed25d5822c3653c29f3aad86eee30d68;hb=2ee5edd84da7c509efc15d241527fa62093b41c7;hp=6c7818b3a6b7f1b6c5f9e97b798c3befbb016c5f;hpb=7e519850d1a3d263fd453ddaed1977754e5262b0;p=jalview.git diff --git a/src/jalview/ws/dbsources/EBIAlfaFold.java b/src/jalview/ws/dbsources/EBIAlfaFold.java index 6c7818b..672f0ac 100644 --- a/src/jalview/ws/dbsources/EBIAlfaFold.java +++ b/src/jalview/ws/dbsources/EBIAlfaFold.java @@ -21,42 +21,44 @@ */ package jalview.ws.dbsources; +import java.io.File; +import java.io.FileInputStream; +import java.io.FileNotFoundException; +import java.io.IOException; +import java.io.InputStream; +import java.util.ArrayList; +import java.util.List; +import java.util.Map; + +import org.json.simple.JSONArray; +import org.json.simple.JSONObject; +import org.json.simple.parser.ParseException; + +import com.stevesoft.pat.Regex; + import jalview.api.FeatureSettingsModelI; -import jalview.bin.Cache; +import jalview.bin.Console; +import jalview.datamodel.AlignedCodonFrame; import jalview.datamodel.AlignmentAnnotation; import jalview.datamodel.AlignmentI; -import jalview.datamodel.ContactMatrix; import jalview.datamodel.ContactMatrixI; import jalview.datamodel.DBRefEntry; -import jalview.datamodel.DBRefSource; import jalview.datamodel.PDBEntry; -import jalview.datamodel.PDBEntry.Type; +import jalview.datamodel.SequenceFeature; import jalview.datamodel.SequenceI; +import jalview.gui.Desktop; import jalview.io.DataSourceType; import jalview.io.FileFormat; import jalview.io.FileFormatI; import jalview.io.FormatAdapter; import jalview.io.PDBFeatureSettings; -import jalview.javascript.json.JSON; -import jalview.structure.StructureImportSettings; -import jalview.util.HttpUtils; +import jalview.structure.StructureMapping; +import jalview.structure.StructureSelectionManager; import jalview.util.MessageManager; import jalview.util.Platform; import jalview.ws.datamodel.alphafold.PAEContactMatrix; -import jalview.ws.ebi.EBIFetchClient; import jalview.ws.utils.UrlDownloadClient; -import java.io.BufferedReader; -import java.io.File; -import java.io.FileInputStream; -import java.util.ArrayList; -import java.util.List; -import java.util.Map; - -import org.jmol.adapter.readers.simple.JSONReader; - -import com.stevesoft.pat.Regex; - /** * @author JimP * @@ -69,6 +71,8 @@ public class EBIAlfaFold extends EbiFileRetrievedProxy private static final int PDB_ID_LENGTH = 4; + private static String AF_VERSION = "3"; + public EBIAlfaFold() { super(); @@ -93,7 +97,9 @@ public class EBIAlfaFold extends EbiFileRetrievedProxy @Override public Regex getAccessionValidator() { - return new Regex("(AF-[A-Z]+[0-9]+[A-Z0-9]+-F1)"); + Regex validator = new Regex("(AF-[A-Z]+[0-9]+[A-Z0-9]+-F1)"); + validator.setIgnoreCase(true); + return validator; } /* @@ -118,15 +124,24 @@ public class EBIAlfaFold extends EbiFileRetrievedProxy return "1"; } - public static String getAlphaFoldCifDownloadUrl(String id) + public static String getAlphaFoldCifDownloadUrl(String id, String vnum) { - return "https://alphafold.ebi.ac.uk/files/" + id + "-model_v1.cif"; + if (vnum == null || vnum.length() == 0) + { + vnum = AF_VERSION; + } + return "https://alphafold.ebi.ac.uk/files/" + id + "-model_v" + vnum + + ".cif"; } - public static String getAlphaFoldPaeDownloadUrl(String id) + public static String getAlphaFoldPaeDownloadUrl(String id, String vnum) { + if (vnum == null || vnum.length() == 0) + { + vnum = AF_VERSION; + } return "https://alphafold.ebi.ac.uk/files/" + id - + "-predicted_aligned_error_v1.json"; + + "-predicted_aligned_error_v" + vnum + ".json"; } /* @@ -137,6 +152,12 @@ public class EBIAlfaFold extends EbiFileRetrievedProxy @Override public AlignmentI getSequenceRecords(String queries) throws Exception { + return getSequenceRecords(queries, null); + } + + public AlignmentI getSequenceRecords(String queries, String retrievalUrl) + throws Exception + { AlignmentI pdbAlignment = null; String chain = null; String id = null; @@ -157,130 +178,409 @@ public class EBIAlfaFold extends EbiFileRetrievedProxy stopQuery(); return null; } - String alphaFoldCif = getAlphaFoldCifDownloadUrl(id); + String alphaFoldCif = getAlphaFoldCifDownloadUrl(id, AF_VERSION); + if (retrievalUrl != null) + { + alphaFoldCif = retrievalUrl; + } try { - File tmpFile = File.createTempFile(id, "cif"); + File tmpFile = File.createTempFile(id, ".cif"); + Console.debug("Retrieving structure file for " + id + " from " + + alphaFoldCif); UrlDownloadClient.download(alphaFoldCif, tmpFile); + + // may not need this check ? file = tmpFile.getAbsolutePath(); if (file == null) { return null; } - // todo get rid of Type and use FileFormatI instead? - FileFormatI fileFormat = FileFormat.MMCif; - pdbAlignment = new FormatAdapter().readFile(tmpFile, - DataSourceType.FILE, fileFormat); - if (pdbAlignment != null) + pdbAlignment = importDownloadedStructureFromUrl(alphaFoldCif, tmpFile, + id, chain, getDbSource(), getDbVersion()); + + if (pdbAlignment == null || pdbAlignment.getHeight() < 1) { - List toremove = new ArrayList(); - for (SequenceI pdbcs : pdbAlignment.getSequences()) - { - String chid = null; - // Mapping map=null; - for (PDBEntry pid : pdbcs.getAllPDBEntries()) - { - if (pid.getFile() == file) - { - chid = pid.getChainCode(); + throw new Exception(MessageManager.formatMessage( + "exception.no_pdb_records_for_chain", new String[] + { id, ((chain == null) ? "' '" : chain) })); + } + // done during structure retrieval + // retrieve_AlphaFold_pAE(id, pdbAlignment, retrievalUrl); - } - } - if (chain == null || (chid != null && (chid.equals(chain) - || chid.trim().equals(chain.trim()) - || (chain.trim().length() == 0 && chid.equals("_"))))) - { - // FIXME seems to result in 'PDB|1QIP|1qip|A' - 1QIP is redundant. - // TODO: suggest simplify naming to 1qip|A as default name defined - pdbcs.setName(id + SEPARATOR + pdbcs.getName()); - // Might need to add more metadata to the PDBEntry object - // like below - /* - * PDBEntry entry = new PDBEntry(); // Construct the PDBEntry - * entry.setId(id); if (entry.getProperty() == null) - * entry.setProperty(new Hashtable()); - * entry.getProperty().put("chains", pdbchain.id + "=" + - * sq.getStart() + "-" + sq.getEnd()); - * sq.getDatasetSequence().addPDBId(entry); - */ - // Add PDB DB Refs - // We make a DBRefEtntry because we have obtained the PDB file from - // a - // verifiable source - // JBPNote - PDB DBRefEntry should also carry the chain and mapping - // information - DBRefEntry dbentry = new DBRefEntry(getDbSource(), - getDbVersion(), (chid == null ? id : id + chid)); - // dbentry.setMap() - pdbcs.addDBRef(dbentry); - } - else - { - // mark this sequence to be removed from the alignment - // - since it's not from the right chain - toremove.add(pdbcs); - } - } - // now remove marked sequences - for (SequenceI pdbcs : toremove) + } catch (Exception ex) // Problem parsing PDB file + { + stopQuery(); + throw (ex); + } + return pdbAlignment; + } + + /** + * get an alphafold pAE for the given id, and add it to sequence 0 in + * pdbAlignment (assuming it came from structurefile parser). + * + * @param id + * @param pdbAlignment + * @param retrievalUrl + * - URL of .mmcif from EBI-AlphaFold - will be used to generate the + * pAE URL automatically + * @throws IOException + * @throws Exception + */ + public static void retrieve_AlphaFold_pAE(String id, + AlignmentI pdbAlignment, String retrievalUrl) throws IOException + { + // import PAE as contact matrix - assume this will work if there was a + // model + String paeURL = getAlphaFoldPaeDownloadUrl(id, AF_VERSION); + + if (retrievalUrl != null) + { + // manufacture the PAE url from a url like ...-model-vN.cif + paeURL = retrievalUrl.replace("model", "predicted_aligned_error") + .replace(".cif", ".json"); + } + + File pae = null; + try + { + pae = File.createTempFile(id == null ? "af_pae" : id, "pae_json"); + } catch (IOException e) + { + e.printStackTrace(); + } + Console.debug("Downloading pae from " + paeURL + " to " + pae.toString() + + ""); + UrlDownloadClient.download(paeURL, pae); + addAlphaFoldPAEToSequence(pdbAlignment, pae, 0, null); + } + + public static void addAlphaFoldPAEToSequence(AlignmentI pdbAlignment, + File pae, int index, String seqId) + { + addAlphaFoldPAE(pdbAlignment, pae, index, seqId, false, false); + } + + public static void addAlphaFoldPAEToStructure(AlignmentI pdbAlignment, + File pae, int index, String structIdOrFile, boolean isStructId) + { + addAlphaFoldPAE(pdbAlignment, pae, index, structIdOrFile, true, + isStructId); + } + + public static void addAlphaFoldPAE(AlignmentI pdbAlignment, File pae, + int index, String id, boolean isStruct, boolean isStructId) + { + FileInputStream paeInput = null; + try + { + paeInput = new FileInputStream(pae); + } catch (FileNotFoundException e) + { + Console.error( + "Could not find pAE file '" + pae.getAbsolutePath() + "'", e); + return; + } + + if (isStruct) + { + StructureSelectionManager ssm = StructureSelectionManager + .getStructureSelectionManager(Desktop.instance); + if (ssm != null) + { + String structFile = isStructId ? ssm.findFileForPDBId(id) : id; + + StructureMapping[] smArray = ssm.getMapping(structFile); + + try { - pdbAlignment.deleteSequence(pdbcs); - if (pdbcs.getAnnotation() != null) + if (!importPaeJSONAsContactMatrixToStructure(smArray, paeInput)) { - for (AlignmentAnnotation aa : pdbcs.getAnnotation()) - { - pdbAlignment.deleteAnnotation(aa); - } + Console.warn("Could not import contact matrix from '" + + pae.getAbsolutePath() + "' to structure."); } + } catch (IOException e1) + { + Console.error("Error when importing pAE file '" + + pae.getAbsolutePath() + "'", e1); + } catch (ParseException e2) + { + Console.error("Error when parsing pAE file '" + + pae.getAbsolutePath() + "'", e2); } } - if (pdbAlignment == null || pdbAlignment.getHeight() < 1) + } + else + { + // attach to sequence?! + try { - throw new Exception(MessageManager.formatMessage( - "exception.no_pdb_records_for_chain", new String[] - { id, ((chain == null) ? "' '" : chain) })); + if (!importPaeJSONAsContactMatrixToSequence(pdbAlignment, paeInput, + index, id)) + { + Console.warn("Could not import contact matrix from '" + + pae.getAbsolutePath() + "' to sequence."); + } + } catch (IOException e1) + { + Console.error("Error when importing pAE file '" + + pae.getAbsolutePath() + "'", e1); + } catch (ParseException e2) + { + Console.error("Error when parsing pAE file '" + + pae.getAbsolutePath() + "'", e2); } + } - // import PAE as contact matrix - assume this will work if there was a - // model - File pae = File.createTempFile(id, "pae_json"); - String paeURL = getAlphaFoldPaeDownloadUrl(id); - UrlDownloadClient.download(paeURL, pae); - if (!importPaeJSONAsContactMatrix(pdbAlignment, pae)) + } + + /** + * parses the given pAE matrix and adds it to sequence 0 in the given + * alignment + * + * @param pdbAlignment + * @param pae_input + * @return true if there was a pAE matrix added + * @throws ParseException + * @throws IOException + * @throws Exception + */ + public static boolean importPaeJSONAsContactMatrixToSequence( + AlignmentI pdbAlignment, InputStream pae_input) + throws IOException, ParseException + { + return importPaeJSONAsContactMatrixToSequence(pdbAlignment, pae_input, + 0, null); + } + + public static boolean importPaeJSONAsContactMatrixToSequence( + AlignmentI pdbAlignment, File paeFile, int index, String seqId) + throws FileNotFoundException, IOException, ParseException + { + return importPaeJSONAsContactMatrixToSequence(pdbAlignment, + new FileInputStream(paeFile), index, seqId); + } + + public static boolean importPaeJSONAsContactMatrixToSequence( + AlignmentI pdbAlignment, InputStream pae_input, int index, + String seqId) throws IOException, ParseException + { + SequenceI sequence = null; + if (seqId == null) + { + int seqToGet = index > 0 ? index : 0; + sequence = pdbAlignment.getSequenceAt(seqToGet); + } + if (sequence == null) + { + SequenceI[] sequences = pdbAlignment.findSequenceMatch(seqId); + if (sequences == null || sequences.length < 1) { - Cache.log.debug("Couln't import contact matrix from " + paeURL - + " (stored in " + pae.toString() + ")"); + Console.warn("Could not find sequence with id '" + seqId + + "' to attach pAE matrix to. Ignoring matrix."); + return false; } + else + { + sequence = sequences[0]; // just use the first sequence with this seqId + } + } - } catch (Exception ex) // Problem parsing PDB file + JSONObject paeDict = parseJSONtoPAEContactMatrix(pae_input); + if (paeDict == null) { - stopQuery(); - throw (ex); + Console.debug("JSON file did not parse properly."); + return false; } - return pdbAlignment; + ContactMatrixI matrix = new PAEContactMatrix(sequence, + (Map) paeDict); + + AlignmentAnnotation cmannot = sequence.addContactList(matrix); + pdbAlignment.addAnnotation(cmannot); + + return true; } - private boolean importPaeJSONAsContactMatrix(AlignmentI pdbAlignment, - File pae) throws Exception + public static JSONObject parseJSONtoPAEContactMatrix( + InputStream pae_input) throws IOException, ParseException { - FileInputStream pae_input = new FileInputStream(pae); + Object paeJson = Platform.parseJSON(pae_input); + JSONObject paeDict = null; + if (paeJson instanceof JSONObject) + { + paeDict = (JSONObject) paeJson; + } + else if (paeJson instanceof JSONArray) + { + JSONArray jsonArray = (JSONArray) paeJson; + if (jsonArray.size() > 0) + paeDict = (JSONObject) jsonArray.get(0); + } + + return paeDict; + } - List pae_obj = (List) Platform - .parseJSON(pae_input); + public static boolean importPaeJSONAsContactMatrixToStructure( + StructureMapping[] smArray, InputStream paeInput) + throws IOException, ParseException + { + boolean someDone = false; + for (StructureMapping sm : smArray) + { + boolean thisDone = importPaeJSONAsContactMatrixToStructure(sm, + paeInput); + someDone |= thisDone; + } + return someDone; + } + + public static boolean importPaeJSONAsContactMatrixToStructure( + StructureMapping sm, File paeFile) + throws FileNotFoundException, IOException, ParseException + { + return importPaeJSONAsContactMatrixToStructure(sm, + new FileInputStream(paeFile)); + } + + public static boolean importPaeJSONAsContactMatrixToStructure( + StructureMapping sm, InputStream paeInput) + throws IOException, ParseException + { + JSONObject pae_obj = parseJSONtoPAEContactMatrix(paeInput); if (pae_obj == null) { + Console.debug("JSON file did not parse properly."); return false; } - ContactMatrixI matrix = new PAEContactMatrix( - pdbAlignment.getSequenceAt(0), (Map)pae_obj.get(0)); - pdbAlignment.getSequenceAt(0).addAlignmentAnnotation(pdbAlignment.addContactList(matrix)); + ContactMatrixI matrix = new PAEContactMatrix(sm.getSequence(), + (Map) pae_obj); + + AlignmentAnnotation cmannot = sm.getSequence().addContactList(matrix); + // sm.getSequence().addAlignmentAnnotation(cmannot); + sm.transfer(cmannot); + // return true; + + StructureSelectionManager ssm = StructureSelectionManager + .getStructureSelectionManager(Desktop.instance); + List acfList = ssm.getSequenceMappings(); + return true; } + /** + * general purpose structure importer - designed to yield alignment useful for + * transfer of annotation to associated sequences + * + * @param alphaFoldCif + * @param tmpFile + * @param id + * @param chain + * @param dbSource + * @param dbVersion + * @return + * @throws Exception + */ + public static AlignmentI importDownloadedStructureFromUrl( + String alphaFoldCif, File tmpFile, String id, String chain, + String dbSource, String dbVersion) throws Exception + { + String file = tmpFile.getAbsolutePath(); + // todo get rid of Type and use FileFormatI instead? + FileFormatI fileFormat = FileFormat.MMCif; + AlignmentI pdbAlignment = new FormatAdapter().readFile(tmpFile, + DataSourceType.FILE, fileFormat); + if (pdbAlignment != null) + { + List toremove = new ArrayList(); + for (SequenceI pdbcs : pdbAlignment.getSequences()) + { + String chid = null; + // Mapping map=null; + for (PDBEntry pid : pdbcs.getAllPDBEntries()) + { + if (pid.getFile() == file) + { + chid = pid.getChainCode(); + + } + } + if (chain == null || (chid != null && (chid.equals(chain) + || chid.trim().equals(chain.trim()) + || (chain.trim().length() == 0 && chid.equals("_"))))) + { + // FIXME seems to result in 'PDB|1QIP|1qip|A' - 1QIP is redundant. + // TODO: suggest simplify naming to 1qip|A as default name defined + pdbcs.setName(id + SEPARATOR + pdbcs.getName()); + // Might need to add more metadata to the PDBEntry object + // like below + /* + * PDBEntry entry = new PDBEntry(); // Construct the PDBEntry + * entry.setId(id); if (entry.getProperty() == null) + * entry.setProperty(new Hashtable()); + * entry.getProperty().put("chains", pdbchain.id + "=" + + * sq.getStart() + "-" + sq.getEnd()); + * sq.getDatasetSequence().addPDBId(entry); + */ + // Add PDB DB Refs + // We make a DBRefEtntry because we have obtained the PDB file from + // a + // verifiable source + // JBPNote - PDB DBRefEntry should also carry the chain and mapping + // information + if (dbSource != null) + { + DBRefEntry dbentry = new DBRefEntry(dbSource, + + dbVersion, (chid == null ? id : id + chid)); + // dbentry.setMap() + pdbcs.addDBRef(dbentry); + // update any feature groups + List allsf = pdbcs.getFeatures() + .getAllFeatures(); + List newsf = new ArrayList(); + if (allsf != null && allsf.size() > 0) + { + for (SequenceFeature f : allsf) + { + if (file.equals(f.getFeatureGroup())) + { + f = new SequenceFeature(f, f.type, f.begin, f.end, id, + f.score); + } + newsf.add(f); + } + pdbcs.setSequenceFeatures(newsf); + } + } + } + else + { + // mark this sequence to be removed from the alignment + // - since it's not from the right chain + toremove.add(pdbcs); + } + } + // now remove marked sequences + for (SequenceI pdbcs : toremove) + { + pdbAlignment.deleteSequence(pdbcs); + if (pdbcs.getAnnotation() != null) + { + for (AlignmentAnnotation aa : pdbcs.getAnnotation()) + { + pdbAlignment.deleteAnnotation(aa); + } + } + } + } + return pdbAlignment; + } + /* * (non-Javadoc) * @@ -328,4 +628,5 @@ public class EBIAlfaFold extends EbiFileRetrievedProxy { return new PDBFeatureSettings(); } + }