From ae1c7d0b73a15d1c7108c43980a67736bc8ade10 Mon Sep 17 00:00:00 2001 From: James Procter Date: Sun, 14 Jan 2024 13:33:57 +0000 Subject: [PATCH] JAL-4366 JAL-4371 refactor and fix up code inferring potential PDB IDs & Chaincode from ID strings to PDBEntryUtils --- .../PDBStructureChooserQuerySource.java | 24 +-- .../structure/StructureSelectionManager.java | 39 +++-- src/jalview/struture/PDBEntryUtils.java | 154 ++++++++++++++++++++ 3 files changed, 185 insertions(+), 32 deletions(-) create mode 100644 src/jalview/struture/PDBEntryUtils.java diff --git a/src/jalview/gui/structurechooser/PDBStructureChooserQuerySource.java b/src/jalview/gui/structurechooser/PDBStructureChooserQuerySource.java index 76479e2..2aa0401 100644 --- a/src/jalview/gui/structurechooser/PDBStructureChooserQuerySource.java +++ b/src/jalview/gui/structurechooser/PDBStructureChooserQuerySource.java @@ -31,7 +31,6 @@ import java.util.Objects; import java.util.Set; import javax.swing.JTable; -import javax.swing.table.TableModel; import jalview.datamodel.DBRefEntry; import jalview.datamodel.DBRefSource; @@ -46,6 +45,7 @@ import jalview.fts.core.FTSRestRequest; import jalview.fts.core.FTSRestResponse; import jalview.fts.service.pdb.PDBFTSRestClient; import jalview.jbgui.FilterOption; +import jalview.struture.PDBEntryUtils; import jalview.util.MessageManager; /** @@ -71,7 +71,7 @@ public class PDBStructureChooserQuerySource PDBFTSRestClient.getInstance()); } - + /** * Builds a query string for a given sequences using its DBRef entries * @@ -86,16 +86,19 @@ public class PDBStructureChooserQuerySource boolean isUniProtRefsFound = false; StringBuilder queryBuilder = new StringBuilder(); Set seqRefs = new LinkedHashSet<>(); - + SequenceI ds = seq.getDatasetSequence(); + while (ds.getDatasetSequence()!=null) { + ds = ds.getDatasetSequence(); + } + /* * note PDBs as DBRefEntry so they are not duplicated in query */ Set pdbids = new HashSet<>(); - - if (seq.getAllPDBEntries() != null - && queryBuilder.length() < MAX_QLENGTH) + if (queryBuilder.length() < MAX_QLENGTH) { - for (PDBEntry entry : seq.getAllPDBEntries()) + Set gatheredEntries = PDBEntryUtils.gatherPDBEntries(seq, true); + for (PDBEntry entry : gatheredEntries) { if (isValidSeqName(entry.getId())) { @@ -105,8 +108,8 @@ public class PDBStructureChooserQuerySource pdbids.add(id); } } - } - + } + List refs = seq.getDBRefs(); if (refs != null && refs.size() != 0) { @@ -142,7 +145,7 @@ public class PDBStructureChooserQuerySource } } } - + if (!isPDBRefsFound && !isUniProtRefsFound) { String seqName = seq.getName(); @@ -348,7 +351,6 @@ public class PDBStructureChooserQuerySource PDBEntry[] pdbEntriesToView = new PDBEntry[selectedRows.length]; int count = 0; int idColumnIndex = -1; - boolean fromTDB = true; idColumnIndex = restable.getColumn("PDB Id").getModelIndex(); for (int row : selectedRows) diff --git a/src/jalview/structure/StructureSelectionManager.java b/src/jalview/structure/StructureSelectionManager.java index 9906582..a804d84 100644 --- a/src/jalview/structure/StructureSelectionManager.java +++ b/src/jalview/structure/StructureSelectionManager.java @@ -26,10 +26,12 @@ import java.util.Arrays; import java.util.Collections; import java.util.Enumeration; import java.util.HashMap; +import java.util.HashSet; import java.util.IdentityHashMap; import java.util.List; import java.util.Locale; import java.util.Map; +import java.util.Set; import java.util.Vector; import jalview.analysis.AlignSeq; @@ -54,6 +56,7 @@ import jalview.io.AppletFormatAdapter; import jalview.io.DataSourceType; import jalview.io.StructureFile; import jalview.structure.StructureImportSettings.TFType; +import jalview.struture.PDBEntryUtils; import jalview.util.MapList; import jalview.util.MappingUtils; import jalview.util.MessageManager; @@ -522,32 +525,15 @@ public class StructureSelectionManager { ds = ds.getDatasetSequence(); } - + List putativePDBe = PDBEntryUtils.selectPutativePDBe(seq,ds, pdb); + if (targetChainIds != null && targetChainIds[s] != null) { infChain = false; targetChainId = targetChainIds[s]; } - else if (seq.getName().indexOf("|") > -1) - { - targetChainId = seq.getName() - .substring(seq.getName().lastIndexOf("|") + 1); - if (targetChainId.length() > 1) - { - if (targetChainId.trim().length() == 0) - { - targetChainId = " "; - } - else - { - // not a valid chain identifier - targetChainId = ""; - } - } - } - else - { - targetChainId = ""; + else { + targetChainId = PDBEntryUtils.inferChainId(seq); } /* @@ -567,6 +553,12 @@ public class StructureSelectionManager { continue; // don't try to map chains don't match. } + PDBEntry putativeChain = null; + if (!putativePDBe.isEmpty() && (putativeChain = PDBEntryUtils + .selectPutativePDBEntry(putativePDBe, chain)) == null) + { + continue; + } // TODO: correctly determine sequence type for mixed na/peptide // structures final String type = chain.isNa ? AlignSeq.DNA : AlignSeq.PEP; @@ -685,6 +677,11 @@ public class StructureSelectionManager } else { + if (maxAlignseq.getS1Coverage()<0.15 && maxAlignseq.getS2Coverage()<0.15) + { + // skip this - the NW alignment is spurious + continue; + } // Construct a needleman wunsch mapping instead. if (progress != null) { diff --git a/src/jalview/struture/PDBEntryUtils.java b/src/jalview/struture/PDBEntryUtils.java new file mode 100644 index 0000000..bb2e93e --- /dev/null +++ b/src/jalview/struture/PDBEntryUtils.java @@ -0,0 +1,154 @@ +/* + * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$) + * Copyright (C) $$Year-Rel$$ The Jalview Authors + * + * This file is part of Jalview. + * + * Jalview is free software: you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, either version 3 + * of the License, or (at your option) any later version. + * + * Jalview is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR + * PURPOSE. See the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Jalview. If not, see . + * The Jalview Authors are detailed in the 'AUTHORS' file. + */ +package jalview.struture; + +import java.util.ArrayList; +import java.util.HashSet; +import java.util.List; +import java.util.Set; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +import com.google.common.collect.Lists; + +import jalview.datamodel.PDBEntry; +import jalview.datamodel.SequenceI; +import jalview.io.StructureFile; +import mc_view.PDBChain; + +public class PDBEntryUtils +{ + + public static String inferChainId(SequenceI seq) + { + String targetChainId; + if (seq.getName().indexOf("|") > -1) + { + targetChainId = seq.getName() + .substring(seq.getName().lastIndexOf("|") + 1); + if (targetChainId.length() > 1) + { + if (targetChainId.trim().length() == 0) + { + targetChainId = " "; + } + else + { + // not a valid chain identifier + targetChainId = ""; + } + } + } + else + { + targetChainId = ""; + } + return targetChainId; + } + protected static Pattern id_and_chain=Pattern.compile("(\\d[0-9A-Za-z]{3})[_:]?(.+)*"); + + public static List inferPDBEntry(SequenceI seq) + { + Matcher matcher = id_and_chain.matcher(seq.getName()); + if (matcher.matches()) + { + String id = matcher.group(1); + PDBEntry pdbe = new PDBEntry(); + pdbe.setId(id); + if (matcher.groupCount() > 1) + { + pdbe.setChainCode(matcher.group(2)); + } + + return List.of(pdbe); + } + return List.of(); + } + + + /** + * generate likely PDB IDs & chain codes from seq and ds that fit pdb + * @param seq + * @param ds + * @param pdb + * @return empty list or one or more PDBEntry which match pdb.getId() + */ + public static List selectPutativePDBe(SequenceI seq, + SequenceI ds, StructureFile pdb) + { + List putativePDBe = new ArrayList(); + Set possiblePDBe=PDBEntryUtils.gatherPDBEntries(seq,true); + for (PDBEntry infPDBe: possiblePDBe) + { + if (infPDBe.getId().equalsIgnoreCase(pdb.getId())) + { + putativePDBe.add(infPDBe); + } + } + return putativePDBe; + } + + + public static Set gatherPDBEntries(SequenceI seq,boolean inferFromName) + { + Set possiblePDBe=new HashSet(); + while (seq!=null) + { + if (seq.getAllPDBEntries()!=null) { + possiblePDBe.addAll(seq.getAllPDBEntries()); + } + if (inferFromName) + { + possiblePDBe.addAll(PDBEntryUtils.inferPDBEntry(seq)); + } + seq = seq.getDatasetSequence(); + } + return possiblePDBe; + } + + + public static PDBEntry selectPutativePDBEntry(List putativePDBe, + PDBChain chain) + { + if (putativePDBe.isEmpty()) + { + return null; + } + + // check if there's a chaincode + PDBEntry putativeEntry = null; + boolean hasChainCodes; + // check for a chaincode mapping + for (PDBEntry pdbe : putativePDBe) + { + if (pdbe.getChainCode() != null) + { + hasChainCodes = true; + if (pdbe.getChainCode().equals(chain.id)) + { + putativeEntry = pdbe; + return putativeEntry; + } + } + } + return null; + } +} -- 1.7.10.2