JAL-4366 JAL-4371 refactor and fix up code inferring potential PDB IDs & Chaincode...
authorJames Procter <j.procter@dundee.ac.uk>
Sun, 14 Jan 2024 13:33:57 +0000 (13:33 +0000)
committerJames Procter <j.procter@dundee.ac.uk>
Sun, 14 Jan 2024 13:34:35 +0000 (13:34 +0000)
src/jalview/gui/structurechooser/PDBStructureChooserQuerySource.java
src/jalview/structure/StructureSelectionManager.java
src/jalview/struture/PDBEntryUtils.java [new file with mode: 0644]

index 76479e2..2aa0401 100644 (file)
@@ -31,7 +31,6 @@ import java.util.Objects;
 import java.util.Set;
 
 import javax.swing.JTable;
-import javax.swing.table.TableModel;
 
 import jalview.datamodel.DBRefEntry;
 import jalview.datamodel.DBRefSource;
@@ -46,6 +45,7 @@ import jalview.fts.core.FTSRestRequest;
 import jalview.fts.core.FTSRestResponse;
 import jalview.fts.service.pdb.PDBFTSRestClient;
 import jalview.jbgui.FilterOption;
+import jalview.struture.PDBEntryUtils;
 import jalview.util.MessageManager;
 
 /**
@@ -71,7 +71,7 @@ public class PDBStructureChooserQuerySource
             PDBFTSRestClient.getInstance());
 
   }
-
+  
   /**
    * Builds a query string for a given sequences using its DBRef entries
    * 
@@ -86,16 +86,19 @@ public class PDBStructureChooserQuerySource
     boolean isUniProtRefsFound = false;
     StringBuilder queryBuilder = new StringBuilder();
     Set<String> seqRefs = new LinkedHashSet<>();
-
+    SequenceI ds = seq.getDatasetSequence();
+    while (ds.getDatasetSequence()!=null) {
+      ds = ds.getDatasetSequence();
+    }
+    
     /*
      * note PDBs as DBRefEntry so they are not duplicated in query
      */
     Set<String> pdbids = new HashSet<>();
-
-    if (seq.getAllPDBEntries() != null
-            && queryBuilder.length() < MAX_QLENGTH)
+    if (queryBuilder.length() < MAX_QLENGTH)
     {
-      for (PDBEntry entry : seq.getAllPDBEntries())
+      Set<PDBEntry> gatheredEntries = PDBEntryUtils.gatherPDBEntries(seq, true); 
+      for (PDBEntry entry : gatheredEntries)
       {
         if (isValidSeqName(entry.getId()))
         {
@@ -105,8 +108,8 @@ public class PDBStructureChooserQuerySource
           pdbids.add(id);
         }
       }
-    }
-
+    } 
+    
     List<DBRefEntry> refs = seq.getDBRefs();
     if (refs != null && refs.size() != 0)
     {
@@ -142,7 +145,7 @@ public class PDBStructureChooserQuerySource
         }
       }
     }
-
+    
     if (!isPDBRefsFound && !isUniProtRefsFound)
     {
       String seqName = seq.getName();
@@ -348,7 +351,6 @@ public class PDBStructureChooserQuerySource
     PDBEntry[] pdbEntriesToView = new PDBEntry[selectedRows.length];
     int count = 0;
     int idColumnIndex = -1;
-    boolean fromTDB = true;
     idColumnIndex = restable.getColumn("PDB Id").getModelIndex();
 
     for (int row : selectedRows)
index 9906582..a804d84 100644 (file)
@@ -26,10 +26,12 @@ import java.util.Arrays;
 import java.util.Collections;
 import java.util.Enumeration;
 import java.util.HashMap;
+import java.util.HashSet;
 import java.util.IdentityHashMap;
 import java.util.List;
 import java.util.Locale;
 import java.util.Map;
+import java.util.Set;
 import java.util.Vector;
 
 import jalview.analysis.AlignSeq;
@@ -54,6 +56,7 @@ import jalview.io.AppletFormatAdapter;
 import jalview.io.DataSourceType;
 import jalview.io.StructureFile;
 import jalview.structure.StructureImportSettings.TFType;
+import jalview.struture.PDBEntryUtils;
 import jalview.util.MapList;
 import jalview.util.MappingUtils;
 import jalview.util.MessageManager;
@@ -522,32 +525,15 @@ public class StructureSelectionManager
       {
         ds = ds.getDatasetSequence();
       }
-
+      List <PDBEntry> putativePDBe = PDBEntryUtils.selectPutativePDBe(seq,ds, pdb); 
+      
       if (targetChainIds != null && targetChainIds[s] != null)
       {
         infChain = false;
         targetChainId = targetChainIds[s];
       }
-      else if (seq.getName().indexOf("|") > -1)
-      {
-        targetChainId = seq.getName()
-                .substring(seq.getName().lastIndexOf("|") + 1);
-        if (targetChainId.length() > 1)
-        {
-          if (targetChainId.trim().length() == 0)
-          {
-            targetChainId = " ";
-          }
-          else
-          {
-            // not a valid chain identifier
-            targetChainId = "";
-          }
-        }
-      }
-      else
-      {
-        targetChainId = "";
+      else {
+        targetChainId = PDBEntryUtils.inferChainId(seq);
       }
 
       /*
@@ -567,6 +553,12 @@ public class StructureSelectionManager
         {
           continue; // don't try to map chains don't match.
         }
+        PDBEntry putativeChain = null;
+        if (!putativePDBe.isEmpty() && (putativeChain = PDBEntryUtils
+                .selectPutativePDBEntry(putativePDBe, chain)) == null)
+        {
+          continue;
+        }
         // TODO: correctly determine sequence type for mixed na/peptide
         // structures
         final String type = chain.isNa ? AlignSeq.DNA : AlignSeq.PEP;
@@ -685,6 +677,11 @@ public class StructureSelectionManager
         }
         else
         {
+          if (maxAlignseq.getS1Coverage()<0.15 && maxAlignseq.getS2Coverage()<0.15)
+          {
+            // skip this - the NW alignment is spurious
+            continue;
+          }
           // Construct a needleman wunsch mapping instead.
           if (progress != null)
           {
diff --git a/src/jalview/struture/PDBEntryUtils.java b/src/jalview/struture/PDBEntryUtils.java
new file mode 100644 (file)
index 0000000..bb2e93e
--- /dev/null
@@ -0,0 +1,154 @@
+/*
+ * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
+ * Copyright (C) $$Year-Rel$$ The Jalview Authors
+ * 
+ * This file is part of Jalview.
+ * 
+ * Jalview is free software: you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License 
+ * as published by the Free Software Foundation, either version 3
+ * of the License, or (at your option) any later version.
+ *  
+ * Jalview is distributed in the hope that it will be useful, but 
+ * WITHOUT ANY WARRANTY; without even the implied warranty 
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR 
+ * PURPOSE.  See the GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License
+ * along with Jalview.  If not, see <http://www.gnu.org/licenses/>.
+ * The Jalview Authors are detailed in the 'AUTHORS' file.
+ */
+package jalview.struture;
+
+import java.util.ArrayList;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import com.google.common.collect.Lists;
+
+import jalview.datamodel.PDBEntry;
+import jalview.datamodel.SequenceI;
+import jalview.io.StructureFile;
+import mc_view.PDBChain;
+
+public class PDBEntryUtils
+{
+
+  public static String inferChainId(SequenceI seq)
+  {
+    String targetChainId;
+    if (seq.getName().indexOf("|") > -1)
+    {
+      targetChainId = seq.getName()
+              .substring(seq.getName().lastIndexOf("|") + 1);
+      if (targetChainId.length() > 1)
+      {
+        if (targetChainId.trim().length() == 0)
+        {
+          targetChainId = " ";
+        }
+        else
+        {
+          // not a valid chain identifier
+          targetChainId = "";
+        }
+      }
+    }
+    else
+    {
+      targetChainId = "";
+    }
+    return targetChainId;
+  }
+  protected static Pattern id_and_chain=Pattern.compile("(\\d[0-9A-Za-z]{3})[_:]?(.+)*");
+
+  public static List<PDBEntry> inferPDBEntry(SequenceI seq)
+  {
+    Matcher matcher = id_and_chain.matcher(seq.getName());
+    if (matcher.matches())
+    {
+      String id = matcher.group(1);
+      PDBEntry pdbe = new PDBEntry();
+      pdbe.setId(id);
+      if (matcher.groupCount() > 1)
+      {
+        pdbe.setChainCode(matcher.group(2));
+      }
+
+      return List.of(pdbe);
+    }
+    return List.of();
+  }
+  
+  
+  /**
+   * generate likely PDB IDs & chain codes from seq and ds that fit pdb
+   * @param seq
+   * @param ds
+   * @param pdb
+   * @return empty list or one or more PDBEntry which match pdb.getId()
+   */
+  public static List<PDBEntry> selectPutativePDBe(SequenceI seq,
+          SequenceI ds, StructureFile pdb)
+  {
+    List<PDBEntry> putativePDBe = new ArrayList<PDBEntry>();
+    Set<PDBEntry> possiblePDBe=PDBEntryUtils.gatherPDBEntries(seq,true);
+    for (PDBEntry infPDBe: possiblePDBe)
+    {
+      if (infPDBe.getId().equalsIgnoreCase(pdb.getId()))
+      {
+        putativePDBe.add(infPDBe);
+      }
+    }
+    return putativePDBe;
+  }
+
+
+  public static Set<PDBEntry> gatherPDBEntries(SequenceI seq,boolean inferFromName)
+  {
+    Set<PDBEntry> possiblePDBe=new HashSet<PDBEntry>();
+    while (seq!=null)
+    {
+      if (seq.getAllPDBEntries()!=null) {
+        possiblePDBe.addAll(seq.getAllPDBEntries());
+      }
+      if (inferFromName)
+      {
+        possiblePDBe.addAll(PDBEntryUtils.inferPDBEntry(seq));
+      }
+      seq = seq.getDatasetSequence();  
+    }
+    return possiblePDBe;
+  }
+
+
+  public static PDBEntry selectPutativePDBEntry(List<PDBEntry> putativePDBe,
+          PDBChain chain)
+  {
+    if (putativePDBe.isEmpty())
+    {
+      return null;
+    }
+
+    // check if there's a chaincode 
+    PDBEntry putativeEntry = null;
+    boolean hasChainCodes;
+    // check for a chaincode mapping
+    for (PDBEntry pdbe : putativePDBe)
+    {
+      if (pdbe.getChainCode() != null)
+      {
+        hasChainCodes = true;
+        if (pdbe.getChainCode().equals(chain.id))
+        {
+          putativeEntry = pdbe;
+          return putativeEntry;
+        }
+      }
+    }
+    return null;
+  }
+}