JAL-4366 JAL-4371 refactor and fix up code inferring potential PDB IDs & Chaincode...
[jalview.git] / src / jalview / struture / PDBEntryUtils.java
diff --git a/src/jalview/struture/PDBEntryUtils.java b/src/jalview/struture/PDBEntryUtils.java
new file mode 100644 (file)
index 0000000..bb2e93e
--- /dev/null
@@ -0,0 +1,154 @@
+/*
+ * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
+ * Copyright (C) $$Year-Rel$$ The Jalview Authors
+ * 
+ * This file is part of Jalview.
+ * 
+ * Jalview is free software: you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License 
+ * as published by the Free Software Foundation, either version 3
+ * of the License, or (at your option) any later version.
+ *  
+ * Jalview is distributed in the hope that it will be useful, but 
+ * WITHOUT ANY WARRANTY; without even the implied warranty 
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR 
+ * PURPOSE.  See the GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License
+ * along with Jalview.  If not, see <http://www.gnu.org/licenses/>.
+ * The Jalview Authors are detailed in the 'AUTHORS' file.
+ */
+package jalview.struture;
+
+import java.util.ArrayList;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import com.google.common.collect.Lists;
+
+import jalview.datamodel.PDBEntry;
+import jalview.datamodel.SequenceI;
+import jalview.io.StructureFile;
+import mc_view.PDBChain;
+
+public class PDBEntryUtils
+{
+
+  public static String inferChainId(SequenceI seq)
+  {
+    String targetChainId;
+    if (seq.getName().indexOf("|") > -1)
+    {
+      targetChainId = seq.getName()
+              .substring(seq.getName().lastIndexOf("|") + 1);
+      if (targetChainId.length() > 1)
+      {
+        if (targetChainId.trim().length() == 0)
+        {
+          targetChainId = " ";
+        }
+        else
+        {
+          // not a valid chain identifier
+          targetChainId = "";
+        }
+      }
+    }
+    else
+    {
+      targetChainId = "";
+    }
+    return targetChainId;
+  }
+  protected static Pattern id_and_chain=Pattern.compile("(\\d[0-9A-Za-z]{3})[_:]?(.+)*");
+
+  public static List<PDBEntry> inferPDBEntry(SequenceI seq)
+  {
+    Matcher matcher = id_and_chain.matcher(seq.getName());
+    if (matcher.matches())
+    {
+      String id = matcher.group(1);
+      PDBEntry pdbe = new PDBEntry();
+      pdbe.setId(id);
+      if (matcher.groupCount() > 1)
+      {
+        pdbe.setChainCode(matcher.group(2));
+      }
+
+      return List.of(pdbe);
+    }
+    return List.of();
+  }
+  
+  
+  /**
+   * generate likely PDB IDs & chain codes from seq and ds that fit pdb
+   * @param seq
+   * @param ds
+   * @param pdb
+   * @return empty list or one or more PDBEntry which match pdb.getId()
+   */
+  public static List<PDBEntry> selectPutativePDBe(SequenceI seq,
+          SequenceI ds, StructureFile pdb)
+  {
+    List<PDBEntry> putativePDBe = new ArrayList<PDBEntry>();
+    Set<PDBEntry> possiblePDBe=PDBEntryUtils.gatherPDBEntries(seq,true);
+    for (PDBEntry infPDBe: possiblePDBe)
+    {
+      if (infPDBe.getId().equalsIgnoreCase(pdb.getId()))
+      {
+        putativePDBe.add(infPDBe);
+      }
+    }
+    return putativePDBe;
+  }
+
+
+  public static Set<PDBEntry> gatherPDBEntries(SequenceI seq,boolean inferFromName)
+  {
+    Set<PDBEntry> possiblePDBe=new HashSet<PDBEntry>();
+    while (seq!=null)
+    {
+      if (seq.getAllPDBEntries()!=null) {
+        possiblePDBe.addAll(seq.getAllPDBEntries());
+      }
+      if (inferFromName)
+      {
+        possiblePDBe.addAll(PDBEntryUtils.inferPDBEntry(seq));
+      }
+      seq = seq.getDatasetSequence();  
+    }
+    return possiblePDBe;
+  }
+
+
+  public static PDBEntry selectPutativePDBEntry(List<PDBEntry> putativePDBe,
+          PDBChain chain)
+  {
+    if (putativePDBe.isEmpty())
+    {
+      return null;
+    }
+
+    // check if there's a chaincode 
+    PDBEntry putativeEntry = null;
+    boolean hasChainCodes;
+    // check for a chaincode mapping
+    for (PDBEntry pdbe : putativePDBe)
+    {
+      if (pdbe.getChainCode() != null)
+      {
+        hasChainCodes = true;
+        if (pdbe.getChainCode().equals(chain.id))
+        {
+          putativeEntry = pdbe;
+          return putativeEntry;
+        }
+      }
+    }
+    return null;
+  }
+}