JAL-2089 patch broken merge to master for Release 2.10.0b1
[jalview.git] / src / jalview / util / DBRefUtils.java
index ed6d860..e6aa472 100755 (executable)
@@ -26,11 +26,12 @@ import jalview.datamodel.PDBEntry;
 import jalview.datamodel.SequenceI;
 
 import java.util.ArrayList;
+import java.util.Arrays;
 import java.util.HashMap;
 import java.util.HashSet;
-import java.util.Hashtable;
 import java.util.List;
 import java.util.Map;
+import java.util.Set;
 
 import com.stevesoft.pat.Regex;
 
@@ -59,6 +60,18 @@ public class DBRefUtils
 
     canonicalSourceNameLookup.put("pdb", DBRefSource.PDB);
     canonicalSourceNameLookup.put("ensembl", DBRefSource.ENSEMBL);
+    // Ensembl Gn and Tr are for Ensembl genomic and transcript IDs as served
+    // from ENA.
+    canonicalSourceNameLookup.put("ensembl-tr", DBRefSource.ENSEMBL);
+    canonicalSourceNameLookup.put("ensembl-gn", DBRefSource.ENSEMBL);
+
+    // Make sure we have lowercase entries for all canonical string lookups
+    Set<String> keys = canonicalSourceNameLookup.keySet();
+    for (String k : keys)
+    {
+      canonicalSourceNameLookup.put(k.toLowerCase(),
+              canonicalSourceNameLookup.get(k));
+    }
 
     dasCoordinateSystemsLookup.put("pdbresnum", DBRefSource.PDB);
     dasCoordinateSystemsLookup.put("uniprot", DBRefSource.UNIPROT);
@@ -235,7 +248,8 @@ public class DBRefUtils
     public boolean matches(DBRefEntry refa, DBRefEntry refb)
     {
       if (refa.getSource() == null
-              || refb.getSource().equals(refa.getSource()))
+              || DBRefUtils.getCanonicalName(refb.getSource()).equals(
+                      DBRefUtils.getCanonicalName(refa.getSource())))
       {
         if (refa.getVersion() == null
                 || refb.getVersion().equals(refa.getVersion()))
@@ -266,7 +280,7 @@ public class DBRefUtils
     @Override
     public boolean matches(DBRefEntry refa, DBRefEntry refb)
     {
-      if (nullOrEqual(refa.getSource(), refb.getSource())
+      if (nullOrEqualSource(refa.getSource(), refb.getSource())
               && nullOrEqual(refa.getVersion(), refb.getVersion())
               && nullOrEqual(refa.getAccessionId(), refb.getAccessionId())
               && nullOrEqual(refa.getMap(), refb.getMap()))
@@ -287,8 +301,10 @@ public class DBRefUtils
     @Override
     public boolean matches(DBRefEntry refa, DBRefEntry refb)
     {
-      if (refa.getSource() != null && refb.getSource() != null
-              && refb.getSource().equals(refa.getSource()))
+      if (refa.getSource() != null
+              && refb.getSource() != null
+              && DBRefUtils.getCanonicalName(refb.getSource()).equals(
+                      DBRefUtils.getCanonicalName(refa.getSource())))
       {
         // We dont care about version
         if (refa.getAccessionId() != null && refb.getAccessionId() != null
@@ -318,8 +334,10 @@ public class DBRefUtils
     @Override
     public boolean matches(DBRefEntry refa, DBRefEntry refb)
     {
-      if (refa.getSource() != null && refb.getSource() != null
-              && refb.getSource().equals(refa.getSource()))
+      if (refa.getSource() != null
+              && refb.getSource() != null
+              && DBRefUtils.getCanonicalName(refb.getSource()).equals(
+                      DBRefUtils.getCanonicalName(refa.getSource())))
       {
         // We dont care about version
         if (refa.getAccessionId() != null && refb.getAccessionId() != null
@@ -354,8 +372,10 @@ public class DBRefUtils
     @Override
     public boolean matches(DBRefEntry refa, DBRefEntry refb)
     {
-      if (refa.getSource() != null && refb.getSource() != null
-              && refb.getSource().equals(refa.getSource()))
+      if (refa.getSource() != null
+              && refb.getSource() != null
+              && DBRefUtils.getCanonicalName(refb.getSource()).equals(
+                      DBRefUtils.getCanonicalName(refa.getSource())))
       {
         // We dont care about version
         // if ((refa.getVersion()==null || refb.getVersion()==null)
@@ -393,8 +413,10 @@ public class DBRefUtils
     @Override
     public boolean matches(DBRefEntry refa, DBRefEntry refb)
     {
-      if (refa.getSource() != null && refb.getSource() != null
-              && refb.getSource().equals(refa.getSource()))
+      if (refa.getSource() != null
+              && refb.getSource() != null
+              && DBRefUtils.getCanonicalName(refb.getSource()).equals(
+                      DBRefUtils.getCanonicalName(refa.getSource())))
       {
         // We dont care about version
 
@@ -485,9 +507,7 @@ public class DBRefUtils
           PDBEntry pdbr = new PDBEntry();
           pdbr.setId(pdbid);
           pdbr.setType(PDBEntry.Type.PDB);
-          pdbr.setProperty(new Hashtable());
           pdbr.setChainCode(chaincode);
-          // pdbr.getProperty().put("CHAIN", chaincode);
           seq.addPDBId(pdbr);
         }
         else
@@ -521,7 +541,28 @@ public class DBRefUtils
     {
       return true;
     }
-    return (o1 == null ? o2.equals(o1) : o1.equals(o2));
+    return o1.equals(o2);
+  }
+
+  /**
+   * canonicalise source string before comparing. null is always wildcard
+   * 
+   * @param o1
+   *          - null or source string to compare
+   * @param o2
+   *          - null or source string to compare
+   * @return true if either o1 or o2 are null, or o1 equals o2 under
+   *         DBRefUtils.getCanonicalName
+   *         (o1).equals(DBRefUtils.getCanonicalName(o2))
+   */
+  public static boolean nullOrEqualSource(String o1, String o2)
+  {
+    if (o1 == null || o2 == null)
+    {
+      return true;
+    }
+    return DBRefUtils.getCanonicalName(o1).equals(
+            DBRefUtils.getCanonicalName(o2));
   }
 
   /**
@@ -546,7 +587,7 @@ public class DBRefUtils
 
   /**
    * Returns the (possibly empty) list of those supplied dbrefs which have the
-   * specified source databse
+   * specified source database, with a case-insensitive match of source name
    * 
    * @param dbRefs
    * @param source
@@ -560,7 +601,7 @@ public class DBRefUtils
     {
       for (DBRefEntry dbref : dbRefs)
       {
-        if (source.equals(dbref.getSource()))
+        if (source.equalsIgnoreCase(dbref.getSource()))
         {
           matches.add(dbref);
         }
@@ -569,4 +610,127 @@ public class DBRefUtils
     return matches;
   }
 
+  /**
+   * promote direct database references to primary for nucleotide or protein
+   * sequences if they have an appropriate primary ref
+   * <table>
+   * <tr>
+   * <th>Seq Type</th>
+   * <th>Primary DB</th>
+   * <th>Direct which will be promoted</th>
+   * </tr>
+   * <tr align=center>
+   * <td>peptides</td>
+   * <td>Ensembl</td>
+   * <td>Uniprot</td>
+   * </tr>
+   * <tr align=center>
+   * <td>peptides</td>
+   * <td>Ensembl</td>
+   * <td>Uniprot</td>
+   * </tr>
+   * <tr align=center>
+   * <td>dna</td>
+   * <td>Ensembl</td>
+   * <td>ENA</td>
+   * </tr>
+   * </table>
+   * 
+   * @param sequence
+   */
+  public static void ensurePrimaries(SequenceI sequence)
+  {
+    List<DBRefEntry> pr = sequence.getPrimaryDBRefs();
+    if (pr.size() == 0)
+    {
+      // nothing to do
+      return;
+    }
+    List<DBRefEntry> selfs = new ArrayList<DBRefEntry>();
+    {
+      DBRefEntry[] selfArray = selectDbRefs(!sequence.isProtein(),
+              sequence.getDBRefs());
+      if (selfArray == null || selfArray.length == 0)
+      {
+        // nothing to do
+        return;
+      }
+      selfs.addAll(Arrays.asList(selfArray));
+    }
+
+    // filter non-primary refs
+    for (DBRefEntry p : pr)
+    {
+      while (selfs.contains(p))
+      {
+        selfs.remove(p);
+      }
+    }
+    List<DBRefEntry> toPromote = new ArrayList<DBRefEntry>();
+
+    for (DBRefEntry p : pr)
+    {
+      List<String> promType = new ArrayList<String>();
+      if (sequence.isProtein())
+      {
+        switch (getCanonicalName(p.getSource()))
+        {
+        case DBRefSource.UNIPROT:
+          // case DBRefSource.UNIPROTKB:
+          // case DBRefSource.UP_NAME:
+          // search for and promote ensembl
+          promType.add(DBRefSource.ENSEMBL);
+          break;
+        case DBRefSource.ENSEMBL:
+          // search for and promote Uniprot
+          promType.add(DBRefSource.UNIPROT);
+          break;
+        }
+      }
+      else
+      {
+        // TODO: promote transcript refs
+      }
+
+      // collate candidates and promote them
+      DBRefEntry[] candidates = selectRefs(
+              selfs.toArray(new DBRefEntry[0]),
+              promType.toArray(new String[0]));
+      if (candidates != null)
+      {
+        for (DBRefEntry cand : candidates)
+        {
+          if (cand.hasMap())
+          {
+            if (cand.getMap().getTo() != null
+                    && cand.getMap().getTo() != sequence)
+            {
+              // can't promote refs with mappings to other sequences
+              continue;
+            }
+            if (cand.getMap().getMap().getFromLowest() != sequence
+                    .getStart()
+                    && cand.getMap().getMap().getFromHighest() != sequence
+                            .getEnd())
+            {
+              // can't promote refs with mappings from a region of this sequence
+              // - eg CDS
+              continue;
+            }
+          }
+          // and promote
+          cand.setVersion(p.getVersion() + " (promoted)");
+          selfs.remove(cand);
+          toPromote.add(cand);
+          if (!cand.isPrimaryCandidate())
+          {
+            System.out.println("Warning: Couldn't promote dbref "
+                    + cand.toString() + " for sequence "
+                    + sequence.toString());
+          }
+        }
+      }
+    }
+  }
+
 }