Merge branch 'develop' into task/JAL-2196pdbeProperties
[jalview.git] / src / jalview / util / DBRefUtils.java
index 4a0a55d..e6aa472 100755 (executable)
@@ -26,6 +26,7 @@ import jalview.datamodel.PDBEntry;
 import jalview.datamodel.SequenceI;
 
 import java.util.ArrayList;
+import java.util.Arrays;
 import java.util.HashMap;
 import java.util.HashSet;
 import java.util.List;
@@ -300,7 +301,8 @@ public class DBRefUtils
     @Override
     public boolean matches(DBRefEntry refa, DBRefEntry refb)
     {
-      if (refa.getSource() != null && refb.getSource() != null
+      if (refa.getSource() != null
+              && refb.getSource() != null
               && DBRefUtils.getCanonicalName(refb.getSource()).equals(
                       DBRefUtils.getCanonicalName(refa.getSource())))
       {
@@ -332,7 +334,8 @@ public class DBRefUtils
     @Override
     public boolean matches(DBRefEntry refa, DBRefEntry refb)
     {
-      if (refa.getSource() != null && refb.getSource() != null
+      if (refa.getSource() != null
+              && refb.getSource() != null
               && DBRefUtils.getCanonicalName(refb.getSource()).equals(
                       DBRefUtils.getCanonicalName(refa.getSource())))
       {
@@ -369,7 +372,8 @@ public class DBRefUtils
     @Override
     public boolean matches(DBRefEntry refa, DBRefEntry refb)
     {
-      if (refa.getSource() != null && refb.getSource() != null
+      if (refa.getSource() != null
+              && refb.getSource() != null
               && DBRefUtils.getCanonicalName(refb.getSource()).equals(
                       DBRefUtils.getCanonicalName(refa.getSource())))
       {
@@ -409,7 +413,8 @@ public class DBRefUtils
     @Override
     public boolean matches(DBRefEntry refa, DBRefEntry refb)
     {
-      if (refa.getSource() != null && refb.getSource() != null
+      if (refa.getSource() != null
+              && refb.getSource() != null
               && DBRefUtils.getCanonicalName(refb.getSource()).equals(
                       DBRefUtils.getCanonicalName(refa.getSource())))
       {
@@ -605,4 +610,127 @@ public class DBRefUtils
     return matches;
   }
 
+  /**
+   * promote direct database references to primary for nucleotide or protein
+   * sequences if they have an appropriate primary ref
+   * <table>
+   * <tr>
+   * <th>Seq Type</th>
+   * <th>Primary DB</th>
+   * <th>Direct which will be promoted</th>
+   * </tr>
+   * <tr align=center>
+   * <td>peptides</td>
+   * <td>Ensembl</td>
+   * <td>Uniprot</td>
+   * </tr>
+   * <tr align=center>
+   * <td>peptides</td>
+   * <td>Ensembl</td>
+   * <td>Uniprot</td>
+   * </tr>
+   * <tr align=center>
+   * <td>dna</td>
+   * <td>Ensembl</td>
+   * <td>ENA</td>
+   * </tr>
+   * </table>
+   * 
+   * @param sequence
+   */
+  public static void ensurePrimaries(SequenceI sequence)
+  {
+    List<DBRefEntry> pr = sequence.getPrimaryDBRefs();
+    if (pr.size() == 0)
+    {
+      // nothing to do
+      return;
+    }
+    List<DBRefEntry> selfs = new ArrayList<DBRefEntry>();
+    {
+      DBRefEntry[] selfArray = selectDbRefs(!sequence.isProtein(),
+              sequence.getDBRefs());
+      if (selfArray == null || selfArray.length == 0)
+      {
+        // nothing to do
+        return;
+      }
+      selfs.addAll(Arrays.asList(selfArray));
+    }
+
+    // filter non-primary refs
+    for (DBRefEntry p : pr)
+    {
+      while (selfs.contains(p))
+      {
+        selfs.remove(p);
+      }
+    }
+    List<DBRefEntry> toPromote = new ArrayList<DBRefEntry>();
+
+    for (DBRefEntry p : pr)
+    {
+      List<String> promType = new ArrayList<String>();
+      if (sequence.isProtein())
+      {
+        switch (getCanonicalName(p.getSource()))
+        {
+        case DBRefSource.UNIPROT:
+          // case DBRefSource.UNIPROTKB:
+          // case DBRefSource.UP_NAME:
+          // search for and promote ensembl
+          promType.add(DBRefSource.ENSEMBL);
+          break;
+        case DBRefSource.ENSEMBL:
+          // search for and promote Uniprot
+          promType.add(DBRefSource.UNIPROT);
+          break;
+        }
+      }
+      else
+      {
+        // TODO: promote transcript refs
+      }
+
+      // collate candidates and promote them
+      DBRefEntry[] candidates = selectRefs(
+              selfs.toArray(new DBRefEntry[0]),
+              promType.toArray(new String[0]));
+      if (candidates != null)
+      {
+        for (DBRefEntry cand : candidates)
+        {
+          if (cand.hasMap())
+          {
+            if (cand.getMap().getTo() != null
+                    && cand.getMap().getTo() != sequence)
+            {
+              // can't promote refs with mappings to other sequences
+              continue;
+            }
+            if (cand.getMap().getMap().getFromLowest() != sequence
+                    .getStart()
+                    && cand.getMap().getMap().getFromHighest() != sequence
+                            .getEnd())
+            {
+              // can't promote refs with mappings from a region of this sequence
+              // - eg CDS
+              continue;
+            }
+          }
+          // and promote
+          cand.setVersion(p.getVersion() + " (promoted)");
+          selfs.remove(cand);
+          toPromote.add(cand);
+          if (!cand.isPrimaryCandidate())
+          {
+            System.out.println("Warning: Couldn't promote dbref "
+                    + cand.toString() + " for sequence "
+                    + sequence.toString());
+          }
+        }
+      }
+    }
+  }
+
 }