JAL-1617 hack to add a fake xref if the uniprot xref isn't present on the embl entry hack/JAL-1617_workaround
authorJim Procter <jprocter@dundee.ac.uk>
Tue, 17 Mar 2015 15:59:19 +0000 (15:59 +0000)
committerJim Procter <jprocter@dundee.ac.uk>
Tue, 17 Mar 2015 15:59:19 +0000 (15:59 +0000)
src/jalview/datamodel/xdb/embl/EmblEntry.java
test/jalview/ws/seqfetcher/DbRefFetcherTest.java

index d501ef1..fc57b27 100644 (file)
@@ -590,8 +590,10 @@ public class EmblEntry
       }
     }
     Sequence product = null;
+    DBRefEntry protEMBLCDS = null;
     exon = adjustForPrStart(prstart, exon);
-
+    boolean noProteinDbref=true;
+    
     if (prseq != null && prname != null && prid != null)
     {
       // extract proteins.
@@ -669,8 +671,12 @@ public class EmblEntry
           if (product != null)
           {
             product.addDBRef(pcdnaref);
-          }
-
+            protEMBLCDS = new DBRefEntry(pcdnaref);
+            protEMBLCDS.setSource(DBRefSource.EMBLCDSProduct);
+            product.addDBRef(protEMBLCDS);
+            
+          }     
+          
         }
       }
       // add cds feature to dna seq - this may include the stop codon
@@ -724,6 +730,7 @@ public class EmblEntry
                               + ref.getAccessionId());
             }
           }
+          noProteinDbref = false;
         }
         if (product != null)
         {
@@ -746,6 +753,33 @@ public class EmblEntry
         }
         dna.addDBRef(ref);
       }
+      if (noProteinDbref && product != null)
+      {
+        // add protein coding reference to dna sequence so xref matches
+        if (protEMBLCDS == null)
+        {
+          protEMBLCDS = new DBRefEntry();
+          protEMBLCDS.setAccessionId(prid);
+          protEMBLCDS.setSource(DBRefSource.EMBLCDSProduct);
+          protEMBLCDS.setVersion(getVersion());
+          protEMBLCDS
+                  .setMap(new Mapping(product, map.getMap().getInverse()));
+        }
+        product.addDBRef(protEMBLCDS);
+          
+        // Add converse mapping reference
+        if (map != null)
+        {
+          Mapping pmap = new Mapping(product, protEMBLCDS.getMap().getMap()
+                  .getInverse());
+          DBRefEntry ncMap = new DBRefEntry(protEMBLCDS);
+          ncMap.setMap(pmap);
+          if (map.getTo() != null)
+          {
+            dna.addDBRef(ncMap);
+          }
+        }
+      }
     }
   }
 
index afaadbb..f58ead5 100644 (file)
@@ -50,6 +50,7 @@ public class DbRefFetcherTest
   @BeforeClass
   public static void setUpBeforeClass() throws Exception
   {
+    jalview.bin.Cache.initLogger();
   }
 
   /**
@@ -102,13 +103,33 @@ public class DbRefFetcherTest
   {
     String retrievalId = "CAA23748"; // "V00488";
     DbSourceProxy embl = new SequenceFetcher().getSourceProxy(DBRefSource.EMBL).get(0);
-    assertNotNull("Couldn't find the EMBL retrieval client",embl);
+    assertNotNull("Couldn't find the EMBL retrieval client", embl);
+    verifyProteinNucleotideXref(retrievalId, embl);
+  }
+
+  @Test
+  public void testEmblCDSUniprotProductRecovery() throws Exception
+  {
+    String retrievalId = "AAH29712";
+    DbSourceProxy embl = new SequenceFetcher().getSourceProxy(
+            DBRefSource.EMBLCDS).get(0);
+    assertNotNull("Couldn't find the EMBL retrieval client", embl);
+    verifyProteinNucleotideXref(retrievalId, embl);
+  }
+
+  private void verifyProteinNucleotideXref(String retrievalId,
+          DbSourceProxy embl) throws Exception
+  {
     AlignmentI alsq = embl.getSequenceRecords(retrievalId);
     assertNotNull("Couldn't find the EMBL record " + retrievalId, alsq);
     assertEquals("Didn't retrieve right number of records", 1, alsq.getHeight());
     DBRefEntry[] dr = DBRefUtils.selectRefs(alsq.getSequenceAt(0).getDBRef(), DBRefSource.PROTEINSEQ);
     assertNotNull(dr);
     assertEquals("Expected a single Uniprot cross reference", 1, dr.length);
+    assertEquals("Expected cross refernce map to be one amino acid", dr[0]
+            .getMap().getMappedWidth(), 1);
+    assertEquals("Expected local refernce map to be 3 nucleotides", dr[0]
+            .getMap().getWidth(), 3);
     AlignmentI sprods = CrossRef.findXrefSequences(alsq.getSequencesArray(), true, dr[0].getSource(), alsq.getDataset());
     assertNotNull(
             "Couldn't recover cross reference sequence from dataset. Was it ever added ?",