Merge branch 'develop' into bug/JAL-2210_ensembl_uniprot_primaryrefs_alternate
authorJim Procter <jprocter@issues.jalview.org>
Sat, 1 Oct 2016 16:00:05 +0000 (17:00 +0100)
committerJim Procter <jprocter@issues.jalview.org>
Sat, 1 Oct 2016 16:00:05 +0000 (17:00 +0100)
16 files changed:
src/jalview/analysis/CrossRef.java
src/jalview/datamodel/Sequence.java
src/jalview/datamodel/SequenceI.java
src/jalview/ext/ensembl/EnsemblRestClient.java
src/jalview/ext/ensembl/EnsemblSeqProxy.java
src/jalview/util/DBRefUtils.java
src/jalview/ws/dbsources/Uniprot.java
test/jalview/commands/EditCommandTest.java
test/jalview/datamodel/SequenceTest.java
test/jalview/gui/StructureChooserTest.java
test/jalview/io/AnnotationFileIOTest.java
test/jalview/io/JSONFileTest.java
test/jalview/io/StockholmFileTest.java
test/jalview/ws/dbsources/UniprotTest.java
test/jalview/ws/seqfetcher/DbRefFetcherTest.java
test/jalview/ws/sifts/SiftsClientTest.java

index 1295b46..6779b87 100644 (file)
@@ -296,20 +296,28 @@ public class CrossRef
               if (!rseqs.contains(matchInDataset))
               {
                 rseqs.add(matchInDataset);
-                // need to try harder to only add unique mappings
-                if (xref.getMap().getMap().isTripletMap()
-                        && dataset.getMapping(seq, matchInDataset) == null
-                        && cf.getMappingBetween(seq, matchInDataset) == null)
+              }
+              // even if rseqs contained matchInDataset - check mappings between
+              // these seqs are added
+              // need to try harder to only add unique mappings
+              if (xref.getMap().getMap().isTripletMap()
+                      && dataset.getMapping(seq, matchInDataset) == null
+                      && cf.getMappingBetween(seq, matchInDataset) == null)
+              {
+                // materialise a mapping for highlighting between these
+                // sequences
+                if (fromDna)
                 {
-                  // materialise a mapping for highlighting between these sequences
-                  if (fromDna)
-                  {
-                    cf.addMap(dss, matchInDataset, xref.getMap().getMap(), xref.getMap().getMappedFromId());
-                  } else {
-                    cf.addMap(matchInDataset, dss, xref.getMap().getMap().getInverse(), xref.getMap().getMappedFromId());
-                  }
+                  cf.addMap(dss, matchInDataset, xref.getMap().getMap(),
+                          xref.getMap().getMappedFromId());
+                }
+                else
+                {
+                  cf.addMap(matchInDataset, dss, xref.getMap().getMap()
+                          .getInverse(), xref.getMap().getMappedFromId());
                 }
               }
+
               refIterator.remove();
               continue;
             }
@@ -393,28 +401,7 @@ public class CrossRef
     // first filter in case we are retrieving crossrefs that have already been
     // retrieved. this happens for cases where a database record doesn't yield
     // protein products for CDS
-    DBRefEntry[] dbrSourceSet = sourceRefs.toArray(new DBRefEntry[0]);
-    for (SequenceI sq : dataset.getSequences())
-    {
-      boolean dupeFound = false;
-      // !fromDna means we are looking only for nucleotide sequences, not
-      // protein
-      if (sq.isProtein() == fromDna)
-      {
-        for (DBRefEntry dbr : sq.getPrimaryDBRefs())
-        {
-          for (DBRefEntry found : DBRefUtils.searchRefs(dbrSourceSet, dbr))
-          {
-            sourceRefs.remove(found);
-            dupeFound = true;
-          }
-        }
-      }
-      if (dupeFound)
-      {
-        dbrSourceSet = sourceRefs.toArray(new DBRefEntry[0]);
-      }
-    }
+    removeAlreadyRetrievedSeqs(sourceRefs, fromDna);
     if (sourceRefs.size() == 0)
     {
       // no more work to do! We already had all requested sequence records in
@@ -441,124 +428,172 @@ public class CrossRef
         // try: Ensembl -> Nuc->Ensembl, Nuc->Uniprot-->Protein->EMBL->
         SequenceI retrievedDss = retrievedSequence.getDatasetSequence() == null ? retrievedSequence
                 : retrievedSequence.getDatasetSequence();
-        DBRefEntry[] dbr = retrievedSequence.getDBRefs();
-        if (dbr != null)
+        importCrossRefSeq(cf, dss, retrievedDss);
+        rseqs.add(retrievedDss);
+        if (dataset.findIndex(retrievedDss) == -1)
+        {
+          dataset.addSequence(retrievedDss);
+          matcher.add(retrievedDss);
+        }
+      }
+    }
+  }
+
+  /**
+   * Search dataset for sequences with a primary reference contained in
+   * sourceRefs.
+   * 
+   * @param sourceRefs
+   *          - list of references to filter.
+   * @param fromDna
+   *          - type of sequence to search for matching primary reference.
+   */
+  private void removeAlreadyRetrievedSeqs(List<DBRefEntry> sourceRefs,
+          boolean fromDna)
+  {
+    DBRefEntry[] dbrSourceSet = sourceRefs.toArray(new DBRefEntry[0]);
+    for (SequenceI sq : dataset.getSequences())
+    {
+      boolean dupeFound = false;
+      // !fromDna means we are looking only for nucleotide sequences, not
+      // protein
+      if (sq.isProtein() == fromDna)
+      {
+        for (DBRefEntry dbr : sq.getPrimaryDBRefs())
         {
-          for (DBRefEntry dbref : dbr)
+          for (DBRefEntry found : DBRefUtils.searchRefs(dbrSourceSet, dbr))
           {
-            // find any entry where we should put in the sequence being
-            // cross-referenced into the map
-            Mapping map = dbref.getMap();
-            if (map != null)
+            sourceRefs.remove(found);
+            dupeFound = true;
+          }
+        }
+      }
+      if (dupeFound)
+      {
+        dbrSourceSet = sourceRefs.toArray(new DBRefEntry[0]);
+      }
+    }
+  }
+
+  /**
+   * process sequence retrieved via a dbref on source sequence to resolve and
+   * transfer data
+   * 
+   * @param cf
+   * @param sourceSequence
+   * @param retrievedSequence
+   */
+  private void importCrossRefSeq(AlignedCodonFrame cf,
+          SequenceI sourceSequence, SequenceI retrievedSequence)
+  {
+    DBRefEntry[] dbr = retrievedSequence.getDBRefs();
+    if (dbr != null)
+    {
+      for (DBRefEntry dbref : dbr)
+      {
+        // find any entry where we should put in the sequence being
+        // cross-referenced into the map
+        Mapping map = dbref.getMap();
+        if (map != null)
+        {
+          if (map.getTo() != null && map.getMap() != null)
+          {
+            // TODO findInDataset requires exact sequence match but
+            // 'congruent' test is only for the mapped part
+            // maybe not a problem in practice since only ENA provide a
+            // mapping and it is to the full protein translation of CDS
+            SequenceI matched = findInDataset(dbref);
+            // matcher.findIdMatch(map.getTo());
+            if (matched != null)
             {
-              if (map.getTo() != null && map.getMap() != null)
+              /*
+               * already got an xref to this sequence; update this
+               * map to point to the same sequence, and add
+               * any new dbrefs to it
+               */
+              DBRefEntry[] toRefs = map.getTo().getDBRefs();
+              if (toRefs != null)
               {
-                // TODO findInDataset requires exact sequence match but
-                // 'congruent' test is only for the mapped part
-                // maybe not a problem in practice since only ENA provide a
-                // mapping and it is to the full protein translation of CDS
-                SequenceI matched = findInDataset(dbref);
-                // matcher.findIdMatch(map.getTo());
-                if (matched != null)
+                for (DBRefEntry ref : toRefs)
                 {
-                  /*
-                   * already got an xref to this sequence; update this
-                   * map to point to the same sequence, and add
-                   * any new dbrefs to it
-                   */
-                  DBRefEntry[] toRefs = map.getTo().getDBRefs();
-                  if (toRefs != null)
-                  {
-                    for (DBRefEntry ref : toRefs)
-                    {
-                      matched.addDBRef(ref); // add or update mapping
-                    }
-                  }
-                  map.setTo(matched);
+                  matched.addDBRef(ref); // add or update mapping
                 }
-                else
-                {
-                  if (dataset.findIndex(map.getTo()) == -1)
-                  {
-                    dataset.addSequence(map.getTo());
-                    matcher.add(map.getTo());
-                  }
-                }
-                try
+              }
+              map.setTo(matched);
+            }
+            else
+            {
+              if (dataset.findIndex(map.getTo()) == -1)
+              {
+                dataset.addSequence(map.getTo());
+                matcher.add(map.getTo());
+              }
+            }
+
+            try
+            {
+              // compare ms with dss and replace with dss in mapping
+              // if map is congruent
+              SequenceI ms = map.getTo();
+              int sf = map.getMap().getToLowest();
+              int st = map.getMap().getToHighest();
+              SequenceI mappedrg = ms.getSubSequence(sf, st);
+              if (mappedrg.getLength() > 0
+                      && ms.getSequenceAsString().equals(
+                              sourceSequence.getSequenceAsString()))
+              {
+                String msg = "Mapping updated from " + ms.getName()
+                        + " to retrieved crossreference "
+                        + sourceSequence.getName();
+                System.out.println(msg);
+                map.setTo(sourceSequence);
+
+                /*
+                 * give the reverse reference the inverse mapping 
+                 * (if it doesn't have one already)
+                 */
+                setReverseMapping(sourceSequence, dbref, cf);
+
+                /*
+                 * copy sequence features as well, avoiding
+                 * duplication (e.g. same variation from two 
+                 * transcripts)
+                 */
+                SequenceFeature[] sfs = ms.getSequenceFeatures();
+                if (sfs != null)
                 {
-                  // compare ms with dss and replace with dss in mapping
-                  // if map is congruent
-                  SequenceI ms = map.getTo();
-                  int sf = map.getMap().getToLowest();
-                  int st = map.getMap().getToHighest();
-                  SequenceI mappedrg = ms.getSubSequence(sf, st);
-                  // SequenceI loc = dss.getSubSequence(sf, st);
-                  if (mappedrg.getLength() > 0
-                          && ms.getSequenceAsString().equals(
-                                  dss.getSequenceAsString()))
-                  // && mappedrg.getSequenceAsString().equals(
-                  // loc.getSequenceAsString()))
+                  for (SequenceFeature feat : sfs)
                   {
-                    String msg = "Mapping updated from " + ms.getName()
-                            + " to retrieved crossreference "
-                            + dss.getName();
-                    System.out.println(msg);
-                    map.setTo(dss);
-
                     /*
-                     * give the reverse reference the inverse mapping 
-                     * (if it doesn't have one already)
+                     * make a flyweight feature object which ignores Parent
+                     * attribute in equality test; this avoids creating many
+                     * otherwise duplicate exon features on genomic sequence
                      */
-                    setReverseMapping(dss, dbref, cf);
-
-                    /*
-                     * copy sequence features as well, avoiding
-                     * duplication (e.g. same variation from two 
-                     * transcripts)
-                     */
-                    SequenceFeature[] sfs = ms.getSequenceFeatures();
-                    if (sfs != null)
+                    SequenceFeature newFeature = new SequenceFeature(
+                            feat)
                     {
-                      for (SequenceFeature feat : sfs)
+                      @Override
+                      public boolean equals(Object o)
                       {
-                        /*
-                         * make a flyweight feature object which ignores Parent
-                         * attribute in equality test; this avoids creating many
-                         * otherwise duplicate exon features on genomic sequence
-                         */
-                        SequenceFeature newFeature = new SequenceFeature(
-                                feat)
-                        {
-                          @Override
-                          public boolean equals(Object o)
-                          {
-                            return super.equals(o, true);
-                          }
-                        };
-                        dss.addSequenceFeature(newFeature);
+                        return super.equals(o, true);
                       }
-                    }
+                    };
+                    sourceSequence.addSequenceFeature(newFeature);
                   }
-                  cf.addMap(retrievedDss, map.getTo(), map.getMap());
-                } catch (Exception e)
-                {
-                  System.err
-                          .println("Exception when consolidating Mapped sequence set...");
-                  e.printStackTrace(System.err);
                 }
               }
+              cf.addMap(retrievedSequence, map.getTo(), map.getMap());
+            } catch (Exception e)
+            {
+              System.err
+                      .println("Exception when consolidating Mapped sequence set...");
+              e.printStackTrace(System.err);
             }
           }
         }
-        retrievedSequence.updatePDBIds();
-        rseqs.add(retrievedDss);
-        if (dataset.findIndex(retrievedDss) == -1)
-        {
-          dataset.addSequence(retrievedDss);
-          matcher.add(retrievedDss);
-        }
       }
     }
+    retrievedSequence.updatePDBIds();
   }
   /**
    * Sets the inverse sequence mapping in the corresponding dbref of the mapped
@@ -618,6 +653,12 @@ public class CrossRef
     String name2 = xref.getSource() + "|" + name;
     SequenceI dss = mapsTo.getDatasetSequence() == null ? mapsTo : mapsTo
             .getDatasetSequence();
+    // first check ds if ds is directly referenced
+    if (dataset.findIndex(dss) > -1)
+    {
+      return dss;
+    }
+    ;
     for (SequenceI seq : dataset.getSequences())
     {
       /*
index 44522a8..29d114d 100755 (executable)
@@ -944,7 +944,17 @@ public class Sequence extends ASequence implements SequenceI
   @Override
   public void setDBRefs(DBRefEntry[] dbref)
   {
+    if (dbrefs == null && datasetSequence != null
+            && this != datasetSequence)
+    {
+      datasetSequence.setDBRefs(dbref);
+      return;
+    }
     dbrefs = dbref;
+    if (dbrefs != null)
+    {
+      DBRefUtils.ensurePrimaries(this);
+    }
   }
 
   @Override
@@ -961,7 +971,12 @@ public class Sequence extends ASequence implements SequenceI
   @Override
   public void addDBRef(DBRefEntry entry)
   {
-    // TODO add to dataset sequence instead if there is one?
+    if (datasetSequence != null)
+    {
+      datasetSequence.addDBRef(entry);
+      return;
+    }
+
     if (dbrefs == null)
     {
       dbrefs = new DBRefEntry[0];
@@ -989,12 +1004,23 @@ public class Sequence extends ASequence implements SequenceI
     temp[temp.length - 1] = entry;
 
     dbrefs = temp;
+
+    DBRefUtils.ensurePrimaries(this);
   }
 
   @Override
   public void setDatasetSequence(SequenceI seq)
   {
-    // TODO check for circular reference before setting?
+    if (seq == this)
+    {
+      throw new Error(
+              "Implementation Error: self reference passed to SequenceI.setDatasetSequence");
+    }
+    if (seq != null && seq.getDatasetSequence() != null)
+    {
+      throw new Error(
+              "Implementation error: cascading dataset sequences are not allowed.");
+    }
     datasetSequence = seq;
   }
 
index b7a291e..a29e2ba 100755 (executable)
@@ -314,6 +314,14 @@ public interface SequenceI extends ASequenceI
 
   public void setVamsasId(String id);
 
+  /**
+   * set the array of Database references for the sequence.
+   * 
+   * @param dbs
+   * @deprecated - use is discouraged since side-effects may occur if DBRefEntry
+   *             set are not normalised.
+   */
+  @Deprecated
   public void setDBRefs(DBRefEntry[] dbs);
 
   public DBRefEntry[] getDBRefs();
index 72efdc1..f8cd0d6 100644 (file)
@@ -248,7 +248,6 @@ abstract class EnsemblRestClient extends EnsemblSequenceFetcher
       writePostBody(connection, ids);
     }
   
-    InputStream response = connection.getInputStream();
     int responseCode = connection.getResponseCode();
   
     if (responseCode != 200)
@@ -261,6 +260,9 @@ abstract class EnsemblRestClient extends EnsemblSequenceFetcher
               "Response code was not 200. Detected response was "
                       + responseCode);
     }
+    // get content
+    InputStream response = connection.getInputStream();
+
     // System.out.println(getClass().getName() + " took "
     // + (System.currentTimeMillis() - now) + "ms to fetch");
 
index 5fccedd..c749b94 100644 (file)
@@ -2,9 +2,11 @@ package jalview.ext.ensembl;
 
 import jalview.analysis.AlignmentUtils;
 import jalview.analysis.Dna;
+import jalview.bin.Cache;
 import jalview.datamodel.Alignment;
 import jalview.datamodel.AlignmentI;
 import jalview.datamodel.DBRefEntry;
+import jalview.datamodel.DBRefSource;
 import jalview.datamodel.Mapping;
 import jalview.datamodel.SequenceFeature;
 import jalview.datamodel.SequenceI;
@@ -158,6 +160,7 @@ public abstract class EnsemblSeqProxy extends EnsemblRestClient
                 + " chunks. Unexpected problem (" + r.getLocalizedMessage()
                 + ")";
         System.err.println(msg);
+        r.printStackTrace();
         break;
       }
     }
@@ -281,6 +284,44 @@ public abstract class EnsemblSeqProxy extends EnsemblRestClient
         DBRefEntry dbr = new DBRefEntry(getDbSource(),
                 getEnsemblDataVersion(), proteinSeq.getName(), map);
         querySeq.getDatasetSequence().addDBRef(dbr);
+        DBRefEntry[] uprots = DBRefUtils.selectRefs(ds.getDBRefs(),
+                new String[] { DBRefSource.UNIPROT });
+        DBRefEntry[] upxrefs = DBRefUtils.selectRefs(querySeq.getDBRefs(),
+                new String[] { DBRefSource.UNIPROT });
+        if (uprots != null)
+        {
+          for (DBRefEntry up : uprots)
+          {
+            // locate local uniprot ref and map
+            List<DBRefEntry> upx = DBRefUtils.searchRefs(upxrefs, up.getAccessionId());
+            DBRefEntry upxref;
+            if (upx.size() != 0)
+            {
+              upxref = upx.get(0);
+
+              if (upx.size() > 1)
+              {
+                Cache.log
+                        .warn("Implementation issue - multiple uniprot acc on product sequence.");
+              }
+            }
+            else
+            {
+              upxref = new DBRefEntry(DBRefSource.UNIPROT,
+                    getEnsemblDataVersion(), up.getAccessionId());
+            }
+
+            Mapping newMap = new Mapping(ds, mapList);
+            upxref.setVersion(getEnsemblDataVersion());
+            upxref.setMap(newMap);
+            if (upx.size() == 0)
+            {
+              // add the new uniprot ref
+              querySeq.getDatasetSequence().addDBRef(upxref);
+            }
+            
+          }
+        }
         
         /*
          * copy exon features to protein, compute peptide variants from dna 
index 405f6e6..757fc4d 100755 (executable)
@@ -26,6 +26,7 @@ import jalview.datamodel.PDBEntry;
 import jalview.datamodel.SequenceI;
 
 import java.util.ArrayList;
+import java.util.Arrays;
 import java.util.HashMap;
 import java.util.HashSet;
 import java.util.Hashtable;
@@ -301,7 +302,8 @@ public class DBRefUtils
     @Override
     public boolean matches(DBRefEntry refa, DBRefEntry refb)
     {
-      if (refa.getSource() != null && refb.getSource() != null
+      if (refa.getSource() != null
+              && refb.getSource() != null
               && DBRefUtils.getCanonicalName(refb.getSource()).equals(
                       DBRefUtils.getCanonicalName(refa.getSource())))
       {
@@ -333,7 +335,8 @@ public class DBRefUtils
     @Override
     public boolean matches(DBRefEntry refa, DBRefEntry refb)
     {
-      if (refa.getSource() != null && refb.getSource() != null
+      if (refa.getSource() != null
+              && refb.getSource() != null
               && DBRefUtils.getCanonicalName(refb.getSource()).equals(
                       DBRefUtils.getCanonicalName(refa.getSource())))
       {
@@ -370,7 +373,8 @@ public class DBRefUtils
     @Override
     public boolean matches(DBRefEntry refa, DBRefEntry refb)
     {
-      if (refa.getSource() != null && refb.getSource() != null
+      if (refa.getSource() != null
+              && refb.getSource() != null
               && DBRefUtils.getCanonicalName(refb.getSource()).equals(
                       DBRefUtils.getCanonicalName(refa.getSource())))
       {
@@ -410,7 +414,8 @@ public class DBRefUtils
     @Override
     public boolean matches(DBRefEntry refa, DBRefEntry refb)
     {
-      if (refa.getSource() != null && refb.getSource() != null
+      if (refa.getSource() != null
+              && refb.getSource() != null
               && DBRefUtils.getCanonicalName(refb.getSource()).equals(
                       DBRefUtils.getCanonicalName(refa.getSource())))
       {
@@ -608,4 +613,127 @@ public class DBRefUtils
     return matches;
   }
 
+  /**
+   * promote direct database references to primary for nucleotide or protein
+   * sequences if they have an appropriate primary ref
+   * <table>
+   * <tr>
+   * <td>Seq Type</td>
+   * <td>Primary DB</td>
+   * <td>Direct which will be promoted</td>
+   * </tr>
+   * <tr>
+   * <td>peptides</td>
+   * <td>Ensembl</td>
+   * <td>Uniprot</td>
+   * </tr>
+   * <tr>
+   * <td>peptides</td>
+   * <td>Ensembl</td>
+   * <td>Uniprot</td>
+   * </tr>
+   * <tr>
+   * <td>dna</td>
+   * <td>Ensembl</td>
+   * <td>ENA</td>
+   * </tr>
+   * </table>
+   * 
+   * @param sequence
+   */
+  public static void ensurePrimaries(SequenceI sequence)
+  {
+    List<DBRefEntry> pr = sequence.getPrimaryDBRefs();
+    if (pr.size() == 0)
+    {
+      // nothing to do
+      return;
+    }
+    List<DBRefEntry> selfs = new ArrayList<DBRefEntry>();
+    {
+      DBRefEntry[] selfArray = selectDbRefs(!sequence.isProtein(),
+              sequence.getDBRefs());
+      if (selfArray == null || selfArray.length == 0)
+      {
+        // nothing to do
+        return;
+      }
+      selfs.addAll(Arrays.asList(selfArray));
+    }
+
+    // filter non-primary refs
+    for (DBRefEntry p : pr)
+    {
+      while (selfs.contains(p))
+      {
+        selfs.remove(p);
+      }
+    }
+    List<DBRefEntry> toPromote = new ArrayList<DBRefEntry>();
+
+    for (DBRefEntry p : pr)
+    {
+      List<String> promType = new ArrayList<String>();
+      if (sequence.isProtein())
+      {
+        switch (getCanonicalName(p.getSource()))
+        {
+        case DBRefSource.UNIPROT:
+          // case DBRefSource.UNIPROTKB:
+          // case DBRefSource.UP_NAME:
+          // search for and promote ensembl
+          promType.add(DBRefSource.ENSEMBL);
+          break;
+        case DBRefSource.ENSEMBL:
+          // search for and promote Uniprot
+          promType.add(DBRefSource.UNIPROT);
+          break;
+        }
+      }
+      else
+      {
+        // TODO: promote transcript refs
+      }
+
+      // collate candidates and promote them
+      DBRefEntry[] candidates = selectRefs(
+              selfs.toArray(new DBRefEntry[0]),
+              promType.toArray(new String[0]));
+      if (candidates != null)
+      {
+        for (DBRefEntry cand : candidates)
+        {
+          if (cand.hasMap())
+          {
+            if (cand.getMap().getTo() != null
+                    && cand.getMap().getTo() != sequence)
+            {
+              // can't promote refs with mappings to other sequences
+              continue;
+            }
+            if (cand.getMap().getMap().getFromLowest() != sequence
+                    .getStart()
+                    && cand.getMap().getMap().getFromHighest() != sequence
+                            .getEnd())
+            {
+              // can't promote refs with mappings from a region of this sequence
+              // - eg CDS
+              continue;
+            }
+          }
+          // and promote
+          cand.setVersion(p.getVersion() + " (promoted)");
+          selfs.remove(cand);
+          toPromote.add(cand);
+          if (!cand.isPrimaryCandidate())
+          {
+            System.out.println("Warning: Couldn't promote dbref "
+                    + cand.toString() + " for sequence "
+                    + sequence.toString());
+          }
+        }
+      }
+    }
+  }
+
 }
index 81b4caf..de70aab 100644 (file)
@@ -30,6 +30,7 @@ import jalview.datamodel.SequenceFeature;
 import jalview.datamodel.SequenceI;
 import jalview.datamodel.UniprotEntry;
 import jalview.datamodel.UniprotFile;
+import jalview.util.DBRefUtils;
 import jalview.ws.ebi.EBIFetchClient;
 import jalview.ws.seqfetcher.DbSourceProxyImpl;
 
@@ -222,6 +223,19 @@ public class Uniprot extends DbSourceProxyImpl
       {
         onlyPdbEntries.addElement(pdb);
       }
+      if ("EMBL".equals(pdb.getType()))
+      {
+        // look for a CDS reference and add it, too.
+        String cdsId = (String) pdb.getProperty()
+                .get("protein sequence ID");
+        if (cdsId != null && cdsId.trim().length() > 0)
+        {
+          dbr = new DBRefEntry(DBRefSource.EMBLCDS, DBRefSource.UNIPROT
+                  + ":"
+                  + dbVersion, cdsId.trim());
+          dbRefs.add(dbr);
+        }
+      }
     }
 
     sequence.setPDBId(onlyPdbEntries);
@@ -233,7 +247,12 @@ public class Uniprot extends DbSourceProxyImpl
         sequence.addSequenceFeature(sf);
       }
     }
+    // we use setDBRefs to assign refs quickly.
     sequence.setDBRefs(dbRefs.toArray(new DBRefEntry[0]));
+    // need to use ensurePrimaries to reify any refs that should become primary
+    // refs
+    DBRefUtils.ensurePrimaries(sequence); // promote any direct refs to primary
+                                          // source dbs
     return sequence;
   }
 
index 9afae37..7fb80fb 100644 (file)
@@ -239,7 +239,7 @@ public class EditCommandTest
   public void testReplace()
   {
     // seem to need a dataset sequence on the edited sequence here
-    seqs[1].setDatasetSequence(seqs[1]);
+    seqs[1].createDatasetSequence();
     new EditCommand("", Action.REPLACE, "ZXY", new SequenceI[] { seqs[1] },
             4, 8, al);
     assertEquals("abcdefghjk", seqs[0].getSequenceAsString());
index 8c5073b..0c401e2 100644 (file)
@@ -366,7 +366,16 @@ public class SequenceTest
      * is there a usecase for this ? setDatasetSequence should throw an error if
      * this actually occurs.
      */
-    sq.getDatasetSequence().setDatasetSequence(sq); // loop!
+    try
+    {
+      sq.getDatasetSequence().setDatasetSequence(sq); // loop!
+      Assert.fail("Expected Error to be raised when calling setDatasetSequence with self reference");
+    } catch (Error e)
+    {
+      // TODO Jalview error/exception class for raising implementation errors
+      assertTrue(e.getMessage().toLowerCase()
+              .contains("implementation error"));
+    }
     assertNull(sq.getSequenceFeatures());
   }
 
@@ -451,19 +460,20 @@ public class SequenceTest
     sq.addPDBId(new PDBEntry("2PDB", "A", Type.MMCIF, "filePath/test2"));
     sq.addPDBId(new PDBEntry("2PDB", "B", Type.MMCIF, "filePath/test2"));
     
+    // these are the same as ones already added
     DBRefEntry pdb1pdb = new DBRefEntry("PDB", "version1", "1PDB");
-    DBRefEntry pdb2pdb = new DBRefEntry("PDB", "version1", "2PDB");
+    DBRefEntry pdb2pdb = new DBRefEntry("PDB", "version2", "2PDB");
 
     
     List<DBRefEntry> primRefs = Arrays.asList(new DBRefEntry[] { pdb1pdb,
         pdb2pdb });
 
-    sq.getDatasetSequence().addDBRef(pdb1pdb);
-    sq.getDatasetSequence().addDBRef(pdb2pdb);
+    sq.getDatasetSequence().addDBRef(pdb1pdb); // should do nothing
+    sq.getDatasetSequence().addDBRef(pdb2pdb); // should do nothing
     sq.getDatasetSequence().addDBRef(
-            new DBRefEntry("PDB", "version3", "3PDB"));
+            new DBRefEntry("PDB", "version3", "3PDB")); // should do nothing
     sq.getDatasetSequence().addDBRef(
-            new DBRefEntry("PDB", "version4", "4PDB"));
+            new DBRefEntry("PDB", "version4", "4PDB")); // should do nothing
     
     PDBEntry pdbe1a=new PDBEntry("1PDB", "A", Type.PDB, "filePath/test1");
     PDBEntry pdbe1b = new PDBEntry("1PDB", "B", Type.PDB, "filePath/test1");
@@ -500,11 +510,14 @@ public class SequenceTest
             new AlignmentAnnotation("Test annot", "Test annot description",
                     annots));
     Assert.assertEquals(sq.getDescription(), "Test sequence description..");
-    Assert.assertEquals(sq.getDBRefs().length, 5);
+    Assert.assertEquals(sq.getDBRefs().length, 5); // DBRefs are on dataset
+                                                   // sequence
     Assert.assertEquals(sq.getAllPDBEntries().size(), 4);
     Assert.assertNotNull(sq.getAnnotation());
     Assert.assertEquals(sq.getAnnotation()[0].annotations.length, 2);
-    Assert.assertEquals(sq.getDatasetSequence().getDBRefs().length, 4);
+    Assert.assertEquals(sq.getDatasetSequence().getDBRefs().length, 5); // same
+                                                                        // as
+                                                                        // sq.getDBRefs()
     Assert.assertEquals(sq.getDatasetSequence().getAllPDBEntries().size(),
             4);
     Assert.assertNotNull(sq.getDatasetSequence().getAnnotation());
@@ -513,11 +526,11 @@ public class SequenceTest
 
     Assert.assertEquals(derived.getDescription(),
             "Test sequence description..");
-    Assert.assertEquals(derived.getDBRefs().length, 4); // come from dataset
+    Assert.assertEquals(derived.getDBRefs().length, 5); // come from dataset
     Assert.assertEquals(derived.getAllPDBEntries().size(), 4);
     Assert.assertNotNull(derived.getAnnotation());
     Assert.assertEquals(derived.getAnnotation()[0].annotations.length, 2);
-    Assert.assertEquals(derived.getDatasetSequence().getDBRefs().length, 4);
+    Assert.assertEquals(derived.getDatasetSequence().getDBRefs().length, 5);
     Assert.assertEquals(derived.getDatasetSequence().getAllPDBEntries()
             .size(), 4);
     Assert.assertNotNull(derived.getDatasetSequence().getAnnotation());
index 4c7df46..1e41a16 100644 (file)
@@ -44,7 +44,7 @@ public class StructureChooserTest
   {
     seq = new Sequence("PDB|4kqy|4KQY|A", "ABCDEFGHIJKLMNOPQRSTUVWXYZ", 1,
             26);
-    seq.setDatasetSequence(seq);
+    seq.createDatasetSequence();
     for (int x = 1; x < 5; x++)
     {
       DBRefEntry dbRef = new DBRefEntry();
index 625244d..c9b5f4a 100644 (file)
@@ -75,8 +75,7 @@ public class AnnotationFileIOTest
       // make sure dataset is initialised ? not sure about this
       for (int i = 0; i < al.getSequencesArray().length; ++i)
       {
-        al.getSequenceAt(i).setDatasetSequence(
-                al.getSequenceAt(i).createDatasetSequence());
+        al.getSequenceAt(i).createDatasetSequence();
       }
       assertNotNull("Couldn't read supplied alignment data.", al);
       return al;
index 93fb12b..f75f433 100644 (file)
@@ -114,7 +114,7 @@ public class JSONFileTest
 
     for (Sequence seq : seqs)
     {
-      seq.setDatasetSequence(seq);
+      seq.createDatasetSequence();
       expectedSeqs.put(seq.getName(), seq);
     }
 
index 0e2b630..b635aa3 100644 (file)
@@ -103,7 +103,7 @@ public class StockholmFileTest
       // make sure dataset is initialised ? not sure about this
       for (int i = 0; i < al.getSequencesArray().length; ++i)
       {
-        al.getSequenceAt(i).setDatasetSequence(al.getSequenceAt(i));
+        al.getSequenceAt(i).createDatasetSequence();
       }
       String outputfile = rf.formatSequences(ioformat, al, true);
       System.out.println("Output file in '" + ioformat + "':\n"
index 72e599d..77f8078 100644 (file)
 package jalview.ws.dbsources;
 
 import static org.testng.AssertJUnit.assertEquals;
+import static org.testng.AssertJUnit.assertNotNull;
 import static org.testng.AssertJUnit.assertNull;
 
 import jalview.datamodel.PDBEntry;
 import jalview.datamodel.SequenceFeature;
+import jalview.datamodel.SequenceI;
 import jalview.datamodel.UniprotEntry;
 
 import java.io.Reader;
@@ -46,6 +48,7 @@ public class UniprotTest
           + "<protein><recommendedName><fullName>Mitogen-activated protein kinase 13</fullName><fullName>Henry</fullName></recommendedName></protein>"
           + "<dbReference type=\"PDB\" id=\"2FSQ\"><property type=\"method\" value=\"X-ray\"/><property type=\"resolution\" value=\"1.40\"/></dbReference>"
           + "<dbReference type=\"PDBsum\" id=\"2FSR\"/>"
+          + "<dbReference type=\"EMBL\" id=\"AE007869\"><property type=\"protein sequence ID\" value=\"AAK85932.1\"/><property type=\"molecule type\" value=\"Genomic_DNA\"/></dbReference>"
           + "<feature type=\"signal peptide\" evidence=\"7\"><location><begin position=\"1\"/><end position=\"18\"/></location></feature>"
           + "<feature type=\"propeptide\" description=\"Activation peptide\" id=\"PRO_0000027399\" evidence=\"9 16 17 18\"><location><begin position=\"19\"/><end position=\"20\"/></location></feature>"
           + "<feature type=\"chain\" description=\"Granzyme B\" id=\"PRO_0000027400\"><location><begin position=\"21\"/><end position=\"247\"/></location></feature>"
@@ -109,7 +112,7 @@ public class UniprotTest
      * Check cross-references
      */
     Vector<PDBEntry> xrefs = entry.getDbReference();
-    assertEquals(2, xrefs.size());
+    assertEquals(3, xrefs.size());
 
     PDBEntry xref = xrefs.get(0);
     assertEquals("2FSQ", xref.getId());
@@ -122,8 +125,29 @@ public class UniprotTest
     assertEquals("2FSR", xref.getId());
     assertEquals("PDBsum", xref.getType());
     assertNull(xref.getProperty());
+
+    xref = xrefs.get(2);
+    assertEquals("AE007869", xref.getId());
+    assertEquals("EMBL", xref.getType());
+    assertNotNull(xref.getProperty());
+    assertEquals("AAK85932.1",
+            (String) xref.getProperty().get("protein sequence ID"));
+    assertEquals("Genomic_DNA",
+            (String) xref.getProperty().get("molecule type"));
+    assertEquals(2, xref.getProperty().size());
+
   }
 
+  @Test(groups = { "Functional" })
+  public void testGetUniprotSequence()
+  {
+    UniprotEntry entry = new Uniprot().getUniprotEntries(
+            new StringReader(UNIPROT_XML)).get(0);
+    SequenceI seq = new Uniprot().uniprotEntryToSequenceI(entry);
+    assertNotNull(seq);
+    assertEquals(6, seq.getDBRefs().length); // 2*Uniprot, PDB, PDBsum, 2*EMBL
+
+  }
   /**
    * Test the method that formats the sequence id
    */
index 59bf445..0a565bd 100644 (file)
@@ -173,8 +173,7 @@ public class DbRefFetcherTest
                     sfs[0].getType()));
     assertEquals(embl.getDbSource(), sfs[0].getFeatureGroup());
     DBRefEntry[] dr = DBRefUtils.selectRefs(seq.getDBRefs(),
-            new String[] { DBRefSource.UNIPROT, DBRefSource.UNIPROTKB,
-                DBRefSource.EMBLCDSProduct, DBRefSource.ENSEMBL });
+            new String[] { DBRefSource.UNIPROT });
     assertNotNull(dr);
     assertEquals("Expected a single Uniprot cross reference", 1, dr.length);
     assertEquals("Expected cross reference map to be one amino acid", dr[0]
index 6f9a864..d3b485e 100644 (file)
@@ -21,6 +21,7 @@
 package jalview.ws.sifts;
 
 import jalview.api.DBRefEntryI;
+import jalview.bin.Cache;
 import jalview.datamodel.DBRefEntry;
 import jalview.datamodel.DBRefSource;
 import jalview.datamodel.Sequence;
@@ -170,6 +171,8 @@ public class SiftsClientTest
   @BeforeTest(alwaysRun = true)
   public void setUpSiftsClient() throws SiftsException
   {
+    // read test props before manipulating config
+    Cache.loadProperties("test/jalview/io/testProps.jvprops");
     // SIFTs entries are updated weekly - so use saved SIFTs file to enforce
     // test reproducibility
     new SiftsSettings();