Merge branch 'bug/JAL-2210_hack_for_Release_2_10' into develop
authorgmungoc <g.m.carstairs@dundee.ac.uk>
Tue, 4 Oct 2016 14:12:19 +0000 (15:12 +0100)
committergmungoc <g.m.carstairs@dundee.ac.uk>
Tue, 4 Oct 2016 14:12:19 +0000 (15:12 +0100)
20 files changed:
src/jalview/analysis/CrossRef.java
src/jalview/datamodel/Alignment.java
src/jalview/datamodel/Sequence.java
src/jalview/datamodel/SequenceI.java
src/jalview/ext/ensembl/EnsemblRestClient.java
src/jalview/ext/ensembl/EnsemblSeqProxy.java
src/jalview/fts/core/FTSRestClient.java
src/jalview/util/DBRefUtils.java
src/jalview/ws/dbsources/Uniprot.java
test/jalview/analysis/CrossRefTest.java
test/jalview/commands/EditCommandTest.java
test/jalview/datamodel/AlignmentTest.java
test/jalview/datamodel/SequenceTest.java
test/jalview/gui/StructureChooserTest.java
test/jalview/io/AnnotationFileIOTest.java
test/jalview/io/JSONFileTest.java
test/jalview/io/StockholmFileTest.java
test/jalview/ws/dbsources/UniprotTest.java
test/jalview/ws/seqfetcher/DbRefFetcherTest.java
test/jalview/ws/sifts/SiftsClientTest.java

index 1295b46..cd71bbd 100644 (file)
@@ -24,6 +24,7 @@ import jalview.datamodel.AlignedCodonFrame;
 import jalview.datamodel.Alignment;
 import jalview.datamodel.AlignmentI;
 import jalview.datamodel.DBRefEntry;
+import jalview.datamodel.DBRefSource;
 import jalview.datamodel.Mapping;
 import jalview.datamodel.Sequence;
 import jalview.datamodel.SequenceFeature;
@@ -106,6 +107,16 @@ public class CrossRef
         findXrefSourcesForSequence(seq, dna, sources);
       }
     }
+    sources.remove(DBRefSource.EMBL); // hack to prevent EMBL xrefs resulting in
+                                      // redundant datasets
+    if (dna)
+    {
+      sources.remove(DBRefSource.ENSEMBL); // hack to prevent Ensembl and
+                                           // EnsemblGenomes xref option shown
+                                           // from cdna panel
+      sources.remove(DBRefSource.ENSEMBLGENOMES);
+    }
+    // redundant datasets
     return sources;
   }
 
@@ -296,20 +307,28 @@ public class CrossRef
               if (!rseqs.contains(matchInDataset))
               {
                 rseqs.add(matchInDataset);
-                // need to try harder to only add unique mappings
-                if (xref.getMap().getMap().isTripletMap()
-                        && dataset.getMapping(seq, matchInDataset) == null
-                        && cf.getMappingBetween(seq, matchInDataset) == null)
+              }
+              // even if rseqs contained matchInDataset - check mappings between
+              // these seqs are added
+              // need to try harder to only add unique mappings
+              if (xref.getMap().getMap().isTripletMap()
+                      && dataset.getMapping(seq, matchInDataset) == null
+                      && cf.getMappingBetween(seq, matchInDataset) == null)
+              {
+                // materialise a mapping for highlighting between these
+                // sequences
+                if (fromDna)
                 {
-                  // materialise a mapping for highlighting between these sequences
-                  if (fromDna)
-                  {
-                    cf.addMap(dss, matchInDataset, xref.getMap().getMap(), xref.getMap().getMappedFromId());
-                  } else {
-                    cf.addMap(matchInDataset, dss, xref.getMap().getMap().getInverse(), xref.getMap().getMappedFromId());
-                  }
+                  cf.addMap(dss, matchInDataset, xref.getMap().getMap(),
+                          xref.getMap().getMappedFromId());
+                }
+                else
+                {
+                  cf.addMap(matchInDataset, dss, xref.getMap().getMap()
+                          .getInverse(), xref.getMap().getMappedFromId());
                 }
               }
+
               refIterator.remove();
               continue;
             }
@@ -393,28 +412,7 @@ public class CrossRef
     // first filter in case we are retrieving crossrefs that have already been
     // retrieved. this happens for cases where a database record doesn't yield
     // protein products for CDS
-    DBRefEntry[] dbrSourceSet = sourceRefs.toArray(new DBRefEntry[0]);
-    for (SequenceI sq : dataset.getSequences())
-    {
-      boolean dupeFound = false;
-      // !fromDna means we are looking only for nucleotide sequences, not
-      // protein
-      if (sq.isProtein() == fromDna)
-      {
-        for (DBRefEntry dbr : sq.getPrimaryDBRefs())
-        {
-          for (DBRefEntry found : DBRefUtils.searchRefs(dbrSourceSet, dbr))
-          {
-            sourceRefs.remove(found);
-            dupeFound = true;
-          }
-        }
-      }
-      if (dupeFound)
-      {
-        dbrSourceSet = sourceRefs.toArray(new DBRefEntry[0]);
-      }
-    }
+    removeAlreadyRetrievedSeqs(sourceRefs, fromDna);
     if (sourceRefs.size() == 0)
     {
       // no more work to do! We already had all requested sequence records in
@@ -434,131 +432,240 @@ public class CrossRef
 
     if (retrieved != null)
     {
-      updateDbrefMappings(seq, xrfs, retrieved, cf, fromDna);
+      boolean addedXref = false;
+      List<SequenceI> newDsSeqs = new ArrayList<SequenceI>(), doNotAdd = new ArrayList<SequenceI>();
+
       for (SequenceI retrievedSequence : retrieved)
       {
         // dataset gets contaminated ccwith non-ds sequences. why ??!
         // try: Ensembl -> Nuc->Ensembl, Nuc->Uniprot-->Protein->EMBL->
         SequenceI retrievedDss = retrievedSequence.getDatasetSequence() == null ? retrievedSequence
                 : retrievedSequence.getDatasetSequence();
-        DBRefEntry[] dbr = retrievedSequence.getDBRefs();
-        if (dbr != null)
+        addedXref |= importCrossRefSeq(cf, newDsSeqs, doNotAdd, dss,
+                retrievedDss);
+      }
+      if (!addedXref)
+      {
+        // try again, after looking for matching IDs
+        // shouldn't need to do this unless the dbref mechanism has broken.
+        updateDbrefMappings(seq, xrfs, retrieved, cf, fromDna);
+        for (SequenceI retrievedSequence : retrieved)
+        {
+          // dataset gets contaminated ccwith non-ds sequences. why ??!
+          // try: Ensembl -> Nuc->Ensembl, Nuc->Uniprot-->Protein->EMBL->
+          SequenceI retrievedDss = retrievedSequence.getDatasetSequence() == null ? retrievedSequence
+                  : retrievedSequence.getDatasetSequence();
+          addedXref |= importCrossRefSeq(cf, newDsSeqs, doNotAdd, dss,
+                  retrievedDss);
+        }
+      }
+      for (SequenceI newToSeq : newDsSeqs)
+      {
+        if (!doNotAdd.contains(newToSeq)
+                && dataset.findIndex(newToSeq) == -1)
         {
-          for (DBRefEntry dbref : dbr)
+          dataset.addSequence(newToSeq);
+          matcher.add(newToSeq);
+        }
+      }
+    }
+  }
+
+  /**
+   * Search dataset for sequences with a primary reference contained in
+   * sourceRefs.
+   * 
+   * @param sourceRefs
+   *          - list of references to filter.
+   * @param fromDna
+   *          - type of sequence to search for matching primary reference.
+   */
+  private void removeAlreadyRetrievedSeqs(List<DBRefEntry> sourceRefs,
+          boolean fromDna)
+  {
+    DBRefEntry[] dbrSourceSet = sourceRefs.toArray(new DBRefEntry[0]);
+    for (SequenceI sq : dataset.getSequences())
+    {
+      boolean dupeFound = false;
+      // !fromDna means we are looking only for nucleotide sequences, not
+      // protein
+      if (sq.isProtein() == fromDna)
+      {
+        for (DBRefEntry dbr : sq.getPrimaryDBRefs())
+        {
+          for (DBRefEntry found : DBRefUtils.searchRefs(dbrSourceSet, dbr))
           {
-            // find any entry where we should put in the sequence being
-            // cross-referenced into the map
-            Mapping map = dbref.getMap();
-            if (map != null)
+            sourceRefs.remove(found);
+            dupeFound = true;
+          }
+        }
+      }
+      if (dupeFound)
+      {
+        // rebuild the search array from the filtered sourceRefs list
+        dbrSourceSet = sourceRefs.toArray(new DBRefEntry[0]);
+      }
+    }
+  }
+
+  /**
+   * process sequence retrieved via a dbref on source sequence to resolve and
+   * transfer data
+   * 
+   * @param cf
+   * @param sourceSequence
+   * @param retrievedSequence
+   * @return true if retrieveSequence was imported
+   */
+  private boolean importCrossRefSeq(AlignedCodonFrame cf,
+          List<SequenceI> newDsSeqs, List<SequenceI> doNotAdd,
+          SequenceI sourceSequence, SequenceI retrievedSequence)
+  {
+    /**
+     * set when retrievedSequence has been verified as a crossreference for
+     * sourceSequence
+     */
+    boolean imported = false;
+    DBRefEntry[] dbr = retrievedSequence.getDBRefs();
+    if (dbr != null)
+    {
+      for (DBRefEntry dbref : dbr)
+      {
+        SequenceI matched = findInDataset(dbref);
+        if (matched == sourceSequence)
+        {
+          // verified retrieved and source sequence cross-reference each other
+          imported = true;
+        }
+        // find any entry where we should put in the sequence being
+        // cross-referenced into the map
+        Mapping map = dbref.getMap();
+        if (map != null)
+        {
+          if (map.getTo() != null && map.getMap() != null)
+          {
+            if (map.getTo() == sourceSequence)
+            {
+              // already called to import once, and most likely this sequence
+              // already imported !
+              continue;
+            }
+            if (matched == null)
             {
-              if (map.getTo() != null && map.getMap() != null)
+              /*
+               * sequence is new to dataset, so save a reference so it can be added. 
+               */
+              newDsSeqs.add(map.getTo());
+              continue;
+            }
+
+            /*
+             * there was a matching sequence in dataset, so now, check to see if we can update the map.getTo() sequence to the existing one.
+             */
+
+            try
+            {
+              // compare ms with dss and replace with dss in mapping
+              // if map is congruent
+              SequenceI ms = map.getTo();
+              // TODO findInDataset requires exact sequence match but
+              // 'congruent' test is only for the mapped part
+              // maybe not a problem in practice since only ENA provide a
+              // mapping and it is to the full protein translation of CDS
+              // matcher.findIdMatch(map.getTo());
+              // TODO addendum: if matched is shorter than getTo, this will fail
+              // - when it should really succeed.
+              int sf = map.getMap().getToLowest();
+              int st = map.getMap().getToHighest();
+              SequenceI mappedrg = ms.getSubSequence(sf, st);
+              if (mappedrg.getLength() > 0
+                      && ms.getSequenceAsString().equals(
+                              matched.getSequenceAsString()))
               {
-                // TODO findInDataset requires exact sequence match but
-                // 'congruent' test is only for the mapped part
-                // maybe not a problem in practice since only ENA provide a
-                // mapping and it is to the full protein translation of CDS
-                SequenceI matched = findInDataset(dbref);
-                // matcher.findIdMatch(map.getTo());
-                if (matched != null)
+                /*
+                 * sequences were a match, 
+                 */
+                String msg = "Mapping updated from " + ms.getName()
+                        + " to retrieved crossreference "
+                        + matched.getName();
+                System.out.println(msg);
+
+                DBRefEntry[] toRefs = map.getTo().getDBRefs();
+                if (toRefs != null)
                 {
                   /*
-                   * already got an xref to this sequence; update this
-                   * map to point to the same sequence, and add
-                   * any new dbrefs to it
+                   * transfer database refs
                    */
-                  DBRefEntry[] toRefs = map.getTo().getDBRefs();
-                  if (toRefs != null)
+                  for (DBRefEntry ref : toRefs)
                   {
-                    for (DBRefEntry ref : toRefs)
+                    if (dbref.getSrcAccString().equals(
+                            ref.getSrcAccString()))
                     {
-                      matched.addDBRef(ref); // add or update mapping
+                      continue; // avoid overwriting the ref on source sequence
                     }
-                  }
-                  map.setTo(matched);
-                }
-                else
-                {
-                  if (dataset.findIndex(map.getTo()) == -1)
-                  {
-                    dataset.addSequence(map.getTo());
-                    matcher.add(map.getTo());
+                    matched.addDBRef(ref); // add or update mapping
                   }
                 }
-                try
-                {
-                  // compare ms with dss and replace with dss in mapping
-                  // if map is congruent
-                  SequenceI ms = map.getTo();
-                  int sf = map.getMap().getToLowest();
-                  int st = map.getMap().getToHighest();
-                  SequenceI mappedrg = ms.getSubSequence(sf, st);
-                  // SequenceI loc = dss.getSubSequence(sf, st);
-                  if (mappedrg.getLength() > 0
-                          && ms.getSequenceAsString().equals(
-                                  dss.getSequenceAsString()))
-                  // && mappedrg.getSequenceAsString().equals(
-                  // loc.getSequenceAsString()))
-                  {
-                    String msg = "Mapping updated from " + ms.getName()
-                            + " to retrieved crossreference "
-                            + dss.getName();
-                    System.out.println(msg);
-                    map.setTo(dss);
+                doNotAdd.add(map.getTo());
+                map.setTo(matched);
 
-                    /*
-                     * give the reverse reference the inverse mapping 
-                     * (if it doesn't have one already)
-                     */
-                    setReverseMapping(dss, dbref, cf);
+                /*
+                 * give the reverse reference the inverse mapping 
+                 * (if it doesn't have one already)
+                 */
+                setReverseMapping(matched, dbref, cf);
 
+                /*
+                 * copy sequence features as well, avoiding
+                 * duplication (e.g. same variation from two 
+                 * transcripts)
+                 */
+                SequenceFeature[] sfs = ms.getSequenceFeatures();
+                if (sfs != null)
+                {
+                  for (SequenceFeature feat : sfs)
+                  {
                     /*
-                     * copy sequence features as well, avoiding
-                     * duplication (e.g. same variation from two 
-                     * transcripts)
+                     * make a flyweight feature object which ignores Parent
+                     * attribute in equality test; this avoids creating many
+                     * otherwise duplicate exon features on genomic sequence
                      */
-                    SequenceFeature[] sfs = ms.getSequenceFeatures();
-                    if (sfs != null)
+                    SequenceFeature newFeature = new SequenceFeature(
+                            feat)
                     {
-                      for (SequenceFeature feat : sfs)
+                      @Override
+                      public boolean equals(Object o)
                       {
-                        /*
-                         * make a flyweight feature object which ignores Parent
-                         * attribute in equality test; this avoids creating many
-                         * otherwise duplicate exon features on genomic sequence
-                         */
-                        SequenceFeature newFeature = new SequenceFeature(
-                                feat)
-                        {
-                          @Override
-                          public boolean equals(Object o)
-                          {
-                            return super.equals(o, true);
-                          }
-                        };
-                        dss.addSequenceFeature(newFeature);
+                        return super.equals(o, true);
                       }
-                    }
+                    };
+                    matched.addSequenceFeature(newFeature);
                   }
-                  cf.addMap(retrievedDss, map.getTo(), map.getMap());
-                } catch (Exception e)
-                {
-                  System.err
-                          .println("Exception when consolidating Mapped sequence set...");
-                  e.printStackTrace(System.err);
                 }
+
               }
+              cf.addMap(retrievedSequence, map.getTo(), map.getMap());
+            } catch (Exception e)
+            {
+              System.err
+                      .println("Exception when consolidating Mapped sequence set...");
+              e.printStackTrace(System.err);
             }
           }
         }
-        retrievedSequence.updatePDBIds();
-        rseqs.add(retrievedDss);
-        if (dataset.findIndex(retrievedDss) == -1)
-        {
-          dataset.addSequence(retrievedDss);
-          matcher.add(retrievedDss);
-        }
       }
     }
+    if (imported)
+    {
+      retrievedSequence.updatePDBIds();
+      rseqs.add(retrievedSequence);
+      if (dataset.findIndex(retrievedSequence) == -1)
+      {
+        dataset.addSequence(retrievedSequence);
+        matcher.add(retrievedSequence);
+      }
+    }
+    return imported;
   }
   /**
    * Sets the inverse sequence mapping in the corresponding dbref of the mapped
@@ -602,9 +709,12 @@ public class CrossRef
   }
 
   /**
-   * Returns the first identical sequence in the dataset if any, else null
+   * Returns null or the first sequence in the dataset which is identical to
+   * xref.mapTo, and has a) a primary dbref matching xref, or if none found, the
+   * first one with an ID source|xrefacc
    * 
    * @param xref
+   *          with map and mapped-to sequence
    * @return
    */
   SequenceI findInDataset(DBRefEntry xref)
@@ -618,22 +728,42 @@ public class CrossRef
     String name2 = xref.getSource() + "|" + name;
     SequenceI dss = mapsTo.getDatasetSequence() == null ? mapsTo : mapsTo
             .getDatasetSequence();
+    // first check ds if ds is directly referenced
+    if (dataset.findIndex(dss) > -1)
+    {
+      return dss;
+    }
+    DBRefEntry template = new DBRefEntry(xref.getSource(), null,
+            xref.getAccessionId());
+    /**
+     * remember the first ID match - in case we don't find a match to template
+     */
+    SequenceI firstIdMatch = null;
     for (SequenceI seq : dataset.getSequences())
     {
+      // first check primary refs.
+      List<DBRefEntry> match = DBRefUtils.searchRefs(seq.getPrimaryDBRefs()
+              .toArray(new DBRefEntry[0]), template);
+      if (match != null && match.size() == 1 && sameSequence(seq, dss))
+      {
+        return seq;
+      }
       /*
        * clumsy alternative to using SequenceIdMatcher which currently
        * returns sequences with a dbref to the matched accession id 
        * which we don't want
        */
-      if (name.equals(seq.getName()) || seq.getName().startsWith(name2))
+      if (firstIdMatch == null
+              && (name.equals(seq.getName()) || seq.getName().startsWith(
+                      name2)))
       {
         if (sameSequence(seq, dss))
         {
-          return seq;
+          firstIdMatch = seq;
         }
       }
     }
-    return null;
+    return firstIdMatch;
   }
 
   /**
index 35ee8c4..2289ac6 100755 (executable)
@@ -24,6 +24,7 @@ import jalview.analysis.AlignmentUtils;
 import jalview.datamodel.AlignedCodonFrame.SequenceToSequenceMapping;
 import jalview.io.FastaFile;
 import jalview.util.Comparison;
+import jalview.util.LinkedIdentityHashSet;
 import jalview.util.MessageManager;
 
 import java.util.ArrayList;
@@ -1054,6 +1055,7 @@ public class Alignment implements AlignmentI
   private void resolveAndAddDatasetSeq(SequenceI currentSeq,
           Set<SequenceI> seqs, boolean createDatasetSequence)
   {
+    SequenceI alignedSeq = currentSeq;
     if (currentSeq.getDatasetSequence() != null)
     {
       currentSeq = currentSeq.getDatasetSequence();
@@ -1088,12 +1090,19 @@ public class Alignment implements AlignmentI
         {
           if (dbr.getMap() != null && dbr.getMap().getTo() != null)
           {
+            if (dbr.getMap().getTo() == alignedSeq)
+            {
+              /*
+               * update mapping to be to the newly created dataset sequence
+               */
+              dbr.getMap().setTo(currentSeq);
+            }
             if (dbr.getMap().getTo().getDatasetSequence() != null)
             {
-              throw new Error("Implementation error: Map.getTo() for dbref"
-                      + dbr + " is not a dataset sequence.");
-              // TODO: if this happens, could also rewrite the reference to
-              // point to new dataset sequence
+              throw new Error(
+                      "Implementation error: Map.getTo() for dbref " + dbr
+                              + " from " + curDs.getName()
+                              + " is not a dataset sequence.");
             }
             // we recurse to add all forward references to dataset sequences via
             // DBRefs/etc
@@ -1115,7 +1124,7 @@ public class Alignment implements AlignmentI
       return;
     }
     // try to avoid using SequenceI.equals at this stage, it will be expensive
-    Set<SequenceI> seqs = new jalview.util.LinkedIdentityHashSet<SequenceI>();
+    Set<SequenceI> seqs = new LinkedIdentityHashSet<SequenceI>();
 
     for (int i = 0; i < getHeight(); i++)
     {
index 44522a8..68c8c50 100755 (executable)
@@ -22,6 +22,7 @@ package jalview.datamodel;
 
 import jalview.analysis.AlignSeq;
 import jalview.api.DBRefEntryI;
+import jalview.util.Comparison;
 import jalview.util.DBRefUtils;
 import jalview.util.MapList;
 import jalview.util.StringUtils;
@@ -944,7 +945,17 @@ public class Sequence extends ASequence implements SequenceI
   @Override
   public void setDBRefs(DBRefEntry[] dbref)
   {
+    if (dbrefs == null && datasetSequence != null
+            && this != datasetSequence)
+    {
+      datasetSequence.setDBRefs(dbref);
+      return;
+    }
     dbrefs = dbref;
+    if (dbrefs != null)
+    {
+      DBRefUtils.ensurePrimaries(this);
+    }
   }
 
   @Override
@@ -961,7 +972,12 @@ public class Sequence extends ASequence implements SequenceI
   @Override
   public void addDBRef(DBRefEntry entry)
   {
-    // TODO add to dataset sequence instead if there is one?
+    if (datasetSequence != null)
+    {
+      datasetSequence.addDBRef(entry);
+      return;
+    }
+
     if (dbrefs == null)
     {
       dbrefs = new DBRefEntry[0];
@@ -989,12 +1005,23 @@ public class Sequence extends ASequence implements SequenceI
     temp[temp.length - 1] = entry;
 
     dbrefs = temp;
+
+    DBRefUtils.ensurePrimaries(this);
   }
 
   @Override
   public void setDatasetSequence(SequenceI seq)
   {
-    // TODO check for circular reference before setting?
+    if (seq == this)
+    {
+      throw new IllegalArgumentException(
+              "Implementation Error: self reference passed to SequenceI.setDatasetSequence");
+    }
+    if (seq != null && seq.getDatasetSequence() != null)
+    {
+      throw new IllegalArgumentException(
+              "Implementation error: cascading dataset sequences are not allowed.");
+    }
     datasetSequence = seq;
   }
 
@@ -1091,6 +1118,10 @@ public class Sequence extends ASequence implements SequenceI
 
   private long _seqhash = 0;
 
+  /**
+   * Answers false if the sequence is more than 85% nucleotide (ACGTU), else
+   * true
+   */
   @Override
   public boolean isProtein()
   {
@@ -1101,7 +1132,7 @@ public class Sequence extends ASequence implements SequenceI
     if (_seqhash != sequence.hashCode())
     {
       _seqhash = sequence.hashCode();
-      _isNa=jalview.util.Comparison.isNucleotide(new SequenceI[] { this });
+      _isNa = Comparison.isNucleotide(this);
     }
     return !_isNa;
   };
index b7a291e..49ddf86 100755 (executable)
@@ -217,8 +217,11 @@ public interface SequenceI extends ASequenceI
   public int[] findPositionMap();
 
   /**
+   * Answers true if the sequence is composed of amino acid characters. Note
+   * that implementations may use heuristic methods which are not guaranteed to
+   * give the biologically 'right' answer.
    * 
-   * @return true if sequence is composed of amino acid characters
+   * @return
    */
   public boolean isProtein();
 
@@ -314,6 +317,14 @@ public interface SequenceI extends ASequenceI
 
   public void setVamsasId(String id);
 
+  /**
+   * set the array of Database references for the sequence.
+   * 
+   * @param dbs
+   * @deprecated - use is discouraged since side-effects may occur if DBRefEntry
+   *             set are not normalised.
+   */
+  @Deprecated
   public void setDBRefs(DBRefEntry[] dbs);
 
   public DBRefEntry[] getDBRefs();
index 72efdc1..11a869e 100644 (file)
@@ -208,6 +208,11 @@ abstract class EnsemblRestClient extends EnsemblSequenceFetcher
     URL url = getUrl(ids);
   
     BufferedReader reader = getHttpResponse(url, ids);
+    if (reader == null)
+    {
+      // request failed
+      return null;
+    }
     FileParse fp = new FileParse(reader, url.toString(), "HTTP_POST");
     return fp;
   }
@@ -248,7 +253,6 @@ abstract class EnsemblRestClient extends EnsemblSequenceFetcher
       writePostBody(connection, ids);
     }
   
-    InputStream response = connection.getInputStream();
     int responseCode = connection.getResponseCode();
   
     if (responseCode != 200)
@@ -257,10 +261,12 @@ abstract class EnsemblRestClient extends EnsemblSequenceFetcher
        * note: a GET request for an invalid id returns an error code e.g. 415
        * but POST request returns 200 and an empty Fasta response 
        */
-      throw new IOException(
-              "Response code was not 200. Detected response was "
-                      + responseCode);
+      System.err.println("Response code " + responseCode + " for " + url);
+      return null;
     }
+    // get content
+    InputStream response = connection.getInputStream();
+
     // System.out.println(getClass().getName() + " took "
     // + (System.currentTimeMillis() - now) + "ms to fetch");
 
index 5fccedd..91b09ea 100644 (file)
@@ -2,9 +2,11 @@ package jalview.ext.ensembl;
 
 import jalview.analysis.AlignmentUtils;
 import jalview.analysis.Dna;
+import jalview.bin.Cache;
 import jalview.datamodel.Alignment;
 import jalview.datamodel.AlignmentI;
 import jalview.datamodel.DBRefEntry;
+import jalview.datamodel.DBRefSource;
 import jalview.datamodel.Mapping;
 import jalview.datamodel.SequenceFeature;
 import jalview.datamodel.SequenceI;
@@ -158,6 +160,7 @@ public abstract class EnsemblSeqProxy extends EnsemblRestClient
                 + " chunks. Unexpected problem (" + r.getLocalizedMessage()
                 + ")";
         System.err.println(msg);
+        r.printStackTrace();
         break;
       }
     }
@@ -281,6 +284,44 @@ public abstract class EnsemblSeqProxy extends EnsemblRestClient
         DBRefEntry dbr = new DBRefEntry(getDbSource(),
                 getEnsemblDataVersion(), proteinSeq.getName(), map);
         querySeq.getDatasetSequence().addDBRef(dbr);
+        DBRefEntry[] uprots = DBRefUtils.selectRefs(ds.getDBRefs(),
+                new String[] { DBRefSource.UNIPROT });
+        DBRefEntry[] upxrefs = DBRefUtils.selectRefs(querySeq.getDBRefs(),
+                new String[] { DBRefSource.UNIPROT });
+        if (uprots != null)
+        {
+          for (DBRefEntry up : uprots)
+          {
+            // locate local uniprot ref and map
+            List<DBRefEntry> upx = DBRefUtils.searchRefs(upxrefs, up.getAccessionId());
+            DBRefEntry upxref;
+            if (upx.size() != 0)
+            {
+              upxref = upx.get(0);
+
+              if (upx.size() > 1)
+              {
+                Cache.log
+                        .warn("Implementation issue - multiple uniprot acc on product sequence.");
+              }
+            }
+            else
+            {
+              upxref = new DBRefEntry(DBRefSource.UNIPROT,
+                    getEnsemblDataVersion(), up.getAccessionId());
+            }
+
+            Mapping newMap = new Mapping(ds, mapList);
+            upxref.setVersion(getEnsemblDataVersion());
+            upxref.setMap(newMap);
+            if (upx.size() == 0)
+            {
+              // add the new uniprot ref
+              querySeq.getDatasetSequence().addDBRef(upxref);
+            }
+            
+          }
+        }
         
         /*
          * copy exon features to protein, compute peptide variants from dna 
@@ -343,6 +384,11 @@ public abstract class EnsemblSeqProxy extends EnsemblRestClient
       throw new JalviewException("ENSEMBL Rest API not available.");
     }
     FileParse fp = getSequenceReader(ids);
+    if (fp == null)
+    {
+      return alignment;
+    }
+
     FastaFile fr = new FastaFile(fp);
     if (fr.hasWarningMessage())
     {
index 00a081b..230cbdb 100644 (file)
@@ -451,7 +451,8 @@ public abstract class FTSRestClient implements FTSRestClientI
     case 502:
     case 504:
     case 505:
-      message = MessageManager.getString("exception.fts_server_error");
+      message = MessageManager.formatMessage("exception.fts_server_error",
+              service);
       break;
     case 503:
       message = MessageManager.getString("exception.service_not_available");
index 405f6e6..d43f5bc 100755 (executable)
@@ -26,6 +26,7 @@ import jalview.datamodel.PDBEntry;
 import jalview.datamodel.SequenceI;
 
 import java.util.ArrayList;
+import java.util.Arrays;
 import java.util.HashMap;
 import java.util.HashSet;
 import java.util.Hashtable;
@@ -301,7 +302,8 @@ public class DBRefUtils
     @Override
     public boolean matches(DBRefEntry refa, DBRefEntry refb)
     {
-      if (refa.getSource() != null && refb.getSource() != null
+      if (refa.getSource() != null
+              && refb.getSource() != null
               && DBRefUtils.getCanonicalName(refb.getSource()).equals(
                       DBRefUtils.getCanonicalName(refa.getSource())))
       {
@@ -333,7 +335,8 @@ public class DBRefUtils
     @Override
     public boolean matches(DBRefEntry refa, DBRefEntry refb)
     {
-      if (refa.getSource() != null && refb.getSource() != null
+      if (refa.getSource() != null
+              && refb.getSource() != null
               && DBRefUtils.getCanonicalName(refb.getSource()).equals(
                       DBRefUtils.getCanonicalName(refa.getSource())))
       {
@@ -370,7 +373,8 @@ public class DBRefUtils
     @Override
     public boolean matches(DBRefEntry refa, DBRefEntry refb)
     {
-      if (refa.getSource() != null && refb.getSource() != null
+      if (refa.getSource() != null
+              && refb.getSource() != null
               && DBRefUtils.getCanonicalName(refb.getSource()).equals(
                       DBRefUtils.getCanonicalName(refa.getSource())))
       {
@@ -410,7 +414,8 @@ public class DBRefUtils
     @Override
     public boolean matches(DBRefEntry refa, DBRefEntry refb)
     {
-      if (refa.getSource() != null && refb.getSource() != null
+      if (refa.getSource() != null
+              && refb.getSource() != null
               && DBRefUtils.getCanonicalName(refb.getSource()).equals(
                       DBRefUtils.getCanonicalName(refa.getSource())))
       {
@@ -608,4 +613,127 @@ public class DBRefUtils
     return matches;
   }
 
+  /**
+   * promote direct database references to primary for nucleotide or protein
+   * sequences if they have an appropriate primary ref
+   * <table>
+   * <tr>
+   * <th>Seq Type</th>
+   * <th>Primary DB</th>
+   * <th>Direct which will be promoted</th>
+   * </tr>
+   * <tr align=center>
+   * <td>peptides</td>
+   * <td>Ensembl</td>
+   * <td>Uniprot</td>
+   * </tr>
+   * <tr align=center>
+   * <td>peptides</td>
+   * <td>Ensembl</td>
+   * <td>Uniprot</td>
+   * </tr>
+   * <tr align=center>
+   * <td>dna</td>
+   * <td>Ensembl</td>
+   * <td>ENA</td>
+   * </tr>
+   * </table>
+   * 
+   * @param sequence
+   */
+  public static void ensurePrimaries(SequenceI sequence)
+  {
+    List<DBRefEntry> pr = sequence.getPrimaryDBRefs();
+    if (pr.size() == 0)
+    {
+      // nothing to do
+      return;
+    }
+    List<DBRefEntry> selfs = new ArrayList<DBRefEntry>();
+    {
+      DBRefEntry[] selfArray = selectDbRefs(!sequence.isProtein(),
+              sequence.getDBRefs());
+      if (selfArray == null || selfArray.length == 0)
+      {
+        // nothing to do
+        return;
+      }
+      selfs.addAll(Arrays.asList(selfArray));
+    }
+
+    // filter non-primary refs
+    for (DBRefEntry p : pr)
+    {
+      while (selfs.contains(p))
+      {
+        selfs.remove(p);
+      }
+    }
+    List<DBRefEntry> toPromote = new ArrayList<DBRefEntry>();
+
+    for (DBRefEntry p : pr)
+    {
+      List<String> promType = new ArrayList<String>();
+      if (sequence.isProtein())
+      {
+        switch (getCanonicalName(p.getSource()))
+        {
+        case DBRefSource.UNIPROT:
+          // case DBRefSource.UNIPROTKB:
+          // case DBRefSource.UP_NAME:
+          // search for and promote ensembl
+          promType.add(DBRefSource.ENSEMBL);
+          break;
+        case DBRefSource.ENSEMBL:
+          // search for and promote Uniprot
+          promType.add(DBRefSource.UNIPROT);
+          break;
+        }
+      }
+      else
+      {
+        // TODO: promote transcript refs
+      }
+
+      // collate candidates and promote them
+      DBRefEntry[] candidates = selectRefs(
+              selfs.toArray(new DBRefEntry[0]),
+              promType.toArray(new String[0]));
+      if (candidates != null)
+      {
+        for (DBRefEntry cand : candidates)
+        {
+          if (cand.hasMap())
+          {
+            if (cand.getMap().getTo() != null
+                    && cand.getMap().getTo() != sequence)
+            {
+              // can't promote refs with mappings to other sequences
+              continue;
+            }
+            if (cand.getMap().getMap().getFromLowest() != sequence
+                    .getStart()
+                    && cand.getMap().getMap().getFromHighest() != sequence
+                            .getEnd())
+            {
+              // can't promote refs with mappings from a region of this sequence
+              // - eg CDS
+              continue;
+            }
+          }
+          // and promote
+          cand.setVersion(p.getVersion() + " (promoted)");
+          selfs.remove(cand);
+          toPromote.add(cand);
+          if (!cand.isPrimaryCandidate())
+          {
+            System.out.println("Warning: Couldn't promote dbref "
+                    + cand.toString() + " for sequence "
+                    + sequence.toString());
+          }
+        }
+      }
+    }
+  }
+
 }
index 81b4caf..0c2af3b 100644 (file)
@@ -222,6 +222,40 @@ public class Uniprot extends DbSourceProxyImpl
       {
         onlyPdbEntries.addElement(pdb);
       }
+      if ("EMBL".equals(pdb.getType()))
+      {
+        // look for a CDS reference and add it, too.
+        String cdsId = (String) pdb.getProperty()
+                .get("protein sequence ID");
+        if (cdsId != null && cdsId.trim().length() > 0)
+        {
+          // remove version
+          String[] vrs = cdsId.split("\\.");
+          dbr = new DBRefEntry(DBRefSource.EMBLCDS, vrs.length > 1 ? vrs[1]
+                  : DBRefSource.UNIPROT + ":" + dbVersion, vrs[0]);
+          dbRefs.add(dbr);
+        }
+      }
+      if ("Ensembl".equals(pdb.getType()))
+      {
+        /*UniprotXML
+         * <dbReference type="Ensembl" id="ENST00000321556">
+        * <molecule id="Q9BXM7-1"/>
+        * <property type="protein sequence ID" value="ENSP00000364204"/>
+        * <property type="gene ID" value="ENSG00000158828"/>
+        * </dbReference> 
+         */
+        String cdsId = (String) pdb.getProperty()
+                .get("protein sequence ID");
+        if (cdsId != null && cdsId.trim().length() > 0)
+        {
+          dbr = new DBRefEntry(DBRefSource.ENSEMBL, DBRefSource.UNIPROT
+                  + ":" + dbVersion, cdsId.trim());
+          dbRefs.add(dbr);
+
+        }
+      }
+
     }
 
     sequence.setPDBId(onlyPdbEntries);
@@ -233,7 +267,10 @@ public class Uniprot extends DbSourceProxyImpl
         sequence.addSequenceFeature(sf);
       }
     }
-    sequence.setDBRefs(dbRefs.toArray(new DBRefEntry[0]));
+    for (DBRefEntry dbr : dbRefs)
+    {
+      sequence.addDBRef(dbr);
+    }
     return sequence;
   }
 
index 24ddb34..759f527 100644 (file)
@@ -121,8 +121,9 @@ public class CrossRefTest
     seq.addDBRef(new DBRefEntry("ENSEMBLGENOMES", "0", "E2350"));
     sources = new CrossRef(new SequenceI[] { seq }, al)
             .findXrefSourcesForSequences(false);
-    assertEquals(4, sources.size());
-    assertEquals("[EMBL, EMBLCDS, GENEDB, ENSEMBL]", sources.toString());
+    // method is patched to remove EMBL from the sources to match
+    assertEquals(3, sources.size());
+    assertEquals("[EMBLCDS, GENEDB, ENSEMBL]", sources.toString());
 
     /*
      * add a sequence to the alignment which has a dbref to UNIPROT|A1234
@@ -140,8 +141,9 @@ public class CrossRefTest
     al.addSequence(seq2);
     sources = new CrossRef(new SequenceI[] { seq, seq2 }, al)
             .findXrefSourcesForSequences(false);
-    assertEquals(3, sources.size());
-    assertEquals("[EMBLCDS, EMBL, GENEDB]", sources.toString());
+    // method removed EMBL from sources to match
+    assertEquals(2, sources.size());
+    assertEquals("[EMBLCDS, GENEDB]", sources.toString());
   }
 
   /**
@@ -402,11 +404,14 @@ public class CrossRefTest
   public void testFindXrefSequences_withFetch()
   {
     SequenceI dna1 = new Sequence("AF039662", "GGGGCAGCACAAGAAC");
-    dna1.addDBRef(new DBRefEntry("UNIPROT", "0", "Q9ZTS2"));
-    dna1.addDBRef(new DBRefEntry("UNIPROT", "0", "P30419"));
-    dna1.addDBRef(new DBRefEntry("UNIPROT", "0", "P00314"));
+    dna1.addDBRef(new DBRefEntry("UNIPROT", "ENA:0", "Q9ZTS2"));
+    dna1.addDBRef(new DBRefEntry("UNIPROT", "ENA:0", "P30419"));
+    dna1.addDBRef(new DBRefEntry("UNIPROT", "ENA:0", "P00314"));
     final SequenceI pep1 = new Sequence("Q9ZTS2", "MYQLIRSSW");
+    pep1.addDBRef(new DBRefEntry("UNIPROT", "0", "Q9ZTS2"));
+
     final SequenceI pep2 = new Sequence("P00314", "MRKLLAASG");
+    pep2.addDBRef(new DBRefEntry("UNIPROT", "0", "P00314"));
 
     /*
      * argument false suppresses adding DAS sources
@@ -471,7 +476,7 @@ public class CrossRefTest
      * 'spliced transcript' with CDS ranges
      */
     SequenceI braf002 = new Sequence("ENST00000497784", "gCAGGCtaTCTGTTCaa");
-    braf002.addDBRef(new DBRefEntry("UNIPROT", "0", "H7C5K3"));
+    braf002.addDBRef(new DBRefEntry("UNIPROT", "ENSEMBL|0", "H7C5K3"));
     braf002.addSequenceFeature(new SequenceFeature("CDS", "", 2, 6, 0f,
             null));
     braf002.addSequenceFeature(new SequenceFeature("CDS", "", 9, 15, 0f,
@@ -483,8 +488,9 @@ public class CrossRefTest
      * which happens to be true for Uniprot,PDB,EMBL but not Pfam,Rfam,Ensembl 
      */
     final SequenceI pep1 = new Sequence("UNIPROT|P15056", "MAAL");
+    pep1.addDBRef(new DBRefEntry("UNIPROT", "0", "P15056"));
     final SequenceI pep2 = new Sequence("UNIPROT|H7C5K3", "QALF");
-
+    pep2.addDBRef(new DBRefEntry("UNIPROT", "0", "H7C5K3"));
     /*
      * argument false suppresses adding DAS sources
      * todo: define an interface type SequenceFetcherI and mock that
@@ -620,7 +626,7 @@ public class CrossRefTest
      */
     final SequenceI x07547 = new Sequence("EMBL|X07547", "cccAAACCCTTTGGG");
     DBRefEntry dbref7 = new DBRefEntry("UNIPROT", "0", "P0CE20");
-    dbref7.setMap(new Mapping(new Sequence("UNIPROT|P0CE19", "KPFG"),
+    dbref7.setMap(new Mapping(new Sequence("UNIPROT|P0CE20", "PFGK"),
             new MapList(map2)));
     x07547.addDBRef(dbref7);
     DBRefEntry dbref8 = new DBRefEntry("UNIPROT", "0", "B0BCM4");
index 9afae37..7fb80fb 100644 (file)
@@ -239,7 +239,7 @@ public class EditCommandTest
   public void testReplace()
   {
     // seem to need a dataset sequence on the edited sequence here
-    seqs[1].setDatasetSequence(seqs[1]);
+    seqs[1].createDatasetSequence();
     new EditCommand("", Action.REPLACE, "ZXY", new SequenceI[] { seqs[1] },
             4, 8, al);
     assertEquals("abcdefghjk", seqs[0].getSequenceAsString());
index 7ad9436..fcf724a 100644 (file)
@@ -1042,4 +1042,67 @@ public class AlignmentTest
     assertEquals(23, startEnd[1]);
   }
 
+  /**
+   * Tests that dbrefs with mappings to sequence get updated if the sequence
+   * acquires a dataset sequence
+   */
+  @Test(groups = "Functional")
+  public void testCreateDataset_updateDbrefMappings()
+  {
+    SequenceI pep = new Sequence("pep", "ASD");
+    SequenceI dna = new Sequence("dna", "aaaGCCTCGGATggg");
+    SequenceI cds = new Sequence("cds", "GCCTCGGAT");
+  
+    // add dbref from dna to peptide
+    DBRefEntry dbr = new DBRefEntry("UNIPROT", "", "pep");
+    dbr.setMap(new Mapping(pep, new MapList(new int[] { 4, 15 }, new int[] {
+        1, 4 }, 3, 1)));
+    dna.addDBRef(dbr);
+
+    // add dbref from dna to peptide
+    DBRefEntry dbr2 = new DBRefEntry("UNIPROT", "", "pep");
+    dbr2.setMap(new Mapping(pep, new MapList(new int[] { 1, 12 }, new int[]
+    { 1, 4 }, 3, 1)));
+    cds.addDBRef(dbr2);
+
+    // add dbref from peptide to dna
+    DBRefEntry dbr3 = new DBRefEntry("EMBL", "", "dna");
+    dbr3.setMap(new Mapping(dna, new MapList(new int[] { 1, 4 }, new int[] {
+        4, 15 }, 1, 3)));
+    pep.addDBRef(dbr3);
+
+    // add dbref from peptide to cds
+    DBRefEntry dbr4 = new DBRefEntry("EMBLCDS", "", "cds");
+    dbr4.setMap(new Mapping(cds, new MapList(new int[] { 1, 4 }, new int[] {
+        1, 12 }, 1, 3)));
+    pep.addDBRef(dbr4);
+
+    AlignmentI protein = new Alignment(new SequenceI[] { pep });
+  
+    /*
+     * create the alignment dataset
+     */
+    ((Alignment) protein).createDatasetAlignment();
+  
+    AlignmentI ds = protein.getDataset();
+  
+    // should be 3 sequences in dataset
+    assertEquals(3, ds.getHeight());
+    assertTrue(ds.getSequences().contains(pep.getDatasetSequence()));
+    assertTrue(ds.getSequences().contains(dna));
+    assertTrue(ds.getSequences().contains(cds));
+
+    /*
+     * verify peptide.cdsdbref.peptidedbref is now mapped to peptide dataset
+     */
+    DBRefEntry[] dbRefs = pep.getDBRefs();
+    assertEquals(2, dbRefs.length);
+    assertSame(dna, dbRefs[0].map.to);
+    assertSame(cds, dbRefs[1].map.to);
+    assertEquals(1, dna.getDBRefs().length);
+    assertSame(pep.getDatasetSequence(), dna.getDBRefs()[0].map.to);
+    assertEquals(1, cds.getDBRefs().length);
+    assertSame(pep.getDatasetSequence(), cds.getDBRefs()[0].map.to);
+  }
+
 }
index 8c5073b..25804bc 100644 (file)
@@ -366,7 +366,16 @@ public class SequenceTest
      * is there a usecase for this ? setDatasetSequence should throw an error if
      * this actually occurs.
      */
-    sq.getDatasetSequence().setDatasetSequence(sq); // loop!
+    try
+    {
+      sq.getDatasetSequence().setDatasetSequence(sq); // loop!
+      Assert.fail("Expected Error to be raised when calling setDatasetSequence with self reference");
+    } catch (IllegalArgumentException e)
+    {
+      // TODO Jalview error/exception class for raising implementation errors
+      assertTrue(e.getMessage().toLowerCase()
+              .contains("implementation error"));
+    }
     assertNull(sq.getSequenceFeatures());
   }
 
@@ -451,19 +460,20 @@ public class SequenceTest
     sq.addPDBId(new PDBEntry("2PDB", "A", Type.MMCIF, "filePath/test2"));
     sq.addPDBId(new PDBEntry("2PDB", "B", Type.MMCIF, "filePath/test2"));
     
+    // these are the same as ones already added
     DBRefEntry pdb1pdb = new DBRefEntry("PDB", "version1", "1PDB");
-    DBRefEntry pdb2pdb = new DBRefEntry("PDB", "version1", "2PDB");
+    DBRefEntry pdb2pdb = new DBRefEntry("PDB", "version2", "2PDB");
 
     
     List<DBRefEntry> primRefs = Arrays.asList(new DBRefEntry[] { pdb1pdb,
         pdb2pdb });
 
-    sq.getDatasetSequence().addDBRef(pdb1pdb);
-    sq.getDatasetSequence().addDBRef(pdb2pdb);
+    sq.getDatasetSequence().addDBRef(pdb1pdb); // should do nothing
+    sq.getDatasetSequence().addDBRef(pdb2pdb); // should do nothing
     sq.getDatasetSequence().addDBRef(
-            new DBRefEntry("PDB", "version3", "3PDB"));
+            new DBRefEntry("PDB", "version3", "3PDB")); // should do nothing
     sq.getDatasetSequence().addDBRef(
-            new DBRefEntry("PDB", "version4", "4PDB"));
+            new DBRefEntry("PDB", "version4", "4PDB")); // should do nothing
     
     PDBEntry pdbe1a=new PDBEntry("1PDB", "A", Type.PDB, "filePath/test1");
     PDBEntry pdbe1b = new PDBEntry("1PDB", "B", Type.PDB, "filePath/test1");
@@ -500,11 +510,14 @@ public class SequenceTest
             new AlignmentAnnotation("Test annot", "Test annot description",
                     annots));
     Assert.assertEquals(sq.getDescription(), "Test sequence description..");
-    Assert.assertEquals(sq.getDBRefs().length, 5);
+    Assert.assertEquals(sq.getDBRefs().length, 5); // DBRefs are on dataset
+                                                   // sequence
     Assert.assertEquals(sq.getAllPDBEntries().size(), 4);
     Assert.assertNotNull(sq.getAnnotation());
     Assert.assertEquals(sq.getAnnotation()[0].annotations.length, 2);
-    Assert.assertEquals(sq.getDatasetSequence().getDBRefs().length, 4);
+    Assert.assertEquals(sq.getDatasetSequence().getDBRefs().length, 5); // same
+                                                                        // as
+                                                                        // sq.getDBRefs()
     Assert.assertEquals(sq.getDatasetSequence().getAllPDBEntries().size(),
             4);
     Assert.assertNotNull(sq.getDatasetSequence().getAnnotation());
@@ -513,11 +526,11 @@ public class SequenceTest
 
     Assert.assertEquals(derived.getDescription(),
             "Test sequence description..");
-    Assert.assertEquals(derived.getDBRefs().length, 4); // come from dataset
+    Assert.assertEquals(derived.getDBRefs().length, 5); // come from dataset
     Assert.assertEquals(derived.getAllPDBEntries().size(), 4);
     Assert.assertNotNull(derived.getAnnotation());
     Assert.assertEquals(derived.getAnnotation()[0].annotations.length, 2);
-    Assert.assertEquals(derived.getDatasetSequence().getDBRefs().length, 4);
+    Assert.assertEquals(derived.getDatasetSequence().getDBRefs().length, 5);
     Assert.assertEquals(derived.getDatasetSequence().getAllPDBEntries()
             .size(), 4);
     Assert.assertNotNull(derived.getDatasetSequence().getAnnotation());
@@ -967,4 +980,22 @@ public class SequenceTest
     assertEquals(4, seq.getAllPDBEntries().size());
     assertSame(pdbe5, seq.getAllPDBEntries().get(3));
   }
+
+  @Test(
+    groups = { "Functional" },
+    expectedExceptions = { IllegalArgumentException.class })
+  public void testSetDatasetSequence_toSelf()
+  {
+    seq.setDatasetSequence(seq);
+  }
+
+  @Test(
+    groups = { "Functional" },
+    expectedExceptions = { IllegalArgumentException.class })
+  public void testSetDatasetSequence_cascading()
+  {
+    SequenceI seq2 = new Sequence("Seq2", "xyz");
+    seq2.createDatasetSequence();
+    seq.setDatasetSequence(seq2);
+  }
 }
index 4c7df46..1e41a16 100644 (file)
@@ -44,7 +44,7 @@ public class StructureChooserTest
   {
     seq = new Sequence("PDB|4kqy|4KQY|A", "ABCDEFGHIJKLMNOPQRSTUVWXYZ", 1,
             26);
-    seq.setDatasetSequence(seq);
+    seq.createDatasetSequence();
     for (int x = 1; x < 5; x++)
     {
       DBRefEntry dbRef = new DBRefEntry();
index 625244d..c9b5f4a 100644 (file)
@@ -75,8 +75,7 @@ public class AnnotationFileIOTest
       // make sure dataset is initialised ? not sure about this
       for (int i = 0; i < al.getSequencesArray().length; ++i)
       {
-        al.getSequenceAt(i).setDatasetSequence(
-                al.getSequenceAt(i).createDatasetSequence());
+        al.getSequenceAt(i).createDatasetSequence();
       }
       assertNotNull("Couldn't read supplied alignment data.", al);
       return al;
index 93fb12b..f75f433 100644 (file)
@@ -114,7 +114,7 @@ public class JSONFileTest
 
     for (Sequence seq : seqs)
     {
-      seq.setDatasetSequence(seq);
+      seq.createDatasetSequence();
       expectedSeqs.put(seq.getName(), seq);
     }
 
index 0e2b630..b635aa3 100644 (file)
@@ -103,7 +103,7 @@ public class StockholmFileTest
       // make sure dataset is initialised ? not sure about this
       for (int i = 0; i < al.getSequencesArray().length; ++i)
       {
-        al.getSequenceAt(i).setDatasetSequence(al.getSequenceAt(i));
+        al.getSequenceAt(i).createDatasetSequence();
       }
       String outputfile = rf.formatSequences(ioformat, al, true);
       System.out.println("Output file in '" + ioformat + "':\n"
index 72e599d..77f8078 100644 (file)
 package jalview.ws.dbsources;
 
 import static org.testng.AssertJUnit.assertEquals;
+import static org.testng.AssertJUnit.assertNotNull;
 import static org.testng.AssertJUnit.assertNull;
 
 import jalview.datamodel.PDBEntry;
 import jalview.datamodel.SequenceFeature;
+import jalview.datamodel.SequenceI;
 import jalview.datamodel.UniprotEntry;
 
 import java.io.Reader;
@@ -46,6 +48,7 @@ public class UniprotTest
           + "<protein><recommendedName><fullName>Mitogen-activated protein kinase 13</fullName><fullName>Henry</fullName></recommendedName></protein>"
           + "<dbReference type=\"PDB\" id=\"2FSQ\"><property type=\"method\" value=\"X-ray\"/><property type=\"resolution\" value=\"1.40\"/></dbReference>"
           + "<dbReference type=\"PDBsum\" id=\"2FSR\"/>"
+          + "<dbReference type=\"EMBL\" id=\"AE007869\"><property type=\"protein sequence ID\" value=\"AAK85932.1\"/><property type=\"molecule type\" value=\"Genomic_DNA\"/></dbReference>"
           + "<feature type=\"signal peptide\" evidence=\"7\"><location><begin position=\"1\"/><end position=\"18\"/></location></feature>"
           + "<feature type=\"propeptide\" description=\"Activation peptide\" id=\"PRO_0000027399\" evidence=\"9 16 17 18\"><location><begin position=\"19\"/><end position=\"20\"/></location></feature>"
           + "<feature type=\"chain\" description=\"Granzyme B\" id=\"PRO_0000027400\"><location><begin position=\"21\"/><end position=\"247\"/></location></feature>"
@@ -109,7 +112,7 @@ public class UniprotTest
      * Check cross-references
      */
     Vector<PDBEntry> xrefs = entry.getDbReference();
-    assertEquals(2, xrefs.size());
+    assertEquals(3, xrefs.size());
 
     PDBEntry xref = xrefs.get(0);
     assertEquals("2FSQ", xref.getId());
@@ -122,8 +125,29 @@ public class UniprotTest
     assertEquals("2FSR", xref.getId());
     assertEquals("PDBsum", xref.getType());
     assertNull(xref.getProperty());
+
+    xref = xrefs.get(2);
+    assertEquals("AE007869", xref.getId());
+    assertEquals("EMBL", xref.getType());
+    assertNotNull(xref.getProperty());
+    assertEquals("AAK85932.1",
+            (String) xref.getProperty().get("protein sequence ID"));
+    assertEquals("Genomic_DNA",
+            (String) xref.getProperty().get("molecule type"));
+    assertEquals(2, xref.getProperty().size());
+
   }
 
+  @Test(groups = { "Functional" })
+  public void testGetUniprotSequence()
+  {
+    UniprotEntry entry = new Uniprot().getUniprotEntries(
+            new StringReader(UNIPROT_XML)).get(0);
+    SequenceI seq = new Uniprot().uniprotEntryToSequenceI(entry);
+    assertNotNull(seq);
+    assertEquals(6, seq.getDBRefs().length); // 2*Uniprot, PDB, PDBsum, 2*EMBL
+
+  }
   /**
    * Test the method that formats the sequence id
    */
index 59bf445..0a565bd 100644 (file)
@@ -173,8 +173,7 @@ public class DbRefFetcherTest
                     sfs[0].getType()));
     assertEquals(embl.getDbSource(), sfs[0].getFeatureGroup());
     DBRefEntry[] dr = DBRefUtils.selectRefs(seq.getDBRefs(),
-            new String[] { DBRefSource.UNIPROT, DBRefSource.UNIPROTKB,
-                DBRefSource.EMBLCDSProduct, DBRefSource.ENSEMBL });
+            new String[] { DBRefSource.UNIPROT });
     assertNotNull(dr);
     assertEquals("Expected a single Uniprot cross reference", 1, dr.length);
     assertEquals("Expected cross reference map to be one amino acid", dr[0]
index 6f9a864..d3b485e 100644 (file)
@@ -21,6 +21,7 @@
 package jalview.ws.sifts;
 
 import jalview.api.DBRefEntryI;
+import jalview.bin.Cache;
 import jalview.datamodel.DBRefEntry;
 import jalview.datamodel.DBRefSource;
 import jalview.datamodel.Sequence;
@@ -170,6 +171,8 @@ public class SiftsClientTest
   @BeforeTest(alwaysRun = true)
   public void setUpSiftsClient() throws SiftsException
   {
+    // read test props before manipulating config
+    Cache.loadProperties("test/jalview/io/testProps.jvprops");
     // SIFTs entries are updated weekly - so use saved SIFTs file to enforce
     // test reproducibility
     new SiftsSettings();