JAL-2110 wip CrossRefs rewrite of CrossRef

author gmungoc <g.m.carstairs@dundee.ac.uk>

Wed, 25 May 2016 08:25:19 +0000 (09:25 +0100)

committer gmungoc <g.m.carstairs@dundee.ac.uk>

Wed, 25 May 2016 08:25:19 +0000 (09:25 +0100)
author gmungoc <g.m.carstairs@dundee.ac.uk>
Wed, 25 May 2016 08:25:19 +0000 (09:25 +0100)
committer gmungoc <g.m.carstairs@dundee.ac.uk>
Wed, 25 May 2016 08:25:19 +0000 (09:25 +0100)
diff --git a/src/jalview/analysis/CrossRefs.java b/src/jalview/analysis/CrossRefs.java

new file mode 100644 (file)

index 0000000..7d0c263
--- /dev/null
+++ b/src/jalview/analysis/CrossRefs.java
@@ -0,0 +1,486 @@
+package jalview.analysis;
+
+import jalview.datamodel.AlignedCodonFrame;
+import jalview.datamodel.Alignment;
+import jalview.datamodel.AlignmentI;
+import jalview.datamodel.DBRefEntry;
+import jalview.datamodel.Mapping;
+import jalview.datamodel.Sequence;
+import jalview.datamodel.SequenceFeature;
+import jalview.datamodel.SequenceI;
+import jalview.util.Comparison;
+import jalview.util.DBRefUtils;
+import jalview.util.MapList;
+import jalview.ws.SequenceFetcherFactory;
+import jalview.ws.seqfetcher.ASequenceFetcher;
+
+import java.util.ArrayList;
+import java.util.Iterator;
+import java.util.List;
+
+public class CrossRefs
+{
+  /**
+   * Finds cross-references for sequences from a specified source database.
+   * These may be found in four ways:
+   * <ul>
+   * <li>as a DBRefEntry on the known sequence, which has a mapped-to sequence</li>
+   * <li>a sequence of complementary type in the alignment dataset, which has a
+   * DBRefEntry to one of the known sequence's 'direct' DBRefs</li>
+   * <li>a sequence of complementary type in the alignment, which has a
+   * DBRefEntry to one of the known sequence's 'cross-ref' DBRefs</li>
+   * <li>by fetching the accession from the remote database</li>
+   * </ul>
+   * 
+   * @param seqs
+   *          the sequences whose cross-references we are searching for
+   * @param dna
+   *          true if the sequences are from a nucleotide alignment, else false
+   * @param source
+   *          the database source we want cross-references to
+   * @param dataset
+   *          the alignment dataset the sequences belong to
+   * @return an alignment containing cross-reference sequences, or null if none
+   *         found
+   */
+  public static AlignmentI findXrefSequences(SequenceI[] seqs, boolean dna,
+          String source, AlignmentI dataset)
+  {
+    List<SequenceI> foundSeqs = new ArrayList<SequenceI>();
+    AlignedCodonFrame mappings = new AlignedCodonFrame();
+
+    for (SequenceI seq : seqs)
+    {
+      if (dna != Comparison.isNucleotide(seq))
+      {
+        /*
+         * mixed alignment, and this sequence is of the wrong type
+         */
+        continue;
+      }
+
+      /*
+       * get this sequence's dbrefs to source database (if any)
+       */
+      List<DBRefEntry> sourceRefs = DBRefUtils.searchRefsForSource(
+              seq.getDBRefs(), source);
+
+      /*
+       * first extract any mapped sequences from sourceRefs
+       */
+      findMappedDbrefs(seq, sourceRefs, foundSeqs, mappings);
+
+      /*
+       * for remaining sourceRefs, try to match a 
+       * complementary sequence in the dataset
+       */
+      findIndirectCrossReferences(seq, source, sourceRefs, dataset,
+              foundSeqs, mappings);
+
+      /*
+       * fetch any remaining sourceRefs from the source database
+       */
+      fetchCrossReferences(seq, sourceRefs, foundSeqs, mappings, dna,
+              dataset);
+    }
+
+    if (foundSeqs.isEmpty())
+    {
+      return null;
+    }
+    AlignmentI crossRefs = new Alignment(
+            foundSeqs.toArray(new SequenceI[foundSeqs.size()]));
+    crossRefs.addCodonFrame(mappings);
+    return crossRefs;
+  }
+
+  /**
+   * Looks for DBRefEntrys to 'source' which have a mapping to a sequence. If
+   * found, adds the sequence to foundSeqs and removes the dbref from the list.
+   * 
+   * @param seq
+   *          the dataset sequence we are searching from
+   * @param sourceRefs
+   *          the sequence's dbrefs to 'source'
+   * @param foundSeqs
+   *          a list of cross-references to add to
+   * @param mappings
+   *          a set of sequence mappings to add to
+   * @return
+   */
+  static void findMappedDbrefs(SequenceI seq, List<DBRefEntry> sourceRefs,
+          List<SequenceI> foundSeqs, AlignedCodonFrame mappings)
+  {
+    Iterator<DBRefEntry> refs = sourceRefs.iterator();
+    while (refs.hasNext())
+    {
+      DBRefEntry dbref = refs.next();
+      Mapping map = dbref.getMap();
+      if (map != null)
+      {
+        SequenceI mappedTo = map.getTo();
+        if (mappedTo != null)
+        {
+          foundSeqs.add(new Sequence(mappedTo));
+          refs.remove();
+      
+          /*
+           * check mapping is not 'direct' (it shouldn't be if we reach here)
+           * and add mapping (dna-to-peptide or vice versa) to the set
+           */
+          MapList mapList = map.getMap();
+          int fromRatio = mapList.getFromRatio();
+          int toRatio = mapList.getToRatio();
+          if (fromRatio != toRatio)
+          {
+            if (fromRatio == 3)
+            {
+              mappings.addMap(seq, mappedTo, mapList);
+            }
+            else
+            {
+              mappings.addMap(mappedTo, seq, mapList.getInverse());
+            }
+          }
+        }
+      }
+    }
+  }
+
+  /**
+   * Tries to fetch seq's database references to 'source' database, and add them
+   * to the foundSeqs list. If found, tries to make a mapping between seq and
+   * the retrieved sequence and insert it into the database reference.
+   * 
+   * @param seq
+   * @param sourceRefs
+   * @param foundSeqs
+   * @param mappings
+   * @param dna
+   */
+  static void fetchCrossReferences(SequenceI seq,
+          List<DBRefEntry> sourceRefs, List<SequenceI> foundSeqs,
+          AlignedCodonFrame mappings, boolean dna, AlignmentI dataset)
+  {
+    ASequenceFetcher sftch = SequenceFetcherFactory.getSequenceFetcher();
+    SequenceI[] retrieved;
+    try
+    {
+      retrieved = sftch.getSequences(sourceRefs, !dna);
+    } catch (Exception e)
+    {
+      System.err
+              .println("Problem whilst retrieving cross references for Sequence : "
+                      + seq.getName());
+      e.printStackTrace();
+      return;
+    }
+
+    if (retrieved != null)
+    {
+      updateDbrefMappings(dna, seq, sourceRefs, retrieved, mappings);
+
+      SequenceIdMatcher matcher = new SequenceIdMatcher(
+              dataset.getSequences());
+      List<SequenceFeature> copiedFeatures = new ArrayList<SequenceFeature>();
+      CrossRef me = new CrossRef();
+      for (int rs = 0; rs < retrieved.length; rs++)
+      {
+        // TODO: examine each sequence for 'redundancy'
+        DBRefEntry[] dbr = retrieved[rs].getDBRefs();
+        if (dbr != null && dbr.length > 0)
+        {
+          for (int di = 0; di < dbr.length; di++)
+          {
+            // find any entry where we should put in the sequence being
+            // cross-referenced into the map
+            Mapping map = dbr[di].getMap();
+            if (map != null)
+            {
+              if (map.getTo() != null && map.getMap() != null)
+              {
+                SequenceI matched = matcher.findIdMatch(map.getTo());
+                if (matched != null)
+                {
+                  /*
+                   * already got an xref to this sequence; update this
+                   * map to point to the same sequence, and add
+                   * any new dbrefs to it
+                   */
+                  for (DBRefEntry ref : map.getTo().getDBRefs())
+                  {
+                    matched.addDBRef(ref); // add or update mapping
+                  }
+                  map.setTo(matched);
+                }
+                else
+                {
+                  matcher.add(map.getTo());
+                }
+                try
+                {
+                  // compare ms with dss and replace with dss in mapping
+                  // if map is congruent
+                  SequenceI ms = map.getTo();
+                  int sf = map.getMap().getToLowest();
+                  int st = map.getMap().getToHighest();
+                  SequenceI mappedrg = ms.getSubSequence(sf, st);
+                  // SequenceI loc = dss.getSubSequence(sf, st);
+                  if (mappedrg.getLength() > 0
+                          && ms.getSequenceAsString().equals(
+                                  seq.getSequenceAsString()))
+                  // && mappedrg.getSequenceAsString().equals(
+                  // loc.getSequenceAsString()))
+                  {
+                    String msg = "Mapping updated from " + ms.getName()
+                            + " to retrieved crossreference "
+                            + seq.getName();
+                    System.out.println(msg);
+                    // method to update all refs of existing To on
+                    // retrieved sequence with dss and merge any props
+                    // on To onto dss.
+                    map.setTo(seq);
+                    /*
+                     * copy sequence features as well, avoiding
+                     * duplication (e.g. same variation from 2 
+                     * transcripts)
+                     */
+                    SequenceFeature[] sfs = ms.getSequenceFeatures();
+                    if (sfs != null)
+                    {
+                      for (SequenceFeature feat : sfs)
+                      {
+                        /* 
+                         * we override SequenceFeature.equals here (but
+                         * not elsewhere) to ignore Parent attribute
+                         * TODO not quite working yet!
+                         */
+                        if (!copiedFeatures
+                                .contains(me.new MySequenceFeature(feat)))
+                        {
+                          seq.addSequenceFeature(feat);
+                          copiedFeatures.add(feat);
+                        }
+                      }
+                    }
+                  }
+                  mappings.addMap(retrieved[rs].getDatasetSequence(),
+                          map.getTo(), map.getMap());
+                } catch (Exception e)
+                {
+                  System.err
+                          .println("Exception when consolidating Mapped sequence set...");
+                  e.printStackTrace(System.err);
+                }
+              }
+            }
+          }
+        }
+        retrieved[rs].updatePDBIds();
+        foundSeqs.add(retrieved[rs]);
+      }
+    }
+  }
+
+  /**
+   * Searches the alignment for a sequence of complementary type to 'seq' which
+   * shares a DBRefEntry with it. If found, adds the sequence to foundSeqs and
+   * removes the resolved sourceRef from the search list.
+   * 
+   * @param seq
+   * @param source
+   * @param sourceRefs
+   * @param dataset
+   * @param foundSeqs
+   * @param mappings
+   * @return
+   */
+  static void findIndirectCrossReferences(SequenceI seq, String source,
+          List<DBRefEntry> sourceRefs, AlignmentI dataset,
+          List<SequenceI> foundSeqs, AlignedCodonFrame mappings)
+  {
+    Iterator<DBRefEntry> refs = sourceRefs.iterator();
+    while (refs.hasNext())
+    {
+      DBRefEntry dbref = refs.next();
+      boolean found = searchDatasetForCrossReference(seq, dbref, dataset,
+              foundSeqs, mappings);
+      if (found)
+      {
+        refs.remove();
+      }
+    }
+  }
+
+  /**
+   * Searches the dataset for a sequence of opposite type to 'excluding', which
+   * has a cross-reference matching dbref. If found, adds the sequence to
+   * foundSeqs and removes dbref from the search list.
+   * 
+   * @param excluding
+   *          a sequence to ignore (start point of search)
+   * @param dbref
+   *          a cross-reference to try to match
+   * @param dataset
+   *          sequences to search in
+   * @param foundSeqs
+   *          result list to add to
+   * @param mappings
+   *          a set of sequence mappings to add to
+   * @return true if relationship found and sequence added
+   */
+  static boolean searchDatasetForCrossReference(SequenceI excluding,
+          DBRefEntry dbref, AlignmentI dataset, List<SequenceI> foundSeqs,
+          AlignedCodonFrame mappings)
+  {
+    boolean fromNucleotide = Comparison.isNucleotide(excluding);
+    boolean found = false;
+    if (dataset == null)
+    {
+      return false;
+    }
+    if (dataset.getSequences() == null)
+    {
+      return false;
+    }
+    List<SequenceI> ds;
+    synchronized (ds = dataset.getSequences())
+    {
+      for (SequenceI nxt : ds)
+      {
+        if (nxt != null)
+        {
+          if (nxt.getDatasetSequence() != null)
+          {
+            System.err
+                    .println("Implementation warning: getProducts passed a dataset alignment without dataset sequences in it!");
+          }
+          if (nxt == excluding || nxt == excluding.getDatasetSequence())
+          {
+            continue;
+          }
+          if (foundSeqs.contains(nxt))
+          {
+            /*
+             * already added this sequence to cross-refs
+             */
+            continue;
+          }
+          boolean isDna = Comparison.isNucleotide(nxt);
+          if (isDna == fromNucleotide)
+          {
+            /*
+             * skip this sequence - wrong molecule type
+             */
+            continue;
+          }
+
+          /*
+           * check if this sequence has any dbref matching source and accession
+           * (version and mapping may differ)
+           */
+          List<DBRefEntry> candidates = DBRefUtils.searchRefs(
+                  nxt.getDBRefs(), dbref);
+
+          if (candidates.isEmpty())
+          {
+            continue;
+          }
+          found = true;
+          foundSeqs.add(nxt);
+          if (mappings != null)
+          {
+            // don't search if we aren't given a codon map object
+            for (DBRefEntry candidate : candidates)
+            {
+              if (candidate.hasMap())
+              {
+                Mapping mapping = candidate.getMap();
+                MapList map = mapping.getMap();
+                if (mapping.getTo() != null
+                        && map.getFromRatio() != map.getToRatio())
+                {
+                  if (fromNucleotide)
+                  {
+                    // map is from dna seq to a protein product
+                    mappings.addMap(excluding, nxt, map);
+                  }
+                  else
+                  {
+                    // map is from protein seq to its coding dna
+                    mappings.addMap(nxt, excluding, map.getInverse());
+                  }
+                }
+              }
+            }
+          }
+        }
+      }
+    }
+    return found;
+  }
+
+  /**
+   * Updates any empty mappings in the cross-references with one to a compatible
+   * retrieved sequence if found, and adds any new mappings to the
+   * AlignedCodonFrame
+   * 
+   * @param dna
+   * @param mapFrom
+   * @param xrefs
+   * @param retrieved
+   * @param mappings
+   */
+  static void updateDbrefMappings(boolean dna, SequenceI mapFrom,
+          List<DBRefEntry> xrefs, SequenceI[] retrieved,
+          AlignedCodonFrame mappings)
+  {
+    SequenceIdMatcher matcher = new SequenceIdMatcher(retrieved);
+    for (DBRefEntry xref : xrefs)
+    {
+      if (!xref.hasMap())
+      {
+        String targetSeqName = xref.getSource() + "|"
+                + xref.getAccessionId();
+        SequenceI[] matches = matcher.findAllIdMatches(targetSeqName);
+        if (matches == null)
+        {
+          return;
+        }
+        for (SequenceI seq : matches)
+        {
+          MapList mapping = null;
+          if (dna)
+          {
+            mapping = AlignmentUtils.mapCdnaToProtein(seq, mapFrom);
+          }
+          else
+          {
+            mapping = AlignmentUtils.mapCdnaToProtein(mapFrom, seq);
+            if (mapping != null)
+            {
+              mapping = mapping.getInverse();
+            }
+          }
+          if (mapping != null)
+          {
+            xref.setMap(new Mapping(seq, mapping));
+            if (dna)
+            {
+              AlignmentUtils.computeProteinFeatures(mapFrom, seq, mapping);
+            }
+            if (dna)
+            {
+              mappings.addMap(mapFrom, seq, mapping);
+            }
+            else
+            {
+              mappings.addMap(seq, mapFrom, mapping.getInverse());
+            }
+            continue;
+          }
+        }
+      }
+    }
+  }
+}
diff --git a/test/jalview/analysis/CrossRefsTest.java b/test/jalview/analysis/CrossRefsTest.java

new file mode 100644 (file)

index 0000000..f06cab0
--- /dev/null
+++ b/test/jalview/analysis/CrossRefsTest.java
@@ -0,0 +1,302 @@
+package jalview.analysis;
+
+import static org.testng.AssertJUnit.assertEquals;
+import static org.testng.AssertJUnit.assertNotSame;
+import static org.testng.AssertJUnit.assertNull;
+import static org.testng.AssertJUnit.assertSame;
+import static org.testng.internal.junit.ArrayAsserts.assertArrayEquals;
+
+import jalview.datamodel.Alignment;
+import jalview.datamodel.AlignmentI;
+import jalview.datamodel.DBRefEntry;
+import jalview.datamodel.Mapping;
+import jalview.datamodel.Sequence;
+import jalview.datamodel.SequenceFeature;
+import jalview.datamodel.SequenceI;
+import jalview.util.MapList;
+import jalview.ws.SequenceFetcher;
+import jalview.ws.SequenceFetcherFactory;
+
+import java.util.List;
+
+import org.testng.annotations.Test;
+
+public class CrossRefsTest
+{
+
+  /**
+   * Test for finding 'product' sequences for the case where the selected
+   * sequence has a dbref with a mapping to a sequence
+   */
+  @Test(groups = { "Functional" })
+  public void testFindXrefSequences_fromDbRefMap()
+  {
+    /*
+     * two peptide sequences each with a DBRef and SequenceFeature
+     */
+    SequenceI pep1 = new Sequence("Q9ZTS2", "MALFQRSV");
+    pep1.addDBRef(new DBRefEntry("Pfam", "0", "PF00111"));
+    pep1.addSequenceFeature(new SequenceFeature("type", "desc", 12, 14, 1f,
+            "group"));
+    SequenceI pep2 = new Sequence("P30419", "MTRRSQIF");
+    pep2.addDBRef(new DBRefEntry("PDB", "0", "3JTK"));
+    pep2.addSequenceFeature(new SequenceFeature("type2", "desc2", 13, 15,
+            12f, "group2"));
+  
+    /*
+     * nucleotide sequence (to go in the alignment)
+     */
+    SequenceI dna1 = new Sequence("AF039662", "GGGGCAGCACAAGAAC");
+  
+    /*
+     * add DBRefEntry's to dna1 with mappings from dna to both peptides
+     */
+    MapList mapList = new MapList(new int[] { 1, 24 }, new int[] { 1, 3 },
+            3, 1);
+    Mapping map = new Mapping(pep1, mapList);
+    DBRefEntry dbRef1 = new DBRefEntry("UNIPROT", "0", "Q9ZTS2", map);
+    dna1.addDBRef(dbRef1);
+    mapList = new MapList(new int[] { 1, 24 }, new int[] { 1, 3 }, 3, 1);
+    map = new Mapping(pep2, mapList);
+    DBRefEntry dbRef2 = new DBRefEntry("UNIPROT", "0", "P30419", map);
+    dna1.addDBRef(dbRef2);
+  
+    /*
+     * find UNIPROT xrefs for nucleotide sequence - it should pick up 
+     * mapped sequences
+     */
+    AlignmentI al = new Alignment(new SequenceI[] { dna1 });
+    AlignmentI xrefs = CrossRefs.findXrefSequences(
+            new SequenceI[] { dna1 },
+            true, "UNIPROT", al);
+    assertEquals(2, xrefs.getHeight());
+  
+    /*
+     * cross-refs alignment holds copies of the mapped sequences
+     * including copies of their dbrefs and features
+     */
+    checkCopySequence(pep1, xrefs.getSequenceAt(0));
+    checkCopySequence(pep2, xrefs.getSequenceAt(1));
+  }
+
+  /**
+   * Test for finding 'product' sequences for the case where only an indirect
+   * xref is found - not on the peptide sequence but on a nucleotide sequence in
+   * the alignment which which it shares a protein dbref
+   */
+  @Test(groups = { "Functional" })
+  public void testFindXrefSequences_indirectDbrefToNucleotide()
+  {
+    /*
+     * Alignment setup:
+     *   - peptide    dbref  UNIPROT|Q9ZTS2
+     *   - nucleotide dbref  EMBL|AF039662, UNIPROT|Q9ZTS2
+     */
+    SequenceI uniprotSeq = new Sequence("Q9ZTS2", "MASVSATMISTS");
+    uniprotSeq.addDBRef(new DBRefEntry("UNIPROT", "0", "Q9ZTS2"));
+    SequenceI emblSeq = new Sequence("AF039662", "GGGGCAGCACAAGAAC");
+    emblSeq.addDBRef(new DBRefEntry("EMBL", "0", "AF039662"));
+    emblSeq.addDBRef(new DBRefEntry("UNIPROT", "0", "Q9ZTS2"));
+  
+    /*
+     * find EMBL xrefs for peptide sequence - it has no direct
+     * dbrefs, but the 'corresponding' nucleotide sequence does, so is returned
+     */
+    /*
+     * Find EMBL xrefs for peptide 
+     * - it has no EMBL dbref of its own
+     * - but nucleotide with matching peptide dbref does, so is returned
+     */
+    AlignmentI al = new Alignment(new SequenceI[] { emblSeq, uniprotSeq });
+    AlignmentI xrefs = CrossRefs.findXrefSequences(
+            new SequenceI[] { uniprotSeq }, false, "EMBL", al);
+    assertEquals(1, xrefs.getHeight());
+    assertSame(emblSeq, xrefs.getSequenceAt(0));
+  }
+
+  /**
+   * Test for finding 'product' sequences for the case where only an indirect
+   * xref is found - not on the nucleotide sequence but on a peptide sequence in
+   * the alignment which which it shares a nucleotide dbref
+   */
+  @Test(groups = { "Functional" })
+  public void testFindXrefSequences_indirectDbrefToProtein()
+  {
+    /*
+     * Alignment setup:
+     *   - nucleotide dbref  EMBL|AF039662
+     *   - peptide    dbrefs EMBL|AF039662, UNIPROT|Q9ZTS2
+     */
+    SequenceI emblSeq = new Sequence("AF039662", "GGGGCAGCACAAGAAC");
+    emblSeq.addDBRef(new DBRefEntry("EMBL", "0", "AF039662"));
+    SequenceI uniprotSeq = new Sequence("Q9ZTS2", "MASVSATMISTS");
+    uniprotSeq.addDBRef(new DBRefEntry("EMBL", "0", "AF039662"));
+    uniprotSeq.addDBRef(new DBRefEntry("UNIPROT", "0", "Q9ZTS2"));
+  
+    /*
+     * Find UNIPROT xrefs for nucleotide 
+     * - it has no UNIPROT dbref of its own
+     * - but peptide with matching nucleotide dbref does, so is returned
+     */
+    AlignmentI al = new Alignment(new SequenceI[] { emblSeq, uniprotSeq });
+    AlignmentI xrefs = CrossRefs.findXrefSequences(
+            new SequenceI[] { emblSeq }, true, "UNIPROT", al);
+    assertEquals(1, xrefs.getHeight());
+    assertSame(uniprotSeq, xrefs.getSequenceAt(0));
+  }
+
+  /**
+   * Test for finding 'product' sequences for the case where the selected
+   * sequence has no dbref to the desired source, and there are no indirect
+   * references via another sequence in the alignment
+   */
+  @Test(groups = { "Functional" })
+  public void testFindXrefSequences_noDbrefs()
+  {
+    /*
+     * two nucleotide sequences, one with UNIPROT dbref
+     */
+    SequenceI dna1 = new Sequence("AF039662", "GGGGCAGCACAAGAAC");
+    dna1.addDBRef(new DBRefEntry("UNIPROT", "0", "Q9ZTS2"));
+    SequenceI dna2 = new Sequence("AJ307031", "AAACCCTTT");
+  
+    /*
+     * find UNIPROT xrefs for peptide sequence - it has no direct
+     * dbrefs, and the other sequence (which has a UNIPROT dbref) is not 
+     * equatable to it, so no results found
+     */
+    AlignmentI al = new Alignment(new SequenceI[] { dna1, dna2 });
+    AlignmentI xrefs = CrossRefs.findXrefSequences(
+            new SequenceI[] { dna2 },
+            true, "UNIPROT", al);
+    assertNull(xrefs);
+  }
+
+  /**
+   * Test for finding 'product' sequences for the case where the selected
+   * sequence has a dbref with no mapping, triggering a fetch from database
+   */
+  @Test(groups = { "Functional" })
+  public void testFindXrefSequences_withFetch()
+  {
+    SequenceI dna1 = new Sequence("AF039662", "GGGGCAGCACAAGAAC");
+    dna1.addDBRef(new DBRefEntry("UNIPROT", "0", "Q9ZTS2"));
+    dna1.addDBRef(new DBRefEntry("UNIPROT", "0", "P30419"));
+    dna1.addDBRef(new DBRefEntry("UNIPROT", "0", "P00314"));
+    final SequenceI pep1 = new Sequence("Q9ZTS2", "MYQLIRSSW");
+    final SequenceI pep2 = new Sequence("P00314", "MRKLLAASG");
+  
+    SequenceFetcher mockFetcher = new SequenceFetcher()
+    {
+  
+      @Override
+      public boolean isFetchable(String source)
+      {
+        return true;
+      }
+  
+      @Override
+      public SequenceI[] getSequences(List<DBRefEntry> refs, boolean dna)
+      {
+        return new SequenceI[] { pep1, pep2 };
+      }
+    };
+    SequenceFetcherFactory.setSequenceFetcher(mockFetcher);
+  
+    /*
+     * find UNIPROT xrefs for nucleotide sequence
+     */
+    AlignmentI al = new Alignment(new SequenceI[] { dna1 });
+    AlignmentI xrefs = CrossRefs.findXrefSequences(
+            new SequenceI[] { dna1 },
+            true, "UNIPROT", al);
+    assertEquals(2, xrefs.getHeight());
+    assertSame(pep1, xrefs.getSequenceAt(0));
+    assertSame(pep2, xrefs.getSequenceAt(1));
+  }
+
+  /**
+   * Helper method to assert seq1 looks like a copy of seq2
+   * 
+   * @param seq1
+   * @param seq2
+   */
+  private void checkCopySequence(SequenceI seq1, SequenceI seq2)
+  {
+    assertNotSame(seq1, seq2);
+    assertEquals(seq1.getName(), seq2.getName());
+    assertEquals(seq1.getStart(), seq2.getStart());
+    assertEquals(seq1.getEnd(), seq2.getEnd());
+    assertEquals(seq1.getSequenceAsString(), seq2.getSequenceAsString());
+  
+    /*
+     * compare dbrefs
+     */
+    assertArrayEquals(seq1.getDBRefs(), seq2.getDBRefs());
+    // check one to verify a copy, not the same object
+    if (seq1.getDBRefs().length > 0)
+    {
+      assertNotSame(seq1.getDBRefs()[0], seq2.getDBRefs()[0]);
+    }
+  
+    /*
+     * compare features
+     */
+    assertArrayEquals(seq1.getSequenceFeatures(),
+            seq2.getSequenceFeatures());
+    if (seq1.getSequenceFeatures().length > 0)
+    {
+      assertNotSame(seq1.getSequenceFeatures()[0],
+              seq2.getSequenceFeatures()[0]);
+    }
+  }
+
+  /**
+   * Test for finding 'product' sequences for the case where the selected
+   * sequence has two dbrefs with no mapping, triggering a fetch from database.
+   * 
+   * @see http://issues.jalview.org/browse/JAL-2029
+   */
+  @Test(groups = { "Functional" })
+  public void testFindXrefSequences_withFetchMultipleRefs()
+  {
+    /*
+     * EMBL|X07547 has a 
+     */
+    SequenceI dna1 = new Sequence("X07547", "GGGGCAGCACAAGAAC");
+    dna1.addDBRef(new DBRefEntry("UNIPROT", "0", "B0BCM4"));
+    dna1.addDBRef(new DBRefEntry("UNIPROT", "0", "P0CE20"));
+    final SequenceI pep1 = new Sequence("B0BCM4", "MGKGIL");
+    final SequenceI pep2 = new Sequence("P0CE20", "MGKGIL");
+  
+    SequenceFetcher mockFetcher = new SequenceFetcher()
+    {
+      int call = 0;
+
+      @Override
+      public boolean isFetchable(String source)
+      {
+        return true;
+      }
+      @Override
+      public SequenceI[] getSequences(List<DBRefEntry> refs, boolean dna)
+      {
+        // pending Mockito with its thenReturn(pep1).thenReturn(pep2) syntax!
+        return new SequenceI[] { call++ == 0 ? pep1 : pep2 };
+      }
+    };
+    SequenceFetcherFactory.setSequenceFetcher(mockFetcher);
+  
+    /*
+     * find UNIPROT xrefs for nucleotide sequence
+     */
+    AlignmentI al = new Alignment(new SequenceI[] { dna1 });
+    AlignmentI xrefs = CrossRefs.findXrefSequences(
+            new SequenceI[] { dna1 },
+            true, "UNIPROT", al);
+    assertEquals(2, xrefs.getHeight());
+    assertSame(pep1, xrefs.getSequenceAt(0));
+    assertSame(pep2, xrefs.getSequenceAt(1));
+  }
+
+}
author	gmungoc <g.m.carstairs@dundee.ac.uk>
	Wed, 25 May 2016 08:25:19 +0000 (09:25 +0100)
committer	gmungoc <g.m.carstairs@dundee.ac.uk>
	Wed, 25 May 2016 08:25:19 +0000 (09:25 +0100)
src/jalview/analysis/CrossRefs.java	[new file with mode: 0644]	patch \| blob
test/jalview/analysis/CrossRefsTest.java	[new file with mode: 0644]	patch \| blob