Merge branch 'develop' into features/JAL-2110_makeSenseOfCrossRef

author gmungoc <g.m.carstairs@dundee.ac.uk>

Thu, 9 Jun 2016 13:53:10 +0000 (14:53 +0100)

committer gmungoc <g.m.carstairs@dundee.ac.uk>

Thu, 9 Jun 2016 13:53:10 +0000 (14:53 +0100)
author gmungoc <g.m.carstairs@dundee.ac.uk>
Thu, 9 Jun 2016 13:53:10 +0000 (14:53 +0100)
committer gmungoc <g.m.carstairs@dundee.ac.uk>
Thu, 9 Jun 2016 13:53:10 +0000 (14:53 +0100)
diff --git a/src/jalview/analysis/CrossRef.java b/src/jalview/analysis/CrossRef.java

index 7e77fc1..cb664df 100644 (file)
--- a/src/jalview/analysis/CrossRef.java
+++ b/src/jalview/analysis/CrossRef.java
@@ -24,19 +24,19 @@ import jalview.datamodel.AlignedCodonFrame;
  import jalview.datamodel.Alignment;
  import jalview.datamodel.AlignmentI;
  import jalview.datamodel.DBRefEntry;
-import jalview.datamodel.DBRefSource;
  import jalview.datamodel.Mapping;
  import jalview.datamodel.Sequence;
  import jalview.datamodel.SequenceFeature;
  import jalview.datamodel.SequenceI;
+import jalview.util.Comparison;
  import jalview.util.DBRefUtils;
  import jalview.util.MapList;
-import jalview.ws.SequenceFetcher;
+import jalview.ws.SequenceFetcherFactory;
  import jalview.ws.seqfetcher.ASequenceFetcher;
  
  import java.util.ArrayList;
+import java.util.Arrays;
  import java.util.List;
-import java.util.Vector;
  
  /**
   * Functions for cross-referencing sequence databases. user must first specify
@@ -69,153 +69,108 @@ public class CrossRef
    }
  
    /**
-   * Select just the DNA or protein references for a protein or dna sequence
-   * 
-   * @param fromDna
-   *          if true, select references from DNA (i.e. Protein databases), else
-   *          DNA database references
-   * @param refs
-   *          a set of references to select from
-   * @return
-   */
-  public static DBRefEntry[] findXDbRefs(boolean fromDna, DBRefEntry[] refs)
-  {
-    return DBRefUtils.selectRefs(refs, fromDna ? DBRefSource.PROTEINDBS
-            : DBRefSource.DNACODINGDBS);
-    // could attempt to find other cross
-    // refs here - ie PDB xrefs
-    // (not dna, not protein seq)
-  }
-
-  /**
-   * @param dna
-   *          true if seqs are DNA seqs
-   * @param seqs
-   * @return a list of sequence database cross reference source types
-   */
-  public static String[] findSequenceXrefTypes(boolean dna, SequenceI[] seqs)
-  {
-    return findSequenceXrefTypes(dna, seqs, null);
-  }
  
-  /**
-   * Indirect references are references from other sequences from the dataset to
-   * any of the direct DBRefEntrys on the given sequences.
+   * Returns a list of distinct database sources for which sequences have either
+   * <ul>
+   * <li>a (dna-to-protein or protein-to-dna) cross-reference</li>
+   * <li>an indirect cross-reference - a (dna-to-protein or protein-to-dna)
+   * reference from another sequence in the dataset which has a cross-reference
+   * to a direct DBRefEntry on the given sequence</li>
+   * </ul>
     * 
     * @param dna
-   *          true if seqs are DNA seqs
+   *          true if seqs are nucleotide
     * @param seqs
-   * @return a list of sequence database cross reference source types
+   *          sequences whose xrefs we are seeking
+   * @param dataset
+   *          an alignment to search for indirect references
+   * @return
     */
-  public static String[] findSequenceXrefTypes(boolean dna,
+  public static List<String> findXrefSourcesForSequences(boolean dna,
            SequenceI[] seqs, AlignmentI dataset)
    {
-    String[] dbrefs = null;
-    List<String> refs = new ArrayList<String>();
+    List<String> sources = new ArrayList<String>();
      for (SequenceI seq : seqs)
      {
        if (seq != null)
        {
-        SequenceI dss = seq;
-        while (dss.getDatasetSequence() != null)
-        {
-          dss = dss.getDatasetSequence();
-        }
-        DBRefEntry[] rfs = findXDbRefs(dna, dss.getDBRefs());
-        if (rfs != null)
-        {
-          for (DBRefEntry ref : rfs)
-          {
-            if (!refs.contains(ref.getSource()))
-            {
-              refs.add(ref.getSource());
-            }
-          }
-        }
-        if (dataset != null)
-        {
-          // search for references to this sequence's direct references.
-          DBRefEntry[] lrfs = CrossRef.findXDbRefs(!dna, seq.getDBRefs());
-          List<SequenceI> rseqs = new ArrayList<SequenceI>();
-          CrossRef.searchDatasetXrefs(seq, !dna, lrfs, dataset, rseqs,
-                  null); // don't need to specify codon frame for mapping here
-          for (SequenceI rs : rseqs)
-          {
-            DBRefEntry[] xrs = findXDbRefs(dna, rs.getDBRefs());
-            if (xrs != null)
-            {
-              for (DBRefEntry ref : xrs)
-              {
-                if (!refs.contains(ref.getSource()))
-                {
-                  refs.add(ref.getSource());
-                }
-              }
-            }
-            // looks like copy and paste - change rfs to xrs?
-            // for (int r = 0; rfs != null && r < rfs.length; r++)
-            // {
-            // if (!refs.contains(rfs[r].getSource()))
-            // {
-            // refs.add(rfs[r].getSource());
-            // }
-            // }
-          }
-        }
+        findXrefSourcesForSequence(seq, dna, dataset, sources);
        }
      }
-    if (refs.size() > 0)
-    {
-      dbrefs = new String[refs.size()];
-      refs.toArray(dbrefs);
-    }
-    return dbrefs;
+    return sources;
    }
  
-  public static boolean hasCdnaMap(SequenceI[] seqs)
+  /**
+   * Returns a list of distinct database sources for which a sequence has either
+   * <ul>
+   * <li>a (dna-to-protein or protein-to-dna) cross-reference</li>
+   * <li>an indirect cross-reference - a (dna-to-protein or protein-to-dna)
+   * reference from another sequence in the dataset which has a cross-reference
+   * to a direct DBRefEntry on the given sequence</li>
+   * </ul>
+   * 
+   * @param seq
+   *          the sequence whose dbrefs we are searching against
+   * @param dna
+   *          true if the sequence is nucleotide
+   * @param dataset
+   *          an alignment to search for indirect references
+   * @param sources
+   *          a list of sources to add matches to
+   */
+  static void findXrefSourcesForSequence(SequenceI seq, boolean dna,
+          AlignmentI dataset, List<String> sources)
    {
-    // TODO unused - remove?
-    String[] reftypes = findSequenceXrefTypes(false, seqs);
-    for (int s = 0; s < reftypes.length; s++)
+    /*
+     * first find seq's xrefs (dna-to-peptide or peptide-to-dna)
+     */
+    DBRefEntry[] rfs = DBRefUtils.selectDbRefs(!dna, seq.getDBRefs());
+    addXrefsToSources(rfs, sources);
+    if (dataset != null)
      {
-      if (reftypes.equals(DBRefSource.EMBLCDS))
+      /*
+       * find sequence's direct (dna-to-dna, peptide-to-peptide) xrefs
+       */
+      DBRefEntry[] lrfs = DBRefUtils.selectDbRefs(dna, seq.getDBRefs());
+      List<SequenceI> rseqs = new ArrayList<SequenceI>();
+
+      /*
+       * find sequences in the alignment which xref one of these DBRefs
+       * i.e. is xref-ed to a common sequence identifier
+       */
+      CrossRef.searchDatasetXrefs(seq, !dna, lrfs, dataset, rseqs, null);
+
+      /*
+       * add those sequences' (dna-to-peptide or peptide-to-dna) dbref sources
+       */
+      for (SequenceI rs : rseqs)
        {
-        return true;
-        // no map
+        DBRefEntry[] xrs = DBRefUtils.selectDbRefs(!dna, rs.getDBRefs());
+        addXrefsToSources(xrs, sources);
        }
      }
-    return false;
    }
  
-  public static SequenceI[] getCdnaMap(SequenceI[] seqs)
+  /**
+   * Helper method that adds the source identifiers of some cross-references to
+   * a (non-redundant) list of database sources
+   * 
+   * @param xrefs
+   * @param sources
+   */
+  static void addXrefsToSources(DBRefEntry[] xrefs, List<String> sources)
    {
-    // TODO unused - remove?
-    Vector cseqs = new Vector();
-    for (int s = 0; s < seqs.length; s++)
+    if (xrefs != null)
      {
-      DBRefEntry[] cdna = findXDbRefs(true, seqs[s].getDBRefs());
-      for (int c = 0; c < cdna.length; c++)
+      for (DBRefEntry ref : xrefs)
        {
-        if (cdna[c].getSource().equals(DBRefSource.EMBLCDS))
+        String source = ref.getSource();
+        if (!sources.contains(source))
          {
-          System.err
-                  .println("TODO: unimplemented sequence retrieval for coding region sequence.");
-          // TODO: retrieve CDS dataset sequences
-          // need global dataset sequence retriever/resolver to reuse refs
-          // and construct Mapping entry.
-          // insert gaps in CDS according to peptide gaps.
-          // add gapped sequence to cseqs
+          sources.add(source);
          }
        }
      }
-    if (cseqs.size() > 0)
-    {
-      SequenceI[] rsqs = new SequenceI[cseqs.size()];
-      cseqs.copyInto(rsqs);
-      return rsqs;
-    }
-    return null;
-
    }
  
    /**
@@ -244,14 +199,20 @@ public class CrossRef
          dss = dss.getDatasetSequence();
        }
        boolean found = false;
-      DBRefEntry[] xrfs = CrossRef.findXDbRefs(dna, dss.getDBRefs());
+      DBRefEntry[] xrfs = DBRefUtils.selectDbRefs(!dna, dss.getDBRefs());
        if ((xrfs == null || xrfs.length == 0) && dataset != null)
        {
-        System.out.println("Attempting to find ds Xrefs refs.");
-        // FIXME should be dss not seq here?
-        DBRefEntry[] lrfs = CrossRef.findXDbRefs(!dna, seq.getDBRefs());
-        // less ambiguous would be a 'find primary dbRefEntry' method.
-        // filter for desired source xref here
+        /*
+         * found no suitable dbrefs on sequence - look for sequences in the
+         * alignment which share a dbref with this one
+         */
+        DBRefEntry[] lrfs = DBRefUtils.selectDbRefs(dna, seq.getDBRefs());
+
+        /*
+         * find sequences (except this one!), of complementary type,
+         *  which have a dbref to an accession id for this sequence,
+         *  and add them to the results
+         */
          found = CrossRef.searchDatasetXrefs(dss, !dna, lrfs, dataset,
                  rseqs, cf);
        }
@@ -266,6 +227,7 @@ public class CrossRef
          {
            if (xref.getMap().getTo() != null)
            {
+            found = true;
              SequenceI rsq = new Sequence(xref.getMap().getTo());
              rseqs.add(rsq);
              if (xref.getMap().getMap().getFromRatio() != xref
@@ -283,7 +245,6 @@ public class CrossRef
                  cf.addMap(rsq, dss, xref.getMap().getMap().getInverse());
                }
              }
-            found = true;
            }
          }
          if (!found)
@@ -292,7 +253,7 @@ public class CrossRef
            // xrefs on this sequence.
            if (dataset != null)
            {
-            found |= searchDataset(dss, xref, dataset, rseqs, cf, false,
+            found = searchDataset(dss, xref, dataset, rseqs, cf, false,/*true?*/
                      !dna);
              if (found)
              {
@@ -305,12 +266,8 @@ public class CrossRef
        {
          if (xrfs != null && xrfs.length > 0)
          {
-          // Try and get the sequence reference...
-          /*
-           * Ideal world - we ask for a sequence fetcher implementation here if
-           * (jalview.io.RunTimeEnvironment.getSequenceFetcher()) (
-           */
-          ASequenceFetcher sftch = new SequenceFetcher();
+          ASequenceFetcher sftch = SequenceFetcherFactory
+                  .getSequenceFetcher();
            SequenceI[] retrieved = null;
            int l = xrfs.length;
            for (int r = 0; r < xrfs.length; r++)
@@ -341,7 +298,7 @@ public class CrossRef
              xrfs = t;
              try
              {
-              retrieved = sftch.getSequences(xrfs, !dna);
+              retrieved = sftch.getSequences(Arrays.asList(xrfs), !dna);
                // problem here is we don't know which of xrfs resulted in which
                // retrieved element
              } catch (Exception e)
@@ -446,6 +403,7 @@ public class CrossRef
                              cf.addMap(retrieved[rs].getDatasetSequence(),
                                      dss, map.getMap());
                            }
+                          // TODO remove this 'else' and the cf.addMap above?
                            else
                            {
                              cf.addMap(retrieved[rs].getDatasetSequence(),
@@ -571,38 +529,26 @@ public class CrossRef
        // add in wildcards
        xref.setVersion(null);
        xref.setMap(null);
-      found = searchDataset(sequenceI, xref, dataset, rseqs, cf, false, dna);
+      found |= searchDataset(sequenceI, xref, dataset, rseqs, cf, false,
+              dna);
      }
      return found;
    }
  
    /**
-   * search a given sequence dataset for references matching cross-references to
-   * the given sequence
+   * Searches dataset for DBRefEntrys matching the given one (xrf) and adds the
+   * associated sequence to rseqs
     * 
     * @param sequenceI
+   *          a sequence to ignore (start point of search)
     * @param xrf
+   *          a cross-reference to try to match
     * @param dataset
+   *          sequences to search in
     * @param rseqs
-   *          set of unique sequences
+   *          result list to add to
     * @param cf
-   * @return true if one or more unique sequences were found and added
-   */
-  public static boolean searchDataset(SequenceI sequenceI, DBRefEntry xrf,
-          AlignmentI dataset, List<SequenceI> rseqs, AlignedCodonFrame cf)
-  {
-    return searchDataset(sequenceI, xrf, dataset, rseqs, cf, true, false);
-  }
-
-  /**
-   * TODO: generalise to different protein classifications Search dataset for
-   * DBRefEntrys matching the given one (xrf) and add the associated sequence to
-   * rseq.
-   * 
-   * @param sequenceI
-   * @param xrf
-   * @param dataset
-   * @param rseqs
+   *          a set of sequence mappings to add to
     * @param direct
     *          - search all references or only subset
     * @param dna
@@ -614,7 +560,6 @@ public class CrossRef
            boolean direct, boolean dna)
    {
      boolean found = false;
-    SequenceI[] typer = new SequenceI[1];
      if (dataset == null)
      {
        return false;
@@ -636,105 +581,84 @@ public class CrossRef
              System.err
                      .println("Implementation warning: getProducts passed a dataset alignment without dataset sequences in it!");
            }
-          if (nxt != sequenceI && nxt != sequenceI.getDatasetSequence())
+          if (nxt == sequenceI || nxt == sequenceI.getDatasetSequence())
            {
-            // check if this is the correct sequence type
+            continue;
+          }
+          // check if this is the correct sequence type
+          {
+            // TODO 'direct' is always set to false - remove?
+            // or should it be 'true' from findXrefSequences?
+            // also its Javadoc conflicts with its use:
+            // test below implies 'direct' means find complementary sequences,
+            // !direct means select same molecule type
+            boolean isDna = Comparison
+                    .isNucleotide(new SequenceI[] { nxt });
+            if ((direct && isDna == dna) || (!direct && isDna != dna))
              {
-              typer[0] = nxt;
-              boolean isDna = jalview.util.Comparison.isNucleotide(typer);
-              if ((direct && isDna == dna) || (!direct && isDna != dna))
-              {
-                // skip this sequence because it is same molecule type
-                continue;
-              }
+              // skip this sequence because it is wrong molecule type
+              continue;
              }
+          }
  
-            // look for direct or indirect references in common
-            DBRefEntry[] poss = nxt.getDBRefs(), cands = null;
-            if (direct)
-            {
-              cands = jalview.util.DBRefUtils.searchRefs(poss, xrf);
-            }
-            else
-            {
-              poss = CrossRef.findXDbRefs(dna, poss); //
-              cands = jalview.util.DBRefUtils.searchRefs(poss, xrf);
-            }
-            if (cands != null)
+          // look for direct or indirect references in common
+          DBRefEntry[] poss = nxt.getDBRefs();
+          List<DBRefEntry> cands = null;
+          /*
+           * TODO does this make any sense?
+           * if 'direct', search the dbrefs for xrf
+           * else, filter the dbrefs by type and then search for xrf
+           * - the result is the same isn't it?
+           */
+          if (direct)
+          {
+            cands = DBRefUtils.searchRefs(poss, xrf);
+          }
+          else
+          {
+            poss = DBRefUtils.selectDbRefs(!dna, poss);
+            cands = DBRefUtils.searchRefs(poss, xrf);
+          }
+          if (!cands.isEmpty())
+          {
+            if (!rseqs.contains(nxt))
              {
-              if (!rseqs.contains(nxt))
+              found = true;
+              rseqs.add(nxt);
+              if (cf != null)
                {
-                rseqs.add(nxt);
-                boolean foundmap = cf != null;
                  // don't search if we aren't given a codon map object
-                for (int r = 0; foundmap && r < cands.length; r++)
+                for (DBRefEntry candidate : cands)
                  {
-                  if (cands[r].hasMap())
+                  Mapping mapping = candidate.getMap();
+                  if (mapping != null)
                    {
-                    if (cands[r].getMap().getTo() != null
-                            && cands[r].getMap().getMap().getFromRatio() != cands[r]
-                                    .getMap().getMap().getToRatio())
+                    MapList map = mapping.getMap();
+                    if (mapping.getTo() != null
+                            && map.getFromRatio() != map.getToRatio())
                      {
-                      foundmap = true;
                        // get sense of map correct for adding to product
                        // alignment.
                        if (dna)
                        {
                          // map is from dna seq to a protein product
-                        cf.addMap(sequenceI, nxt, cands[r].getMap()
-                                .getMap());
+                        cf.addMap(sequenceI, nxt, map);
                        }
                        else
                        {
                          // map should be from protein seq to its coding dna
-                        cf.addMap(nxt, sequenceI, cands[r].getMap()
-                                .getMap().getInverse());
+                        cf.addMap(nxt, sequenceI, map.getInverse());
                        }
                      }
                    }
                  }
-                // TODO: add mapping between sequences if necessary
-                found = true;
                }
+              // TODO: add mapping between sequences if necessary
              }
-
            }
          }
        }
      }
      return found;
    }
-
-  /**
-   * precalculate different products that can be found for seqs in dataset and
-   * return them.
-   * 
-   * @param dna
-   * @param seqs
-   * @param dataset
-   * @param fake
-   *          - don't actually build lists - just get types
-   * @return public static Object[] buildXProductsList(boolean dna, SequenceI[]
-   *         seqs, AlignmentI dataset, boolean fake) { String types[] =
-   *         jalview.analysis.CrossRef.findSequenceXrefTypes( dna, seqs,
-   *         dataset); if (types != null) { System.out.println("Xref Types for:
-   *         "+(dna ? "dna" : "prot")); for (int t = 0; t < types.length; t++) {
-   *         System.out.println("Type: " + types[t]); SequenceI[] prod =
-   *         jalview.analysis.CrossRef.findXrefSequences(seqs, dna, types[t]);
-   *         System.out.println("Found " + ((prod == null) ? "no" : "" +
-   *         prod.length) + " products"); if (prod!=null) { for (int p=0;
-   *         p<prod.length; p++) { System.out.println("Prod "+p+":
-   *         "+prod[p].getDisplayId(true)); } } } } else {
-   *         System.out.println("Trying getProducts for
-   *         "+al.getSequenceAt(0).getDisplayId(true));
-   *         System.out.println("Search DS Xref for: "+(dna ? "dna" : "prot"));
-   *         // have a bash at finding the products amongst all the retrieved
-   *         sequences. SequenceI[] prod =
-   *         jalview.analysis.CrossRef.findXrefSequences(al
-   *         .getSequencesArray(), dna, null, ds); System.out.println("Found " +
-   *         ((prod == null) ? "no" : "" + prod.length) + " products"); if
-   *         (prod!=null) { // select non-equivalent sequences from dataset list
-   *         for (int p=0; p<prod.length; p++) { System.out.println("Prod "+p+":
-   *         "+prod[p].getDisplayId(true)); } } } }
-   */
  }
diff --git a/src/jalview/analysis/CrossRefs.java b/src/jalview/analysis/CrossRefs.java

new file mode 100644 (file)

index 0000000..0f3f425
--- /dev/null
+++ b/src/jalview/analysis/CrossRefs.java
@@ -0,0 +1,487 @@
+package jalview.analysis;
+
+import jalview.datamodel.AlignedCodonFrame;
+import jalview.datamodel.Alignment;
+import jalview.datamodel.AlignmentI;
+import jalview.datamodel.DBRefEntry;
+import jalview.datamodel.Mapping;
+import jalview.datamodel.Sequence;
+import jalview.datamodel.SequenceFeature;
+import jalview.datamodel.SequenceI;
+import jalview.util.Comparison;
+import jalview.util.DBRefUtils;
+import jalview.util.MapList;
+import jalview.ws.SequenceFetcherFactory;
+import jalview.ws.seqfetcher.ASequenceFetcher;
+
+import java.util.ArrayList;
+import java.util.Iterator;
+import java.util.List;
+
+public class CrossRefs
+{
+  /**
+   * Finds cross-references for sequences from a specified source database.
+   * These may be found in four ways:
+   * <ul>
+   * <li>as a DBRefEntry on the known sequence, which has a mapped-to sequence</li>
+   * <li>a sequence of complementary type in the alignment dataset, which has a
+   * DBRefEntry to one of the known sequence's 'direct' DBRefs</li>
+   * <li>a sequence of complementary type in the alignment, which has a
+   * DBRefEntry to one of the known sequence's 'cross-ref' DBRefs</li>
+   * <li>by fetching the accession from the remote database</li>
+   * </ul>
+   * 
+   * @param seqs
+   *          the sequences whose cross-references we are searching for
+   * @param dna
+   *          true if the sequences are from a nucleotide alignment, else false
+   * @param source
+   *          the database source we want cross-references to
+   * @param dataset
+   *          the alignment dataset the sequences belong to
+   * @return an alignment containing cross-reference sequences, or null if none
+   *         found
+   */
+  public static AlignmentI findXrefSequences(SequenceI[] seqs, boolean dna,
+          String source, AlignmentI dataset)
+  {
+    List<SequenceI> foundSeqs = new ArrayList<SequenceI>();
+    AlignedCodonFrame mappings = new AlignedCodonFrame();
+
+    List<DBRefEntry> sourceRefs = new ArrayList<DBRefEntry>();
+
+    for (SequenceI seq : seqs)
+    {
+      if (dna != Comparison.isNucleotide(seq))
+      {
+        /*
+         * mixed alignment, and this sequence is of the wrong type
+         */
+        continue;
+      }
+
+      /*
+       * get this sequence's dbrefs to source database (if any)
+       */
+      List<DBRefEntry> seqSourceRefs = DBRefUtils.searchRefsForSource(
+              seq.getDBRefs(), source);
+
+      /*
+       * first extract any mapped sequences from sourceRefs
+       */
+      findMappedDbrefs(seq, seqSourceRefs, foundSeqs, mappings);
+
+      /*
+       * for remaining sourceRefs, try to match a 
+       * complementary sequence in the dataset
+       */
+      findIndirectCrossReferences(seq, source, seqSourceRefs, dataset,
+              foundSeqs, mappings);
+    }
+
+    /*
+     * fetch any remaining sourceRefs from the source database
+     */
+    fetchCrossReferences(sourceRefs, foundSeqs, mappings, dna, dataset);
+
+    if (foundSeqs.isEmpty())
+    {
+      return null;
+    }
+    AlignmentI crossRefs = new Alignment(
+            foundSeqs.toArray(new SequenceI[foundSeqs.size()]));
+    crossRefs.addCodonFrame(mappings);
+    return crossRefs;
+  }
+
+  /**
+   * Looks for DBRefEntrys to 'source' which have a mapping to a sequence. If
+   * found, adds the sequence to foundSeqs and removes the dbref from the list.
+   * 
+   * @param seq
+   *          the dataset sequence we are searching from
+   * @param sourceRefs
+   *          the sequence's dbrefs to 'source'
+   * @param foundSeqs
+   *          a list of cross-references to add to
+   * @param mappings
+   *          a set of sequence mappings to add to
+   * @return
+   */
+  static void findMappedDbrefs(SequenceI seq, List<DBRefEntry> sourceRefs,
+          List<SequenceI> foundSeqs, AlignedCodonFrame mappings)
+  {
+    Iterator<DBRefEntry> refs = sourceRefs.iterator();
+    while (refs.hasNext())
+    {
+      DBRefEntry dbref = refs.next();
+      Mapping map = dbref.getMap();
+      if (map != null)
+      {
+        SequenceI mappedTo = map.getTo();
+        if (mappedTo != null)
+        {
+          foundSeqs.add(new Sequence(mappedTo));
+          refs.remove();
+      
+          /*
+           * check mapping is not 'direct' (it shouldn't be if we reach here)
+           * and add mapping (dna-to-peptide or vice versa) to the set
+           */
+          MapList mapList = map.getMap();
+          int fromRatio = mapList.getFromRatio();
+          int toRatio = mapList.getToRatio();
+          if (fromRatio != toRatio)
+          {
+            if (fromRatio == 3)
+            {
+              mappings.addMap(seq, mappedTo, mapList);
+            }
+            else
+            {
+              mappings.addMap(mappedTo, seq, mapList.getInverse());
+            }
+          }
+        }
+      }
+    }
+  }
+
+  /**
+   * Tries to fetch seq's database references to 'source' database, and add them
+   * to the foundSeqs list. If found, tries to make a mapping between seq and
+   * the retrieved sequence and insert it into the database reference.
+   * 
+   * @param seq
+   * @param sourceRefs
+   * @param foundSeqs
+   * @param mappings
+   * @param dna
+   */
+  static void fetchCrossReferences(SequenceI seq,
+          List<DBRefEntry> sourceRefs, List<SequenceI> foundSeqs,
+          AlignedCodonFrame mappings, boolean dna, AlignmentI dataset)
+  {
+    ASequenceFetcher sftch = SequenceFetcherFactory.getSequenceFetcher();
+    SequenceI[] retrieved;
+    try
+    {
+      retrieved = sftch.getSequences(sourceRefs, !dna);
+    } catch (Exception e)
+    {
+      System.err
+              .println("Problem whilst retrieving cross references for Sequence : "
+                      + seq.getName());
+      e.printStackTrace();
+      return;
+    }
+
+    if (retrieved != null)
+    {
+      updateDbrefMappings(dna, seq, sourceRefs, retrieved, mappings);
+
+      SequenceIdMatcher matcher = new SequenceIdMatcher(
+              dataset.getSequences());
+      List<SequenceFeature> copiedFeatures = new ArrayList<SequenceFeature>();
+      CrossRef me = new CrossRef();
+      for (int rs = 0; rs < retrieved.length; rs++)
+      {
+        // TODO: examine each sequence for 'redundancy'
+        DBRefEntry[] dbr = retrieved[rs].getDBRefs();
+        if (dbr != null && dbr.length > 0)
+        {
+          for (int di = 0; di < dbr.length; di++)
+          {
+            // find any entry where we should put in the sequence being
+            // cross-referenced into the map
+            Mapping map = dbr[di].getMap();
+            if (map != null)
+            {
+              if (map.getTo() != null && map.getMap() != null)
+              {
+                SequenceI matched = matcher.findIdMatch(map.getTo());
+                if (matched != null)
+                {
+                  /*
+                   * already got an xref to this sequence; update this
+                   * map to point to the same sequence, and add
+                   * any new dbrefs to it
+                   */
+                  for (DBRefEntry ref : map.getTo().getDBRefs())
+                  {
+                    matched.addDBRef(ref); // add or update mapping
+                  }
+                  map.setTo(matched);
+                }
+                else
+                {
+                  matcher.add(map.getTo());
+                }
+                try
+                {
+                  // compare ms with dss and replace with dss in mapping
+                  // if map is congruent
+                  SequenceI ms = map.getTo();
+                  int sf = map.getMap().getToLowest();
+                  int st = map.getMap().getToHighest();
+                  SequenceI mappedrg = ms.getSubSequence(sf, st);
+                  // SequenceI loc = dss.getSubSequence(sf, st);
+                  if (mappedrg.getLength() > 0
+                          && ms.getSequenceAsString().equals(
+                                  seq.getSequenceAsString()))
+                  // && mappedrg.getSequenceAsString().equals(
+                  // loc.getSequenceAsString()))
+                  {
+                    String msg = "Mapping updated from " + ms.getName()
+                            + " to retrieved crossreference "
+                            + seq.getName();
+                    System.out.println(msg);
+                    // method to update all refs of existing To on
+                    // retrieved sequence with dss and merge any props
+                    // on To onto dss.
+                    map.setTo(seq);
+                    /*
+                     * copy sequence features as well, avoiding
+                     * duplication (e.g. same variation from 2 
+                     * transcripts)
+                     */
+                    SequenceFeature[] sfs = ms.getSequenceFeatures();
+                    if (sfs != null)
+                    {
+                      for (SequenceFeature feat : sfs)
+                      {
+                        /* 
+                         * we override SequenceFeature.equals here (but
+                         * not elsewhere) to ignore Parent attribute
+                         * TODO not quite working yet!
+                         */
+                        if (!copiedFeatures
+                                .contains(me.new MySequenceFeature(feat)))
+                        {
+                          seq.addSequenceFeature(feat);
+                          copiedFeatures.add(feat);
+                        }
+                      }
+                    }
+                  }
+                  mappings.addMap(retrieved[rs].getDatasetSequence(),
+                          map.getTo(), map.getMap());
+                } catch (Exception e)
+                {
+                  System.err
+                          .println("Exception when consolidating Mapped sequence set...");
+                  e.printStackTrace(System.err);
+                }
+              }
+            }
+          }
+        }
+        retrieved[rs].updatePDBIds();
+        foundSeqs.add(retrieved[rs]);
+      }
+    }
+  }
+
+  /**
+   * Searches the alignment for a sequence of complementary type to 'seq' which
+   * shares a DBRefEntry with it. If found, adds the sequence to foundSeqs and
+   * removes the resolved sourceRef from the search list.
+   * 
+   * @param seq
+   * @param source
+   * @param sourceRefs
+   * @param dataset
+   * @param foundSeqs
+   * @param mappings
+   * @return
+   */
+  static void findIndirectCrossReferences(SequenceI seq, String source,
+          List<DBRefEntry> sourceRefs, AlignmentI dataset,
+          List<SequenceI> foundSeqs, AlignedCodonFrame mappings)
+  {
+    Iterator<DBRefEntry> refs = sourceRefs.iterator();
+    while (refs.hasNext())
+    {
+      DBRefEntry dbref = refs.next();
+      boolean found = searchDatasetForCrossReference(seq, dbref, dataset,
+              foundSeqs, mappings);
+      if (found)
+      {
+        refs.remove();
+      }
+    }
+  }
+
+  /**
+   * Searches the dataset for a sequence of opposite type to 'excluding', which
+   * has a cross-reference matching dbref. If found, adds the sequence to
+   * foundSeqs and removes dbref from the search list.
+   * 
+   * @param excluding
+   *          a sequence to ignore (start point of search)
+   * @param dbref
+   *          a cross-reference to try to match
+   * @param dataset
+   *          sequences to search in
+   * @param foundSeqs
+   *          result list to add to
+   * @param mappings
+   *          a set of sequence mappings to add to
+   * @return true if relationship found and sequence added
+   */
+  static boolean searchDatasetForCrossReference(SequenceI excluding,
+          DBRefEntry dbref, AlignmentI dataset, List<SequenceI> foundSeqs,
+          AlignedCodonFrame mappings)
+  {
+    boolean fromNucleotide = Comparison.isNucleotide(excluding);
+    boolean found = false;
+    if (dataset == null)
+    {
+      return false;
+    }
+    if (dataset.getSequences() == null)
+    {
+      return false;
+    }
+    List<SequenceI> ds;
+    synchronized (ds = dataset.getSequences())
+    {
+      for (SequenceI nxt : ds)
+      {
+        if (nxt != null)
+        {
+          if (nxt.getDatasetSequence() != null)
+          {
+            System.err
+                    .println("Implementation warning: getProducts passed a dataset alignment without dataset sequences in it!");
+          }
+          if (nxt == excluding || nxt == excluding.getDatasetSequence())
+          {
+            continue;
+          }
+          if (foundSeqs.contains(nxt))
+          {
+            /*
+             * already added this sequence to cross-refs
+             */
+            continue;
+          }
+          boolean isDna = Comparison.isNucleotide(nxt);
+          if (isDna == fromNucleotide)
+          {
+            /*
+             * skip this sequence - wrong molecule type
+             */
+            continue;
+          }
+
+          /*
+           * check if this sequence has any dbref matching source and accession
+           * (version and mapping may differ)
+           */
+          List<DBRefEntry> candidates = DBRefUtils.searchRefs(
+                  nxt.getDBRefs(), dbref);
+
+          if (candidates.isEmpty())
+          {
+            continue;
+          }
+          found = true;
+          foundSeqs.add(nxt);
+          if (mappings != null)
+          {
+            // don't search if we aren't given a codon map object
+            for (DBRefEntry candidate : candidates)
+            {
+              if (candidate.hasMap())
+              {
+                Mapping mapping = candidate.getMap();
+                MapList map = mapping.getMap();
+                if (mapping.getTo() != null
+                        && map.getFromRatio() != map.getToRatio())
+                {
+                  if (fromNucleotide)
+                  {
+                    // map is from dna seq to a protein product
+                    mappings.addMap(excluding, nxt, map);
+                  }
+                  else
+                  {
+                    // map is from protein seq to its coding dna
+                    mappings.addMap(nxt, excluding, map.getInverse());
+                  }
+                }
+              }
+            }
+          }
+        }
+      }
+    }
+    return found;
+  }
+
+  /**
+   * Updates any empty mappings in the cross-references with one to a compatible
+   * retrieved sequence if found, and adds any new mappings to the
+   * AlignedCodonFrame
+   * 
+   * @param dna
+   * @param mapFrom
+   * @param xrefs
+   * @param retrieved
+   * @param mappings
+   */
+  static void updateDbrefMappings(boolean dna, SequenceI mapFrom,
+          List<DBRefEntry> xrefs, SequenceI[] retrieved,
+          AlignedCodonFrame mappings)
+  {
+    SequenceIdMatcher matcher = new SequenceIdMatcher(retrieved);
+    for (DBRefEntry xref : xrefs)
+    {
+      if (!xref.hasMap())
+      {
+        String targetSeqName = xref.getSource() + "|"
+                + xref.getAccessionId();
+        SequenceI[] matches = matcher.findAllIdMatches(targetSeqName);
+        if (matches == null)
+        {
+          return;
+        }
+        for (SequenceI seq : matches)
+        {
+          MapList mapping = null;
+          if (dna)
+          {
+            mapping = AlignmentUtils.mapCdnaToProtein(seq, mapFrom);
+          }
+          else
+          {
+            mapping = AlignmentUtils.mapCdnaToProtein(mapFrom, seq);
+            if (mapping != null)
+            {
+              mapping = mapping.getInverse();
+            }
+          }
+          if (mapping != null)
+          {
+            xref.setMap(new Mapping(seq, mapping));
+            if (dna)
+            {
+              AlignmentUtils.computeProteinFeatures(mapFrom, seq, mapping);
+            }
+            if (dna)
+            {
+              mappings.addMap(mapFrom, seq, mapping);
+            }
+            else
+            {
+              mappings.addMap(seq, mapFrom, mapping.getInverse());
+            }
+            continue;
+          }
+        }
+      }
+    }
+  }
+}
diff --git a/src/jalview/gui/AlignFrame.java b/src/jalview/gui/AlignFrame.java

index 7ab8311..39e4bcd 100644 (file)
--- a/src/jalview/gui/AlignFrame.java
+++ b/src/jalview/gui/AlignFrame.java
@@ -23,6 +23,7 @@ package jalview.gui;
  import jalview.analysis.AlignmentSorter;
  import jalview.analysis.AlignmentUtils;
  import jalview.analysis.CrossRef;
+import jalview.analysis.CrossRefs;
  import jalview.analysis.Dna;
  import jalview.analysis.ParseProperties;
  import jalview.analysis.SequenceIdMatcher;
@@ -4647,24 +4648,21 @@ public class AlignFrame extends GAlignFrame implements DropTargetListener,
      {
        showProducts.removeAll();
        final boolean dna = viewport.getAlignment().isNucleotide();
-      String[] ptypes = (selection == null || selection.length == 0) ? null
-              : CrossRef.findSequenceXrefTypes(dna, selection, dataset);
+      List<String> ptypes = (selection == null || selection.length == 0) ? null
+              : CrossRef.findXrefSourcesForSequences(dna, selection, dataset);
  
-      for (int t = 0; ptypes != null && t < ptypes.length; t++)
+      for (final String source : ptypes)
        {
          showp = true;
          final AlignFrame af = this;
-        final String source = ptypes[t];
-        JMenuItem xtype = new JMenuItem(ptypes[t]);
+        JMenuItem xtype = new JMenuItem(source);
          xtype.addActionListener(new ActionListener()
          {
-
            @Override
            public void actionPerformed(ActionEvent e)
            {
              showProductsFor(af.viewport.getSequenceSelection(), dna, source);
            }
-
          });
          showProducts.add(xtype);
        }
@@ -4672,7 +4670,7 @@ public class AlignFrame extends GAlignFrame implements DropTargetListener,
        showProducts.setEnabled(showp);
      } catch (Exception e)
      {
-      jalview.bin.Cache.log
+      Cache.log
                .warn("canShowProducts threw an exception - please report to help@jalview.org",
                        e);
        return false;
@@ -4708,7 +4706,7 @@ public class AlignFrame extends GAlignFrame implements DropTargetListener,
          {
            AlignmentI alignment = AlignFrame.this.getViewport()
                    .getAlignment();
-          AlignmentI xrefs = CrossRef.findXrefSequences(sel, dna, source,
+          AlignmentI xrefs = CrossRefs.findXrefSequences(sel, dna, source,
                    alignment);
            if (xrefs != null)
            {
diff --git a/src/jalview/gui/SequenceFetcher.java b/src/jalview/gui/SequenceFetcher.java

index 71c8a39..85ea20b 100755 (executable)
--- a/src/jalview/gui/SequenceFetcher.java
+++ b/src/jalview/gui/SequenceFetcher.java
@@ -817,10 +817,8 @@ public class SequenceFetcher extends JPanel implements Runnable
        Cache.log.info(
                "Error retrieving " + accession
                + " from " + proxy.getDbName(), e);
-    } finally
-    {
-      return success;
      }
+    return success;
    }
  
    /**
@@ -840,7 +838,6 @@ public class SequenceFetcher extends JPanel implements Runnable
  
      for (String q : queries)
      {
-      DBRefEntry[] found = null;
        DBRefEntry dbr = new DBRefEntry();
        dbr.setSource(proxy.getDbSource());
        dbr.setVersion(null);
@@ -851,8 +848,9 @@ public class SequenceFetcher extends JPanel implements Runnable
        {
          if (rs[r] != null)
          {
-          found = DBRefUtils.searchRefs(rs[r].getDBRefs(), accId);
-          if (found != null && found.length > 0)
+          List<DBRefEntry> found = DBRefUtils.searchRefs(rs[r].getDBRefs(),
+                  accId);
+          if (!found.isEmpty())
            {
              rfound = true;
              break;
diff --git a/src/jalview/util/Comparison.java b/src/jalview/util/Comparison.java

index 5605a53..0beb45b 100644 (file)
--- a/src/jalview/util/Comparison.java
+++ b/src/jalview/util/Comparison.java
@@ -249,6 +249,18 @@ public class Comparison
    }
  
    /**
+   * Overloaded method signature to test whether a single sequence is nucleotide
+   * (that is, more than 85% CGTA)
+   * 
+   * @param seq
+   * @return
+   */
+  public static final boolean isNucleotide(SequenceI seq)
+  {
+    return isNucleotide(new SequenceI[] { seq });
+  }
+
+  /**
     * Answers true if more than 85% of the sequence residues (ignoring gaps) are
     * A, G, C, T or U, else false. This is just a heuristic guess and may give a
     * wrong answer (as AGCT are also amino acid codes).
diff --git a/src/jalview/util/DBRefUtils.java b/src/jalview/util/DBRefUtils.java

index 424d40b..ed6d860 100755 (executable)
--- a/src/jalview/util/DBRefUtils.java
+++ b/src/jalview/util/DBRefUtils.java
@@ -67,11 +67,14 @@ public class DBRefUtils
    }
  
    /**
+   * Returns those DBRefEntry objects whose source identifier (once converted to
+   * Jalview's canonical form) is in the list of sources to search for. Returns
+   * null if no matches found.
     * 
     * @param dbrefs
-   *          array of DBRef objects to search
+   *          DBRefEntry objects to search
     * @param sources
-   *          String[] array of source DBRef IDs to retrieve
+   *          array of sources to select
     * @return
     */
    public static DBRefEntry[] selectRefs(DBRefEntry[] dbrefs,
@@ -148,8 +151,8 @@ public class DBRefUtils
    }
  
    /**
-   * Returns an array of those references that match the given entry, or null if
-   * no matches. Currently uses a comparator which matches if
+   * Returns a (possibly empty) list of those references that match the given
+   * entry. Currently uses a comparator which matches if
     * <ul>
     * <li>database sources are the same</li>
     * <li>accession ids are the same</li>
@@ -162,34 +165,35 @@ public class DBRefUtils
     *          pattern to match
     * @return
     */
-  public static DBRefEntry[] searchRefs(DBRefEntry[] ref, DBRefEntry entry)
+  public static List<DBRefEntry> searchRefs(DBRefEntry[] ref,
+          DBRefEntry entry)
    {
      return searchRefs(ref, entry,
              matchDbAndIdAndEitherMapOrEquivalentMapList);
    }
  
    /**
-   * Returns an array of those references that match the given accession id
+   * Returns a list of those references that match the given accession id
     * <ul>
     * <li>database sources are the same</li>
     * <li>accession ids are the same</li>
     * <li>both have no mapping, or the mappings are the same</li>
     * </ul>
     * 
-   * @param ref
+   * @param refs
     *          Set of references to search
-   * @param entry
-   *          pattern to match
+   * @param accId
+   *          accession id to match
     * @return
     */
-  public static DBRefEntry[] searchRefs(DBRefEntry[] ref, String accId)
+  public static List<DBRefEntry> searchRefs(DBRefEntry[] refs, String accId)
    {
-    return searchRefs(ref, new DBRefEntry("", "", accId), matchId);
+    return searchRefs(refs, new DBRefEntry("", "", accId), matchId);
    }
  
    /**
-   * Returns an array of those references that match the given entry, according
-   * to the given comparator. Returns null if no matches.
+   * Returns a (possibly empty) list of those references that match the given
+   * entry, according to the given comparator.
     * 
     * @param refs
     *          an array of database references to search
@@ -198,14 +202,14 @@ public class DBRefUtils
     * @param comparator
     * @return
     */
-  static DBRefEntry[] searchRefs(DBRefEntry[] refs, DBRefEntry entry,
+  static List<DBRefEntry> searchRefs(DBRefEntry[] refs, DBRefEntry entry,
            DbRefComp comparator)
    {
+    List<DBRefEntry> rfs = new ArrayList<DBRefEntry>();
      if (refs == null || entry == null)
      {
-      return null;
+      return rfs;
      }
-    List<DBRefEntry> rfs = new ArrayList<DBRefEntry>();
      for (int i = 0; i < refs.length; i++)
      {
        if (comparator.matches(entry, refs[i]))
@@ -213,7 +217,7 @@ public class DBRefUtils
          rfs.add(refs[i]);
        }
      }
-    return rfs.size() == 0 ? null : rfs.toArray(new DBRefEntry[rfs.size()]);
+    return rfs;
    }
  
    interface DbRefComp
@@ -380,9 +384,9 @@ public class DBRefUtils
    };
  
    /**
-   * accession ID and DB must be identical. Version is ignored. No map on either
-   * or map but no maplist on either or maplist of map on a is equivalent to the
-   * maplist of map on b.
+   * accession ID and DB must be identical, or null on a. Version is ignored. No
+   * map on either or map but no maplist on either or maplist of map on a is
+   * equivalent to the maplist of map on b.
     */
    public static DbRefComp matchDbAndIdAndEitherMapOrEquivalentMapList = new DbRefComp()
    {
@@ -393,8 +397,9 @@ public class DBRefUtils
                && refb.getSource().equals(refa.getSource()))
        {
          // We dont care about version
-        if (refa.getAccessionId() != null && refb.getAccessionId() != null
-                && refb.getAccessionId().equals(refa.getAccessionId()))
+
+        if (refa.getAccessionId() == null
+                || refa.getAccessionId().equals(refb.getAccessionId()))
          {
            if (refa.getMap() == null || refb.getMap() == null)
            {
@@ -406,7 +411,7 @@ public class DBRefUtils
                    || (refb.getMap().getMap() != null
                            && refa.getMap().getMap() != null && (refb
                            .getMap().getMap().equals(refa.getMap().getMap()))))
-          { // getMap().getMap().containsEither(false,refa.getMap().getMap())
+          {
              return true;
            }
          }
@@ -519,4 +524,49 @@ public class DBRefUtils
      return (o1 == null ? o2.equals(o1) : o1.equals(o2));
    }
  
+  /**
+   * Selects just the DNA or protein references from a set of references
+   * 
+   * @param selectDna
+   *          if true, select references to 'standard' DNA databases, else to
+   *          'standard' peptide databases
+   * @param refs
+   *          a set of references to select from
+   * @return
+   */
+  public static DBRefEntry[] selectDbRefs(boolean selectDna,
+          DBRefEntry[] refs)
+  {
+    return selectRefs(refs, selectDna ? DBRefSource.DNACODINGDBS
+            : DBRefSource.PROTEINDBS);
+    // could attempt to find other cross
+    // refs here - ie PDB xrefs
+    // (not dna, not protein seq)
+  }
+
+  /**
+   * Returns the (possibly empty) list of those supplied dbrefs which have the
+   * specified source databse
+   * 
+   * @param dbRefs
+   * @param source
+   * @return
+   */
+  public static List<DBRefEntry> searchRefsForSource(DBRefEntry[] dbRefs,
+          String source)
+  {
+    List<DBRefEntry> matches = new ArrayList<DBRefEntry>();
+    if (dbRefs != null && source != null)
+    {
+      for (DBRefEntry dbref : dbRefs)
+      {
+        if (source.equals(dbref.getSource()))
+        {
+          matches.add(dbref);
+        }
+      }
+    }
+    return matches;
+  }
+
  }
diff --git a/src/jalview/ws/SequenceFetcherFactory.java b/src/jalview/ws/SequenceFetcherFactory.java

new file mode 100644 (file)

index 0000000..2b8f364
--- /dev/null
+++ b/src/jalview/ws/SequenceFetcherFactory.java
@@ -0,0 +1,32 @@
+package jalview.ws;
+
+import jalview.ws.seqfetcher.ASequenceFetcher;
+
+public class SequenceFetcherFactory
+{
+
+  private static SequenceFetcher instance;
+
+  /**
+   * Returns a new SequenceFetcher object, or a mock object if one has been set
+   * 
+   * @return
+   */
+  public static ASequenceFetcher getSequenceFetcher()
+  {
+    return instance == null ? new SequenceFetcher() : instance;
+  }
+
+  /**
+   * Set the instance object to use (intended for unit testing with mock
+   * objects).
+   * 
+   * Be sure to reset to null in the tearDown method of any tests!
+   * 
+   * @param sf
+   */
+  public static void setSequenceFetcher(SequenceFetcher sf)
+  {
+    instance = sf;
+  }
+}
diff --git a/src/jalview/ws/seqfetcher/ASequenceFetcher.java b/src/jalview/ws/seqfetcher/ASequenceFetcher.java

index 2392476..33a917e 100644 (file)
--- a/src/jalview/ws/seqfetcher/ASequenceFetcher.java
+++ b/src/jalview/ws/seqfetcher/ASequenceFetcher.java
@@ -55,7 +55,7 @@ public class ASequenceFetcher
    /**
     * Constructor
     */
-  public ASequenceFetcher()
+  protected ASequenceFetcher()
    {
      super();
  
@@ -125,20 +125,20 @@ public class ASequenceFetcher
     *          if true, only fetch from nucleotide data sources, else peptide
     * @return
     */
-  public SequenceI[] getSequences(DBRefEntry[] refs, boolean dna)
+  public SequenceI[] getSequences(List<DBRefEntry> refs, boolean dna)
    {
      Vector<SequenceI> rseqs = new Vector<SequenceI>();
      Hashtable<String, List<String>> queries = new Hashtable<String, List<String>>();
-    for (int r = 0; r < refs.length; r++)
+    for (DBRefEntry ref : refs)
      {
-      if (!queries.containsKey(refs[r].getSource()))
+      if (!queries.containsKey(ref.getSource()))
        {
-        queries.put(refs[r].getSource(), new ArrayList<String>());
+        queries.put(ref.getSource(), new ArrayList<String>());
        }
-      List<String> qset = queries.get(refs[r].getSource());
-      if (!qset.contains(refs[r].getAccessionId()))
+      List<String> qset = queries.get(ref.getSource());
+      if (!qset.contains(ref.getAccessionId()))
        {
-        qset.add(refs[r].getAccessionId());
+        qset.add(ref.getAccessionId());
        }
      }
      Enumeration<String> e = queries.keys();
@@ -205,15 +205,12 @@ public class ASequenceFetcher
                  for (int is = 0; is < seqs.length; is++)
                  {
                    rseqs.addElement(seqs[is]);
-                  DBRefEntry[] frefs = DBRefUtils.searchRefs(seqs[is]
+                  List<DBRefEntry> frefs = DBRefUtils.searchRefs(seqs[is]
                            .getDBRefs(), new DBRefEntry(db, null, null));
-                  if (frefs != null)
+                  for (DBRefEntry dbr : frefs)
                    {
-                    for (DBRefEntry dbr : frefs)
-                    {
-                      queriesFound.add(dbr.getAccessionId());
-                      queriesMade.remove(dbr.getAccessionId());
-                    }
+                    queriesFound.add(dbr.getAccessionId());
+                    queriesMade.remove(dbr.getAccessionId());
                    }
                    seqs[is] = null;
                  }
diff --git a/test/jalview/analysis/CrossRefTest.java b/test/jalview/analysis/CrossRefTest.java

index bbc23e5..31f9728 100644 (file)
--- a/test/jalview/analysis/CrossRefTest.java
+++ b/test/jalview/analysis/CrossRefTest.java
@@ -21,10 +21,29 @@
  package jalview.analysis;
  
  import static org.testng.AssertJUnit.assertEquals;
+import static org.testng.AssertJUnit.assertFalse;
+import static org.testng.AssertJUnit.assertNotSame;
+import static org.testng.AssertJUnit.assertNull;
  import static org.testng.AssertJUnit.assertSame;
+import static org.testng.AssertJUnit.assertTrue;
+import static org.testng.internal.junit.ArrayAsserts.assertArrayEquals;
  
+import jalview.datamodel.Alignment;
+import jalview.datamodel.AlignmentI;
  import jalview.datamodel.DBRefEntry;
+import jalview.datamodel.Mapping;
+import jalview.datamodel.Sequence;
+import jalview.datamodel.SequenceFeature;
+import jalview.datamodel.SequenceI;
+import jalview.util.DBRefUtils;
+import jalview.util.MapList;
+import jalview.ws.SequenceFetcher;
+import jalview.ws.SequenceFetcherFactory;
  
+import java.util.ArrayList;
+import java.util.List;
+
+import org.testng.annotations.AfterClass;
  import org.testng.annotations.Test;
  
  public class CrossRefTest
@@ -40,27 +59,370 @@ public class CrossRefTest
      DBRefEntry ref6 = new DBRefEntry("emblCDS", "1", "A123");
      DBRefEntry ref7 = new DBRefEntry("GeneDB", "1", "A123");
      DBRefEntry ref8 = new DBRefEntry("PFAM", "1", "A123");
+    // ENSEMBL is a source of either dna or protein sequence data
+    DBRefEntry ref9 = new DBRefEntry("ENSEMBL", "1", "A123");
      DBRefEntry[] refs = new DBRefEntry[] { ref1, ref2, ref3, ref4, ref5,
-        ref6, ref7, ref8 };
+        ref6, ref7, ref8, ref9 };
  
      /*
       * Just the DNA refs:
       */
-    DBRefEntry[] found = CrossRef.findXDbRefs(false, refs);
-    assertEquals(3, found.length);
+    DBRefEntry[] found = DBRefUtils.selectDbRefs(true, refs);
+    assertEquals(4, found.length);
      assertSame(ref5, found[0]);
      assertSame(ref6, found[1]);
      assertSame(ref7, found[2]);
+    assertSame(ref9, found[3]);
  
      /*
       * Just the protein refs:
       */
-    found = CrossRef.findXDbRefs(true, refs);
-    assertEquals(4, found.length);
+    found = DBRefUtils.selectDbRefs(false, refs);
+    assertEquals(5, found.length);
      assertSame(ref1, found[0]);
      assertSame(ref2, found[1]);
      assertSame(ref3, found[2]);
      assertSame(ref4, found[3]);
+    assertSame(ref9, found[4]);
+  }
+
+  /**
+   * Test the method that finds a sequence's "product" xref source databases,
+   * which may be direct (dbrefs on the sequence), or indirect (dbrefs on
+   * sequences which share a dbref with the sequence
+   */
+  @Test(groups = { "Functional" })
+  public void testFindXrefSourcesForSequence_proteinToDna()
+  {
+    SequenceI seq = new Sequence("Seq1", "MGKYQARLSS");
+    List<String> sources = new ArrayList<String>();
+    AlignmentI al = new Alignment(new SequenceI[] {});
+
+    /*
+     * first with no dbrefs to search
+     */
+    CrossRef.findXrefSourcesForSequence(seq, false, al, sources);
+    assertTrue(sources.isEmpty());
+
+    /*
+     * add some dbrefs to sequence
+     */
+    // protein db is not a candidate for findXrefSources
+    seq.addDBRef(new DBRefEntry("UNIPROT", "0", "A1234"));
+    // dna coding databatases are
+    seq.addDBRef(new DBRefEntry("EMBL", "0", "E2345"));
+    // a second EMBL xref should not result in a duplicate
+    seq.addDBRef(new DBRefEntry("EMBL", "0", "E2346"));
+    seq.addDBRef(new DBRefEntry("EMBLCDS", "0", "E2347"));
+    seq.addDBRef(new DBRefEntry("GENEDB", "0", "E2348"));
+    seq.addDBRef(new DBRefEntry("ENSEMBL", "0", "E2349"));
+    seq.addDBRef(new DBRefEntry("ENSEMBLGENOMES", "0", "E2350"));
+    CrossRef.findXrefSourcesForSequence(seq, false, al, sources);
+    assertEquals(4, sources.size());
+    assertEquals("[EMBL, EMBLCDS, GENEDB, ENSEMBL]",
+            sources.toString());
+
+    /*
+     * add a sequence to the alignment which has a dbref to UNIPROT|A1234
+     * and others to dna coding databases
+     */
+    sources.clear();
+    seq.setDBRefs(null);
+    seq.addDBRef(new DBRefEntry("UNIPROT", "0", "A1234"));
+    seq.addDBRef(new DBRefEntry("EMBLCDS", "0", "E2347"));
+    SequenceI seq2 = new Sequence("Seq2", "MGKYQARLSS");
+    seq2.addDBRef(new DBRefEntry("UNIPROT", "0", "A1234"));
+    seq2.addDBRef(new DBRefEntry("EMBL", "0", "E2345"));
+    seq2.addDBRef(new DBRefEntry("GENEDB", "0", "E2348"));
+    // TODO include ENSEMBLGENOMES in DBRefSource.DNACODINGDBS ?
+    al.addSequence(seq2);
+    CrossRef.findXrefSourcesForSequence(seq, false, al, sources);
+    assertEquals(3, sources.size());
+    assertEquals("[EMBLCDS, EMBL, GENEDB]", sources.toString());
+  }
+
+  /**
+   * Test for finding 'product' sequences for the case where only an indirect
+   * xref is found - not on the nucleotide sequence but on a peptide sequence in
+   * the alignment which which it shares a nucleotide dbref
+   */
+  @Test(groups = { "Functional" })
+  public void testFindXrefSequences_indirectDbrefToProtein()
+  {
+    /*
+     * Alignment setup:
+     *   - nucleotide dbref  EMBL|AF039662
+     *   - peptide    dbrefs EMBL|AF039662, UNIPROT|Q9ZTS2
+     */
+    SequenceI emblSeq = new Sequence("AF039662", "GGGGCAGCACAAGAAC");
+    emblSeq.addDBRef(new DBRefEntry("EMBL", "0", "AF039662"));
+    SequenceI uniprotSeq = new Sequence("Q9ZTS2", "MASVSATMISTS");
+    uniprotSeq.addDBRef(new DBRefEntry("EMBL", "0", "AF039662"));
+    uniprotSeq.addDBRef(new DBRefEntry("UNIPROT", "0", "Q9ZTS2"));
+
+    /*
+     * Find UNIPROT xrefs for nucleotide 
+     * - it has no UNIPROT dbref of its own
+     * - but peptide with matching nucleotide dbref does, so is returned
+     */
+    AlignmentI al = new Alignment(new SequenceI[] { emblSeq, uniprotSeq });
+    Alignment xrefs = CrossRef.findXrefSequences(
+            new SequenceI[] { emblSeq }, true, "UNIPROT", al);
+    assertEquals(1, xrefs.getHeight());
+    assertSame(uniprotSeq, xrefs.getSequenceAt(0));
+  }
+
+  /**
+   * Test for finding 'product' sequences for the case where only an indirect
+   * xref is found - not on the peptide sequence but on a nucleotide sequence in
+   * the alignment which which it shares a protein dbref
+   */
+  @Test(groups = { "Functional" })
+  public void testFindXrefSequences_indirectDbrefToNucleotide()
+  {
+    /*
+     * Alignment setup:
+     *   - peptide    dbref  UNIPROT|Q9ZTS2
+     *   - nucleotide dbref  EMBL|AF039662, UNIPROT|Q9ZTS2
+     */
+    SequenceI uniprotSeq = new Sequence("Q9ZTS2", "MASVSATMISTS");
+    uniprotSeq.addDBRef(new DBRefEntry("UNIPROT", "0", "Q9ZTS2"));
+    SequenceI emblSeq = new Sequence("AF039662", "GGGGCAGCACAAGAAC");
+    emblSeq.addDBRef(new DBRefEntry("EMBL", "0", "AF039662"));
+    emblSeq.addDBRef(new DBRefEntry("UNIPROT", "0", "Q9ZTS2"));
+  
+    /*
+     * find EMBL xrefs for peptide sequence - it has no direct
+     * dbrefs, but the 'corresponding' nucleotide sequence does, so is returned
+     */
+    /*
+     * Find EMBL xrefs for peptide 
+     * - it has no EMBL dbref of its own
+     * - but nucleotide with matching peptide dbref does, so is returned
+     */
+    AlignmentI al = new Alignment(new SequenceI[] { emblSeq, uniprotSeq });
+    Alignment xrefs = CrossRef.findXrefSequences(
+            new SequenceI[] { uniprotSeq }, false, "EMBL", al);
+    assertEquals(1, xrefs.getHeight());
+    assertSame(emblSeq, xrefs.getSequenceAt(0));
+  }
+
+  /**
+   * Test for finding 'product' sequences for the case where the selected
+   * sequence has no dbref to the desired source, and there are no indirect
+   * references via another sequence in the alignment
+   */
+  @Test(groups = { "Functional" })
+  public void testFindXrefSequences_noDbrefs()
+  {
+    /*
+     * two nucleotide sequences, one with UNIPROT dbref
+     */
+    SequenceI dna1 = new Sequence("AF039662", "GGGGCAGCACAAGAAC");
+    dna1.addDBRef(new DBRefEntry("UNIPROT", "0", "Q9ZTS2"));
+    SequenceI dna2 = new Sequence("AJ307031", "AAACCCTTT");
+  
+    /*
+     * find UNIPROT xrefs for peptide sequence - it has no direct
+     * dbrefs, and the other sequence (which has a UNIPROT dbref) is not 
+     * equatable to it, so no results found
+     */
+    AlignmentI al = new Alignment(new SequenceI[] { dna1, dna2 });
+    Alignment xrefs = CrossRef.findXrefSequences(new SequenceI[] { dna2 },
+            true, "UNIPROT", al);
+    assertNull(xrefs);
+  }
+
+  /**
+   * Tests for the method that searches an alignment (with one sequence
+   * excluded) for protein/nucleotide sequences with a given cross-reference
+   */
+  @Test(groups = { "Functional" })
+  public void testSearchDataset()
+  {
+    /*
+     * nucleotide sequence with UNIPROT AND EMBL dbref
+     * peptide sequence with UNIPROT dbref
+     */
+    SequenceI dna1 = new Sequence("AF039662", "GGGGCAGCACAAGAAC");
+    dna1.addDBRef(new DBRefEntry("UNIPROT", "0", "Q9ZTS2"));
+    dna1.addDBRef(new DBRefEntry("EMBL", "0", "AF039662"));
+    SequenceI pep1 = new Sequence("Q9ZTS2", "MLAVSRGQ");
+    pep1.addDBRef(new DBRefEntry("UNIPROT", "0", "Q9ZTS2"));
+    AlignmentI al = new Alignment(new SequenceI[] { dna1, pep1 });
+
+    List<SequenceI> result = new ArrayList<SequenceI>();
+
+    /*
+     * first search for a dbref nowhere on the alignment:
+     */
+    DBRefEntry dbref = new DBRefEntry("UNIPROT", "0", "P30419");
+    boolean found = CrossRef.searchDataset(dna1, dbref, al, result, null,
+            true, true);
+    assertFalse(found);
+    assertTrue(result.isEmpty());
+
+    // TODO we are setting direct=true here but it is set to
+    // false in Jalview code...
+
+    /*
+     * search for a protein sequence with dbref UNIPROT:Q9ZTS2
+     */
+    dbref = new DBRefEntry("UNIPROT", "0", "Q9ZTS2");
+    found = CrossRef.searchDataset(dna1, dbref, al, result, null, true,
+            true);
+    assertTrue(found);
+    assertEquals(1, result.size());
+    assertSame(pep1, result.get(0));
+
+    /*
+     * search for a nucleotide sequence with dbref UNIPROT:Q9ZTS2
+     */
+    result.clear();
+    dbref = new DBRefEntry("UNIPROT", "0", "Q9ZTS2");
+    found = CrossRef.searchDataset(pep1, dbref, al, result, null, true,
+            false);
+    assertTrue(found);
+    assertEquals(1, result.size());
+    assertSame(dna1, result.get(0));
+  }
+
+  /**
+   * Test for finding 'product' sequences for the case where the selected
+   * sequence has a dbref with a mapping to a sequence
+   */
+  @Test(groups = { "Functional" })
+  public void testFindXrefSequences_fromDbRefMap()
+  {
+    /*
+     * two peptide sequences each with a DBRef and SequenceFeature
+     */
+    SequenceI pep1 = new Sequence("Q9ZTS2", "MALFQRSV");
+    pep1.addDBRef(new DBRefEntry("Pfam", "0", "PF00111"));
+    pep1.addSequenceFeature(new SequenceFeature("type", "desc", 12, 14, 1f,
+            "group"));
+    SequenceI pep2 = new Sequence("P30419", "MTRRSQIF");
+    pep2.addDBRef(new DBRefEntry("PDB", "0", "3JTK"));
+    pep2.addSequenceFeature(new SequenceFeature("type2", "desc2", 13, 15,
+            12f, "group2"));
+
+    /*
+     * nucleotide sequence (to go in the alignment)
+     */
+    SequenceI dna1 = new Sequence("AF039662", "GGGGCAGCACAAGAAC");
+
+    /*
+     * add DBRefEntry's to dna1 with mappings from dna to both peptides
+     */
+    MapList mapList = new MapList(new int[] { 1, 24 }, new int[] { 1, 3 },
+            3, 1);
+    Mapping map = new Mapping(pep1, mapList);
+    DBRefEntry dbRef1 = new DBRefEntry("UNIPROT", "0", "Q9ZTS2", map);
+    dna1.addDBRef(dbRef1);
+    mapList = new MapList(new int[] { 1, 24 }, new int[] { 1, 3 }, 3, 1);
+    map = new Mapping(pep2, mapList);
+    DBRefEntry dbRef2 = new DBRefEntry("UNIPROT", "0", "P30419", map);
+    dna1.addDBRef(dbRef2);
+
+    /*
+     * find UNIPROT xrefs for nucleotide sequence - it should pick up 
+     * mapped sequences
+     */
+    AlignmentI al = new Alignment(new SequenceI[] { dna1 });
+    Alignment xrefs = CrossRef.findXrefSequences(new SequenceI[] { dna1 },
+            true, "UNIPROT", al);
+    assertEquals(2, xrefs.getHeight());
+
+    /*
+     * cross-refs alignment holds copies of the mapped sequences
+     * including copies of their dbrefs and features
+     */
+    checkCopySequence(pep1, xrefs.getSequenceAt(0));
+    checkCopySequence(pep2, xrefs.getSequenceAt(1));
+  }
+
+  /**
+   * Helper method to assert seq1 looks like a copy of seq2
+   * 
+   * @param seq1
+   * @param seq2
+   */
+  private void checkCopySequence(SequenceI seq1, SequenceI seq2)
+  {
+    assertNotSame(seq1, seq2);
+    assertEquals(seq1.getName(), seq2.getName());
+    assertEquals(seq1.getStart(), seq2.getStart());
+    assertEquals(seq1.getEnd(), seq2.getEnd());
+    assertEquals(seq1.getSequenceAsString(), seq2.getSequenceAsString());
+
+    /*
+     * compare dbrefs
+     */
+    assertArrayEquals(seq1.getDBRefs(), seq2.getDBRefs());
+    // check one to verify a copy, not the same object
+    if (seq1.getDBRefs().length > 0)
+    {
+      assertNotSame(seq1.getDBRefs()[0], seq2.getDBRefs()[0]);
+    }
+
+    /*
+     * compare features
+     */
+    assertArrayEquals(seq1.getSequenceFeatures(),
+            seq2.getSequenceFeatures());
+    if (seq1.getSequenceFeatures().length > 0)
+    {
+      assertNotSame(seq1.getSequenceFeatures()[0],
+              seq2.getSequenceFeatures()[0]);
+    }
+  }
+
+  /**
+   * Test for finding 'product' sequences for the case where the selected
+   * sequence has a dbref with no mapping, triggering a fetch from database
+   */
+  @Test(groups = { "Functional" })
+  public void testFindXrefSequences_withFetch()
+  {
+    SequenceI dna1 = new Sequence("AF039662", "GGGGCAGCACAAGAAC");
+    dna1.addDBRef(new DBRefEntry("UNIPROT", "0", "Q9ZTS2"));
+    dna1.addDBRef(new DBRefEntry("UNIPROT", "0", "P30419"));
+    dna1.addDBRef(new DBRefEntry("UNIPROT", "0", "P00314"));
+    final SequenceI pep1 = new Sequence("Q9ZTS2", "MYQLIRSSW");
+    final SequenceI pep2 = new Sequence("P00314", "MRKLLAASG");
+  
+    SequenceFetcher mockFetcher = new SequenceFetcher()
+    {
+
+      @Override
+      public boolean isFetchable(String source)
+      {
+        return true;
+      }
+
+      @Override
+      public SequenceI[] getSequences(List<DBRefEntry> refs, boolean dna)
+      {
+        return new SequenceI[] { pep1, pep2 };
+      }
+    };
+    SequenceFetcherFactory.setSequenceFetcher(mockFetcher);
+
+    /*
+     * find UNIPROT xrefs for nucleotide sequence
+     */
+    AlignmentI al = new Alignment(new SequenceI[] { dna1 });
+    Alignment xrefs = CrossRef.findXrefSequences(new SequenceI[] { dna1 },
+            true, "UNIPROT", al);
+    assertEquals(2, xrefs.getHeight());
+    assertSame(pep1, xrefs.getSequenceAt(0));
+    assertSame(pep2, xrefs.getSequenceAt(1));
+  }
+
+  @AfterClass
+  public void tearDown()
+  {
+    SequenceFetcherFactory.setSequenceFetcher(null);
    }
  
  }
diff --git a/test/jalview/analysis/CrossRefsTest.java b/test/jalview/analysis/CrossRefsTest.java

new file mode 100644 (file)

index 0000000..cdcb184
--- /dev/null
+++ b/test/jalview/analysis/CrossRefsTest.java
@@ -0,0 +1,298 @@
+package jalview.analysis;
+
+import static org.testng.AssertJUnit.assertEquals;
+import static org.testng.AssertJUnit.assertNotSame;
+import static org.testng.AssertJUnit.assertNull;
+import static org.testng.AssertJUnit.assertSame;
+import static org.testng.internal.junit.ArrayAsserts.assertArrayEquals;
+
+import jalview.datamodel.Alignment;
+import jalview.datamodel.AlignmentI;
+import jalview.datamodel.DBRefEntry;
+import jalview.datamodel.Mapping;
+import jalview.datamodel.Sequence;
+import jalview.datamodel.SequenceFeature;
+import jalview.datamodel.SequenceI;
+import jalview.util.MapList;
+import jalview.ws.SequenceFetcher;
+import jalview.ws.SequenceFetcherFactory;
+
+import java.util.List;
+
+import org.testng.annotations.Test;
+
+public class CrossRefsTest
+{
+
+  /**
+   * Test for finding 'product' sequences for the case where the selected
+   * sequence has a dbref with a mapping to a sequence
+   */
+  @Test(groups = { "Functional" })
+  public void testFindXrefSequences_fromDbRefMap()
+  {
+    /*
+     * two peptide sequences each with a DBRef and SequenceFeature
+     */
+    SequenceI pep1 = new Sequence("Q9ZTS2", "MALFQRSV");
+    pep1.addDBRef(new DBRefEntry("Pfam", "0", "PF00111"));
+    pep1.addSequenceFeature(new SequenceFeature("type", "desc", 12, 14, 1f,
+            "group"));
+    SequenceI pep2 = new Sequence("P30419", "MTRRSQIF");
+    pep2.addDBRef(new DBRefEntry("PDB", "0", "3JTK"));
+    pep2.addSequenceFeature(new SequenceFeature("type2", "desc2", 13, 15,
+            12f, "group2"));
+  
+    /*
+     * nucleotide sequence (to go in the alignment)
+     */
+    SequenceI dna1 = new Sequence("AF039662", "GGGGCAGCACAAGAAC");
+  
+    /*
+     * add DBRefEntry's to dna1 with mappings from dna to both peptides
+     */
+    MapList mapList = new MapList(new int[] { 1, 24 }, new int[] { 1, 3 },
+            3, 1);
+    Mapping map = new Mapping(pep1, mapList);
+    DBRefEntry dbRef1 = new DBRefEntry("UNIPROT", "0", "Q9ZTS2", map);
+    dna1.addDBRef(dbRef1);
+    mapList = new MapList(new int[] { 1, 24 }, new int[] { 1, 3 }, 3, 1);
+    map = new Mapping(pep2, mapList);
+    DBRefEntry dbRef2 = new DBRefEntry("UNIPROT", "0", "P30419", map);
+    dna1.addDBRef(dbRef2);
+  
+    /*
+     * find UNIPROT xrefs for nucleotide sequence - it should pick up 
+     * mapped sequences
+     */
+    AlignmentI al = new Alignment(new SequenceI[] { dna1 });
+    AlignmentI xrefs = CrossRefs.findXrefSequences(
+            new SequenceI[] { dna1 },
+            true, "UNIPROT", al);
+    assertEquals(2, xrefs.getHeight());
+  
+    /*
+     * cross-refs alignment holds copies of the mapped sequences
+     * including copies of their dbrefs and features
+     */
+    checkCopySequence(pep1, xrefs.getSequenceAt(0));
+    checkCopySequence(pep2, xrefs.getSequenceAt(1));
+  }
+
+  /**
+   * Test for finding 'product' sequences for the case where only an indirect
+   * xref is found - not on the peptide sequence but on a nucleotide sequence in
+   * the alignment which which it shares a protein dbref
+   */
+  @Test(groups = { "Functional" })
+  public void testFindXrefSequences_indirectDbrefToNucleotide()
+  {
+    /*
+     * Alignment setup:
+     *   - peptide    dbref  UNIPROT|Q9ZTS2
+     *   - nucleotide dbref  EMBL|AF039662, UNIPROT|Q9ZTS2
+     */
+    SequenceI uniprotSeq = new Sequence("Q9ZTS2", "MASVSATMISTS");
+    uniprotSeq.addDBRef(new DBRefEntry("UNIPROT", "0", "Q9ZTS2"));
+    SequenceI emblSeq = new Sequence("AF039662", "GGGGCAGCACAAGAAC");
+    emblSeq.addDBRef(new DBRefEntry("EMBL", "0", "AF039662"));
+    emblSeq.addDBRef(new DBRefEntry("UNIPROT", "0", "Q9ZTS2"));
+  
+    /*
+     * Find EMBL xrefs for peptide 
+     * - it has no EMBL dbref of its own
+     * - but nucleotide with matching peptide dbref does, so is returned
+     */
+    AlignmentI al = new Alignment(new SequenceI[] { emblSeq, uniprotSeq });
+    AlignmentI xrefs = CrossRefs.findXrefSequences(
+            new SequenceI[] { uniprotSeq }, false, "EMBL", al);
+    assertEquals(1, xrefs.getHeight());
+    assertSame(emblSeq, xrefs.getSequenceAt(0));
+  }
+
+  /**
+   * Test for finding 'product' sequences for the case where only an indirect
+   * xref is found - not on the nucleotide sequence but on a peptide sequence in
+   * the alignment which which it shares a nucleotide dbref
+   */
+  @Test(groups = { "Functional" })
+  public void testFindXrefSequences_indirectDbrefToProtein()
+  {
+    /*
+     * Alignment setup:
+     *   - nucleotide dbref  EMBL|AF039662
+     *   - peptide    dbrefs EMBL|AF039662, UNIPROT|Q9ZTS2
+     */
+    SequenceI emblSeq = new Sequence("AF039662", "GGGGCAGCACAAGAAC");
+    emblSeq.addDBRef(new DBRefEntry("EMBL", "0", "AF039662"));
+    SequenceI uniprotSeq = new Sequence("Q9ZTS2", "MASVSATMISTS");
+    uniprotSeq.addDBRef(new DBRefEntry("EMBL", "0", "AF039662"));
+    uniprotSeq.addDBRef(new DBRefEntry("UNIPROT", "0", "Q9ZTS2"));
+  
+    /*
+     * Find UNIPROT xrefs for nucleotide 
+     * - it has no UNIPROT dbref of its own
+     * - but peptide with matching nucleotide dbref does, so is returned
+     */
+    AlignmentI al = new Alignment(new SequenceI[] { emblSeq, uniprotSeq });
+    AlignmentI xrefs = CrossRefs.findXrefSequences(
+            new SequenceI[] { emblSeq }, true, "UNIPROT", al);
+    assertEquals(1, xrefs.getHeight());
+    assertSame(uniprotSeq, xrefs.getSequenceAt(0));
+  }
+
+  /**
+   * Test for finding 'product' sequences for the case where the selected
+   * sequence has no dbref to the desired source, and there are no indirect
+   * references via another sequence in the alignment
+   */
+  @Test(groups = { "Functional" })
+  public void testFindXrefSequences_noDbrefs()
+  {
+    /*
+     * two nucleotide sequences, one with UNIPROT dbref
+     */
+    SequenceI dna1 = new Sequence("AF039662", "GGGGCAGCACAAGAAC");
+    dna1.addDBRef(new DBRefEntry("UNIPROT", "0", "Q9ZTS2"));
+    SequenceI dna2 = new Sequence("AJ307031", "AAACCCTTT");
+  
+    /*
+     * find UNIPROT xrefs for peptide sequence - it has no direct
+     * dbrefs, and the other sequence (which has a UNIPROT dbref) is not 
+     * equatable to it, so no results found
+     */
+    AlignmentI al = new Alignment(new SequenceI[] { dna1, dna2 });
+    AlignmentI xrefs = CrossRefs.findXrefSequences(
+            new SequenceI[] { dna2 },
+            true, "UNIPROT", al);
+    assertNull(xrefs);
+  }
+
+  /**
+   * Test for finding 'product' sequences for the case where the selected
+   * sequence has a dbref with no mapping, triggering a fetch from database
+   */
+  @Test(groups = { "Functional" })
+  public void testFindXrefSequences_withFetch()
+  {
+    SequenceI dna1 = new Sequence("AF039662", "GGGGCAGCACAAGAAC");
+    dna1.addDBRef(new DBRefEntry("UNIPROT", "0", "Q9ZTS2"));
+    dna1.addDBRef(new DBRefEntry("UNIPROT", "0", "P30419"));
+    dna1.addDBRef(new DBRefEntry("UNIPROT", "0", "P00314"));
+    final SequenceI pep1 = new Sequence("Q9ZTS2", "MYQLIRSSW");
+    final SequenceI pep2 = new Sequence("P00314", "MRKLLAASG");
+  
+    SequenceFetcher mockFetcher = new SequenceFetcher()
+    {
+  
+      @Override
+      public boolean isFetchable(String source)
+      {
+        return true;
+      }
+  
+      @Override
+      public SequenceI[] getSequences(List<DBRefEntry> refs, boolean dna)
+      {
+        return new SequenceI[] { pep1, pep2 };
+      }
+    };
+    SequenceFetcherFactory.setSequenceFetcher(mockFetcher);
+  
+    /*
+     * find UNIPROT xrefs for nucleotide sequence
+     */
+    AlignmentI al = new Alignment(new SequenceI[] { dna1 });
+    AlignmentI xrefs = CrossRefs.findXrefSequences(
+            new SequenceI[] { dna1 },
+            true, "UNIPROT", al);
+    assertEquals(2, xrefs.getHeight());
+    assertSame(pep1, xrefs.getSequenceAt(0));
+    assertSame(pep2, xrefs.getSequenceAt(1));
+  }
+
+  /**
+   * Helper method to assert seq1 looks like a copy of seq2
+   * 
+   * @param seq1
+   * @param seq2
+   */
+  private void checkCopySequence(SequenceI seq1, SequenceI seq2)
+  {
+    assertNotSame(seq1, seq2);
+    assertEquals(seq1.getName(), seq2.getName());
+    assertEquals(seq1.getStart(), seq2.getStart());
+    assertEquals(seq1.getEnd(), seq2.getEnd());
+    assertEquals(seq1.getSequenceAsString(), seq2.getSequenceAsString());
+  
+    /*
+     * compare dbrefs
+     */
+    assertArrayEquals(seq1.getDBRefs(), seq2.getDBRefs());
+    // check one to verify a copy, not the same object
+    if (seq1.getDBRefs().length > 0)
+    {
+      assertNotSame(seq1.getDBRefs()[0], seq2.getDBRefs()[0]);
+    }
+  
+    /*
+     * compare features
+     */
+    assertArrayEquals(seq1.getSequenceFeatures(),
+            seq2.getSequenceFeatures());
+    if (seq1.getSequenceFeatures().length > 0)
+    {
+      assertNotSame(seq1.getSequenceFeatures()[0],
+              seq2.getSequenceFeatures()[0]);
+    }
+  }
+
+  /**
+   * Test for finding 'product' sequences for the case where the selected
+   * sequence has two dbrefs with no mapping, triggering a fetch from database.
+   * 
+   * @see http://issues.jalview.org/browse/JAL-2029
+   */
+  @Test(groups = { "Functional" })
+  public void testFindXrefSequences_withFetchMultipleRefs()
+  {
+    /*
+     * EMBL|X07547 has a 
+     */
+    SequenceI dna1 = new Sequence("X07547", "GGGGCAGCACAAGAAC");
+    dna1.addDBRef(new DBRefEntry("UNIPROT", "0", "B0BCM4"));
+    dna1.addDBRef(new DBRefEntry("UNIPROT", "0", "P0CE20"));
+    final SequenceI pep1 = new Sequence("B0BCM4", "MGKGIL");
+    final SequenceI pep2 = new Sequence("P0CE20", "MGKGIL");
+  
+    SequenceFetcher mockFetcher = new SequenceFetcher()
+    {
+      int call = 0;
+
+      @Override
+      public boolean isFetchable(String source)
+      {
+        return true;
+      }
+      @Override
+      public SequenceI[] getSequences(List<DBRefEntry> refs, boolean dna)
+      {
+        // pending Mockito with its thenReturn(pep1).thenReturn(pep2) syntax!
+        return new SequenceI[] { call++ == 0 ? pep1 : pep2 };
+      }
+    };
+    SequenceFetcherFactory.setSequenceFetcher(mockFetcher);
+  
+    /*
+     * find UNIPROT xrefs for nucleotide sequence
+     */
+    AlignmentI al = new Alignment(new SequenceI[] { dna1 });
+    AlignmentI xrefs = CrossRefs.findXrefSequences(
+            new SequenceI[] { dna1 },
+            true, "UNIPROT", al);
+    assertEquals(2, xrefs.getHeight());
+    assertSame(pep1, xrefs.getSequenceAt(0));
+    assertSame(pep2, xrefs.getSequenceAt(1));
+  }
+
+}
diff --git a/test/jalview/util/DBRefUtilsTest.java b/test/jalview/util/DBRefUtilsTest.java

index c5e8ef5..96935ce 100644 (file)
--- a/test/jalview/util/DBRefUtilsTest.java
+++ b/test/jalview/util/DBRefUtilsTest.java
@@ -33,6 +33,8 @@ import jalview.datamodel.PDBEntry;
  import jalview.datamodel.Sequence;
  import jalview.datamodel.SequenceI;
  
+import java.util.List;
+
  import org.testng.annotations.Test;
  
  public class DBRefUtilsTest
@@ -191,12 +193,13 @@ public class DBRefUtilsTest
      ref5.setMap(new Mapping(new MapList(new int[] { 1, 1 }, new int[] { 1,
          1 }, 1, 1)));
  
-    DBRefEntry[] matches = DBRefUtils.searchRefs(new DBRefEntry[] { ref1,
+    List<DBRefEntry> matches = DBRefUtils.searchRefs(new DBRefEntry[] {
+        ref1,
          ref2, ref3, ref4, ref5 }, target);
-    assertEquals(3, matches.length);
-    assertSame(ref1, matches[0]);
-    assertSame(ref2, matches[1]);
-    assertSame(ref5, matches[2]);
+    assertEquals(3, matches.size());
+    assertSame(ref1, matches.get(0));
+    assertSame(ref2, matches.get(1));
+    assertSame(ref5, matches.get(2));
    }
  
    /**
@@ -224,11 +227,12 @@ public class DBRefUtilsTest
              new int[] { 1, 1 }, 2, 2));
      ref3.setMap(map3);
  
-    DBRefEntry[] matches = DBRefUtils.searchRefs(new DBRefEntry[] { ref1,
+    List<DBRefEntry> matches = DBRefUtils.searchRefs(new DBRefEntry[] {
+        ref1,
          ref2, ref3 }, target);
-    assertEquals(2, matches.length);
-    assertSame(ref1, matches[0]);
-    assertSame(ref2, matches[1]);
+    assertEquals(2, matches.size());
+    assertSame(ref1, matches.get(0));
+    assertSame(ref2, matches.get(1));
    }
  
    /**
@@ -249,11 +253,42 @@ public class DBRefUtilsTest
      ref5.setMap(new Mapping(new MapList(new int[] { 1, 1 }, new int[] { 1,
          1 }, 1, 1)));
    
-    DBRefEntry[] matches = DBRefUtils.searchRefs(new DBRefEntry[] { ref1,
-        ref2, ref3, ref4, ref5 }, "A1234");
-    assertEquals(3, matches.length);
-    assertSame(ref1, matches[0]);
-    assertSame(ref2, matches[1]);
-    assertSame(ref5, matches[2]);
+    DBRefEntry[] dbrefs = new DBRefEntry[] { ref1,
+        ref2, ref3, ref4, ref5 };
+    List<DBRefEntry> matches = DBRefUtils.searchRefs(dbrefs, "A1234");
+    assertEquals(3, matches.size());
+    assertSame(ref1, matches.get(0));
+    assertSame(ref2, matches.get(1));
+    assertSame(ref5, matches.get(2));
+  }
+
+  /**
+   * Test the method that searches for matches references - case when we are
+   * matching a reference with null (any) accession id
+   */
+  @Test(groups = { "Functional" })
+  public void testSearchRefs_wildcardAccessionid()
+  {
+    DBRefEntry target = new DBRefEntry("EMBL", "2", null);
+  
+    DBRefEntry ref1 = new DBRefEntry("EMBL", "1", "A1234"); // matches
+    // constructor changes embl to EMBL
+    DBRefEntry ref2 = new DBRefEntry("embl", "1", "A1235"); // matches
+    // constructor does not upper-case accession id
+    DBRefEntry ref3 = new DBRefEntry("EMBL", "1", "A1236"); // matches
+    DBRefEntry ref4 = new DBRefEntry("EMBLCDS", "1", "A1234"); // no match
+    // ref5 matches although it has a mapping - ignored
+    DBRefEntry ref5 = new DBRefEntry("EMBL", "1", "A1237");
+    ref5.setMap(new Mapping(new MapList(new int[] { 1, 1 }, new int[] { 1,
+        1 }, 1, 1)));
+  
+    List<DBRefEntry> matches = DBRefUtils.searchRefs(new DBRefEntry[] {
+        ref1,
+        ref2, ref3, ref4, ref5 }, target);
+    assertEquals(4, matches.size());
+    assertSame(ref1, matches.get(0));
+    assertSame(ref2, matches.get(1));
+    assertSame(ref3, matches.get(2));
+    assertSame(ref5, matches.get(3));
    }
  }
diff --git a/test/jalview/ws/SequenceFetcherTest.java b/test/jalview/ws/SequenceFetcherTest.java

index a54ce8b..76ca69b 100644 (file)
--- a/test/jalview/ws/SequenceFetcherTest.java
+++ b/test/jalview/ws/SequenceFetcherTest.java
@@ -1,5 +1,6 @@
  package jalview.ws;
  
+import jalview.analysis.CrossRef;
  import jalview.datamodel.Alignment;
  import jalview.datamodel.AlignmentI;
  import jalview.datamodel.DBRefSource;
@@ -105,18 +106,18 @@ public class SequenceFetcherTest
            {
              boolean dna = sp.isDnaCoding();
              // try and find products
-            String types[] = jalview.analysis.CrossRef
-                    .findSequenceXrefTypes(dna, al.getSequencesArray());
+            List<String> types = CrossRef.findXrefSourcesForSequences(dna,
+                    al.getSequencesArray(), null);
              if (types != null)
              {
                System.out.println("Xref Types for: "
                        + (dna ? "dna" : "prot"));
-              for (int t = 0; t < types.length; t++)
+              for (String source : types)
                {
-                System.out.println("Type: " + types[t]);
+                System.out.println("Type: " + source);
                  SequenceI[] prod = jalview.analysis.CrossRef
                          .findXrefSequences(al.getSequencesArray(), dna,
-                                types[t], null)
+                                source, null)
                          .getSequencesArray();
                  System.out.println("Found "
                          + ((prod == null) ? "no" : "" + prod.length)
author	gmungoc <g.m.carstairs@dundee.ac.uk>
	Thu, 9 Jun 2016 13:53:10 +0000 (14:53 +0100)
committer	gmungoc <g.m.carstairs@dundee.ac.uk>
	Thu, 9 Jun 2016 13:53:10 +0000 (14:53 +0100)
src/jalview/analysis/CrossRef.java		patch \| blob \| history
src/jalview/analysis/CrossRefs.java	[new file with mode: 0644]	patch \| blob
src/jalview/gui/AlignFrame.java		patch \| blob \| history
src/jalview/gui/SequenceFetcher.java		patch \| blob \| history
src/jalview/util/Comparison.java		patch \| blob \| history
src/jalview/util/DBRefUtils.java		patch \| blob \| history
src/jalview/ws/SequenceFetcherFactory.java	[new file with mode: 0644]	patch \| blob
src/jalview/ws/seqfetcher/ASequenceFetcher.java		patch \| blob \| history
test/jalview/analysis/CrossRefTest.java		patch \| blob \| history
test/jalview/analysis/CrossRefsTest.java	[new file with mode: 0644]	patch \| blob
test/jalview/util/DBRefUtilsTest.java		patch \| blob \| history
test/jalview/ws/SequenceFetcherTest.java		patch \| blob \| history