JAL-1693 make exon alignment for get-xref splitframe (with CDS xref)

author gmungoc <g.m.carstairs@dundee.ac.uk>

Mon, 13 Apr 2015 15:22:35 +0000 (16:22 +0100)

committer gmungoc <g.m.carstairs@dundee.ac.uk>

Mon, 13 Apr 2015 15:22:35 +0000 (16:22 +0100)
author gmungoc <g.m.carstairs@dundee.ac.uk>
Mon, 13 Apr 2015 15:22:35 +0000 (16:22 +0100)
committer gmungoc <g.m.carstairs@dundee.ac.uk>
Mon, 13 Apr 2015 15:22:35 +0000 (16:22 +0100)
diff --git a/resources/lang/Messages.properties b/resources/lang/Messages.properties

index 027a9ce..7544a6a 100644 (file)
--- a/resources/lang/Messages.properties
+++ b/resources/lang/Messages.properties
@@ -697,7 +697,7 @@ label.translate_cDNA = Translate as cDNA
  label.linked_view_title = Linked cDNA and protein view
  label.align = Align
  label.extract_scores = Extract Scores
-label.get_cross_refs = Get Cross References
+label.get_cross_refs = Get Cross-References
  label.sort_alignment_new_tree = Sort Alignment With New Tree
  label.add_sequences = Add Sequences
  label.new_window = New Window
diff --git a/src/jalview/analysis/AlignmentUtils.java b/src/jalview/analysis/AlignmentUtils.java

index b78afeb..a811d84 100644 (file)
--- a/src/jalview/analysis/AlignmentUtils.java
+++ b/src/jalview/analysis/AlignmentUtils.java
@@ -35,16 +35,22 @@ import java.util.TreeMap;
  
  import jalview.datamodel.AlignedCodon;
  import jalview.datamodel.AlignedCodonFrame;
+import jalview.datamodel.Alignment;
  import jalview.datamodel.AlignmentAnnotation;
  import jalview.datamodel.AlignmentI;
  import jalview.datamodel.DBRefEntry;
+import jalview.datamodel.DBRefSource;
+import jalview.datamodel.FeatureProperties;
  import jalview.datamodel.Mapping;
  import jalview.datamodel.SearchResults;
  import jalview.datamodel.Sequence;
+import jalview.datamodel.SequenceFeature;
  import jalview.datamodel.SequenceGroup;
  import jalview.datamodel.SequenceI;
  import jalview.schemes.ResidueProperties;
+import jalview.util.DBRefUtils;
  import jalview.util.MapList;
+import jalview.util.MappingUtils;
  
  /**
   * grab bag of useful alignment manipulation operations Expect these to be
@@ -1258,4 +1264,124 @@ public class AlignmentUtils
      }
      return false;
    }
+
+  /**
+   * Constructs an alignment consisting of the mapped exon regions in the given
+   * nucleotide sequences, and updates mappings to match.
+   * 
+   * @param dna
+   *          aligned dna sequences
+   * @param mappings
+   *          from dna to protein; these are replaced with new mappings
+   * @return an alignment whose sequences are the exon-only parts of the dna
+   *         sequences (or null if no exons are found)
+   */
+  public static AlignmentI makeExonAlignment(SequenceI[] dna,
+          Set<AlignedCodonFrame> mappings)
+  {
+    Set<AlignedCodonFrame> newMappings = new HashSet<AlignedCodonFrame>();
+    List<SequenceI> exonSequences = new ArrayList<SequenceI>();
+    
+    for (SequenceI dnaSeq : dna)
+    {
+      final SequenceI ds = dnaSeq.getDatasetSequence();
+      List<AlignedCodonFrame> seqMappings = MappingUtils
+              .findMappingsForSequence(ds, mappings);
+      if (!seqMappings.isEmpty())
+      {
+        /*
+         * We assume here that only one protein mapping is expected per dna
+         * sequence. Mapping to multiple protein sequences is conceivable but
+         * undefined. Splitting a mapping to one protein sequence across
+         * multiple mappings is possible but pathological. Need closer
+         * constraints on the contents of AlignedCodonFrame.
+         */
+        AlignedCodonFrame newMapping = new AlignedCodonFrame();
+        final SequenceI exonSequence = makeExonSequence(ds,
+                seqMappings.get(0), newMapping);
+        exonSequences.add(exonSequence);
+        newMappings.add(newMapping);
+      }
+    }
+    AlignmentI al = new Alignment(
+            exonSequences.toArray(new SequenceI[exonSequences.size()]));
+    al.setDataset(null);
+
+    /*
+     * Replace the old mappings with the new ones
+     */
+    mappings.clear();
+    mappings.addAll(newMappings);
+
+    return al;
+  }
+
+  /**
+   * Helper method to make an exon-only sequence and populate its mapping to
+   * protein
+   * <p>
+   * For example, if ggCCaTTcGAg has mappings [3, 4, 6, 7, 9, 10] to protein
+   * then generate a sequence CCTTGA with mapping [1, 6] to the same protein
+   * residues
+   * 
+   * @param dnaSeq
+   *          a dna dataset sequence
+   * @param mapping
+   *          the current mapping of the sequence to protein
+   * @param newMapping
+   *          the new mapping to populate, from the exon-only sequence
+   * @return
+   */
+  protected static SequenceI makeExonSequence(SequenceI dnaSeq,
+          AlignedCodonFrame acf, AlignedCodonFrame newMapping)
+  {
+    Mapping mapping = acf.getMappingForSequence(dnaSeq);
+    final char[] dna = dnaSeq.getSequence();
+    StringBuilder newSequence = new StringBuilder(dnaSeq.getLength());
+
+    /*
+     * Get the codon regions as { [2, 5], [7, 12], [14, 14] etc }
+     */
+    List<int[]> exonRanges = mapping.getMap().getFromRanges();
+    for (int[] range : exonRanges)
+    {
+      for (int pos = range[0]; pos <= range[1]; pos++)
+      {
+        newSequence.append(dna[pos - 1]);
+      }
+    }
+
+    SequenceI exon = new Sequence(dnaSeq.getName(), newSequence.toString());
+
+    /*
+     * Locate any xrefs to CDS database on the protein product and attach to the
+     * CDS sequence. Also add as a sub-token of the sequence name.
+     */
+    // default to "CDS" if we can't locate an actual gene id
+    String cdsAccId = FeatureProperties.getCodingFeature(DBRefSource.EMBL);
+    DBRefEntry[] cdsRefs = DBRefUtils.selectRefs(
+            mapping.getTo().getDBRef(), DBRefSource.CODINGDBS);
+    if (cdsRefs != null)
+    {
+      for (DBRefEntry cdsRef : cdsRefs)
+      {
+        exon.addDBRef(new DBRefEntry(cdsRef));
+        cdsAccId = cdsRef.getAccessionId();
+      }
+    }
+    exon.setName(exon.getName() + "|" + cdsAccId);
+    exon.createDatasetSequence();
+
+    /*
+     * Build new mappings - from the same protein regions, but now to contiguous
+     * exons
+     */
+    List<int[]> exonRange = new ArrayList<int[]>();
+    exonRange.add(new int[]
+    { 1, newSequence.length() });
+    MapList map = new MapList(exonRange, mapping.getMap().getToRanges(), 3, 1);
+    newMapping.addMap(exon.getDatasetSequence(), mapping.getTo(), map);
+
+    return exon;
+  }
  }
diff --git a/src/jalview/analysis/CrossRef.java b/src/jalview/analysis/CrossRef.java

index 47bd7bc..7238239 100644 (file)
--- a/src/jalview/analysis/CrossRef.java
+++ b/src/jalview/analysis/CrossRef.java
@@ -20,6 +20,10 @@
   */
  package jalview.analysis;
  
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Vector;
+
  import jalview.datamodel.AlignedCodonFrame;
  import jalview.datamodel.Alignment;
  import jalview.datamodel.AlignmentI;
@@ -27,14 +31,10 @@ import jalview.datamodel.DBRefEntry;
  import jalview.datamodel.DBRefSource;
  import jalview.datamodel.Sequence;
  import jalview.datamodel.SequenceI;
+import jalview.util.DBRefUtils;
  import jalview.ws.SequenceFetcher;
  import jalview.ws.seqfetcher.ASequenceFetcher;
  
-import java.util.Enumeration;
-import java.util.Hashtable;
-import java.util.List;
-import java.util.Vector;
-
  /**
   * Functions for cross-referencing sequence databases. user must first specify
   * if cross-referencing from protein or dna (set dna==true)
@@ -45,39 +45,22 @@ import java.util.Vector;
  public class CrossRef
  {
    /**
-   * get the DNA or protein references for a protein or dna sequence
+   * Select just the DNA or protein references for a protein or dna sequence
     * 
-   * @param dna
-   * @param rfs
+   * @param fromDna
+   *          if true, select references from DNA (i.e. Protein databases), else
+   *          DNA database references
+   * @param refs
+   *          a set of references to select from
     * @return
     */
-  public static DBRefEntry[] findXDbRefs(boolean dna, DBRefEntry[] rfs)
+  public static DBRefEntry[] findXDbRefs(boolean fromDna, DBRefEntry[] refs)
    {
-    if (dna)
-    {
-      rfs = jalview.util.DBRefUtils.selectRefs(rfs, DBRefSource.PROTEINDBS);
-    }
-    else
-    {
-      rfs = jalview.util.DBRefUtils.selectRefs(rfs,
-              DBRefSource.DNACODINGDBS); // could attempt to find other cross
-      // refs and return here - ie PDB xrefs
-      // (not dna, not protein seq)
-    }
-    return rfs;
-  }
-
-  public static Hashtable classifyDbRefs(DBRefEntry[] rfs)
-  {
-    Hashtable classes = new Hashtable();
-    classes.put(DBRefSource.PROTEINDBS,
-            jalview.util.DBRefUtils.selectRefs(rfs, DBRefSource.PROTEINDBS));
-    classes.put(DBRefSource.DNACODINGDBS, jalview.util.DBRefUtils
-            .selectRefs(rfs, DBRefSource.DNACODINGDBS));
-    classes.put(DBRefSource.DOMAINDBS,
-            jalview.util.DBRefUtils.selectRefs(rfs, DBRefSource.DOMAINDBS));
-    // classes.put(OTHER, )
-    return classes;
+    return DBRefUtils.selectRefs(refs, fromDna ? DBRefSource.PROTEINDBS
+            : DBRefSource.DNACODINGDBS);
+    // could attempt to find other cross
+    // refs here - ie PDB xrefs
+    // (not dna, not protein seq)
    }
  
    /**
@@ -104,12 +87,11 @@ public class CrossRef
            SequenceI[] seqs, AlignmentI dataset)
    {
      String[] dbrefs = null;
-    Vector refs = new Vector();
+    List<String> refs = new ArrayList<String>();
      for (int s = 0; s < seqs.length; s++)
      {
        if (seqs[s] != null)
        {
-
          SequenceI dss = seqs[s];
          while (dss.getDatasetSequence() != null)
          {
@@ -120,7 +102,7 @@ public class CrossRef
          {
            if (!refs.contains(rfs[r].getSource()))
            {
-            refs.addElement(rfs[r].getSource());
+            refs.add(rfs[r].getSource());
            }
          }
          if (dataset != null)
@@ -128,19 +110,17 @@ public class CrossRef
            // search for references to this sequence's direct references.
            DBRefEntry[] lrfs = CrossRef
                    .findXDbRefs(!dna, seqs[s].getDBRef());
-          Vector rseqs = new Vector();
+          List<SequenceI> rseqs = new ArrayList<SequenceI>();
            CrossRef.searchDatasetXrefs(seqs[s], !dna, lrfs, dataset, rseqs,
                    null); // don't need to specify codon frame for mapping here
-          Enumeration lr = rseqs.elements();
-          while (lr.hasMoreElements())
+          for (SequenceI rs : rseqs)
            {
-            SequenceI rs = (SequenceI) lr.nextElement();
-            DBRefEntry[] xrs = findXDbRefs(dna, rs.getDBRef());
+            DBRefEntry[] xrs = findXDbRefs(dna, rs.getDBRef()); // not used??
              for (int r = 0; rfs != null && r < rfs.length; r++)
              {
                if (!refs.contains(rfs[r].getSource()))
                {
-                refs.addElement(rfs[r].getSource());
+                refs.add(rfs[r].getSource());
                }
              }
            }
@@ -150,7 +130,7 @@ public class CrossRef
      if (refs.size() > 0)
      {
        dbrefs = new String[refs.size()];
-      refs.copyInto(dbrefs);
+      refs.toArray(dbrefs);
      }
      return dbrefs;
    }
@@ -228,7 +208,7 @@ public class CrossRef
    public static Alignment findXrefSequences(SequenceI[] seqs, boolean dna,
            String source, AlignmentI dataset)
    {
-    Vector rseqs = new Vector();
+    List<SequenceI> rseqs = new ArrayList<SequenceI>();
      Alignment ral = null;
      AlignedCodonFrame cf = new AlignedCodonFrame(); // nominal width
      for (int s = 0; s < seqs.length; s++)
@@ -243,14 +223,8 @@ public class CrossRef
        if ((xrfs == null || xrfs.length == 0) && dataset != null)
        {
          System.out.println("Attempting to find ds Xrefs refs.");
-        DBRefEntry[] lrfs = CrossRef.findXDbRefs(!dna, seqs[s].getDBRef()); // less
-        // ambiguous
-        // would
-        // be a
-        // 'find
-        // primary
-        // dbRefEntry'
-        // method.
+        DBRefEntry[] lrfs = CrossRef.findXDbRefs(!dna, seqs[s].getDBRef());
+        // less ambiguous would be a 'find primary dbRefEntry' method.
          // filter for desired source xref here
          found = CrossRef.searchDatasetXrefs(dss, !dna, lrfs, dataset,
                  rseqs, cf);
@@ -265,8 +239,8 @@ public class CrossRef
          {
            if (xrfs[r].getMap().getTo() != null)
            {
-            Sequence rsq = new Sequence(xrfs[r].getMap().getTo());
-            rseqs.addElement(rsq);
+            SequenceI rsq = new Sequence(xrfs[r].getMap().getTo());
+            rseqs.add(rsq);
              if (xrfs[r].getMap().getMap().getFromRatio() != xrfs[r]
                      .getMap().getMap().getToRatio())
              {
@@ -401,7 +375,7 @@ public class CrossRef
                    }
                  }
                  retrieved[rs].updatePDBIds();
-                rseqs.addElement(retrieved[rs]);
+                rseqs.add(retrieved[rs]);
                }
              }
            }
@@ -411,7 +385,7 @@ public class CrossRef
      if (rseqs.size() > 0)
      {
        SequenceI[] rsqs = new SequenceI[rseqs.size()];
-      rseqs.copyInto(rsqs);
+      rseqs.toArray(rsqs);
        ral = new Alignment(rsqs);
        if (cf != null && cf.getProtMappings() != null)
        {
@@ -433,7 +407,8 @@ public class CrossRef
     * @return true if matches were found.
     */
    private static boolean searchDatasetXrefs(SequenceI sequenceI,
-          boolean dna, DBRefEntry[] lrfs, AlignmentI dataset, Vector rseqs,
+          boolean dna, DBRefEntry[] lrfs, AlignmentI dataset,
+          List<SequenceI> rseqs,
            AlignedCodonFrame cf)
    {
      boolean found = false;
@@ -465,7 +440,7 @@ public class CrossRef
     * @return true if one or more unique sequences were found and added
     */
    public static boolean searchDataset(SequenceI sequenceI, DBRefEntry xrf,
-          AlignmentI dataset, Vector rseqs, AlignedCodonFrame cf)
+          AlignmentI dataset, List<SequenceI> rseqs, AlignedCodonFrame cf)
    {
      return searchDataset(sequenceI, xrf, dataset, rseqs, cf, true, false);
    }
@@ -486,7 +461,7 @@ public class CrossRef
     * @return true if relationship found and sequence added.
     */
    public static boolean searchDataset(SequenceI sequenceI, DBRefEntry xrf,
-          AlignmentI dataset, Vector rseqs, AlignedCodonFrame cf,
+          AlignmentI dataset, List<SequenceI> rseqs, AlignedCodonFrame cf,
            boolean direct, boolean dna)
    {
      boolean found = false;
@@ -540,10 +515,9 @@ public class CrossRef
              {
                if (!rseqs.contains(nxt))
                {
-                rseqs.addElement(nxt);
-                boolean foundmap = cf != null; // don't search if we aren't
-                                               // given
-                // a codon map object
+                rseqs.add(nxt);
+                boolean foundmap = cf != null;
+                // don't search if we aren't given a codon map object
                  for (int r = 0; foundmap && r < cands.length; r++)
                  {
                    if (cands[r].hasMap())
diff --git a/src/jalview/analysis/SeqsetUtils.java b/src/jalview/analysis/SeqsetUtils.java

index 27ad577..2ede9ed 100755 (executable)
--- a/src/jalview/analysis/SeqsetUtils.java
+++ b/src/jalview/analysis/SeqsetUtils.java
@@ -20,9 +20,14 @@
   */
  package jalview.analysis;
  
-import java.util.*;
+import java.util.Enumeration;
+import java.util.Hashtable;
+import java.util.Vector;
  
-import jalview.datamodel.*;
+import jalview.datamodel.PDBEntry;
+import jalview.datamodel.Sequence;
+import jalview.datamodel.SequenceFeature;
+import jalview.datamodel.SequenceI;
  
  public class SeqsetUtils
  {
@@ -56,7 +61,7 @@ public class SeqsetUtils
      }
      sqinfo.put("SeqFeatures", sfeat);
      sqinfo.put("PdbId", (seq.getPDBId() != null) ? seq.getPDBId()
-            : new Vector());
+            : new Vector<PDBEntry>());
      sqinfo.put("datasetSequence",
              (seq.getDatasetSequence() != null) ? seq.getDatasetSequence()
                      : new Sequence("THISISAPLACEHOLDER", ""));
@@ -84,7 +89,7 @@ public class SeqsetUtils
      Integer start = (Integer) sqinfo.get("Start");
      Integer end = (Integer) sqinfo.get("End");
      Vector sfeatures = (Vector) sqinfo.get("SeqFeatures");
-    Vector pdbid = (Vector) sqinfo.get("PdbId");
+    Vector<PDBEntry> pdbid = (Vector<PDBEntry>) sqinfo.get("PdbId");
      String description = (String) sqinfo.get("Description");
      Sequence seqds = (Sequence) sqinfo.get("datasetSequence");
      if (oldname == null)
diff --git a/src/jalview/datamodel/Alignment.java b/src/jalview/datamodel/Alignment.java

index 482df7f..81046f1 100755 (executable)
--- a/src/jalview/datamodel/Alignment.java
+++ b/src/jalview/datamodel/Alignment.java
@@ -20,10 +20,6 @@
   */
  package jalview.datamodel;
  
-import jalview.analysis.AlignmentUtils;
-import jalview.io.FastaFile;
-import jalview.util.MessageManager;
-
  import java.util.ArrayList;
  import java.util.Enumeration;
  import java.util.HashSet;
@@ -34,6 +30,10 @@ import java.util.Map;
  import java.util.Set;
  import java.util.Vector;
  
+import jalview.analysis.AlignmentUtils;
+import jalview.io.FastaFile;
+import jalview.util.MessageManager;
+
  /**
   * Data structure to hold and manipulate a multiple sequence alignment
   */
@@ -1664,8 +1664,8 @@ public class Alignment implements AlignmentI
     * identically. If this is nucleotide and the other is protein, make 3 gaps
     * for each gap in the protein sequences. If this is protein and the other is
     * nucleotide, insert a gap for each 3 gaps (or part thereof) between
-   * nucleotide bases. Does nothing if alignment of protein from cDNA is
-   * requested (not yet implemented).
+   * nucleotide bases. If this is protein and the other is nucleotide, gaps
+   * protein to match the relative ordering of codons in the nucleotide.
     * 
     * Parameters control whether gaps in exon (mapped) and intron (unmapped)
     * regions are preserved. Gaps that connect introns to exons are treated
@@ -1697,6 +1697,9 @@ public class Alignment implements AlignmentI
              { thisGapChar, thisGapChar, thisGapChar }) : String
              .valueOf(thisGapChar);
  
+    // TODO handle intron regions? Needs a 'holistic' alignment of dna,
+    // not just sequence by sequence. But how to 'gap' intron regions?
+
      /*
       * Get mappings from 'that' alignment's sequences to this.
       */
diff --git a/src/jalview/datamodel/FeatureProperties.java b/src/jalview/datamodel/FeatureProperties.java

index b940eb1..d25eb96 100644 (file)
--- a/src/jalview/datamodel/FeatureProperties.java
+++ b/src/jalview/datamodel/FeatureProperties.java
@@ -29,6 +29,8 @@ package jalview.datamodel;
  public class FeatureProperties
  {
  
+  private static final String EMBL_CODING_FEATURE = "CDS";
+
    public static final String EXONPOS = "exon number";
  
    public static final String EXONPRODUCT = "product";
@@ -43,9 +45,29 @@ public class FeatureProperties
     */
    public static boolean isCodingFeature(String dbrefsource, String type)
    {
-    return ((dbrefsource == null
-            || dbrefsource.equalsIgnoreCase(DBRefSource.EMBL) || dbrefsource
-              .equalsIgnoreCase(DBRefSource.EMBLCDS)) && type
-            .equalsIgnoreCase("CDS"));
+    if (type.equalsIgnoreCase(EMBL_CODING_FEATURE))
+    {
+      return (dbrefsource == null
+              || dbrefsource.equalsIgnoreCase(DBRefSource.EMBL) || dbrefsource
+                .equalsIgnoreCase(DBRefSource.EMBLCDS));
+    }
+    return false;
+  }
+
+  /**
+   * Returns the coding feature name for a database source. Currently just
+   * hard-coded to return CDS for EMBL/EMBLCDS, else null.
+   * 
+   * @param dbrefsource
+   * @return
+   */
+  public static String getCodingFeature(String dbrefsource)
+  {
+    if (DBRefSource.EMBL.equalsIgnoreCase(dbrefsource)
+            || DBRefSource.EMBLCDS.equalsIgnoreCase(dbrefsource))
+    {
+      return EMBL_CODING_FEATURE;
+    }
+    return null;
    }
  }
diff --git a/src/jalview/datamodel/Sequence.java b/src/jalview/datamodel/Sequence.java

index 5c1fba5..65d8179 100755 (executable)
--- a/src/jalview/datamodel/Sequence.java
+++ b/src/jalview/datamodel/Sequence.java
@@ -20,9 +20,6 @@
   */
  package jalview.datamodel;
  
-import jalview.analysis.AlignSeq;
-import jalview.util.StringUtils;
-
  import java.util.ArrayList;
  import java.util.Enumeration;
  import java.util.List;
@@ -30,6 +27,9 @@ import java.util.Vector;
  
  import fr.orsay.lri.varna.models.rna.RNA;
  
+import jalview.analysis.AlignSeq;
+import jalview.util.StringUtils;
+
  /**
   * 
   * Implements the SequenceI interface for a char[] based sequence object.
@@ -51,7 +51,7 @@ public class Sequence implements SequenceI
  
    int end;
  
-  Vector pdbIds;
+  Vector<PDBEntry> pdbIds;
  
    String vamsasId;
  
@@ -353,7 +353,7 @@ public class Sequence implements SequenceI
    {
      if (pdbIds == null)
      {
-      pdbIds = new Vector();
+      pdbIds = new Vector<PDBEntry>();
      }
      if (!pdbIds.contains(entry))
      {
diff --git a/src/jalview/datamodel/SequenceI.java b/src/jalview/datamodel/SequenceI.java

index a9a7589..04f3588 100755 (executable)
--- a/src/jalview/datamodel/SequenceI.java
+++ b/src/jalview/datamodel/SequenceI.java
@@ -265,14 +265,14 @@ public interface SequenceI
     * @param id
     *          DOCUMENT ME!
     */
-  public void setPDBId(Vector ids);
+  public void setPDBId(Vector<PDBEntry> ids);
  
    /**
-   * DOCUMENT ME!
+   * Returns a list
     * 
     * @return DOCUMENT ME!
     */
-  public Vector getPDBId();
+  public Vector<PDBEntry> getPDBId();
  
    /**
     * add entry to the vector of PDBIds, if it isn't in the list already
diff --git a/src/jalview/util/DBRefUtils.java b/src/jalview/util/DBRefUtils.java

index 8163f05..9765a1a 100755 (executable)
--- a/src/jalview/util/DBRefUtils.java
+++ b/src/jalview/util/DBRefUtils.java
@@ -20,18 +20,39 @@
   */
  package jalview.util;
  
-import jalview.datamodel.DBRefEntry;
-import jalview.datamodel.PDBEntry;
-import jalview.datamodel.SequenceI;
-
  import java.util.ArrayList;
  import java.util.HashMap;
  import java.util.Hashtable;
+import java.util.List;
  import java.util.Map;
-import java.util.Vector;
+
+import com.stevesoft.pat.Regex;
+
+import jalview.datamodel.DBRefEntry;
+import jalview.datamodel.DBRefSource;
+import jalview.datamodel.PDBEntry;
+import jalview.datamodel.SequenceI;
  
  public class DBRefUtils
  {
+  private static Map<String, String> canonicalSourceNameLookup = new HashMap<String, String>();
+
+  private static Map<String, String> dasCoordinateSystemsLookup = new HashMap<String, String>();
+
+  static
+  {
+    // TODO load these from a resource file?
+    canonicalSourceNameLookup.put("uniprotkb/swiss-prot",
+            DBRefSource.UNIPROT);
+    canonicalSourceNameLookup.put("uniprotkb/trembl", DBRefSource.UNIPROT);
+    canonicalSourceNameLookup.put("pdb", DBRefSource.PDB);
+
+    dasCoordinateSystemsLookup.put("pdbresnum", DBRefSource.PDB);
+    dasCoordinateSystemsLookup.put("uniprot", DBRefSource.UNIPROT);
+    dasCoordinateSystemsLookup.put("embl", DBRefSource.EMBL);
+    // dasCoordinateSystemsLookup.put("embl", DBRefSource.EMBLCDS);
+  }
+
    /**
     * Utilities for handling DBRef objects and their collections.
     */
@@ -89,37 +110,20 @@ public class DBRefUtils
     * @return boolean true if Source DBRefEntry is compatible with DAS
     *         CoordinateSystem name
     */
-  public static Hashtable DasCoordinateSystemsLookup = null;
  
    public static boolean isDasCoordinateSystem(String string,
            DBRefEntry dBRefEntry)
    {
-    if (DasCoordinateSystemsLookup == null)
+    if (string == null || dBRefEntry == null)
      {
-      // TODO: Make a DasCoordinateSystemsLookup properties resource
-      // Initialise
-      DasCoordinateSystemsLookup = new Hashtable();
-      DasCoordinateSystemsLookup.put("pdbresnum",
-              jalview.datamodel.DBRefSource.PDB);
-      DasCoordinateSystemsLookup.put("uniprot",
-              jalview.datamodel.DBRefSource.UNIPROT);
-      DasCoordinateSystemsLookup.put("EMBL",
-              jalview.datamodel.DBRefSource.EMBL);
-      // DasCoordinateSystemsLookup.put("EMBL",
-      // jalview.datamodel.DBRefSource.EMBLCDS);
+      return false;
      }
-
-    String coordsys = (String) DasCoordinateSystemsLookup.get(string
+    String coordsys = dasCoordinateSystemsLookup.get(string
              .toLowerCase());
-    if (coordsys != null)
-    {
-      return coordsys.equals(dBRefEntry.getSource());
-    }
-    return false;
+    return coordsys == null ? false : coordsys.equals(dBRefEntry
+            .getSource());
    }
  
-  public static Hashtable CanonicalSourceNameLookup = null;
-
    /**
     * look up source in an internal list of database reference sources and return
     * the canonical jalview name for the source, or the original string if it has
@@ -131,34 +135,28 @@ public class DBRefUtils
     */
    public static String getCanonicalName(String source)
    {
-    if (CanonicalSourceNameLookup == null)
+    if (source == null)
      {
-      CanonicalSourceNameLookup = new Hashtable();
-      CanonicalSourceNameLookup.put("uniprotkb/swiss-prot",
-              jalview.datamodel.DBRefSource.UNIPROT);
-      CanonicalSourceNameLookup.put("uniprotkb/trembl",
-              jalview.datamodel.DBRefSource.UNIPROT);
-      CanonicalSourceNameLookup.put("pdb",
-              jalview.datamodel.DBRefSource.PDB);
+      return null;
      }
-    String canonical = (String) CanonicalSourceNameLookup.get(source
+    String canonical = canonicalSourceNameLookup.get(source
              .toLowerCase());
-    if (canonical == null)
-    {
-      return source;
-    }
-    return canonical;
+    return canonical == null ? source : canonical;
    }
  
    /**
-   * find RefEntry corresponding to a particular pattern the equals method of
-   * each entry is used, from String attributes right down to Mapping
-   * attributes.
+   * Returns an array of those references that match the given entry, or null if
+   * no matches. Currently uses a comparator which matches if
+   * <ul>
+   * <li>database sources are the same</li>
+   * <li>accession ids are the same</li>
+   * <li>both have no mapping, or the mappings are the same</li>
+   * </ul>
     * 
     * @param ref
     *          Set of references to search
     * @param entry
-   *          pattern to collect - null any entry for wildcard match
+   *          pattern to match
     * @return
     */
    public static DBRefEntry[] searchRefs(DBRefEntry[] ref, DBRefEntry entry)
@@ -167,32 +165,36 @@ public class DBRefUtils
              matchDbAndIdAndEitherMapOrEquivalentMapList);
    }
  
-  public static DBRefEntry[] searchRefs(DBRefEntry[] ref, DBRefEntry entry,
+  /**
+   * Returns an array of those references that match the given entry, according
+   * to the given comparator. Returns null if no matches.
+   * 
+   * @param refs
+   *          an array of database references to search
+   * @param entry
+   *          an entry to compare against
+   * @param comparator
+   * @return
+   */
+  static DBRefEntry[] searchRefs(DBRefEntry[] refs, DBRefEntry entry,
            DbRefComp comparator)
    {
-    if (ref == null || entry == null)
+    if (refs == null || entry == null)
      {
        return null;
      }
-    Vector rfs = new Vector();
-    for (int i = 0; i < ref.length; i++)
+    List<DBRefEntry> rfs = new ArrayList<DBRefEntry>();
+    for (int i = 0; i < refs.length; i++)
      {
-      if (comparator.matches(entry, ref[i]))
+      if (comparator.matches(entry, refs[i]))
        {
-        rfs.addElement(ref[i]);
+        rfs.add(refs[i]);
        }
      }
-    // TODO Auto-generated method stub
-    if (rfs.size() > 0)
-    {
-      DBRefEntry[] rf = new DBRefEntry[rfs.size()];
-      rfs.copyInto(rf);
-      return rf;
-    }
-    return null;
+    return rfs.size() == 0 ? null : rfs.toArray(new DBRefEntry[rfs.size()]);
    }
  
-  public interface DbRefComp
+  interface DbRefComp
    {
      public boolean matches(DBRefEntry refa, DBRefEntry refb);
    }
@@ -402,14 +404,17 @@ public class DBRefUtils
    };
  
    /**
-   * used by file parsers to generate DBRefs from annotation within file (eg
-   * stockholm)
+   * Parses a DBRefEntry and adds it to the sequence, also a PDBEntry if the
+   * database is PDB.
+   * <p>
+   * Used by file parsers to generate DBRefs from annotation within file (eg
+   * Stockholm)
     * 
     * @param dbname
     * @param version
     * @param acn
     * @param seq
-   *          where to anotate with reference
+   *          where to annotate with reference
     * @return parsed version of entry that was added to seq (if any)
     */
    public static DBRefEntry parseToDbRef(SequenceI seq, String dbname,
@@ -418,12 +423,14 @@ public class DBRefUtils
      DBRefEntry ref = null;
      if (dbname != null)
      {
-      String locsrc = jalview.util.DBRefUtils.getCanonicalName(dbname);
-      if (locsrc.equals(jalview.datamodel.DBRefSource.PDB))
+      String locsrc = DBRefUtils.getCanonicalName(dbname);
+      if (locsrc.equals(DBRefSource.PDB))
        {
-        // check for chaincode and mapping
-        // PFAM style stockhom PDB citation
-        com.stevesoft.pat.Regex r = new com.stevesoft.pat.Regex(
+        /*
+         * Check for PFAM style stockhom PDB accession id citation e.g.
+         * "1WRI A; 7-80;"
+         */
+        Regex r = new com.stevesoft.pat.Regex(
                  "([0-9][0-9A-Za-z]{3})\\s*(.?)\\s*;\\s*([0-9]+)-([0-9]+)");
          if (r.search(acn.trim()))
          {
@@ -433,8 +440,8 @@ public class DBRefUtils
            {
              chaincode = " ";
            }
-          String mapstart = r.stringMatched(3);
-          String mapend = r.stringMatched(4);
+          // String mapstart = r.stringMatched(3);
+          // String mapend = r.stringMatched(4);
            if (chaincode.equals(" "))
            {
              chaincode = "_";
diff --git a/src/jalview/ws/dbsources/Uniprot.java b/src/jalview/ws/dbsources/Uniprot.java

index d204b99..f0e5de0 100644 (file)
--- a/src/jalview/ws/dbsources/Uniprot.java
+++ b/src/jalview/ws/dbsources/Uniprot.java
@@ -222,7 +222,7 @@ public class Uniprot extends DbSourceProxyImpl implements DbSourceProxy
      {
        UniprotEntry entry = (UniprotEntry) entries.elementAt(i);
        Enumeration e = entry.getDbReference().elements();
-      Vector onlyPdbEntries = new Vector();
+      Vector<PDBEntry> onlyPdbEntries = new Vector();
        Vector dbxrefs = new Vector();
        while (e.hasMoreElements())
        {
diff --git a/test/jalview/analysis/AlignmentUtilsTests.java b/test/jalview/analysis/AlignmentUtilsTests.java

index 71b1bcb..98d77d4 100644 (file)
--- a/test/jalview/analysis/AlignmentUtilsTests.java
+++ b/test/jalview/analysis/AlignmentUtilsTests.java
@@ -29,8 +29,10 @@ import java.io.IOException;
  import java.util.ArrayList;
  import java.util.Arrays;
  import java.util.Collections;
+import java.util.HashSet;
  import java.util.List;
  import java.util.Map;
+import java.util.Set;
  
  import org.junit.Test;
  
@@ -41,11 +43,14 @@ import jalview.datamodel.AlignmentI;
  import jalview.datamodel.Annotation;
  import jalview.datamodel.DBRefEntry;
  import jalview.datamodel.Mapping;
+import jalview.datamodel.SearchResults;
+import jalview.datamodel.SearchResults.Match;
  import jalview.datamodel.Sequence;
  import jalview.datamodel.SequenceI;
  import jalview.io.AppletFormatAdapter;
  import jalview.io.FormatAdapter;
  import jalview.util.MapList;
+import jalview.util.MappingUtils;
  
  public class AlignmentUtilsTests 
  {
@@ -869,4 +874,137 @@ public class AlignmentUtilsTests
      assertTrue(AlignmentUtils.haveCrossRef(seq1, seq2));
      assertTrue(AlignmentUtils.haveCrossRef(seq2, seq1));
    }
+
+  /**
+   * Test the method that extracts the exon-only part of a dna alignment.
+   */
+  @Test
+  public void testMakeExonAlignment()
+  {
+    SequenceI dna1 = new Sequence("dna1", "aaaGGGcccTTTaaa");
+    SequenceI dna2 = new Sequence("dna2", "GGGcccTTTaaaCCC");
+    SequenceI pep1 = new Sequence("pep1", "GF");
+    SequenceI pep2 = new Sequence("pep2", "GFP");
+    dna1.createDatasetSequence();
+    dna2.createDatasetSequence();
+    pep1.createDatasetSequence();
+    pep2.createDatasetSequence();
+
+    Set<AlignedCodonFrame> mappings = new HashSet<AlignedCodonFrame>();
+    MapList map = new MapList(new int[]
+    { 4, 6, 10, 12 }, new int[]
+    { 1, 2 }, 3, 1);
+    AlignedCodonFrame acf = new AlignedCodonFrame();
+    acf.addMap(dna1.getDatasetSequence(), pep1.getDatasetSequence(), map);
+    mappings.add(acf);
+    map = new MapList(new int[]
+    { 1, 3, 7, 9, 13, 15 }, new int[]
+    { 1, 3 }, 3, 1);
+    acf = new AlignedCodonFrame();
+    acf.addMap(dna2.getDatasetSequence(), pep2.getDatasetSequence(), map);
+    mappings.add(acf);
+    
+    AlignmentI exons = AlignmentUtils.makeExonAlignment(new SequenceI[]
+    { dna1, dna2 }, mappings);
+    assertEquals(2, exons.getSequences().size());
+    assertEquals("GGGTTT", exons.getSequenceAt(0).getSequenceAsString());
+    assertEquals("GGGTTTCCC", exons.getSequenceAt(1).getSequenceAsString());
+
+    /*
+     * Verify updated mappings
+     */
+    assertEquals(2, mappings.size());
+
+    /*
+     * Mapping from pep1 to GGGTTT in first new exon sequence
+     */
+    List<AlignedCodonFrame> pep1Mapping = MappingUtils
+            .findMappingsForSequence(pep1, mappings);
+    assertEquals(1, pep1Mapping.size());
+    // map G to GGG
+    SearchResults sr = MappingUtils.buildSearchResults(pep1, 1, mappings);
+    assertEquals(1, sr.getResults().size());
+    Match m = sr.getResults().get(0);
+    assertEquals(exons.getSequenceAt(0).getDatasetSequence(),
+            m.getSequence());
+    assertEquals(1, m.getStart());
+    assertEquals(3, m.getEnd());
+    // map F to TTT
+    sr = MappingUtils.buildSearchResults(pep1, 2, mappings);
+    m = sr.getResults().get(0);
+    assertEquals(exons.getSequenceAt(0).getDatasetSequence(),
+            m.getSequence());
+    assertEquals(4, m.getStart());
+    assertEquals(6, m.getEnd());
+
+    /*
+     * Mapping from pep2 to GGGTTTCCC in second new exon sequence
+     */
+    List<AlignedCodonFrame> pep2Mapping = MappingUtils
+            .findMappingsForSequence(pep2, mappings);
+    assertEquals(1, pep2Mapping.size());
+    // map G to GGG
+    sr = MappingUtils.buildSearchResults(pep2, 1, mappings);
+    assertEquals(1, sr.getResults().size());
+    m = sr.getResults().get(0);
+    assertEquals(exons.getSequenceAt(1).getDatasetSequence(),
+            m.getSequence());
+    assertEquals(1, m.getStart());
+    assertEquals(3, m.getEnd());
+    // map F to TTT
+    sr = MappingUtils.buildSearchResults(pep2, 2, mappings);
+    m = sr.getResults().get(0);
+    assertEquals(exons.getSequenceAt(1).getDatasetSequence(),
+            m.getSequence());
+    assertEquals(4, m.getStart());
+    assertEquals(6, m.getEnd());
+    // map P to CCC
+    sr = MappingUtils.buildSearchResults(pep2, 3, mappings);
+    m = sr.getResults().get(0);
+    assertEquals(exons.getSequenceAt(1).getDatasetSequence(),
+            m.getSequence());
+    assertEquals(7, m.getStart());
+    assertEquals(9, m.getEnd());
+  }
+
+  /**
+   * Test the method that makes an exon-only sequence from a DNA sequence and
+   * its product mapping. Test includes the expected case that the DNA sequence
+   * already has a protein product (Uniprot translation) which in turn has an
+   * x-ref to the EMBLCDS record.
+   */
+  @Test
+  public void testMakeExonSequence()
+  {
+    SequenceI dna1 = new Sequence("dna1", "aaaGGGcccTTTaaa");
+    SequenceI pep1 = new Sequence("pep1", "GF");
+    dna1.createDatasetSequence();
+    pep1.createDatasetSequence();
+    pep1.getDatasetSequence().addDBRef(
+            new DBRefEntry("EMBLCDS", "2", "A12345"));
+
+    /*
+     * Make the mapping from dna to protein. The protein sequence has a DBRef to
+     * EMBLCDS|A12345.
+     */
+    Set<AlignedCodonFrame> mappings = new HashSet<AlignedCodonFrame>();
+    MapList map = new MapList(new int[]
+    { 4, 6, 10, 12 }, new int[]
+    { 1, 2 }, 3, 1);
+    AlignedCodonFrame acf = new AlignedCodonFrame();
+    acf.addMap(dna1.getDatasetSequence(), pep1.getDatasetSequence(), map);
+    mappings.add(acf);
+
+    AlignedCodonFrame newMapping = new AlignedCodonFrame();
+    SequenceI exon = AlignmentUtils.makeExonSequence(dna1, acf, newMapping);
+
+    assertEquals("GGGTTT", exon.getSequenceAsString());
+    assertEquals("dna1|A12345", exon.getName());
+    assertEquals(1, exon.getDBRef().length);
+    DBRefEntry cdsRef = exon.getDBRef()[0];
+    assertEquals("EMBLCDS", cdsRef.getSource());
+    assertEquals("2", cdsRef.getVersion());
+    assertEquals("A12345", cdsRef.getAccessionId());
+
+  }
  }
diff --git a/test/jalview/analysis/CrossRefTest.java b/test/jalview/analysis/CrossRefTest.java

new file mode 100644 (file)

index 0000000..35606f0
--- /dev/null
+++ b/test/jalview/analysis/CrossRefTest.java
@@ -0,0 +1,46 @@
+package jalview.analysis;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertSame;
+
+import org.junit.Test;
+
+import jalview.datamodel.DBRefEntry;
+
+public class CrossRefTest
+{
+  @Test
+  public void testFindXDbRefs()
+  {
+    DBRefEntry ref1 = new DBRefEntry("UNIPROT", "1", "A123");
+    DBRefEntry ref2 = new DBRefEntry("UNIPROTKB/TREMBL", "1", "A123");
+    DBRefEntry ref3 = new DBRefEntry("pdb", "1", "A123");
+    DBRefEntry ref4 = new DBRefEntry("EMBLCDSPROTEIN", "1", "A123");
+    DBRefEntry ref5 = new DBRefEntry("embl", "1", "A123");
+    DBRefEntry ref6 = new DBRefEntry("emblCDS", "1", "A123");
+    DBRefEntry ref7 = new DBRefEntry("GeneDB", "1", "A123");
+    DBRefEntry ref8 = new DBRefEntry("PFAM", "1", "A123");
+    DBRefEntry[] refs = new DBRefEntry[]
+    { ref1, ref2, ref3, ref4, ref5, ref6, ref7, ref8 };
+
+    /*
+     * Just the DNA refs:
+     */
+    DBRefEntry[] found = CrossRef.findXDbRefs(false, refs);
+    assertEquals(3, found.length);
+    assertSame(ref5, found[0]);
+    assertSame(ref6, found[1]);
+    assertSame(ref7, found[2]);
+
+    /*
+     * Just the protein refs:
+     */
+    found = CrossRef.findXDbRefs(true, refs);
+    assertEquals(4, found.length);
+    assertSame(ref1, found[0]);
+    assertSame(ref2, found[1]);
+    assertSame(ref3, found[2]);
+    assertSame(ref4, found[3]);
+  }
+
+}
diff --git a/test/jalview/util/DBRefUtilsTest.java b/test/jalview/util/DBRefUtilsTest.java

new file mode 100644 (file)

index 0000000..e606665
--- /dev/null
+++ b/test/jalview/util/DBRefUtilsTest.java
@@ -0,0 +1,222 @@
+package jalview.util;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertNull;
+import static org.junit.Assert.assertSame;
+import static org.junit.Assert.assertTrue;
+
+import org.junit.Test;
+
+import jalview.datamodel.DBRefEntry;
+import jalview.datamodel.DBRefSource;
+import jalview.datamodel.Mapping;
+import jalview.datamodel.PDBEntry;
+import jalview.datamodel.Sequence;
+import jalview.datamodel.SequenceI;
+
+public class DBRefUtilsTest
+{
+
+  /**
+   * Test the method that selects DBRefEntry items whose source is in a supplied
+   * list
+   */
+  @Test
+  public void testSelectRefs()
+  {
+    assertNull(DBRefUtils.selectRefs(null, null));
+    assertNull(DBRefUtils.selectRefs(null, DBRefSource.CODINGDBS));
+
+    DBRefEntry ref1 = new DBRefEntry("EMBL", "1.2", "A12345");
+    DBRefEntry ref2 = new DBRefEntry("UNIPROT", "1.2", "A12346");
+    // Source is converted to upper-case by this constructor!
+    DBRefEntry ref3 = new DBRefEntry("Uniprot", "1.2", "A12347");
+    DBRefEntry[] dbrefs = new DBRefEntry[]
+    { ref1, ref2, ref3 };
+    String[] sources = new String[]
+    { "EMBL", "UNIPROT" };
+
+    DBRefEntry[] selected = DBRefUtils.selectRefs(dbrefs, sources);
+    assertEquals(3, selected.length);
+    assertSame(ref1, selected[0]);
+    assertSame(ref2, selected[1]);
+    assertSame(ref3, selected[2]);
+
+    sources = new String[]
+    { "EMBL" };
+    selected = DBRefUtils.selectRefs(dbrefs, sources);
+    assertEquals(1, selected.length);
+    assertSame(ref1, selected[0]);
+
+    sources = new String[]
+    { "UNIPROT" };
+    selected = DBRefUtils.selectRefs(dbrefs, sources);
+    assertEquals(2, selected.length);
+    assertSame(ref2, selected[0]);
+    assertSame(ref3, selected[1]);
+
+    sources = new String[]
+    { "Uniprot", "EMBLCDS" };
+    selected = DBRefUtils.selectRefs(dbrefs, sources);
+    assertNull(selected);
+  }
+
+  /**
+   * Test the method that converts (currently three) database names to a
+   * canonical name (not case-sensitive)
+   */
+  @Test
+  public void testGetCanonicalName()
+  {
+    assertNull(DBRefUtils.getCanonicalName(null));
+    assertEquals("", DBRefUtils.getCanonicalName(""));
+    assertEquals("PDB", DBRefUtils.getCanonicalName("pdb"));
+    assertEquals("PDB", DBRefUtils.getCanonicalName("Pdb"));
+    assertEquals("UNIPROT",
+            DBRefUtils.getCanonicalName("uniprotkb/swiss-prot"));
+    assertEquals("UNIPROT", DBRefUtils.getCanonicalName("uniprotkb/trembl"));
+    assertEquals("UNIPROT",
+            DBRefUtils.getCanonicalName("UNIPROTKB/SWISS-PROT"));
+    assertEquals("UNIPROT", DBRefUtils.getCanonicalName("UNIPROTKB/TREMBL"));
+    assertEquals("UNIPROTKB/SWISS-CHEESE",
+            DBRefUtils.getCanonicalName("UNIPROTKB/SWISS-CHEESE"));
+  }
+
+  @Test
+  public void testIsDasCoordinateSystem()
+  {
+    assertFalse(DBRefUtils.isDasCoordinateSystem(null, null));
+    assertFalse(DBRefUtils.isDasCoordinateSystem("pdbresnum", null));
+    assertFalse(DBRefUtils.isDasCoordinateSystem(null, new DBRefEntry(
+            "PDB", "v1", "a1")));
+
+    assertTrue(DBRefUtils.isDasCoordinateSystem("pdbresnum",
+            new DBRefEntry("PDB", "v1", "a1")));
+    assertTrue(DBRefUtils.isDasCoordinateSystem("PDBRESNUM",
+            new DBRefEntry("PDB", "v1", "a1")));
+    // "pdb" is converted to upper-case in DBRefEntry constructor
+    assertTrue(DBRefUtils.isDasCoordinateSystem("pdbresnum",
+            new DBRefEntry("pdb", "v1", "a1")));
+    assertFalse(DBRefUtils.isDasCoordinateSystem("pdb", new DBRefEntry(
+            "pdb", "v1", "a1")));
+
+    assertTrue(DBRefUtils.isDasCoordinateSystem("UNIPROT", new DBRefEntry(
+            "Uniprot", "v1", "a1")));
+    assertTrue(DBRefUtils.isDasCoordinateSystem("Uniprot", new DBRefEntry(
+            "UNIPROT", "v1", "a1")));
+    assertFalse(DBRefUtils.isDasCoordinateSystem("UNIPROTKB",
+            new DBRefEntry(
+            "pdb", "v1", "a1")));
+
+    assertTrue(DBRefUtils.isDasCoordinateSystem("EMBL", new DBRefEntry(
+            "EMBL", "v1", "a1")));
+    assertTrue(DBRefUtils.isDasCoordinateSystem("embl", new DBRefEntry(
+            "embl", "v1", "a1")));
+  }
+
+  /**
+   * Test 'parsing' a DBRef - non PDB case
+   */
+  @Test
+  public void testParseToDbRef()
+  {
+    SequenceI seq = new Sequence("Seq1", "ABCD");
+    DBRefEntry ref = DBRefUtils.parseToDbRef(seq, "EMBL", "1.2", "a7890");
+    DBRefEntry[] refs = seq.getDBRef();
+    assertEquals(1, refs.length);
+    assertSame(ref, refs[0]);
+    assertEquals("EMBL", ref.getSource());
+    assertEquals("1.2", ref.getVersion());
+    assertEquals("a7890", ref.getAccessionId());
+    assertNull(seq.getPDBId());
+  }
+
+  /**
+   * Test 'parsing' a DBRef - Stockholm PDB format
+   */
+  @Test
+  public void testParseToDbRef_PDB()
+  {
+    SequenceI seq = new Sequence("Seq1", "ABCD");
+    DBRefEntry ref = DBRefUtils.parseToDbRef(seq, "pdb", "1.2",
+            "1WRI A; 7-80;");
+    DBRefEntry[] refs = seq.getDBRef();
+    assertEquals(1, refs.length);
+    assertSame(ref, refs[0]);
+    assertEquals("PDB", ref.getSource());
+    assertEquals("1.2", ref.getVersion());
+    // DBRef id is pdbId + chain code
+    assertEquals("1WRIA", ref.getAccessionId());
+    assertEquals(1, seq.getPDBId().size());
+    PDBEntry pdbRef = seq.getPDBId().get(0);
+    assertEquals("1WRI", pdbRef.getId());
+    assertNull(pdbRef.getFile());
+    assertEquals("A", pdbRef.getProperty().get("CHAIN"));
+    assertNull(pdbRef.getType());
+  }
+
+  /**
+   * Test the method that searches for matches references - case when we are
+   * matching a reference with no mappings
+   */
+  @Test
+  public void testSearchRefs_noMapping()
+  {
+    DBRefEntry target = new DBRefEntry("EMBL", "2", "A1234");
+
+    DBRefEntry ref1 = new DBRefEntry("EMBL", "1", "A1234"); // matches
+    // constructor changes embl to EMBL
+    DBRefEntry ref2 = new DBRefEntry("embl", "1", "A1234"); // matches
+    // constructor does not upper-case accession id
+    DBRefEntry ref3 = new DBRefEntry("EMBL", "1", "a1234"); // no match
+    DBRefEntry ref4 = new DBRefEntry("EMBLCDS", "1", "A1234"); // no match
+    // ref5 matches although it has a mapping - ignored
+    DBRefEntry ref5 = new DBRefEntry("EMBL", "1", "A1234");
+    ref5.setMap(new Mapping(new MapList(new int[]
+    { 1, 1 }, new int[]
+    { 1, 1 }, 1, 1)));
+
+    DBRefEntry[] matches = DBRefUtils.searchRefs(new DBRefEntry[]
+    { ref1, ref2, ref3, ref4, ref5 }, target);
+    assertEquals(3, matches.length);
+    assertSame(ref1, matches[0]);
+    assertSame(ref2, matches[1]);
+    assertSame(ref5, matches[2]);
+  }
+
+  /**
+   * Test the method that searches for matches references - case when we are
+   * matching a reference with a mapping
+   */
+  @Test
+  public void testSearchRefs_withMapping()
+  {
+    DBRefEntry target = new DBRefEntry("EMBL", "2", "A1234");
+    final Mapping map1 = new Mapping(new MapList(new int[]
+    { 1, 1 }, new int[]
+    { 1, 1 }, 1, 1));
+    target.setMap(map1);
+
+    // these all match target iff mappings match
+    DBRefEntry ref1 = new DBRefEntry("EMBL", "1", "A1234"); // no map: matches
+    DBRefEntry ref2 = new DBRefEntry("EMBL", "1", "A1234"); // =map: matches
+    final Mapping map2 = new Mapping(new MapList(new int[]
+    { 1, 1 }, new int[]
+    { 1, 1 }, 1, 1));
+    ref2.setMap(map2);
+
+    // different map: no match
+    DBRefEntry ref3 = new DBRefEntry("EMBL", "1", "A1234");
+    final Mapping map3 = new Mapping(new MapList(new int[]
+    { 1, 1 }, new int[]
+    { 1, 1 }, 2, 2));
+    ref3.setMap(map3);
+
+    DBRefEntry[] matches = DBRefUtils.searchRefs(new DBRefEntry[]
+    { ref1, ref2, ref3 }, target);
+    assertEquals(2, matches.length);
+    assertSame(ref1, matches[0]);
+    assertSame(ref2, matches[1]);
+  }
+}
diff --git a/test/jalview/util/MappingUtilsTest.java b/test/jalview/util/MappingUtilsTest.java

index 4e144fc..c86259b 100644 (file)
--- a/test/jalview/util/MappingUtilsTest.java
+++ b/test/jalview/util/MappingUtilsTest.java
@@ -90,7 +90,7 @@ public class MappingUtilsTest
     * Simple test of mapping with introns involved.
     */
    @Test
-  public void testBuildSearchResults_withIntro()
+  public void testBuildSearchResults_withIntron()
    {
      final Sequence seq1 = new Sequence("Seq1", "C-G-TAGA-GCAGCTT");
      seq1.createDatasetSequence();
author	gmungoc <g.m.carstairs@dundee.ac.uk>
	Mon, 13 Apr 2015 15:22:35 +0000 (16:22 +0100)
committer	gmungoc <g.m.carstairs@dundee.ac.uk>
	Mon, 13 Apr 2015 15:22:35 +0000 (16:22 +0100)
resources/lang/Messages.properties		patch \| blob \| history
src/jalview/analysis/AlignmentUtils.java		patch \| blob \| history
src/jalview/analysis/CrossRef.java		patch \| blob \| history
src/jalview/analysis/SeqsetUtils.java		patch \| blob \| history
src/jalview/datamodel/Alignment.java		patch \| blob \| history
src/jalview/datamodel/FeatureProperties.java		patch \| blob \| history
src/jalview/datamodel/Sequence.java		patch \| blob \| history
src/jalview/datamodel/SequenceI.java		patch \| blob \| history
src/jalview/util/DBRefUtils.java		patch \| blob \| history
src/jalview/ws/dbsources/Uniprot.java		patch \| blob \| history
test/jalview/analysis/AlignmentUtilsTests.java		patch \| blob \| history
test/jalview/analysis/CrossRefTest.java	[new file with mode: 0644]	patch \| blob
test/jalview/util/DBRefUtilsTest.java	[new file with mode: 0644]	patch \| blob
test/jalview/util/MappingUtilsTest.java		patch \| blob \| history