Merge branch 'mungo_develop' into features/JAL-653_JAL-1766_htslib_refseqsupport
authorgmungoc <g.m.carstairs@dundee.ac.uk>
Sat, 20 Feb 2016 14:00:13 +0000 (14:00 +0000)
committergmungoc <g.m.carstairs@dundee.ac.uk>
Sat, 20 Feb 2016 14:00:13 +0000 (14:00 +0000)
29 files changed:
resources/lang/Messages.properties
src/jalview/analysis/CrossRef.java
src/jalview/datamodel/AlignedCodonFrame.java
src/jalview/ext/ensembl/EnsemblCdna.java
src/jalview/ext/ensembl/EnsemblGene.java
src/jalview/ext/ensembl/EnsemblLookup.java [new file with mode: 0644]
src/jalview/ext/ensembl/EnsemblProtein.java
src/jalview/ext/ensembl/EnsemblRestClient.java
src/jalview/ext/ensembl/EnsemblSeqProxy.java
src/jalview/ext/ensembl/EnsemblSequenceFetcher.java
src/jalview/ext/ensembl/EnsemblSymbol.java [new file with mode: 0644]
src/jalview/ext/ensembl/EnsemblXref.java
src/jalview/ext/ensembl/Species.java [new file with mode: 0644]
src/jalview/gui/AlignFrame.java
src/jalview/gui/SequenceFetcher.java
src/jalview/gui/SplitFrame.java
src/jalview/io/AlignFile.java
src/jalview/io/gff/SequenceOntologyLite.java
src/jalview/util/DBRefUtils.java
src/jalview/util/MappingUtils.java
test/jalview/ext/ensembl/EnsemblCdnaTest.java
test/jalview/ext/ensembl/EnsemblCdsTest.java
test/jalview/ext/ensembl/EnsemblGeneTest.java
test/jalview/ext/ensembl/EnsemblProteinTest.java
test/jalview/ext/ensembl/EnsemblSeqProxyTest.java
test/jalview/util/DBRefUtilsTest.java
test/jalview/util/MappingUtilsTest.java
test/jalview/ws/SequenceFetcherTest.java
test/jalview/ws/seqfetcher/DbRefFetcherTest.java

index 5ce5f46..4ab8732 100644 (file)
@@ -218,6 +218,8 @@ label.above_identity_threshold = Above Identity Threshold
 label.show_sequence_features = Show Sequence Features
 label.nucleotide = Nucleotide
 label.protein = Protein
+label.nucleotides = Nucleotides
+label.proteins = Proteins
 label.to_new_alignment = To New Alignment
 label.to_this_alignment = Add To This Alignment
 label.apply_colour_to_all_groups = Apply Colour To All Groups
index d45750e..2f6076a 100644 (file)
@@ -219,30 +219,23 @@ public class CrossRef
 
   /**
    * 
-   * @param dna
-   * @param seqs
-   * @return
-   */
-  public static Alignment findXrefSequences(SequenceI[] seqs, boolean dna,
-          String source)
-  {
-    return findXrefSequences(seqs, dna, source, null);
-  }
-
-  /**
-   * 
    * @param seqs
    *          sequences whose xrefs are being retrieved
    * @param dna
    *          true if sequences are nucleotide
    * @param source
-   * @param dataset
-   *          alignment to search for product sequences.
+   * @param al
+   *          alignment to search for cross-referenced sequences (and possibly
+   *          add to)
+   * @param addedPeers
+   *          a list of sequences to add to if 'peers' to the original sequences
+   *          are found e.g. alternative protein products for a protein's gene
    * @return products (as dataset sequences)
    */
   public static Alignment findXrefSequences(SequenceI[] seqs, boolean dna,
-          String source, AlignmentI dataset)
+          String source, AlignmentI al, List<SequenceI> addedPeers)
   {
+    AlignmentI dataset = al.getDataset() == null ? al : al.getDataset();
     List<SequenceI> rseqs = new ArrayList<SequenceI>();
     AlignedCodonFrame cf = new AlignedCodonFrame();
     for (SequenceI seq : seqs)
@@ -389,10 +382,12 @@ public class CrossRef
                           int sf = map.getMap().getToLowest();
                           int st = map.getMap().getToHighest();
                           SequenceI mappedrg = ms.getSubSequence(sf, st);
-                          SequenceI loc = dss.getSubSequence(sf, st);
+                          // SequenceI loc = dss.getSubSequence(sf, st);
                           if (mappedrg.getLength() > 0
-                                  && mappedrg.getSequenceAsString().equals(
-                                          loc.getSequenceAsString()))
+                                  && ms.getSequenceAsString().equals(
+                                          dss.getSequenceAsString()))
+                          // && mappedrg.getSequenceAsString().equals(
+                          // loc.getSequenceAsString()))
                           {
                             String msg = "Mapping updated from "
                                     + ms.getName()
@@ -414,8 +409,8 @@ public class CrossRef
                               for (SequenceFeature feat : sfs)
                               {
                                 /* 
-                                 * we override the equality test here (but not
-                                 * elsewhere) to ignore Parent attribute
+                                 * we override SequenceFeature.equals here (but
+                                 * not elsewhere) to ignore Parent attribute
                                  * TODO not quite working yet!
                                  */
                                 if (!copiedFeatures
@@ -430,6 +425,12 @@ public class CrossRef
                             cf.addMap(retrieved[rs].getDatasetSequence(),
                                     dss, map.getMap());
                           }
+                          else
+                          {
+                            addedPeers.add(map.getTo());
+                            cf.addMap(retrieved[rs].getDatasetSequence(),
+                                    map.getTo(), map.getMap());
+                          }
                         } catch (Exception e)
                         {
                           System.err
@@ -452,9 +453,7 @@ public class CrossRef
     Alignment ral = null;
     if (rseqs.size() > 0)
     {
-      SequenceI[] rsqs = new SequenceI[rseqs.size()];
-      rseqs.toArray(rsqs);
-      ral = new Alignment(rsqs);
+      ral = new Alignment(rseqs.toArray(new SequenceI[rseqs.size()]));
       if (cf != null && !cf.isEmpty())
       {
         ral.addCodonFrame(cf);
index 5dfd434..3fc8c28 100644 (file)
@@ -321,7 +321,8 @@ public class AlignedCodonFrame
       {
         for (SequenceI sourceAligned : al.getSequences())
         {
-          if (ssm.mapping.to == sourceAligned.getDatasetSequence())
+          if (ssm.mapping.to == sourceAligned.getDatasetSequence()
+                  || ssm.mapping.to == sourceAligned)
           {
             return sourceAligned;
           }
index f60125b..467fc6d 100644 (file)
@@ -10,9 +10,13 @@ import com.stevesoft.pat.Regex;
 
 public class EnsemblCdna extends EnsemblSeqProxy
 {
-  // TODO modify to accept other species e.g. ENSMUSPnnn
+  /*
+   * accepts ENST or ENSTG with 11 digits
+   * or ENSMUST or similar for other species
+   * or CCDSnnnnn.nn with at least 3 digits
+   */
   private static final Regex ACCESSION_REGEX = new Regex(
-          "(ENST|ENSG|CCDS)[0-9.]{3,}$");
+          "(ENS([A-Z]{3}|)[TG][0-9]{11}$)" + "|" + "(CCDS[0-9.]{3,}$)");
   
   /*
    * fetch exon features on genomic sequence (to identify the cdna regions)
index 73649b4..10841bd 100644 (file)
@@ -7,8 +7,8 @@ import jalview.datamodel.SequenceI;
 import jalview.io.gff.SequenceOntologyFactory;
 import jalview.io.gff.SequenceOntologyI;
 import jalview.util.MapList;
+import jalview.util.StringUtils;
 
-import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.List;
@@ -24,9 +24,11 @@ public class EnsemblGene extends EnsemblSeqProxy
 {
   private static final String GENE_PREFIX = "gene:";
 
-  // TODO modify to accept other species e.g. ENSMUSGnnn
-  private static final Regex ACCESSION_REGEX = new Regex(
-          "(ENSG|ENST)[0-9]{11}$");
+  /*
+   * accepts anything as we will attempt lookup of gene or 
+   * transcript id or gene name
+   */
+  private static final Regex ACCESSION_REGEX = new Regex(".*");
 
   private static final EnsemblFeatureType[] FEATURES_TO_FETCH = {
       EnsemblFeatureType.gene, EnsemblFeatureType.transcript,
@@ -52,8 +54,15 @@ public class EnsemblGene extends EnsemblSeqProxy
   }
 
   /**
-   * Builds an alignment of all transcripts for the requested gene:
+   * Returns an alignment containing the gene(s) for the given gene or
+   * transcript identifier, or external identifier (e.g. Uniprot id). If given a
+   * gene name or external identifier, returns any related gene sequences found
+   * for model organisms. If only a single gene is queried for, then its
+   * transcripts are also retrieved and added to the alignment. <br>
+   * Method:
    * <ul>
+   * <li>resolves a transcript identifier by looking up its parent gene id</li>
+   * <li>resolves an external identifier by looking up xref-ed gene ids</li>
    * <li>fetches the gene sequence</li>
    * <li>fetches features on the sequence</li>
    * <li>identifies "transcript" features whose Parent is the requested gene</li>
@@ -65,81 +74,81 @@ public class EnsemblGene extends EnsemblSeqProxy
    * <li>aligns each transcript against the gene sequence based on the position
    * mappings</li>
    * </ul>
+   * 
+   * @param query
+   *          one or more identifiers separated by a space
+   * @return an alignment containing one or more genes, and possibly
+   *         transcripts, or null
    */
   @Override
   public AlignmentI getSequenceRecords(String query) throws Exception
   {
-    List<String> transcriptsWanted = null;
+    // todo: tidy up handling of one or multiple accession ids
+    String[] queries = query.split(getAccessionSeparator());
 
+    /*
+     * if given a transcript id, look up its gene parent
+     */
     if (isTranscriptIdentifier(query))
     {
-      transcriptsWanted = Arrays.asList(query
-              .split(getAccessionSeparator()));
-      query = getGeneForTranscript(query);
+      // we are assuming all transcripts have the same gene parent here
+      query = new EnsemblLookup().getParent(queries[0]);
       if (query == null)
       {
         return null;
       }
     }
 
+    /*
+     * if given a gene or other external name, lookup and fetch 
+     * the corresponding gene for all model organisms 
+     */
+    if (!isGeneIdentifier(query))
+    {
+      List<String> geneIds = new EnsemblSymbol().getIds(query);
+      if (geneIds.isEmpty())
+      {
+        return null;
+      }
+      String theIds = StringUtils.listToDelimitedString(geneIds,
+              getAccessionSeparator());
+      return getSequenceRecords(theIds);
+    }
+
     AlignmentI al = super.getSequenceRecords(query);
-    if (al.getHeight() > 0)
+
+    /*
+     * if we retrieved a single gene, get its transcripts as well
+     */
+    if (al.getHeight() == 1)
     {
-      getTranscripts(al, query, transcriptsWanted);
+      getTranscripts(al, query);
     }
 
     return al;
   }
 
   /**
-   * Gets the parent gene identifier for a given transcript identifier, by
-   * retrieving 'transcript' features overlapping the transcript, and finding
-   * the Parent property of the feature whose id is the given identifier.
+   * Attempts to get Ensembl stable identifiers for model organisms for a gene
+   * name by calling the xrefs symbol REST service to resolve the gene name.
    * 
    * @param query
    * @return
    */
-  protected String getGeneForTranscript(String transcriptId)
+  protected String getGeneIdentifiersForName(String query)
   {
-    String geneId = null;
-
-    /*
-     * reduce multiple transcripts (e.g. from Uniprot x-ref) to the first
-     * one only as representative (they should all have the same gene)
-     */
-    transcriptId = transcriptId.split(getAccessionSeparator())[0];
-
-    try
+    List<String> ids = new EnsemblSymbol().getIds(query);
+    if (ids != null)
     {
-      EnsemblFeatureType[] geneFeature = new EnsemblFeatureType[] { EnsemblFeatureType.transcript };
-      AlignmentI al = new EnsemblFeatures().getSequenceRecords(
-              transcriptId, geneFeature);
-      if (al != null && al.getHeight() > 0)
+      for (String id : ids)
       {
-        SequenceFeature[] sfs = al.getSequenceAt(0).getSequenceFeatures();
-        if (sfs != null)
+        if (isGeneIdentifier(id))
         {
-          for (SequenceFeature sf : sfs)
-          {
-            if (transcriptId.equals(getTranscriptId(sf)))
-            {
-              String parent = (String) sf.getValue(PARENT);
-              if (parent != null && parent.startsWith(GENE_PREFIX))
-              {
-                geneId = parent.substring(5);
-              }
-              break;
-            }
-          }
+          return id;
         }
       }
-      return geneId;
-    } catch (IOException e)
-    {
-      System.err.println("Error retrieving gene id for " + transcriptId
-              + ": " + e.getMessage());
-      return null;
     }
+    return null;
   }
 
   /**
@@ -149,17 +158,14 @@ public class EnsemblGene extends EnsemblSeqProxy
    * 
    * @param al
    * @param accId
-   * @param transcriptsWanted
-   *          optional list of transcript ids to filter by
    * @throws Exception
    */
-  protected void getTranscripts(AlignmentI al, String accId,
-          List<String> transcriptsWanted)
+  protected void getTranscripts(AlignmentI al, String accId)
           throws Exception
   {
     SequenceI gene = al.getSequenceAt(0);
     List<SequenceFeature> transcriptFeatures = getTranscriptFeatures(accId,
-            gene, transcriptsWanted);
+            gene);
 
     for (SequenceFeature transcriptFeature : transcriptFeatures)
     {
@@ -250,6 +256,11 @@ public class EnsemblGene extends EnsemblSeqProxy
             transcript.getDatasetSequence(), mapping, parentId);
 
     /*
+     * fetch and save cross-references
+     */
+    super.getCrossReferences(transcript);
+
+    /*
      * and finally fetch the protein product and save as a cross-reference
      */
     new EnsemblCdna().addProteinProduct(transcript);
@@ -274,12 +285,10 @@ public class EnsemblGene extends EnsemblSeqProxy
    * 
    * @param accId
    * @param geneSequence
-   * @param transcriptsWanted
-   *          optional list of ids to filter on
    * @return
    */
   protected List<SequenceFeature> getTranscriptFeatures(String accId,
-          SequenceI geneSequence, List<String> transcriptsWanted)
+          SequenceI geneSequence)
   {
     List<SequenceFeature> transcriptFeatures = new ArrayList<SequenceFeature>();
 
@@ -292,14 +301,6 @@ public class EnsemblGene extends EnsemblSeqProxy
       {
         if (isTranscript(sf.getType()))
         {
-          if (transcriptsWanted != null)
-          {
-            String transcriptId = (String) sf.getValue("transcript_id");
-            if (!transcriptsWanted.contains(transcriptId))
-            {
-              // continue;
-            }
-          }
           String parent = (String) sf.getValue(PARENT);
           if (parentIdentifier.equals(parent))
           {
diff --git a/src/jalview/ext/ensembl/EnsemblLookup.java b/src/jalview/ext/ensembl/EnsemblLookup.java
new file mode 100644 (file)
index 0000000..cd792b5
--- /dev/null
@@ -0,0 +1,135 @@
+package jalview.ext.ensembl;
+
+import jalview.datamodel.AlignmentI;
+
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.net.MalformedURLException;
+import java.net.URL;
+import java.util.Arrays;
+import java.util.List;
+
+import org.json.simple.JSONObject;
+import org.json.simple.parser.JSONParser;
+import org.json.simple.parser.ParseException;
+
+public class EnsemblLookup extends EnsemblRestClient
+{
+
+  @Override
+  public String getDbName()
+  {
+    return "ENSEMBL";
+  }
+
+  @Override
+  public AlignmentI getSequenceRecords(String queries) throws Exception
+  {
+    return null;
+  }
+
+  @Override
+  protected URL getUrl(List<String> ids) throws MalformedURLException
+  {
+    String identifier = ids.get(0);
+    return getUrl(identifier);
+  }
+
+  /**
+   * @param identifier
+   * @return
+   */
+  protected URL getUrl(String identifier)
+  {
+    String url = ENSEMBL_REST + "/lookup/id/" + identifier
+            + "?content-type=application/json";
+    try
+    {
+      return new URL(url);
+    } catch (MalformedURLException e)
+    {
+      return null;
+    }
+  }
+
+  @Override
+  protected boolean useGetRequest()
+  {
+    return true;
+  }
+
+  @Override
+  protected String getRequestMimeType(boolean multipleIds)
+  {
+    return "application/json";
+  }
+
+  @Override
+  protected String getResponseMimeType()
+  {
+    return "application/json";
+  }
+
+  /**
+   * Calls the Ensembl lookup REST endpoint and retrieves the 'Parent' for the
+   * given identifier, or null if not found
+   * 
+   * @param identifier
+   * @return
+   */
+  public String getParent(String identifier)
+  {
+    List<String> ids = Arrays.asList(new String[] { identifier });
+  
+    BufferedReader br = null;
+    try
+    {
+      URL url = getUrl(identifier);
+      if (url != null)
+      {
+        br = getHttpResponse(url, ids);
+      }
+      return (parseResponse(br));
+    } catch (IOException e)
+    {
+      // ignore
+      return null;
+    } finally
+    {
+      if (br != null)
+      {
+        try
+        {
+          br.close();
+        } catch (IOException e)
+        {
+          // ignore
+        }
+      }
+    }
+  }
+
+  /**
+   * Parses "Parent" from the JSON response and returns the value, or null if
+   * not found
+   * 
+   * @param br
+   * @return
+   * @throws IOException
+   */
+  protected String parseResponse(BufferedReader br) throws IOException
+  {
+    String parent = null;
+    JSONParser jp = new JSONParser();
+    try
+    {
+      JSONObject val = (JSONObject) jp.parse(br);
+      parent = val.get("Parent").toString();
+    } catch (ParseException e)
+    {
+      // ignore
+    }
+    return parent;
+  }
+
+}
index 29c7eda..fb79ccf 100644 (file)
@@ -10,9 +10,13 @@ import com.stevesoft.pat.Regex;
 
 public class EnsemblProtein extends EnsemblSeqProxy
 {
-  // TODO modify to accept other species e.g. ENSMUSPnnn
+  /*
+   * accepts ENSP with 11 digits
+   * or ENSMUSP or similar for other species
+   * or CCDSnnnnn.nn with at least 3 digits
+   */
   private static final Regex ACCESSION_REGEX = new Regex(
-          "(ENSP|CCDS)[0-9.]{3,}$");
+          "(ENS([A-Z]{3}|)P[0-9]{11}$)" + "|" + "(CCDS[0-9.]{3,}$)");
 
   private static final List<String> CROSSREFS = Arrays.asList(new String[] {
       "PDB", "Uniprot/SPTREMBL", "Uniprot/SWISSPROT" });
index 215eb7a..297f71b 100644 (file)
@@ -14,6 +14,8 @@ import java.util.List;
 
 import javax.ws.rs.HttpMethod;
 
+import com.stevesoft.pat.Regex;
+
 /**
  * Base class for Ensembl REST service clients
  * 
@@ -31,12 +33,28 @@ abstract class EnsemblRestClient extends EnsemblSequenceFetcher
 
   private final static long RETEST_INTERVAL = 10000L; // 10 seconds
 
+  private static final Regex TRANSCRIPT_REGEX = new Regex(
+            "(ENS)([A-Z]{3}|)T[0-9]{11}$");
+
+  private static final Regex GENE_REGEX = new Regex(
+            "(ENS)([A-Z]{3}|)G[0-9]{11}$");
+
   private static boolean ensemblRestAvailable = false;
 
   private static long lastCheck = -1;
 
   protected volatile boolean inProgress = false;
 
+  public static boolean isTranscriptIdentifier(String query)
+  {
+    return query == null ? false : TRANSCRIPT_REGEX.search(query);
+  }
+
+  public static boolean isGeneIdentifier(String query)
+  {
+    return query == null ? false : GENE_REGEX.search(query);
+  }
+
   @Override
   public boolean queryInProgress()
   {
index a2be17b..77263ff 100644 (file)
@@ -30,8 +30,6 @@ import java.util.LinkedHashMap;
 import java.util.List;
 import java.util.Map.Entry;
 
-import com.stevesoft.pat.Regex;
-
 /**
  * Base class for Ensembl sequence fetchers
  * 
@@ -39,12 +37,8 @@ import com.stevesoft.pat.Regex;
  */
 public abstract class EnsemblSeqProxy extends EnsemblRestClient
 {
-  // TODO modify to accept other species e.g. ENSMUSTnnn
-  private static final Regex TRANSCRIPT_REGEX = new Regex(
-          "(ENST)[0-9]{11}$");
-
   private static final List<String> CROSS_REFERENCES = Arrays
-          .asList(new String[] { "CCDS" });
+          .asList(new String[] { "CCDS", "Uniprot/SWISSPROT" });
 
   protected static final String CONSEQUENCE_TYPE = "consequence_type";
 
@@ -163,6 +157,11 @@ public abstract class EnsemblSeqProxy extends EnsemblRestClient
       }
     }
 
+    if (alignment == null)
+    {
+      return null;
+    }
+
     /*
      * fetch and transfer genomic sequence features,
      * fetch protein product and add as cross-reference
@@ -344,7 +343,6 @@ public abstract class EnsemblSeqProxy extends EnsemblRestClient
     int mappedDnaLength = getCdsRanges(dnaSeq, ranges);
 
     int proteinLength = proteinSeq.getLength();
-    List<int[]> proteinRange = new ArrayList<int[]>();
     int proteinStart = 1;
 
     /*
@@ -356,15 +354,20 @@ public abstract class EnsemblSeqProxy extends EnsemblRestClient
       proteinStart = 2;
       proteinLength--;
     }
-    proteinRange.add(new int[] { proteinStart, proteinLength });
+    List<int[]> proteinRange = new ArrayList<int[]>();
 
     /*
      * dna length should map to protein (or protein plus stop codon)
      */
     int codesForResidues = mappedDnaLength / 3;
-    if (codesForResidues == proteinLength
-            || codesForResidues == (proteinLength + 1))
+    if (codesForResidues == (proteinLength + 1))
+    {
+      MappingUtils.unmapStopCodon(ranges, mappedDnaLength);
+      codesForResidues--;
+    }
+    if (codesForResidues == proteinLength)
     {
+      proteinRange.add(new int[] { proteinStart, proteinLength });
       return new MapList(ranges, proteinRange, 3, 1);
     }
     return null;
@@ -389,14 +392,14 @@ public abstract class EnsemblSeqProxy extends EnsemblRestClient
     {
       return 0;
     }
+    SequenceOntologyI so = SequenceOntologyFactory.getInstance();
     int mappedDnaLength = 0;
     for (SequenceFeature sf : sfs)
     {
       /*
        * process a CDS feature (or a sub-type of CDS)
        */
-      if (SequenceOntologyFactory.getInstance().isA(sf.getType(),
-              SequenceOntologyI.CDS))
+      if (so.isA(sf.getType(), SequenceOntologyI.CDS))
       {
         int phase = 0;
         try {
@@ -411,7 +414,7 @@ public abstract class EnsemblSeqProxy extends EnsemblRestClient
          */
         int begin = sf.getBegin();
         int end = sf.getEnd();
-        if (ranges.isEmpty() && phase > 0)
+        if (ranges.isEmpty())
         {
           begin += phase;
           if (begin > end)
@@ -1129,9 +1132,4 @@ public abstract class EnsemblSeqProxy extends EnsemblRestClient
             || SequenceOntologyFactory.getInstance().isA(featureType,
                     SequenceOntologyI.TRANSCRIPT);
   }
-
-  public static boolean isTranscriptIdentifier(String query)
-  {
-    return query == null ? false : TRANSCRIPT_REGEX.search(query);
-  }
 }
index 67c5e63..9a4952e 100644 (file)
@@ -12,9 +12,13 @@ import com.stevesoft.pat.Regex;
  */
 abstract class EnsemblSequenceFetcher extends DbSourceProxyImpl
 {
-  // TODO modify to accept other species e.g. ENSMUSTnnn
+  /*
+   * accepts ENSG/T/E/P with 11 digits
+   * or ENSMUSP or similar for other species
+   * or CCDSnnnnn.nn with at least 3 digits
+   */
   private static final Regex ACCESSION_REGEX = new Regex(
-          "(ENSP|ENST|ENSG|CCDS)[0-9.]{3,}$");
+          "(ENS([A-Z]{3}|)[GTEP]{1}[0-9]{11}$)" + "|" + "(CCDS[0-9.]{3,}$)");
 
   /*
    * possible values for the 'feature' parameter of the /overlap REST service
diff --git a/src/jalview/ext/ensembl/EnsemblSymbol.java b/src/jalview/ext/ensembl/EnsemblSymbol.java
new file mode 100644 (file)
index 0000000..5b3baa1
--- /dev/null
@@ -0,0 +1,121 @@
+package jalview.ext.ensembl;
+
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.net.MalformedURLException;
+import java.net.URL;
+import java.util.ArrayList;
+import java.util.Iterator;
+import java.util.List;
+
+import org.json.simple.JSONArray;
+import org.json.simple.JSONObject;
+import org.json.simple.parser.JSONParser;
+import org.json.simple.parser.ParseException;
+
+public class EnsemblSymbol extends EnsemblXref
+{
+  /**
+   * Returns the first "id" value in gene identifier format from the JSON
+   * response, or null if none found
+   * 
+   * @param br
+   * @return
+   * @throws IOException
+   */
+  protected String parseResponse(BufferedReader br)
+          throws IOException
+  {
+    JSONParser jp = new JSONParser();
+    String result = null;
+    try
+    {
+      JSONArray responses = (JSONArray) jp.parse(br);
+      Iterator rvals = responses.iterator();
+      while (rvals.hasNext())
+      {
+        JSONObject val = (JSONObject) rvals.next();
+        String id = val.get("id").toString();
+        if (id != null && isGeneIdentifier(id))
+        {
+          result = id;
+          break;
+        }
+      }
+    } catch (ParseException e)
+    {
+      // ignore
+    }
+    return result;
+  }
+
+  protected URL getUrl(String id, Species species)
+  {
+    String url = ENSEMBL_REST + "/xrefs/symbol/" + species.toString() + "/"
+            + id
+            + "?content-type=application/json";
+    try
+    {
+      return new URL(url);
+    } catch (MalformedURLException e)
+    {
+      return null;
+    }
+  }
+
+  /**
+   * Calls the Ensembl xrefs REST 'symbol' endpoint and retrieves any gene ids
+   * for the given identifier, for any known model organisms
+   * 
+   * @param identifier
+   * @return
+   */
+  public List<String> getIds(String identifier)
+  {
+    List<String> result = new ArrayList<String>();
+    List<String> ids = new ArrayList<String>();
+    ids.add(identifier);
+  
+    String[] queries = identifier.split(getAccessionSeparator());
+    BufferedReader br = null;
+    try
+    {
+      for (String query : queries)
+      {
+        for (Species taxon : Species.values())
+        {
+          if (taxon.isModelOrganism())
+          {
+            URL url = getUrl(query, taxon);
+            if (url != null)
+            {
+              br = getHttpResponse(url, ids);
+            }
+            String geneId = parseResponse(br);
+            if (geneId != null)
+            {
+              result.add(geneId);
+            }
+          }
+        }
+      }
+    } catch (IOException e)
+    {
+      // ignore
+    } finally
+    {
+      if (br != null)
+      {
+        try
+        {
+          br.close();
+        } catch (IOException e)
+        {
+          // ignore
+        }
+      }
+    }
+    return result;
+  }
+
+}
index d4c5b18..514e44a 100644 (file)
@@ -22,7 +22,7 @@ import org.json.simple.parser.ParseException;
  * service
  * 
  * @author gmcarstairs
- *
+ * @see http://rest.ensembl.org/documentation/info/xref_id
  */
 class EnsemblXref extends EnsemblRestClient
 {
@@ -42,8 +42,7 @@ class EnsemblXref extends EnsemblRestClient
   @Override
   protected URL getUrl(List<String> ids) throws MalformedURLException
   {
-    // TODO Auto-generated method stub
-    return null;
+    return getUrl(ids.get(0));
   }
 
   @Override
diff --git a/src/jalview/ext/ensembl/Species.java b/src/jalview/ext/ensembl/Species.java
new file mode 100644 (file)
index 0000000..d8a00a5
--- /dev/null
@@ -0,0 +1,32 @@
+package jalview.ext.ensembl;
+
+/**
+ * Selected species identifiers used by Ensembl
+ * 
+ * @author gmcarstairs
+ * @see http://rest.ensembl.org/info/species?content-type=text/xml
+ */
+enum Species
+{
+  /*
+   * using any suitably readable alias as the enum name; these are all
+   * valid species parameters to Ensembl REST services where applicable
+   */
+  human(true), mouse(true), s_cerevisiae(true), cow(false), pig(false),
+  rat(true), celegans(true), sheep(false), horse(false), gorilla(false),
+  rabbit(false), gibbon(false), dog(false), orangutan(false),
+  xenopus(true), chimpanzee(false), cat(false), zebrafish(true), chicken(
+          true), dmelanogaster(true);
+
+  boolean modelOrganism;
+
+  private Species(boolean model)
+  {
+    this.modelOrganism = model;
+  }
+
+  boolean isModelOrganism()
+  {
+    return modelOrganism;
+  }
+}
index 85f7d19..c93b84b 100644 (file)
@@ -904,7 +904,9 @@ public class AlignFrame extends GAlignFrame implements DropTargetListener,
     rnahelicesColour.setEnabled(av.getAlignment().hasRNAStructure());
     rnahelicesColour
             .setSelected(av.getGlobalColourScheme() instanceof jalview.schemes.RNAHelicesColour);
-    setShowProductsEnabled();
+
+    showProducts.setEnabled(canShowProducts());
+
     updateEditMenuBar();
   }
 
@@ -4652,67 +4654,27 @@ public class AlignFrame extends GAlignFrame implements DropTargetListener,
     }
   }
 
-  /*
-   * public void vamsasStore_actionPerformed(ActionEvent e) { JalviewFileChooser
-   * chooser = new JalviewFileChooser(jalview.bin.Cache.
-   * getProperty("LAST_DIRECTORY"));
-   * 
-   * chooser.setFileView(new JalviewFileView()); chooser.setDialogTitle("Export
-   * to Vamsas file"); chooser.setToolTipText("Export");
-   * 
-   * int value = chooser.showSaveDialog(this);
-   * 
-   * if (value == JalviewFileChooser.APPROVE_OPTION) {
-   * jalview.io.VamsasDatastore vs = new jalview.io.VamsasDatastore(viewport);
-   * //vs.store(chooser.getSelectedFile().getAbsolutePath() ); vs.storeJalview(
-   * chooser.getSelectedFile().getAbsolutePath(), this); } }
-   */
   /**
-   * prototype of an automatically enabled/disabled analysis function
+   * Searches selected sequences for xRef products and builds the Show
+   * Cross-References menu (formerly called Show Products)
    * 
+   * @return true if Show Cross-references menu should be enabled.
    */
-  protected void setShowProductsEnabled()
+  public boolean canShowProducts()
   {
     SequenceI[] selection = viewport.getSequenceSelection();
-    if (canShowProducts(selection, viewport.getSelectionGroup() != null,
-            viewport.getAlignment().getDataset()))
-    {
-      showProducts.setEnabled(true);
-
-    }
-    else
-    {
-      showProducts.setEnabled(false);
-    }
-  }
-
-  /**
-   * search selection for sequence xRef products and build the show products
-   * menu.
-   * 
-   * @param selection
-   * @param dataset
-   * @return true if showProducts menu should be enabled.
-   */
-  public boolean canShowProducts(SequenceI[] selection,
-          boolean isRegionSelection, Alignment dataset)
-  {
+    AlignmentI dataset = viewport.getAlignment().getDataset();
     boolean showp = false;
     try
     {
       showProducts.removeAll();
       final boolean dna = viewport.getAlignment().isNucleotide();
-      final Alignment ds = dataset;
       String[] ptypes = (selection == null || selection.length == 0) ? null
               : CrossRef.findSequenceXrefTypes(dna, selection, dataset);
-      // Object[] prods =
-      // CrossRef.buildXProductsList(viewport.getAlignment().isNucleotide(),
-      // selection, dataset, true);
-      final SequenceI[] sel = selection;
+
       for (int t = 0; ptypes != null && t < ptypes.length; t++)
       {
         showp = true;
-        final boolean isRegSel = isRegionSelection;
         final AlignFrame af = this;
         final String source = ptypes[t];
         JMenuItem xtype = new JMenuItem(ptypes[t]);
@@ -4722,9 +4684,7 @@ public class AlignFrame extends GAlignFrame implements DropTargetListener,
           @Override
           public void actionPerformed(ActionEvent e)
           {
-            // TODO: new thread for this call with vis-delay
-            af.showProductsFor(af.viewport.getSequenceSelection(),
-                    isRegSel, dna, source);
+            showProductsFor(af.viewport.getSequenceSelection(), dna, source);
           }
 
         });
@@ -4735,15 +4695,15 @@ public class AlignFrame extends GAlignFrame implements DropTargetListener,
     } catch (Exception e)
     {
       jalview.bin.Cache.log
-              .warn("canTranslate threw an exception - please report to help@jalview.org",
+              .warn("canShowProducts threw an exception - please report to help@jalview.org",
                       e);
       return false;
     }
     return showp;
   }
 
-  protected void showProductsFor(final SequenceI[] sel,
-          final boolean isRegSel, final boolean dna, final String source)
+  protected void showProductsFor(final SequenceI[] sel, final boolean dna,
+          final String source)
   {
     Runnable foo = new Runnable()
     {
@@ -4757,27 +4717,18 @@ public class AlignFrame extends GAlignFrame implements DropTargetListener,
                 new Object[] { source }), sttime);
         try
         {
-          // update our local dataset reference
-          Alignment ds = AlignFrame.this.getViewport().getAlignment()
-                  .getDataset();
-          Alignment prods = CrossRef
-                  .findXrefSequences(sel, dna, source, ds);
-          if (prods != null)
+          /*
+           * 'peer' sequences are any to add to this alignment, for example
+           * alternative protein products for my protein's gene
+           */
+          List<SequenceI> addedPeers = new ArrayList<SequenceI>();
+          AlignmentI alignment = AlignFrame.this.getViewport().getAlignment();
+          Alignment xrefs = CrossRef.findXrefSequences(sel, dna, source,
+                  alignment, addedPeers);
+          if (xrefs != null)
           {
-            SequenceI[] sprods = new SequenceI[prods.getHeight()];
-            for (int s = 0; s < sprods.length; s++)
-            {
-              sprods[s] = (prods.getSequenceAt(s)).deriveSequence();
-              if (ds.getSequences() == null
-                      || !ds.getSequences().contains(
-                              sprods[s].getDatasetSequence()))
-              {
-                ds.addSequence(sprods[s].getDatasetSequence());
-              }
-              sprods[s].updatePDBIds();
-            }
-            Alignment al = new Alignment(sprods);
-            al.setDataset(ds);
+            Alignment al = makeCrossReferencesAlignment(
+                    alignment.getDataset(), xrefs);
 
             /*
              * Copy dna-to-protein mappings to new alignment
@@ -4785,16 +4736,17 @@ public class AlignFrame extends GAlignFrame implements DropTargetListener,
             // TODO 1: no mappings are set up for EMBL product
             // TODO 2: if they were, should add them to protein alignment, not
             // dna
-            List<AlignedCodonFrame> cf = prods.getCodonFrames();
-            for (AlignedCodonFrame acf : cf)
-            {
-              al.addCodonFrame(acf);
-            }
+            // List<AlignedCodonFrame> cf = xrefs.getCodonFrames();
+            // for (AlignedCodonFrame acf : cf)
+            // {
+            // al.addCodonFrame(acf);
+            // }
             AlignFrame newFrame = new AlignFrame(al, DEFAULT_WIDTH,
                     DEFAULT_HEIGHT);
-            String newtitle = "" + (dna ? "Proteins" : "Nucleotides")
-                    + " for " + (isRegSel ? "selected region of " : "")
-                    + getTitle();
+            String newtitle = String.format("%s %s %s",
+                    MessageManager.getString(dna ? "label.proteins"
+                            : "label.nucleotides"), MessageManager
+                            .getString("label.for"), getTitle());
             newFrame.setTitle(newtitle);
 
             boolean asSplitFrame = Cache.getDefault(
@@ -4808,25 +4760,50 @@ public class AlignFrame extends GAlignFrame implements DropTargetListener,
               AlignmentI copyAlignment = null;
               final SequenceI[] sequenceSelection = AlignFrame.this.viewport
                       .getSequenceSelection();
+              List<AlignedCodonFrame> cf = xrefs.getCodonFrames();
               if (dna)
               {
                 copyAlignment = AlignmentUtils.makeCdsAlignment(
                         sequenceSelection, cf);
                 al.getCodonFrames().clear();
                 al.getCodonFrames().addAll(cf);
-                final StructureSelectionManager ssm = StructureSelectionManager
-                        .getStructureSelectionManager(Desktop.instance);
-                ssm.registerMappings(cf);
               }
               else
               {
                 copyAlignment = new Alignment(new Alignment(
                         sequenceSelection));
+                copyAlignment.getCodonFrames().addAll(cf);
               }
+              StructureSelectionManager ssm = StructureSelectionManager
+                      .getStructureSelectionManager(Desktop.instance);
+              ssm.registerMappings(cf);
+
+              /*
+               * add in any extra 'peer' sequences discovered
+               * (e.g. alternative protein products)
+               */
+              for (SequenceI peer : addedPeers)
+              {
+                copyAlignment.addSequence(peer);
+              }
+
+              /*
+               * align protein to dna
+               */
+              // TODO needs debugging
+              // if (dna)
+              // {
+              // al.alignAs(copyAlignment);
+              // }
+              // else
+              // {
+              // copyAlignment.alignAs(al);
+              // }
+
               AlignFrame copyThis = new AlignFrame(copyAlignment,
                       AlignFrame.DEFAULT_WIDTH, AlignFrame.DEFAULT_HEIGHT);
               copyThis.setTitle(AlignFrame.this.getTitle());
-              // SplitFrame with dna above, protein below
+
               boolean showSequenceFeatures = viewport
                       .isShowSequenceFeatures();
               newFrame.setShowSeqFeatures(showSequenceFeatures);
@@ -4849,6 +4826,7 @@ public class AlignFrame extends GAlignFrame implements DropTargetListener,
               String linkedTitle = MessageManager
                       .getString("label.linked_view_title");
               Desktop.addInternalFrame(sf, linkedTitle, -1, -1);
+              sf.adjustDivider();
             }
             else
             {
@@ -4878,6 +4856,32 @@ public class AlignFrame extends GAlignFrame implements DropTargetListener,
                 new Object[] { source }), sttime);
       }
 
+      /**
+       * @param alignment
+       * @param prods
+       * @return
+       */
+      protected Alignment makeCrossReferencesAlignment(
+Alignment dataset,
+              Alignment prods)
+      {
+        SequenceI[] sprods = new SequenceI[prods.getHeight()];
+        for (int s = 0; s < sprods.length; s++)
+        {
+          sprods[s] = (prods.getSequenceAt(s)).deriveSequence();
+          if (dataset.getSequences() == null
+                  || !dataset.getSequences().contains(
+                          sprods[s].getDatasetSequence()))
+          {
+            dataset.addSequence(sprods[s].getDatasetSequence());
+          }
+          sprods[s].updatePDBIds();
+        }
+        Alignment al = new Alignment(sprods);
+        al.setDataset(dataset);
+        return al;
+      }
+
     };
     Thread frunner = new Thread(foo);
     frunner.start();
index 742a109..fc6fb0d 100755 (executable)
@@ -642,13 +642,14 @@ public class SequenceFetcher extends JPanel implements Runnable
                 boolean rfound = false;
                 for (int r = 0; r < rs.length; r++)
                 {
-                  if (rs[r] != null
-                          && (found = DBRefUtils.searchRefs(
-                                  rs[r].getDBRefs(), dbr)) != null
-                          && found.length > 0)
+                  if (rs[r] != null)
                   {
-                    rfound = true;
-                    rs[r] = null;
+                    found = DBRefUtils.searchRefs(rs[r].getDBRefs(), accId);
+                    if (found != null && found.length > 0)
+                    {
+                      rfound = true;
+                      rs[r] = null;
+                    }
                   }
                 }
                 if (!rfound)
index 083c7ec..617224f 100644 (file)
@@ -61,6 +61,14 @@ import javax.swing.event.InternalFrameEvent;
  */
 public class SplitFrame extends GSplitFrame implements SplitContainerI
 {
+  private static final int WINDOWS_INSETS_WIDTH = 28; // tbc
+
+  private static final int MAC_INSETS_WIDTH = 28;
+
+  private static final int WINDOWS_INSETS_HEIGHT = 50; // tbc
+
+  private static final int MAC_INSETS_HEIGHT = 50;
+  private static final int DESKTOP_DECORATORS_HEIGHT = 65;
   private static final long serialVersionUID = 1L;
 
   public SplitFrame(GAlignFrame top, GAlignFrame bottom)
@@ -86,8 +94,10 @@ public class SplitFrame extends GSplitFrame implements SplitContainerI
      * estimate width and height of SplitFrame; this.getInsets() doesn't seem to
      * give the full additional size (a few pixels short)
      */
-    int widthFudge = Platform.isAMac() ? 28 : 28; // Windows tbc
-    int heightFudge = Platform.isAMac() ? 50 : 50; // tbc
+    int widthFudge = Platform.isAMac() ? MAC_INSETS_WIDTH
+            : WINDOWS_INSETS_WIDTH;
+    int heightFudge = Platform.isAMac() ? MAC_INSETS_HEIGHT
+            : WINDOWS_INSETS_HEIGHT;
     int width = ((AlignFrame) getTopFrame()).getWidth() + widthFudge;
     int height = ((AlignFrame) getTopFrame()).getHeight()
             + ((AlignFrame) getBottomFrame()).getHeight() + DIVIDER_SIZE
@@ -118,7 +128,8 @@ public class SplitFrame extends GSplitFrame implements SplitContainerI
   {
     // allow about 65 pixels for Desktop decorators on Windows
 
-    int newHeight = Math.min(height, Desktop.instance.getHeight() - 65);
+    int newHeight = Math.min(height, Desktop.instance.getHeight()
+            - DESKTOP_DECORATORS_HEIGHT);
     if (newHeight != height)
     {
       int oldDividerLocation = getDividerLocation();
@@ -182,6 +193,40 @@ public class SplitFrame extends GSplitFrame implements SplitContainerI
   }
 
   /**
+   * Adjust the divider for a sensible split of the real estate (for example,
+   * when many transcripts are shown with a single protein). This should only be
+   * called after the split pane has been laid out (made visible) so it has a
+   * height.
+   */
+  protected void adjustDivider()
+  {
+    final AlignViewport topViewport = ((AlignFrame) getTopFrame()).viewport;
+    final AlignViewport bottomViewport = ((AlignFrame) getBottomFrame()).viewport;
+    final AlignmentI topAlignment = topViewport.getAlignment();
+    final AlignmentI bottomAlignment = bottomViewport.getAlignment();
+    boolean topAnnotations = topViewport.isShowAnnotation();
+    boolean bottomAnnotations = bottomViewport.isShowAnnotation();
+    int topCount = topAlignment.getHeight();
+    int bottomCount = bottomAlignment.getHeight();
+    int topCharHeight = topViewport.getViewStyle().getCharHeight();
+    int bottomCharHeight = bottomViewport.getViewStyle().getCharHeight();
+
+    /*
+     * estimate ratio of (topFrameContent / bottomFrameContent)
+     */
+    int insets = Platform.isAMac() ? MAC_INSETS_HEIGHT
+            : WINDOWS_INSETS_HEIGHT;
+    // allow 3 'rows' for scale, scrollbar, status bar
+    int topHeight = insets + (3 + topCount) * topCharHeight
+            + (topAnnotations ? topViewport.calcPanelHeight() : 0);
+    int bottomHeight = insets + (3 + bottomCount) * bottomCharHeight
+            + (bottomAnnotations ? bottomViewport.calcPanelHeight() : 0);
+    double ratio = ((double) topHeight) / (topHeight + bottomHeight);
+
+    setRelativeDividerLocation(ratio);
+  }
+
+  /**
    * Add a listener to tidy up when the frame is closed.
    */
   protected void addCloseFrameListener()
index 2b8f127..984eff6 100755 (executable)
@@ -355,24 +355,12 @@ public abstract class AlignFile extends FileParse
       String desc = id.substring(space + 1);
       seq.setDescription(desc);
 
-      if (desc.startsWith("chromosome"))
-      {
-        /*
-         * parse Ensembl style gene description e.g.
-         * chromosome:GRCh38:7:140696688:140721955:1
-         */
-        String[] tokens = desc.split(":");
-        if (tokens.length > 3)
-        {
-          try
-          {
-            seq.setStart(Integer.parseInt(tokens[3]));
-          } catch (NumberFormatException e)
-          {
-            // ignore
-          }
-        }
-      }
+      /*
+       * it is tempting to parse Ensembl style gene description e.g.
+       * chromosome:GRCh38:7:140696688:140721955:1 and set the
+       * start position of the sequence, but this causes much confusion
+       * for reverse strand feature locations
+       */
     }
     else
     {
index d2e6654..b3f8161 100644 (file)
@@ -37,6 +37,7 @@ public class SequenceOntologyLite implements SequenceOntologyI
     { "snRNA_gene", "gene" },
     { "miRNA_gene", "gene" },
     { "lincRNA_gene", "gene" },
+    { "rRNA_gene", "gene" },
     
     /*
      * transcript sub-types:
@@ -49,6 +50,7 @@ public class SequenceOntologyLite implements SequenceOntologyI
     { "snRNA", "transcript" },
     { "miRNA", "transcript" },
     { "lincRNA", "transcript" },
+    { "rRNA", "transcript" },
     // there are many more sub-types of ncRNA...
     
     /*
index e7053ed..424d40b 100755 (executable)
@@ -169,6 +169,25 @@ public class DBRefUtils
   }
 
   /**
+   * Returns an array of those references that match the given accession id
+   * <ul>
+   * <li>database sources are the same</li>
+   * <li>accession ids are the same</li>
+   * <li>both have no mapping, or the mappings are the same</li>
+   * </ul>
+   * 
+   * @param ref
+   *          Set of references to search
+   * @param entry
+   *          pattern to match
+   * @return
+   */
+  public static DBRefEntry[] searchRefs(DBRefEntry[] ref, String accId)
+  {
+    return searchRefs(ref, new DBRefEntry("", "", accId), matchId);
+  }
+
+  /**
    * Returns an array of those references that match the given entry, according
    * to the given comparator. Returns null if no matches.
    * 
@@ -397,6 +416,23 @@ public class DBRefUtils
   };
 
   /**
+   * accession ID only must be identical.
+   */
+  public static DbRefComp matchId = new DbRefComp()
+  {
+    @Override
+    public boolean matches(DBRefEntry refa, DBRefEntry refb)
+    {
+      if (refa.getAccessionId() != null && refb.getAccessionId() != null
+              && refb.getAccessionId().equals(refa.getAccessionId()))
+      {
+        return true;
+      }
+      return false;
+    }
+  };
+
+  /**
    * Parses a DBRefEntry and adds it to the sequence, also a PDBEntry if the
    * database is PDB.
    * <p>
index 1bbfc73..267e871 100644 (file)
@@ -768,4 +768,55 @@ public final class MappingUtils
     }
     return result;
   }
+
+  /**
+   * Remove the last 3 mapped positions from the given ranges
+   * 
+   * @param ranges
+   * @param mappedLength
+   */
+  public static void unmapStopCodon(List<int[]> ranges,
+          int mappedLength)
+  {
+    if (mappedLength < 3)
+    {
+      return;
+    }
+    boolean done = false;
+    int targetLength = mappedLength - 3;
+    int mapped = 0;
+    Iterator<int[]> it = ranges.iterator();
+    while (!done && it.hasNext())
+    {
+      int[] range = it.next();
+      int length = Math.abs(range[1] - range[0]) + 1;
+      if (mapped + length == targetLength)
+      {
+        done = true;
+      }
+      else if (mapped + length < targetLength)
+      {
+        mapped += length;
+        continue;
+      }
+      else
+      {
+        /*
+         * need just a bit of this range
+         */
+        int needed = targetLength - mapped;
+        int sense = range[1] >= range[0] ? 1 : -1;
+        range[1] = range[0] + (sense * (needed - 1));
+        done = true;
+      }
+    }
+    /*
+     * remove any trailing ranges
+     */
+    while (it.hasNext())
+    {
+      it.next();
+      it.remove();
+    }
+  }
 }
index 2d99a52..90c38d4 100644 (file)
@@ -14,6 +14,7 @@ import jalview.util.MapList;
 
 import java.util.List;
 
+import org.testng.Assert;
 import org.testng.annotations.AfterClass;
 import org.testng.annotations.BeforeClass;
 import org.testng.annotations.Test;
@@ -234,4 +235,17 @@ public class EnsemblCdnaTest
     sf.setType("CDS");
     assertFalse(testee.identifiesSequence(sf, accId));
   }
+
+  @Test(groups = "Functional")
+  public void testIsValidReference() throws Exception
+  {
+    EnsemblSequenceFetcher esq = new EnsemblCdna();
+    Assert.assertTrue(esq.isValidReference("CCDS5863.1"));
+    Assert.assertTrue(esq.isValidReference("ENST00000288602"));
+    Assert.assertTrue(esq.isValidReference("ENSG00000288602"));
+    Assert.assertFalse(esq.isValidReference("ENSP00000288602"));
+    Assert.assertFalse(esq.isValidReference("ENST0000288602"));
+    // non-human species having a 3 character identifier included:
+    Assert.assertTrue(esq.isValidReference("ENSMUSG00000099398"));
+  }
 }
index fb17845..183f933 100644 (file)
@@ -13,6 +13,7 @@ import jalview.util.MapList;
 
 import java.util.List;
 
+import org.testng.Assert;
 import org.testng.annotations.AfterClass;
 import org.testng.annotations.BeforeClass;
 import org.testng.annotations.Test;
@@ -151,4 +152,17 @@ public class EnsemblCdsTest
     assertFalse(testee.identifiesSequence(sf, accId));
   }
 
+  @Test(groups = "Functional")
+  public void testIsValidReference() throws Exception
+  {
+    EnsemblSequenceFetcher esq = new EnsemblCds();
+    Assert.assertTrue(esq.isValidReference("CCDS5863.1"));
+    Assert.assertTrue(esq.isValidReference("ENST00000288602"));
+    Assert.assertTrue(esq.isValidReference("ENSG00000288602"));
+    Assert.assertTrue(esq.isValidReference("ENSP00000288602"));
+    Assert.assertFalse(esq.isValidReference("ENST0000288602"));
+    // non-human species have a 3 character identifier included:
+    Assert.assertTrue(esq.isValidReference("ENSMUSG00000099398"));
+  }
+
 }
index a262c1e..d1c7e2f 100644 (file)
@@ -12,7 +12,6 @@ import jalview.io.gff.SequenceOntologyFactory;
 import jalview.io.gff.SequenceOntologyLite;
 import jalview.util.MapList;
 
-import java.util.Arrays;
 import java.util.List;
 
 import org.testng.annotations.AfterClass;
@@ -160,21 +159,11 @@ public class EnsemblGeneTest
      * with no filter
      */
     List<SequenceFeature> features = testee.getTranscriptFeatures(geneId,
-            genomic, null);
+            genomic);
     assertEquals(3, features.size());
     assertSame(sf1, features.get(0));
     assertSame(sf2, features.get(1));
     assertSame(sf3, features.get(2));
-
-    /*
-     * with filter
-     */
-    List<String> ids = Arrays.asList(new String[] { "transcript2",
-        "transcript3" });
-    features = testee.getTranscriptFeatures(geneId, genomic, ids);
-    assertEquals(2, features.size());
-    assertSame(sf2, features.get(0));
-    assertSame(sf3, features.get(1));
   }
 
   /**
index c5db0a8..e6f6683 100644 (file)
@@ -16,6 +16,8 @@ public class EnsemblProteinTest
     Assert.assertTrue(esq.isValidReference("ENSP00000288602"));
     Assert.assertFalse(esq.isValidReference("ENST00000288602"));
     Assert.assertFalse(esq.isValidReference("ENSG00000288602"));
+    // non-human species having a 3 character identifier included:
+    Assert.assertTrue(esq.isValidReference("ENSMUSP00000099398"));
   }
 
   @Test(groups = "Functional")
index 73d2858..7ef8dd7 100644 (file)
@@ -368,8 +368,22 @@ public class EnsemblSeqProxyTest
     assertFalse(EnsemblSeqProxy.isTranscriptIdentifier(""));
     assertFalse(EnsemblSeqProxy.isTranscriptIdentifier("ENSG00000012345"));
     assertTrue(EnsemblSeqProxy.isTranscriptIdentifier("ENST00000012345"));
+    assertTrue(EnsemblSeqProxy.isTranscriptIdentifier("ENSMUST00000012345"));
     assertFalse(EnsemblSeqProxy.isTranscriptIdentifier("enst00000012345"));
     assertFalse(EnsemblSeqProxy.isTranscriptIdentifier("ENST000000123456"));
     assertFalse(EnsemblSeqProxy.isTranscriptIdentifier("ENST0000001234"));
   }
+
+  @Test(groups = "Functional")
+  public void testIsGeneIdentifier()
+  {
+    assertFalse(EnsemblSeqProxy.isGeneIdentifier(null));
+    assertFalse(EnsemblSeqProxy.isGeneIdentifier(""));
+    assertFalse(EnsemblSeqProxy.isGeneIdentifier("ENST00000012345"));
+    assertTrue(EnsemblSeqProxy.isGeneIdentifier("ENSG00000012345"));
+    assertTrue(EnsemblSeqProxy.isGeneIdentifier("ENSMUSG00000012345"));
+    assertFalse(EnsemblSeqProxy.isGeneIdentifier("ensg00000012345"));
+    assertFalse(EnsemblSeqProxy.isGeneIdentifier("ENSG000000123456"));
+    assertFalse(EnsemblSeqProxy.isGeneIdentifier("ENSG0000001234"));
+  }
 }
\ No newline at end of file
index e1eb2a6..371bb91 100644 (file)
@@ -230,4 +230,30 @@ public class DBRefUtilsTest
     assertSame(ref1, matches[0]);
     assertSame(ref2, matches[1]);
   }
+
+  /**
+   * Test the method that searches for matching references based on accession id
+   * only
+   */
+  @Test(groups = { "Functional" })
+  public void testSearchRefs_accessionid()
+  {
+  
+    DBRefEntry ref1 = new DBRefEntry("Uniprot", "1", "A1234"); // matches
+    DBRefEntry ref2 = new DBRefEntry("embl", "1", "A1234"); // matches
+    // constructor does not upper-case accession id
+    DBRefEntry ref3 = new DBRefEntry("EMBL", "1", "a1234"); // no match
+    DBRefEntry ref4 = new DBRefEntry("EMBLCDS", "1", "A1235"); // no match
+    // ref5 matches although it has a mapping - ignored
+    DBRefEntry ref5 = new DBRefEntry("EMBL", "1", "A1234");
+    ref5.setMap(new Mapping(new MapList(new int[] { 1, 1 }, new int[] { 1,
+        1 }, 1, 1)));
+  
+    DBRefEntry[] matches = DBRefUtils.searchRefs(new DBRefEntry[] { ref1,
+        ref2, ref3, ref4, ref5 }, "A1234");
+    assertEquals(3, matches.length);
+    assertSame(ref1, matches[0]);
+    assertSame(ref2, matches[1]);
+    assertSame(ref5, matches[2]);
+  }
 }
index 7100381..095ab1b 100644 (file)
@@ -23,6 +23,7 @@ package jalview.util;
 import static org.testng.AssertJUnit.assertEquals;
 import static org.testng.AssertJUnit.assertSame;
 import static org.testng.AssertJUnit.assertTrue;
+import static org.testng.internal.junit.ArrayAsserts.assertArrayEquals;
 
 import jalview.api.AlignViewportI;
 import jalview.commands.EditCommand;
@@ -855,4 +856,59 @@ public class MappingUtilsTest
     assertEquals("[0, 3]", Arrays.toString(hidden.get(0)));
     assertEquals("[5, 10]", Arrays.toString(hidden.get(1)));
   }
+
+  /**
+   * Tests for the method that removes the trailing stop codon from a mapping
+   * range i.e. the last 3 positions (whether split or not)
+   */
+  @Test(groups = { "Functional" })
+  public void testUnmapStopCodon()
+  {
+    List<int[]> ranges = new ArrayList<int[]>();
+
+    // simple case, forward strand:
+    ranges.add(new int[] { 1, 3 });
+    ranges.add(new int[] { 9, 14 });
+    MappingUtils.unmapStopCodon(ranges, 9);
+    assertEquals(2, ranges.size());
+    assertArrayEquals(new int[] { 1, 3 }, ranges.get(0));
+    assertArrayEquals(new int[] { 9, 11 }, ranges.get(1));
+
+    // split stop codon, forward strand:
+    ranges.clear();
+    ranges.add(new int[] { 1, 8 });
+    ranges.add(new int[] { 10, 10 });
+    MappingUtils.unmapStopCodon(ranges, 9);
+    assertEquals(1, ranges.size());
+    assertArrayEquals(new int[] { 1, 6 }, ranges.get(0));
+
+    // very split stop codon, forward strand:
+    ranges.clear();
+    ranges.add(new int[] { 1, 1 });
+    ranges.add(new int[] { 3, 4 });
+    ranges.add(new int[] { 6, 6 });
+    ranges.add(new int[] { 8, 8 });
+    ranges.add(new int[] { 10, 10 });
+    MappingUtils.unmapStopCodon(ranges, 6);
+    assertEquals(2, ranges.size());
+    assertArrayEquals(new int[] { 1, 1 }, ranges.get(0));
+    assertArrayEquals(new int[] { 3, 4 }, ranges.get(1));
+
+    // simple case, reverse strand:
+    ranges.clear();
+    ranges.add(new int[] { 12, 10 });
+    ranges.add(new int[] { 6, 1 });
+    MappingUtils.unmapStopCodon(ranges, 9);
+    assertEquals(2, ranges.size());
+    assertArrayEquals(new int[] { 12, 10 }, ranges.get(0));
+    assertArrayEquals(new int[] { 6, 4 }, ranges.get(1));
+
+    // split stop codon, reverse strand:
+    ranges.clear();
+    ranges.add(new int[] { 12, 6 });
+    ranges.add(new int[] { 4, 3 });
+    MappingUtils.unmapStopCodon(ranges, 9);
+    assertEquals(1, ranges.size());
+    assertArrayEquals(new int[] { 12, 7 }, ranges.get(0));
+  }
 }
index 7a9b553..d7058d0 100644 (file)
@@ -7,6 +7,7 @@ import jalview.datamodel.SequenceI;
 import jalview.ws.seqfetcher.ASequenceFetcher;
 import jalview.ws.seqfetcher.DbSourceProxy;
 
+import java.util.ArrayList;
 import java.util.Enumeration;
 import java.util.List;
 import java.util.Vector;
@@ -116,7 +117,8 @@ public class SequenceFetcherTest
                 System.out.println("Type: " + types[t]);
                 SequenceI[] prod = jalview.analysis.CrossRef
                         .findXrefSequences(al.getSequencesArray(), dna,
-                                types[t]).getSequencesArray();
+                                types[t], null, new ArrayList<SequenceI>())
+                        .getSequencesArray();
                 System.out.println("Found "
                         + ((prod == null) ? "no" : "" + prod.length)
                         + " products");
@@ -199,7 +201,7 @@ public class SequenceFetcherTest
           // sequences.
           SequenceI[] seqs = al.getSequencesArray();
           Alignment prodal = jalview.analysis.CrossRef.findXrefSequences(
-                  seqs, dna, null, ds);
+                  seqs, dna, null, ds, new ArrayList<SequenceI>());
           System.out.println("Found "
                   + ((prodal == null) ? "no" : "" + prodal.getHeight())
                   + " products");
index fae5778..b9e209f 100644 (file)
@@ -179,8 +179,8 @@ public class DbRefFetcherTest
     assertEquals("Expected local reference map to be 3 nucleotides", dr[0]
             .getMap().getWidth(), 3);
     AlignmentI sprods = CrossRef.findXrefSequences(
-            alsq.getSequencesArray(), true, dr[0].getSource(),
-            alsq.getDataset());
+            alsq.getSequencesArray(), true, dr[0].getSource(), alsq,
+            new ArrayList<SequenceI>());
     assertNotNull(
             "Couldn't recover cross reference sequence from dataset. Was it ever added ?",
             sprods);