Merge branch 'mungo_develop' into features/JAL-653_JAL-1766_htslib_refseqsupport

author gmungoc <g.m.carstairs@dundee.ac.uk>

Sat, 20 Feb 2016 14:00:13 +0000 (14:00 +0000)

committer gmungoc <g.m.carstairs@dundee.ac.uk>

Sat, 20 Feb 2016 14:00:13 +0000 (14:00 +0000)
author gmungoc <g.m.carstairs@dundee.ac.uk>
Sat, 20 Feb 2016 14:00:13 +0000 (14:00 +0000)
committer gmungoc <g.m.carstairs@dundee.ac.uk>
Sat, 20 Feb 2016 14:00:13 +0000 (14:00 +0000)
diff --git a/resources/lang/Messages.properties b/resources/lang/Messages.properties

index 5ce5f46..4ab8732 100644 (file)
--- a/resources/lang/Messages.properties
+++ b/resources/lang/Messages.properties
@@ -218,6 +218,8 @@ label.above_identity_threshold = Above Identity Threshold
  label.show_sequence_features = Show Sequence Features
  label.nucleotide = Nucleotide
  label.protein = Protein
+label.nucleotides = Nucleotides
+label.proteins = Proteins
  label.to_new_alignment = To New Alignment
  label.to_this_alignment = Add To This Alignment
  label.apply_colour_to_all_groups = Apply Colour To All Groups
diff --git a/src/jalview/analysis/CrossRef.java b/src/jalview/analysis/CrossRef.java

index d45750e..2f6076a 100644 (file)
--- a/src/jalview/analysis/CrossRef.java
+++ b/src/jalview/analysis/CrossRef.java
@@ -219,30 +219,23 @@ public class CrossRef
  
    /**
     * 
-   * @param dna
-   * @param seqs
-   * @return
-   */
-  public static Alignment findXrefSequences(SequenceI[] seqs, boolean dna,
-          String source)
-  {
-    return findXrefSequences(seqs, dna, source, null);
-  }
-
-  /**
-   * 
     * @param seqs
     *          sequences whose xrefs are being retrieved
     * @param dna
     *          true if sequences are nucleotide
     * @param source
-   * @param dataset
-   *          alignment to search for product sequences.
+   * @param al
+   *          alignment to search for cross-referenced sequences (and possibly
+   *          add to)
+   * @param addedPeers
+   *          a list of sequences to add to if 'peers' to the original sequences
+   *          are found e.g. alternative protein products for a protein's gene
     * @return products (as dataset sequences)
     */
    public static Alignment findXrefSequences(SequenceI[] seqs, boolean dna,
-          String source, AlignmentI dataset)
+          String source, AlignmentI al, List<SequenceI> addedPeers)
    {
+    AlignmentI dataset = al.getDataset() == null ? al : al.getDataset();
      List<SequenceI> rseqs = new ArrayList<SequenceI>();
      AlignedCodonFrame cf = new AlignedCodonFrame();
      for (SequenceI seq : seqs)
@@ -389,10 +382,12 @@ public class CrossRef
                            int sf = map.getMap().getToLowest();
                            int st = map.getMap().getToHighest();
                            SequenceI mappedrg = ms.getSubSequence(sf, st);
-                          SequenceI loc = dss.getSubSequence(sf, st);
+                          // SequenceI loc = dss.getSubSequence(sf, st);
                            if (mappedrg.getLength() > 0
-                                  && mappedrg.getSequenceAsString().equals(
-                                          loc.getSequenceAsString()))
+                                  && ms.getSequenceAsString().equals(
+                                          dss.getSequenceAsString()))
+                          // && mappedrg.getSequenceAsString().equals(
+                          // loc.getSequenceAsString()))
                            {
                              String msg = "Mapping updated from "
                                      + ms.getName()
@@ -414,8 +409,8 @@ public class CrossRef
                                for (SequenceFeature feat : sfs)
                                {
                                  /* 
-                                 * we override the equality test here (but not
-                                 * elsewhere) to ignore Parent attribute
+                                 * we override SequenceFeature.equals here (but
+                                 * not elsewhere) to ignore Parent attribute
                                   * TODO not quite working yet!
                                   */
                                  if (!copiedFeatures
@@ -430,6 +425,12 @@ public class CrossRef
                              cf.addMap(retrieved[rs].getDatasetSequence(),
                                      dss, map.getMap());
                            }
+                          else
+                          {
+                            addedPeers.add(map.getTo());
+                            cf.addMap(retrieved[rs].getDatasetSequence(),
+                                    map.getTo(), map.getMap());
+                          }
                          } catch (Exception e)
                          {
                            System.err
@@ -452,9 +453,7 @@ public class CrossRef
      Alignment ral = null;
      if (rseqs.size() > 0)
      {
-      SequenceI[] rsqs = new SequenceI[rseqs.size()];
-      rseqs.toArray(rsqs);
-      ral = new Alignment(rsqs);
+      ral = new Alignment(rseqs.toArray(new SequenceI[rseqs.size()]));
        if (cf != null && !cf.isEmpty())
        {
          ral.addCodonFrame(cf);
diff --git a/src/jalview/datamodel/AlignedCodonFrame.java b/src/jalview/datamodel/AlignedCodonFrame.java

index 5dfd434..3fc8c28 100644 (file)
--- a/src/jalview/datamodel/AlignedCodonFrame.java
+++ b/src/jalview/datamodel/AlignedCodonFrame.java
@@ -321,7 +321,8 @@ public class AlignedCodonFrame
        {
          for (SequenceI sourceAligned : al.getSequences())
          {
-          if (ssm.mapping.to == sourceAligned.getDatasetSequence())
+          if (ssm.mapping.to == sourceAligned.getDatasetSequence()
+                  || ssm.mapping.to == sourceAligned)
            {
              return sourceAligned;
            }
diff --git a/src/jalview/ext/ensembl/EnsemblCdna.java b/src/jalview/ext/ensembl/EnsemblCdna.java

index f60125b..467fc6d 100644 (file)
--- a/src/jalview/ext/ensembl/EnsemblCdna.java
+++ b/src/jalview/ext/ensembl/EnsemblCdna.java
@@ -10,9 +10,13 @@ import com.stevesoft.pat.Regex;
  
  public class EnsemblCdna extends EnsemblSeqProxy
  {
-  // TODO modify to accept other species e.g. ENSMUSPnnn
+  /*
+   * accepts ENST or ENSTG with 11 digits
+   * or ENSMUST or similar for other species
+   * or CCDSnnnnn.nn with at least 3 digits
+   */
    private static final Regex ACCESSION_REGEX = new Regex(
-          "(ENST|ENSG|CCDS)[0-9.]{3,}$");
+          "(ENS([A-Z]{3}|)[TG][0-9]{11}$)" + "|" + "(CCDS[0-9.]{3,}$)");
    
    /*
     * fetch exon features on genomic sequence (to identify the cdna regions)
diff --git a/src/jalview/ext/ensembl/EnsemblGene.java b/src/jalview/ext/ensembl/EnsemblGene.java

index 73649b4..10841bd 100644 (file)
--- a/src/jalview/ext/ensembl/EnsemblGene.java
+++ b/src/jalview/ext/ensembl/EnsemblGene.java
@@ -7,8 +7,8 @@ import jalview.datamodel.SequenceI;
  import jalview.io.gff.SequenceOntologyFactory;
  import jalview.io.gff.SequenceOntologyI;
  import jalview.util.MapList;
+import jalview.util.StringUtils;
  
-import java.io.IOException;
  import java.util.ArrayList;
  import java.util.Arrays;
  import java.util.List;
@@ -24,9 +24,11 @@ public class EnsemblGene extends EnsemblSeqProxy
  {
    private static final String GENE_PREFIX = "gene:";
  
-  // TODO modify to accept other species e.g. ENSMUSGnnn
-  private static final Regex ACCESSION_REGEX = new Regex(
-          "(ENSG|ENST)[0-9]{11}$");
+  /*
+   * accepts anything as we will attempt lookup of gene or 
+   * transcript id or gene name
+   */
+  private static final Regex ACCESSION_REGEX = new Regex(".*");
  
    private static final EnsemblFeatureType[] FEATURES_TO_FETCH = {
        EnsemblFeatureType.gene, EnsemblFeatureType.transcript,
@@ -52,8 +54,15 @@ public class EnsemblGene extends EnsemblSeqProxy
    }
  
    /**
-   * Builds an alignment of all transcripts for the requested gene:
+   * Returns an alignment containing the gene(s) for the given gene or
+   * transcript identifier, or external identifier (e.g. Uniprot id). If given a
+   * gene name or external identifier, returns any related gene sequences found
+   * for model organisms. If only a single gene is queried for, then its
+   * transcripts are also retrieved and added to the alignment. <br>
+   * Method:
     * <ul>
+   * <li>resolves a transcript identifier by looking up its parent gene id</li>
+   * <li>resolves an external identifier by looking up xref-ed gene ids</li>
     * <li>fetches the gene sequence</li>
     * <li>fetches features on the sequence</li>
     * <li>identifies "transcript" features whose Parent is the requested gene</li>
@@ -65,81 +74,81 @@ public class EnsemblGene extends EnsemblSeqProxy
     * <li>aligns each transcript against the gene sequence based on the position
     * mappings</li>
     * </ul>
+   * 
+   * @param query
+   *          one or more identifiers separated by a space
+   * @return an alignment containing one or more genes, and possibly
+   *         transcripts, or null
     */
    @Override
    public AlignmentI getSequenceRecords(String query) throws Exception
    {
-    List<String> transcriptsWanted = null;
+    // todo: tidy up handling of one or multiple accession ids
+    String[] queries = query.split(getAccessionSeparator());
  
+    /*
+     * if given a transcript id, look up its gene parent
+     */
      if (isTranscriptIdentifier(query))
      {
-      transcriptsWanted = Arrays.asList(query
-              .split(getAccessionSeparator()));
-      query = getGeneForTranscript(query);
+      // we are assuming all transcripts have the same gene parent here
+      query = new EnsemblLookup().getParent(queries[0]);
        if (query == null)
        {
          return null;
        }
      }
  
+    /*
+     * if given a gene or other external name, lookup and fetch 
+     * the corresponding gene for all model organisms 
+     */
+    if (!isGeneIdentifier(query))
+    {
+      List<String> geneIds = new EnsemblSymbol().getIds(query);
+      if (geneIds.isEmpty())
+      {
+        return null;
+      }
+      String theIds = StringUtils.listToDelimitedString(geneIds,
+              getAccessionSeparator());
+      return getSequenceRecords(theIds);
+    }
+
      AlignmentI al = super.getSequenceRecords(query);
-    if (al.getHeight() > 0)
+
+    /*
+     * if we retrieved a single gene, get its transcripts as well
+     */
+    if (al.getHeight() == 1)
      {
-      getTranscripts(al, query, transcriptsWanted);
+      getTranscripts(al, query);
      }
  
      return al;
    }
  
    /**
-   * Gets the parent gene identifier for a given transcript identifier, by
-   * retrieving 'transcript' features overlapping the transcript, and finding
-   * the Parent property of the feature whose id is the given identifier.
+   * Attempts to get Ensembl stable identifiers for model organisms for a gene
+   * name by calling the xrefs symbol REST service to resolve the gene name.
     * 
     * @param query
     * @return
     */
-  protected String getGeneForTranscript(String transcriptId)
+  protected String getGeneIdentifiersForName(String query)
    {
-    String geneId = null;
-
-    /*
-     * reduce multiple transcripts (e.g. from Uniprot x-ref) to the first
-     * one only as representative (they should all have the same gene)
-     */
-    transcriptId = transcriptId.split(getAccessionSeparator())[0];
-
-    try
+    List<String> ids = new EnsemblSymbol().getIds(query);
+    if (ids != null)
      {
-      EnsemblFeatureType[] geneFeature = new EnsemblFeatureType[] { EnsemblFeatureType.transcript };
-      AlignmentI al = new EnsemblFeatures().getSequenceRecords(
-              transcriptId, geneFeature);
-      if (al != null && al.getHeight() > 0)
+      for (String id : ids)
        {
-        SequenceFeature[] sfs = al.getSequenceAt(0).getSequenceFeatures();
-        if (sfs != null)
+        if (isGeneIdentifier(id))
          {
-          for (SequenceFeature sf : sfs)
-          {
-            if (transcriptId.equals(getTranscriptId(sf)))
-            {
-              String parent = (String) sf.getValue(PARENT);
-              if (parent != null && parent.startsWith(GENE_PREFIX))
-              {
-                geneId = parent.substring(5);
-              }
-              break;
-            }
-          }
+          return id;
          }
        }
-      return geneId;
-    } catch (IOException e)
-    {
-      System.err.println("Error retrieving gene id for " + transcriptId
-              + ": " + e.getMessage());
-      return null;
      }
+    return null;
    }
  
    /**
@@ -149,17 +158,14 @@ public class EnsemblGene extends EnsemblSeqProxy
     * 
     * @param al
     * @param accId
-   * @param transcriptsWanted
-   *          optional list of transcript ids to filter by
     * @throws Exception
     */
-  protected void getTranscripts(AlignmentI al, String accId,
-          List<String> transcriptsWanted)
+  protected void getTranscripts(AlignmentI al, String accId)
            throws Exception
    {
      SequenceI gene = al.getSequenceAt(0);
      List<SequenceFeature> transcriptFeatures = getTranscriptFeatures(accId,
-            gene, transcriptsWanted);
+            gene);
  
      for (SequenceFeature transcriptFeature : transcriptFeatures)
      {
@@ -250,6 +256,11 @@ public class EnsemblGene extends EnsemblSeqProxy
              transcript.getDatasetSequence(), mapping, parentId);
  
      /*
+     * fetch and save cross-references
+     */
+    super.getCrossReferences(transcript);
+
+    /*
       * and finally fetch the protein product and save as a cross-reference
       */
      new EnsemblCdna().addProteinProduct(transcript);
@@ -274,12 +285,10 @@ public class EnsemblGene extends EnsemblSeqProxy
     * 
     * @param accId
     * @param geneSequence
-   * @param transcriptsWanted
-   *          optional list of ids to filter on
     * @return
     */
    protected List<SequenceFeature> getTranscriptFeatures(String accId,
-          SequenceI geneSequence, List<String> transcriptsWanted)
+          SequenceI geneSequence)
    {
      List<SequenceFeature> transcriptFeatures = new ArrayList<SequenceFeature>();
  
@@ -292,14 +301,6 @@ public class EnsemblGene extends EnsemblSeqProxy
        {
          if (isTranscript(sf.getType()))
          {
-          if (transcriptsWanted != null)
-          {
-            String transcriptId = (String) sf.getValue("transcript_id");
-            if (!transcriptsWanted.contains(transcriptId))
-            {
-              // continue;
-            }
-          }
            String parent = (String) sf.getValue(PARENT);
            if (parentIdentifier.equals(parent))
            {
diff --git a/src/jalview/ext/ensembl/EnsemblLookup.java b/src/jalview/ext/ensembl/EnsemblLookup.java

new file mode 100644 (file)

index 0000000..cd792b5
--- /dev/null
+++ b/src/jalview/ext/ensembl/EnsemblLookup.java
@@ -0,0 +1,135 @@
+package jalview.ext.ensembl;
+
+import jalview.datamodel.AlignmentI;
+
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.net.MalformedURLException;
+import java.net.URL;
+import java.util.Arrays;
+import java.util.List;
+
+import org.json.simple.JSONObject;
+import org.json.simple.parser.JSONParser;
+import org.json.simple.parser.ParseException;
+
+public class EnsemblLookup extends EnsemblRestClient
+{
+
+  @Override
+  public String getDbName()
+  {
+    return "ENSEMBL";
+  }
+
+  @Override
+  public AlignmentI getSequenceRecords(String queries) throws Exception
+  {
+    return null;
+  }
+
+  @Override
+  protected URL getUrl(List<String> ids) throws MalformedURLException
+  {
+    String identifier = ids.get(0);
+    return getUrl(identifier);
+  }
+
+  /**
+   * @param identifier
+   * @return
+   */
+  protected URL getUrl(String identifier)
+  {
+    String url = ENSEMBL_REST + "/lookup/id/" + identifier
+            + "?content-type=application/json";
+    try
+    {
+      return new URL(url);
+    } catch (MalformedURLException e)
+    {
+      return null;
+    }
+  }
+
+  @Override
+  protected boolean useGetRequest()
+  {
+    return true;
+  }
+
+  @Override
+  protected String getRequestMimeType(boolean multipleIds)
+  {
+    return "application/json";
+  }
+
+  @Override
+  protected String getResponseMimeType()
+  {
+    return "application/json";
+  }
+
+  /**
+   * Calls the Ensembl lookup REST endpoint and retrieves the 'Parent' for the
+   * given identifier, or null if not found
+   * 
+   * @param identifier
+   * @return
+   */
+  public String getParent(String identifier)
+  {
+    List<String> ids = Arrays.asList(new String[] { identifier });
+  
+    BufferedReader br = null;
+    try
+    {
+      URL url = getUrl(identifier);
+      if (url != null)
+      {
+        br = getHttpResponse(url, ids);
+      }
+      return (parseResponse(br));
+    } catch (IOException e)
+    {
+      // ignore
+      return null;
+    } finally
+    {
+      if (br != null)
+      {
+        try
+        {
+          br.close();
+        } catch (IOException e)
+        {
+          // ignore
+        }
+      }
+    }
+  }
+
+  /**
+   * Parses "Parent" from the JSON response and returns the value, or null if
+   * not found
+   * 
+   * @param br
+   * @return
+   * @throws IOException
+   */
+  protected String parseResponse(BufferedReader br) throws IOException
+  {
+    String parent = null;
+    JSONParser jp = new JSONParser();
+    try
+    {
+      JSONObject val = (JSONObject) jp.parse(br);
+      parent = val.get("Parent").toString();
+    } catch (ParseException e)
+    {
+      // ignore
+    }
+    return parent;
+  }
+
+}
diff --git a/src/jalview/ext/ensembl/EnsemblProtein.java b/src/jalview/ext/ensembl/EnsemblProtein.java

index 29c7eda..fb79ccf 100644 (file)
--- a/src/jalview/ext/ensembl/EnsemblProtein.java
+++ b/src/jalview/ext/ensembl/EnsemblProtein.java
@@ -10,9 +10,13 @@ import com.stevesoft.pat.Regex;
  
  public class EnsemblProtein extends EnsemblSeqProxy
  {
-  // TODO modify to accept other species e.g. ENSMUSPnnn
+  /*
+   * accepts ENSP with 11 digits
+   * or ENSMUSP or similar for other species
+   * or CCDSnnnnn.nn with at least 3 digits
+   */
    private static final Regex ACCESSION_REGEX = new Regex(
-          "(ENSP|CCDS)[0-9.]{3,}$");
+          "(ENS([A-Z]{3}|)P[0-9]{11}$)" + "|" + "(CCDS[0-9.]{3,}$)");
  
    private static final List<String> CROSSREFS = Arrays.asList(new String[] {
        "PDB", "Uniprot/SPTREMBL", "Uniprot/SWISSPROT" });
diff --git a/src/jalview/ext/ensembl/EnsemblRestClient.java b/src/jalview/ext/ensembl/EnsemblRestClient.java

index 215eb7a..297f71b 100644 (file)
--- a/src/jalview/ext/ensembl/EnsemblRestClient.java
+++ b/src/jalview/ext/ensembl/EnsemblRestClient.java
@@ -14,6 +14,8 @@ import java.util.List;
  
  import javax.ws.rs.HttpMethod;
  
+import com.stevesoft.pat.Regex;
+
  /**
   * Base class for Ensembl REST service clients
   * 
@@ -31,12 +33,28 @@ abstract class EnsemblRestClient extends EnsemblSequenceFetcher
  
    private final static long RETEST_INTERVAL = 10000L; // 10 seconds
  
+  private static final Regex TRANSCRIPT_REGEX = new Regex(
+            "(ENS)([A-Z]{3}|)T[0-9]{11}$");
+
+  private static final Regex GENE_REGEX = new Regex(
+            "(ENS)([A-Z]{3}|)G[0-9]{11}$");
+
    private static boolean ensemblRestAvailable = false;
  
    private static long lastCheck = -1;
  
    protected volatile boolean inProgress = false;
  
+  public static boolean isTranscriptIdentifier(String query)
+  {
+    return query == null ? false : TRANSCRIPT_REGEX.search(query);
+  }
+
+  public static boolean isGeneIdentifier(String query)
+  {
+    return query == null ? false : GENE_REGEX.search(query);
+  }
+
    @Override
    public boolean queryInProgress()
    {
diff --git a/src/jalview/ext/ensembl/EnsemblSeqProxy.java b/src/jalview/ext/ensembl/EnsemblSeqProxy.java

index a2be17b..77263ff 100644 (file)
--- a/src/jalview/ext/ensembl/EnsemblSeqProxy.java
+++ b/src/jalview/ext/ensembl/EnsemblSeqProxy.java
@@ -30,8 +30,6 @@ import java.util.LinkedHashMap;
  import java.util.List;
  import java.util.Map.Entry;
  
-import com.stevesoft.pat.Regex;
-
  /**
   * Base class for Ensembl sequence fetchers
   * 
@@ -39,12 +37,8 @@ import com.stevesoft.pat.Regex;
   */
  public abstract class EnsemblSeqProxy extends EnsemblRestClient
  {
-  // TODO modify to accept other species e.g. ENSMUSTnnn
-  private static final Regex TRANSCRIPT_REGEX = new Regex(
-          "(ENST)[0-9]{11}$");
-
    private static final List<String> CROSS_REFERENCES = Arrays
-          .asList(new String[] { "CCDS" });
+          .asList(new String[] { "CCDS", "Uniprot/SWISSPROT" });
  
    protected static final String CONSEQUENCE_TYPE = "consequence_type";
  
@@ -163,6 +157,11 @@ public abstract class EnsemblSeqProxy extends EnsemblRestClient
        }
      }
  
+    if (alignment == null)
+    {
+      return null;
+    }
+
      /*
       * fetch and transfer genomic sequence features,
       * fetch protein product and add as cross-reference
@@ -344,7 +343,6 @@ public abstract class EnsemblSeqProxy extends EnsemblRestClient
      int mappedDnaLength = getCdsRanges(dnaSeq, ranges);
  
      int proteinLength = proteinSeq.getLength();
-    List<int[]> proteinRange = new ArrayList<int[]>();
      int proteinStart = 1;
  
      /*
@@ -356,15 +354,20 @@ public abstract class EnsemblSeqProxy extends EnsemblRestClient
        proteinStart = 2;
        proteinLength--;
      }
-    proteinRange.add(new int[] { proteinStart, proteinLength });
+    List<int[]> proteinRange = new ArrayList<int[]>();
  
      /*
       * dna length should map to protein (or protein plus stop codon)
       */
      int codesForResidues = mappedDnaLength / 3;
-    if (codesForResidues == proteinLength
-            || codesForResidues == (proteinLength + 1))
+    if (codesForResidues == (proteinLength + 1))
+    {
+      MappingUtils.unmapStopCodon(ranges, mappedDnaLength);
+      codesForResidues--;
+    }
+    if (codesForResidues == proteinLength)
      {
+      proteinRange.add(new int[] { proteinStart, proteinLength });
        return new MapList(ranges, proteinRange, 3, 1);
      }
      return null;
@@ -389,14 +392,14 @@ public abstract class EnsemblSeqProxy extends EnsemblRestClient
      {
        return 0;
      }
+    SequenceOntologyI so = SequenceOntologyFactory.getInstance();
      int mappedDnaLength = 0;
      for (SequenceFeature sf : sfs)
      {
        /*
         * process a CDS feature (or a sub-type of CDS)
         */
-      if (SequenceOntologyFactory.getInstance().isA(sf.getType(),
-              SequenceOntologyI.CDS))
+      if (so.isA(sf.getType(), SequenceOntologyI.CDS))
        {
          int phase = 0;
          try {
@@ -411,7 +414,7 @@ public abstract class EnsemblSeqProxy extends EnsemblRestClient
           */
          int begin = sf.getBegin();
          int end = sf.getEnd();
-        if (ranges.isEmpty() && phase > 0)
+        if (ranges.isEmpty())
          {
            begin += phase;
            if (begin > end)
@@ -1129,9 +1132,4 @@ public abstract class EnsemblSeqProxy extends EnsemblRestClient
              || SequenceOntologyFactory.getInstance().isA(featureType,
                      SequenceOntologyI.TRANSCRIPT);
    }
-
-  public static boolean isTranscriptIdentifier(String query)
-  {
-    return query == null ? false : TRANSCRIPT_REGEX.search(query);
-  }
  }
diff --git a/src/jalview/ext/ensembl/EnsemblSequenceFetcher.java b/src/jalview/ext/ensembl/EnsemblSequenceFetcher.java

index 67c5e63..9a4952e 100644 (file)
--- a/src/jalview/ext/ensembl/EnsemblSequenceFetcher.java
+++ b/src/jalview/ext/ensembl/EnsemblSequenceFetcher.java
@@ -12,9 +12,13 @@ import com.stevesoft.pat.Regex;
   */
  abstract class EnsemblSequenceFetcher extends DbSourceProxyImpl
  {
-  // TODO modify to accept other species e.g. ENSMUSTnnn
+  /*
+   * accepts ENSG/T/E/P with 11 digits
+   * or ENSMUSP or similar for other species
+   * or CCDSnnnnn.nn with at least 3 digits
+   */
    private static final Regex ACCESSION_REGEX = new Regex(
-          "(ENSP|ENST|ENSG|CCDS)[0-9.]{3,}$");
+          "(ENS([A-Z]{3}|)[GTEP]{1}[0-9]{11}$)" + "|" + "(CCDS[0-9.]{3,}$)");
  
    /*
     * possible values for the 'feature' parameter of the /overlap REST service
diff --git a/src/jalview/ext/ensembl/EnsemblSymbol.java b/src/jalview/ext/ensembl/EnsemblSymbol.java

new file mode 100644 (file)

index 0000000..5b3baa1
--- /dev/null
+++ b/src/jalview/ext/ensembl/EnsemblSymbol.java
@@ -0,0 +1,121 @@
+package jalview.ext.ensembl;
+
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.net.MalformedURLException;
+import java.net.URL;
+import java.util.ArrayList;
+import java.util.Iterator;
+import java.util.List;
+
+import org.json.simple.JSONArray;
+import org.json.simple.JSONObject;
+import org.json.simple.parser.JSONParser;
+import org.json.simple.parser.ParseException;
+
+public class EnsemblSymbol extends EnsemblXref
+{
+  /**
+   * Returns the first "id" value in gene identifier format from the JSON
+   * response, or null if none found
+   * 
+   * @param br
+   * @return
+   * @throws IOException
+   */
+  protected String parseResponse(BufferedReader br)
+          throws IOException
+  {
+    JSONParser jp = new JSONParser();
+    String result = null;
+    try
+    {
+      JSONArray responses = (JSONArray) jp.parse(br);
+      Iterator rvals = responses.iterator();
+      while (rvals.hasNext())
+      {
+        JSONObject val = (JSONObject) rvals.next();
+        String id = val.get("id").toString();
+        if (id != null && isGeneIdentifier(id))
+        {
+          result = id;
+          break;
+        }
+      }
+    } catch (ParseException e)
+    {
+      // ignore
+    }
+    return result;
+  }
+
+  protected URL getUrl(String id, Species species)
+  {
+    String url = ENSEMBL_REST + "/xrefs/symbol/" + species.toString() + "/"
+            + id
+            + "?content-type=application/json";
+    try
+    {
+      return new URL(url);
+    } catch (MalformedURLException e)
+    {
+      return null;
+    }
+  }
+
+  /**
+   * Calls the Ensembl xrefs REST 'symbol' endpoint and retrieves any gene ids
+   * for the given identifier, for any known model organisms
+   * 
+   * @param identifier
+   * @return
+   */
+  public List<String> getIds(String identifier)
+  {
+    List<String> result = new ArrayList<String>();
+    List<String> ids = new ArrayList<String>();
+    ids.add(identifier);
+  
+    String[] queries = identifier.split(getAccessionSeparator());
+    BufferedReader br = null;
+    try
+    {
+      for (String query : queries)
+      {
+        for (Species taxon : Species.values())
+        {
+          if (taxon.isModelOrganism())
+          {
+            URL url = getUrl(query, taxon);
+            if (url != null)
+            {
+              br = getHttpResponse(url, ids);
+            }
+            String geneId = parseResponse(br);
+            if (geneId != null)
+            {
+              result.add(geneId);
+            }
+          }
+        }
+      }
+    } catch (IOException e)
+    {
+      // ignore
+    } finally
+    {
+      if (br != null)
+      {
+        try
+        {
+          br.close();
+        } catch (IOException e)
+        {
+          // ignore
+        }
+      }
+    }
+    return result;
+  }
+
+}
diff --git a/src/jalview/ext/ensembl/EnsemblXref.java b/src/jalview/ext/ensembl/EnsemblXref.java

index d4c5b18..514e44a 100644 (file)
--- a/src/jalview/ext/ensembl/EnsemblXref.java
+++ b/src/jalview/ext/ensembl/EnsemblXref.java
@@ -22,7 +22,7 @@ import org.json.simple.parser.ParseException;
   * service
   * 
   * @author gmcarstairs
- *
+ * @see http://rest.ensembl.org/documentation/info/xref_id
   */
  class EnsemblXref extends EnsemblRestClient
  {
@@ -42,8 +42,7 @@ class EnsemblXref extends EnsemblRestClient
    @Override
    protected URL getUrl(List<String> ids) throws MalformedURLException
    {
-    // TODO Auto-generated method stub
-    return null;
+    return getUrl(ids.get(0));
    }
  
    @Override
diff --git a/src/jalview/ext/ensembl/Species.java b/src/jalview/ext/ensembl/Species.java

new file mode 100644 (file)

index 0000000..d8a00a5
--- /dev/null
+++ b/src/jalview/ext/ensembl/Species.java
@@ -0,0 +1,32 @@
+package jalview.ext.ensembl;
+
+/**
+ * Selected species identifiers used by Ensembl
+ * 
+ * @author gmcarstairs
+ * @see http://rest.ensembl.org/info/species?content-type=text/xml
+ */
+enum Species
+{
+  /*
+   * using any suitably readable alias as the enum name; these are all
+   * valid species parameters to Ensembl REST services where applicable
+   */
+  human(true), mouse(true), s_cerevisiae(true), cow(false), pig(false),
+  rat(true), celegans(true), sheep(false), horse(false), gorilla(false),
+  rabbit(false), gibbon(false), dog(false), orangutan(false),
+  xenopus(true), chimpanzee(false), cat(false), zebrafish(true), chicken(
+          true), dmelanogaster(true);
+
+  boolean modelOrganism;
+
+  private Species(boolean model)
+  {
+    this.modelOrganism = model;
+  }
+
+  boolean isModelOrganism()
+  {
+    return modelOrganism;
+  }
+}
diff --git a/src/jalview/gui/AlignFrame.java b/src/jalview/gui/AlignFrame.java

index 85f7d19..c93b84b 100644 (file)
--- a/src/jalview/gui/AlignFrame.java
+++ b/src/jalview/gui/AlignFrame.java
@@ -904,7 +904,9 @@ public class AlignFrame extends GAlignFrame implements DropTargetListener,
      rnahelicesColour.setEnabled(av.getAlignment().hasRNAStructure());
      rnahelicesColour
              .setSelected(av.getGlobalColourScheme() instanceof jalview.schemes.RNAHelicesColour);
-    setShowProductsEnabled();
+
+    showProducts.setEnabled(canShowProducts());
+
      updateEditMenuBar();
    }
  
@@ -4652,67 +4654,27 @@ public class AlignFrame extends GAlignFrame implements DropTargetListener,
      }
    }
  
-  /*
-   * public void vamsasStore_actionPerformed(ActionEvent e) { JalviewFileChooser
-   * chooser = new JalviewFileChooser(jalview.bin.Cache.
-   * getProperty("LAST_DIRECTORY"));
-   * 
-   * chooser.setFileView(new JalviewFileView()); chooser.setDialogTitle("Export
-   * to Vamsas file"); chooser.setToolTipText("Export");
-   * 
-   * int value = chooser.showSaveDialog(this);
-   * 
-   * if (value == JalviewFileChooser.APPROVE_OPTION) {
-   * jalview.io.VamsasDatastore vs = new jalview.io.VamsasDatastore(viewport);
-   * //vs.store(chooser.getSelectedFile().getAbsolutePath() ); vs.storeJalview(
-   * chooser.getSelectedFile().getAbsolutePath(), this); } }
-   */
    /**
-   * prototype of an automatically enabled/disabled analysis function
+   * Searches selected sequences for xRef products and builds the Show
+   * Cross-References menu (formerly called Show Products)
     * 
+   * @return true if Show Cross-references menu should be enabled.
     */
-  protected void setShowProductsEnabled()
+  public boolean canShowProducts()
    {
      SequenceI[] selection = viewport.getSequenceSelection();
-    if (canShowProducts(selection, viewport.getSelectionGroup() != null,
-            viewport.getAlignment().getDataset()))
-    {
-      showProducts.setEnabled(true);
-
-    }
-    else
-    {
-      showProducts.setEnabled(false);
-    }
-  }
-
-  /**
-   * search selection for sequence xRef products and build the show products
-   * menu.
-   * 
-   * @param selection
-   * @param dataset
-   * @return true if showProducts menu should be enabled.
-   */
-  public boolean canShowProducts(SequenceI[] selection,
-          boolean isRegionSelection, Alignment dataset)
-  {
+    AlignmentI dataset = viewport.getAlignment().getDataset();
      boolean showp = false;
      try
      {
        showProducts.removeAll();
        final boolean dna = viewport.getAlignment().isNucleotide();
-      final Alignment ds = dataset;
        String[] ptypes = (selection == null || selection.length == 0) ? null
                : CrossRef.findSequenceXrefTypes(dna, selection, dataset);
-      // Object[] prods =
-      // CrossRef.buildXProductsList(viewport.getAlignment().isNucleotide(),
-      // selection, dataset, true);
-      final SequenceI[] sel = selection;
+
        for (int t = 0; ptypes != null && t < ptypes.length; t++)
        {
          showp = true;
-        final boolean isRegSel = isRegionSelection;
          final AlignFrame af = this;
          final String source = ptypes[t];
          JMenuItem xtype = new JMenuItem(ptypes[t]);
@@ -4722,9 +4684,7 @@ public class AlignFrame extends GAlignFrame implements DropTargetListener,
            @Override
            public void actionPerformed(ActionEvent e)
            {
-            // TODO: new thread for this call with vis-delay
-            af.showProductsFor(af.viewport.getSequenceSelection(),
-                    isRegSel, dna, source);
+            showProductsFor(af.viewport.getSequenceSelection(), dna, source);
            }
  
          });
@@ -4735,15 +4695,15 @@ public class AlignFrame extends GAlignFrame implements DropTargetListener,
      } catch (Exception e)
      {
        jalview.bin.Cache.log
-              .warn("canTranslate threw an exception - please report to help@jalview.org",
+              .warn("canShowProducts threw an exception - please report to help@jalview.org",
                        e);
        return false;
      }
      return showp;
    }
  
-  protected void showProductsFor(final SequenceI[] sel,
-          final boolean isRegSel, final boolean dna, final String source)
+  protected void showProductsFor(final SequenceI[] sel, final boolean dna,
+          final String source)
    {
      Runnable foo = new Runnable()
      {
@@ -4757,27 +4717,18 @@ public class AlignFrame extends GAlignFrame implements DropTargetListener,
                  new Object[] { source }), sttime);
          try
          {
-          // update our local dataset reference
-          Alignment ds = AlignFrame.this.getViewport().getAlignment()
-                  .getDataset();
-          Alignment prods = CrossRef
-                  .findXrefSequences(sel, dna, source, ds);
-          if (prods != null)
+          /*
+           * 'peer' sequences are any to add to this alignment, for example
+           * alternative protein products for my protein's gene
+           */
+          List<SequenceI> addedPeers = new ArrayList<SequenceI>();
+          AlignmentI alignment = AlignFrame.this.getViewport().getAlignment();
+          Alignment xrefs = CrossRef.findXrefSequences(sel, dna, source,
+                  alignment, addedPeers);
+          if (xrefs != null)
            {
-            SequenceI[] sprods = new SequenceI[prods.getHeight()];
-            for (int s = 0; s < sprods.length; s++)
-            {
-              sprods[s] = (prods.getSequenceAt(s)).deriveSequence();
-              if (ds.getSequences() == null
-                      || !ds.getSequences().contains(
-                              sprods[s].getDatasetSequence()))
-              {
-                ds.addSequence(sprods[s].getDatasetSequence());
-              }
-              sprods[s].updatePDBIds();
-            }
-            Alignment al = new Alignment(sprods);
-            al.setDataset(ds);
+            Alignment al = makeCrossReferencesAlignment(
+                    alignment.getDataset(), xrefs);
  
              /*
               * Copy dna-to-protein mappings to new alignment
@@ -4785,16 +4736,17 @@ public class AlignFrame extends GAlignFrame implements DropTargetListener,
              // TODO 1: no mappings are set up for EMBL product
              // TODO 2: if they were, should add them to protein alignment, not
              // dna
-            List<AlignedCodonFrame> cf = prods.getCodonFrames();
-            for (AlignedCodonFrame acf : cf)
-            {
-              al.addCodonFrame(acf);
-            }
+            // List<AlignedCodonFrame> cf = xrefs.getCodonFrames();
+            // for (AlignedCodonFrame acf : cf)
+            // {
+            // al.addCodonFrame(acf);
+            // }
              AlignFrame newFrame = new AlignFrame(al, DEFAULT_WIDTH,
                      DEFAULT_HEIGHT);
-            String newtitle = "" + (dna ? "Proteins" : "Nucleotides")
-                    + " for " + (isRegSel ? "selected region of " : "")
-                    + getTitle();
+            String newtitle = String.format("%s %s %s",
+                    MessageManager.getString(dna ? "label.proteins"
+                            : "label.nucleotides"), MessageManager
+                            .getString("label.for"), getTitle());
              newFrame.setTitle(newtitle);
  
              boolean asSplitFrame = Cache.getDefault(
@@ -4808,25 +4760,50 @@ public class AlignFrame extends GAlignFrame implements DropTargetListener,
                AlignmentI copyAlignment = null;
                final SequenceI[] sequenceSelection = AlignFrame.this.viewport
                        .getSequenceSelection();
+              List<AlignedCodonFrame> cf = xrefs.getCodonFrames();
                if (dna)
                {
                  copyAlignment = AlignmentUtils.makeCdsAlignment(
                          sequenceSelection, cf);
                  al.getCodonFrames().clear();
                  al.getCodonFrames().addAll(cf);
-                final StructureSelectionManager ssm = StructureSelectionManager
-                        .getStructureSelectionManager(Desktop.instance);
-                ssm.registerMappings(cf);
                }
                else
                {
                  copyAlignment = new Alignment(new Alignment(
                          sequenceSelection));
+                copyAlignment.getCodonFrames().addAll(cf);
                }
+              StructureSelectionManager ssm = StructureSelectionManager
+                      .getStructureSelectionManager(Desktop.instance);
+              ssm.registerMappings(cf);
+
+              /*
+               * add in any extra 'peer' sequences discovered
+               * (e.g. alternative protein products)
+               */
+              for (SequenceI peer : addedPeers)
+              {
+                copyAlignment.addSequence(peer);
+              }
+
+              /*
+               * align protein to dna
+               */
+              // TODO needs debugging
+              // if (dna)
+              // {
+              // al.alignAs(copyAlignment);
+              // }
+              // else
+              // {
+              // copyAlignment.alignAs(al);
+              // }
+
                AlignFrame copyThis = new AlignFrame(copyAlignment,
                        AlignFrame.DEFAULT_WIDTH, AlignFrame.DEFAULT_HEIGHT);
                copyThis.setTitle(AlignFrame.this.getTitle());
-              // SplitFrame with dna above, protein below
+
                boolean showSequenceFeatures = viewport
                        .isShowSequenceFeatures();
                newFrame.setShowSeqFeatures(showSequenceFeatures);
@@ -4849,6 +4826,7 @@ public class AlignFrame extends GAlignFrame implements DropTargetListener,
                String linkedTitle = MessageManager
                        .getString("label.linked_view_title");
                Desktop.addInternalFrame(sf, linkedTitle, -1, -1);
+              sf.adjustDivider();
              }
              else
              {
@@ -4878,6 +4856,32 @@ public class AlignFrame extends GAlignFrame implements DropTargetListener,
                  new Object[] { source }), sttime);
        }
  
+      /**
+       * @param alignment
+       * @param prods
+       * @return
+       */
+      protected Alignment makeCrossReferencesAlignment(
+Alignment dataset,
+              Alignment prods)
+      {
+        SequenceI[] sprods = new SequenceI[prods.getHeight()];
+        for (int s = 0; s < sprods.length; s++)
+        {
+          sprods[s] = (prods.getSequenceAt(s)).deriveSequence();
+          if (dataset.getSequences() == null
+                  || !dataset.getSequences().contains(
+                          sprods[s].getDatasetSequence()))
+          {
+            dataset.addSequence(sprods[s].getDatasetSequence());
+          }
+          sprods[s].updatePDBIds();
+        }
+        Alignment al = new Alignment(sprods);
+        al.setDataset(dataset);
+        return al;
+      }
+
      };
      Thread frunner = new Thread(foo);
      frunner.start();
diff --git a/src/jalview/gui/SequenceFetcher.java b/src/jalview/gui/SequenceFetcher.java

index 742a109..fc6fb0d 100755 (executable)
--- a/src/jalview/gui/SequenceFetcher.java
+++ b/src/jalview/gui/SequenceFetcher.java
@@ -642,13 +642,14 @@ public class SequenceFetcher extends JPanel implements Runnable
                  boolean rfound = false;
                  for (int r = 0; r < rs.length; r++)
                  {
-                  if (rs[r] != null
-                          && (found = DBRefUtils.searchRefs(
-                                  rs[r].getDBRefs(), dbr)) != null
-                          && found.length > 0)
+                  if (rs[r] != null)
                    {
-                    rfound = true;
-                    rs[r] = null;
+                    found = DBRefUtils.searchRefs(rs[r].getDBRefs(), accId);
+                    if (found != null && found.length > 0)
+                    {
+                      rfound = true;
+                      rs[r] = null;
+                    }
                    }
                  }
                  if (!rfound)
diff --git a/src/jalview/gui/SplitFrame.java b/src/jalview/gui/SplitFrame.java

index 083c7ec..617224f 100644 (file)
--- a/src/jalview/gui/SplitFrame.java
+++ b/src/jalview/gui/SplitFrame.java
@@ -61,6 +61,14 @@ import javax.swing.event.InternalFrameEvent;
   */
  public class SplitFrame extends GSplitFrame implements SplitContainerI
  {
+  private static final int WINDOWS_INSETS_WIDTH = 28; // tbc
+
+  private static final int MAC_INSETS_WIDTH = 28;
+
+  private static final int WINDOWS_INSETS_HEIGHT = 50; // tbc
+
+  private static final int MAC_INSETS_HEIGHT = 50;
+  private static final int DESKTOP_DECORATORS_HEIGHT = 65;
    private static final long serialVersionUID = 1L;
  
    public SplitFrame(GAlignFrame top, GAlignFrame bottom)
@@ -86,8 +94,10 @@ public class SplitFrame extends GSplitFrame implements SplitContainerI
       * estimate width and height of SplitFrame; this.getInsets() doesn't seem to
       * give the full additional size (a few pixels short)
       */
-    int widthFudge = Platform.isAMac() ? 28 : 28; // Windows tbc
-    int heightFudge = Platform.isAMac() ? 50 : 50; // tbc
+    int widthFudge = Platform.isAMac() ? MAC_INSETS_WIDTH
+            : WINDOWS_INSETS_WIDTH;
+    int heightFudge = Platform.isAMac() ? MAC_INSETS_HEIGHT
+            : WINDOWS_INSETS_HEIGHT;
      int width = ((AlignFrame) getTopFrame()).getWidth() + widthFudge;
      int height = ((AlignFrame) getTopFrame()).getHeight()
              + ((AlignFrame) getBottomFrame()).getHeight() + DIVIDER_SIZE
@@ -118,7 +128,8 @@ public class SplitFrame extends GSplitFrame implements SplitContainerI
    {
      // allow about 65 pixels for Desktop decorators on Windows
  
-    int newHeight = Math.min(height, Desktop.instance.getHeight() - 65);
+    int newHeight = Math.min(height, Desktop.instance.getHeight()
+            - DESKTOP_DECORATORS_HEIGHT);
      if (newHeight != height)
      {
        int oldDividerLocation = getDividerLocation();
@@ -182,6 +193,40 @@ public class SplitFrame extends GSplitFrame implements SplitContainerI
    }
  
    /**
+   * Adjust the divider for a sensible split of the real estate (for example,
+   * when many transcripts are shown with a single protein). This should only be
+   * called after the split pane has been laid out (made visible) so it has a
+   * height.
+   */
+  protected void adjustDivider()
+  {
+    final AlignViewport topViewport = ((AlignFrame) getTopFrame()).viewport;
+    final AlignViewport bottomViewport = ((AlignFrame) getBottomFrame()).viewport;
+    final AlignmentI topAlignment = topViewport.getAlignment();
+    final AlignmentI bottomAlignment = bottomViewport.getAlignment();
+    boolean topAnnotations = topViewport.isShowAnnotation();
+    boolean bottomAnnotations = bottomViewport.isShowAnnotation();
+    int topCount = topAlignment.getHeight();
+    int bottomCount = bottomAlignment.getHeight();
+    int topCharHeight = topViewport.getViewStyle().getCharHeight();
+    int bottomCharHeight = bottomViewport.getViewStyle().getCharHeight();
+
+    /*
+     * estimate ratio of (topFrameContent / bottomFrameContent)
+     */
+    int insets = Platform.isAMac() ? MAC_INSETS_HEIGHT
+            : WINDOWS_INSETS_HEIGHT;
+    // allow 3 'rows' for scale, scrollbar, status bar
+    int topHeight = insets + (3 + topCount) * topCharHeight
+            + (topAnnotations ? topViewport.calcPanelHeight() : 0);
+    int bottomHeight = insets + (3 + bottomCount) * bottomCharHeight
+            + (bottomAnnotations ? bottomViewport.calcPanelHeight() : 0);
+    double ratio = ((double) topHeight) / (topHeight + bottomHeight);
+
+    setRelativeDividerLocation(ratio);
+  }
+
+  /**
     * Add a listener to tidy up when the frame is closed.
     */
    protected void addCloseFrameListener()
diff --git a/src/jalview/io/AlignFile.java b/src/jalview/io/AlignFile.java

index 2b8f127..984eff6 100755 (executable)
--- a/src/jalview/io/AlignFile.java
+++ b/src/jalview/io/AlignFile.java
@@ -355,24 +355,12 @@ public abstract class AlignFile extends FileParse
        String desc = id.substring(space + 1);
        seq.setDescription(desc);
  
-      if (desc.startsWith("chromosome"))
-      {
-        /*
-         * parse Ensembl style gene description e.g.
-         * chromosome:GRCh38:7:140696688:140721955:1
-         */
-        String[] tokens = desc.split(":");
-        if (tokens.length > 3)
-        {
-          try
-          {
-            seq.setStart(Integer.parseInt(tokens[3]));
-          } catch (NumberFormatException e)
-          {
-            // ignore
-          }
-        }
-      }
+      /*
+       * it is tempting to parse Ensembl style gene description e.g.
+       * chromosome:GRCh38:7:140696688:140721955:1 and set the
+       * start position of the sequence, but this causes much confusion
+       * for reverse strand feature locations
+       */
      }
      else
      {
diff --git a/src/jalview/io/gff/SequenceOntologyLite.java b/src/jalview/io/gff/SequenceOntologyLite.java

index d2e6654..b3f8161 100644 (file)
--- a/src/jalview/io/gff/SequenceOntologyLite.java
+++ b/src/jalview/io/gff/SequenceOntologyLite.java
@@ -37,6 +37,7 @@ public class SequenceOntologyLite implements SequenceOntologyI
      { "snRNA_gene", "gene" },
      { "miRNA_gene", "gene" },
      { "lincRNA_gene", "gene" },
+    { "rRNA_gene", "gene" },
      
      /*
       * transcript sub-types:
@@ -49,6 +50,7 @@ public class SequenceOntologyLite implements SequenceOntologyI
      { "snRNA", "transcript" },
      { "miRNA", "transcript" },
      { "lincRNA", "transcript" },
+    { "rRNA", "transcript" },
      // there are many more sub-types of ncRNA...
      
      /*
diff --git a/src/jalview/util/DBRefUtils.java b/src/jalview/util/DBRefUtils.java

index e7053ed..424d40b 100755 (executable)
--- a/src/jalview/util/DBRefUtils.java
+++ b/src/jalview/util/DBRefUtils.java
@@ -169,6 +169,25 @@ public class DBRefUtils
    }
  
    /**
+   * Returns an array of those references that match the given accession id
+   * <ul>
+   * <li>database sources are the same</li>
+   * <li>accession ids are the same</li>
+   * <li>both have no mapping, or the mappings are the same</li>
+   * </ul>
+   * 
+   * @param ref
+   *          Set of references to search
+   * @param entry
+   *          pattern to match
+   * @return
+   */
+  public static DBRefEntry[] searchRefs(DBRefEntry[] ref, String accId)
+  {
+    return searchRefs(ref, new DBRefEntry("", "", accId), matchId);
+  }
+
+  /**
     * Returns an array of those references that match the given entry, according
     * to the given comparator. Returns null if no matches.
     * 
@@ -397,6 +416,23 @@ public class DBRefUtils
    };
  
    /**
+   * accession ID only must be identical.
+   */
+  public static DbRefComp matchId = new DbRefComp()
+  {
+    @Override
+    public boolean matches(DBRefEntry refa, DBRefEntry refb)
+    {
+      if (refa.getAccessionId() != null && refb.getAccessionId() != null
+              && refb.getAccessionId().equals(refa.getAccessionId()))
+      {
+        return true;
+      }
+      return false;
+    }
+  };
+
+  /**
     * Parses a DBRefEntry and adds it to the sequence, also a PDBEntry if the
     * database is PDB.
     * <p>
diff --git a/src/jalview/util/MappingUtils.java b/src/jalview/util/MappingUtils.java

index 1bbfc73..267e871 100644 (file)
--- a/src/jalview/util/MappingUtils.java
+++ b/src/jalview/util/MappingUtils.java
@@ -768,4 +768,55 @@ public final class MappingUtils
      }
      return result;
    }
+
+  /**
+   * Remove the last 3 mapped positions from the given ranges
+   * 
+   * @param ranges
+   * @param mappedLength
+   */
+  public static void unmapStopCodon(List<int[]> ranges,
+          int mappedLength)
+  {
+    if (mappedLength < 3)
+    {
+      return;
+    }
+    boolean done = false;
+    int targetLength = mappedLength - 3;
+    int mapped = 0;
+    Iterator<int[]> it = ranges.iterator();
+    while (!done && it.hasNext())
+    {
+      int[] range = it.next();
+      int length = Math.abs(range[1] - range[0]) + 1;
+      if (mapped + length == targetLength)
+      {
+        done = true;
+      }
+      else if (mapped + length < targetLength)
+      {
+        mapped += length;
+        continue;
+      }
+      else
+      {
+        /*
+         * need just a bit of this range
+         */
+        int needed = targetLength - mapped;
+        int sense = range[1] >= range[0] ? 1 : -1;
+        range[1] = range[0] + (sense * (needed - 1));
+        done = true;
+      }
+    }
+    /*
+     * remove any trailing ranges
+     */
+    while (it.hasNext())
+    {
+      it.next();
+      it.remove();
+    }
+  }
  }
diff --git a/test/jalview/ext/ensembl/EnsemblCdnaTest.java b/test/jalview/ext/ensembl/EnsemblCdnaTest.java

index 2d99a52..90c38d4 100644 (file)
--- a/test/jalview/ext/ensembl/EnsemblCdnaTest.java
+++ b/test/jalview/ext/ensembl/EnsemblCdnaTest.java
@@ -14,6 +14,7 @@ import jalview.util.MapList;
  
  import java.util.List;
  
+import org.testng.Assert;
  import org.testng.annotations.AfterClass;
  import org.testng.annotations.BeforeClass;
  import org.testng.annotations.Test;
@@ -234,4 +235,17 @@ public class EnsemblCdnaTest
      sf.setType("CDS");
      assertFalse(testee.identifiesSequence(sf, accId));
    }
+
+  @Test(groups = "Functional")
+  public void testIsValidReference() throws Exception
+  {
+    EnsemblSequenceFetcher esq = new EnsemblCdna();
+    Assert.assertTrue(esq.isValidReference("CCDS5863.1"));
+    Assert.assertTrue(esq.isValidReference("ENST00000288602"));
+    Assert.assertTrue(esq.isValidReference("ENSG00000288602"));
+    Assert.assertFalse(esq.isValidReference("ENSP00000288602"));
+    Assert.assertFalse(esq.isValidReference("ENST0000288602"));
+    // non-human species having a 3 character identifier included:
+    Assert.assertTrue(esq.isValidReference("ENSMUSG00000099398"));
+  }
  }
diff --git a/test/jalview/ext/ensembl/EnsemblCdsTest.java b/test/jalview/ext/ensembl/EnsemblCdsTest.java

index fb17845..183f933 100644 (file)
--- a/test/jalview/ext/ensembl/EnsemblCdsTest.java
+++ b/test/jalview/ext/ensembl/EnsemblCdsTest.java
@@ -13,6 +13,7 @@ import jalview.util.MapList;
  
  import java.util.List;
  
+import org.testng.Assert;
  import org.testng.annotations.AfterClass;
  import org.testng.annotations.BeforeClass;
  import org.testng.annotations.Test;
@@ -151,4 +152,17 @@ public class EnsemblCdsTest
      assertFalse(testee.identifiesSequence(sf, accId));
    }
  
+  @Test(groups = "Functional")
+  public void testIsValidReference() throws Exception
+  {
+    EnsemblSequenceFetcher esq = new EnsemblCds();
+    Assert.assertTrue(esq.isValidReference("CCDS5863.1"));
+    Assert.assertTrue(esq.isValidReference("ENST00000288602"));
+    Assert.assertTrue(esq.isValidReference("ENSG00000288602"));
+    Assert.assertTrue(esq.isValidReference("ENSP00000288602"));
+    Assert.assertFalse(esq.isValidReference("ENST0000288602"));
+    // non-human species have a 3 character identifier included:
+    Assert.assertTrue(esq.isValidReference("ENSMUSG00000099398"));
+  }
+
  }
diff --git a/test/jalview/ext/ensembl/EnsemblGeneTest.java b/test/jalview/ext/ensembl/EnsemblGeneTest.java

index a262c1e..d1c7e2f 100644 (file)
--- a/test/jalview/ext/ensembl/EnsemblGeneTest.java
+++ b/test/jalview/ext/ensembl/EnsemblGeneTest.java
@@ -12,7 +12,6 @@ import jalview.io.gff.SequenceOntologyFactory;
  import jalview.io.gff.SequenceOntologyLite;
  import jalview.util.MapList;
  
-import java.util.Arrays;
  import java.util.List;
  
  import org.testng.annotations.AfterClass;
@@ -160,21 +159,11 @@ public class EnsemblGeneTest
       * with no filter
       */
      List<SequenceFeature> features = testee.getTranscriptFeatures(geneId,
-            genomic, null);
+            genomic);
      assertEquals(3, features.size());
      assertSame(sf1, features.get(0));
      assertSame(sf2, features.get(1));
      assertSame(sf3, features.get(2));
-
-    /*
-     * with filter
-     */
-    List<String> ids = Arrays.asList(new String[] { "transcript2",
-        "transcript3" });
-    features = testee.getTranscriptFeatures(geneId, genomic, ids);
-    assertEquals(2, features.size());
-    assertSame(sf2, features.get(0));
-    assertSame(sf3, features.get(1));
    }
  
    /**
diff --git a/test/jalview/ext/ensembl/EnsemblProteinTest.java b/test/jalview/ext/ensembl/EnsemblProteinTest.java

index c5db0a8..e6f6683 100644 (file)
--- a/test/jalview/ext/ensembl/EnsemblProteinTest.java
+++ b/test/jalview/ext/ensembl/EnsemblProteinTest.java
@@ -16,6 +16,8 @@ public class EnsemblProteinTest
      Assert.assertTrue(esq.isValidReference("ENSP00000288602"));
      Assert.assertFalse(esq.isValidReference("ENST00000288602"));
      Assert.assertFalse(esq.isValidReference("ENSG00000288602"));
+    // non-human species having a 3 character identifier included:
+    Assert.assertTrue(esq.isValidReference("ENSMUSP00000099398"));
    }
  
    @Test(groups = "Functional")
diff --git a/test/jalview/ext/ensembl/EnsemblSeqProxyTest.java b/test/jalview/ext/ensembl/EnsemblSeqProxyTest.java

index 73d2858..7ef8dd7 100644 (file)
--- a/test/jalview/ext/ensembl/EnsemblSeqProxyTest.java
+++ b/test/jalview/ext/ensembl/EnsemblSeqProxyTest.java
@@ -368,8 +368,22 @@ public class EnsemblSeqProxyTest
      assertFalse(EnsemblSeqProxy.isTranscriptIdentifier(""));
      assertFalse(EnsemblSeqProxy.isTranscriptIdentifier("ENSG00000012345"));
      assertTrue(EnsemblSeqProxy.isTranscriptIdentifier("ENST00000012345"));
+    assertTrue(EnsemblSeqProxy.isTranscriptIdentifier("ENSMUST00000012345"));
      assertFalse(EnsemblSeqProxy.isTranscriptIdentifier("enst00000012345"));
      assertFalse(EnsemblSeqProxy.isTranscriptIdentifier("ENST000000123456"));
      assertFalse(EnsemblSeqProxy.isTranscriptIdentifier("ENST0000001234"));
    }
+
+  @Test(groups = "Functional")
+  public void testIsGeneIdentifier()
+  {
+    assertFalse(EnsemblSeqProxy.isGeneIdentifier(null));
+    assertFalse(EnsemblSeqProxy.isGeneIdentifier(""));
+    assertFalse(EnsemblSeqProxy.isGeneIdentifier("ENST00000012345"));
+    assertTrue(EnsemblSeqProxy.isGeneIdentifier("ENSG00000012345"));
+    assertTrue(EnsemblSeqProxy.isGeneIdentifier("ENSMUSG00000012345"));
+    assertFalse(EnsemblSeqProxy.isGeneIdentifier("ensg00000012345"));
+    assertFalse(EnsemblSeqProxy.isGeneIdentifier("ENSG000000123456"));
+    assertFalse(EnsemblSeqProxy.isGeneIdentifier("ENSG0000001234"));
+  }
  }
\ No newline at end of file
diff --git a/test/jalview/util/DBRefUtilsTest.java b/test/jalview/util/DBRefUtilsTest.java

index e1eb2a6..371bb91 100644 (file)
--- a/test/jalview/util/DBRefUtilsTest.java
+++ b/test/jalview/util/DBRefUtilsTest.java
@@ -230,4 +230,30 @@ public class DBRefUtilsTest
      assertSame(ref1, matches[0]);
      assertSame(ref2, matches[1]);
    }
+
+  /**
+   * Test the method that searches for matching references based on accession id
+   * only
+   */
+  @Test(groups = { "Functional" })
+  public void testSearchRefs_accessionid()
+  {
+  
+    DBRefEntry ref1 = new DBRefEntry("Uniprot", "1", "A1234"); // matches
+    DBRefEntry ref2 = new DBRefEntry("embl", "1", "A1234"); // matches
+    // constructor does not upper-case accession id
+    DBRefEntry ref3 = new DBRefEntry("EMBL", "1", "a1234"); // no match
+    DBRefEntry ref4 = new DBRefEntry("EMBLCDS", "1", "A1235"); // no match
+    // ref5 matches although it has a mapping - ignored
+    DBRefEntry ref5 = new DBRefEntry("EMBL", "1", "A1234");
+    ref5.setMap(new Mapping(new MapList(new int[] { 1, 1 }, new int[] { 1,
+        1 }, 1, 1)));
+  
+    DBRefEntry[] matches = DBRefUtils.searchRefs(new DBRefEntry[] { ref1,
+        ref2, ref3, ref4, ref5 }, "A1234");
+    assertEquals(3, matches.length);
+    assertSame(ref1, matches[0]);
+    assertSame(ref2, matches[1]);
+    assertSame(ref5, matches[2]);
+  }
  }
diff --git a/test/jalview/util/MappingUtilsTest.java b/test/jalview/util/MappingUtilsTest.java

index 7100381..095ab1b 100644 (file)
--- a/test/jalview/util/MappingUtilsTest.java
+++ b/test/jalview/util/MappingUtilsTest.java
@@ -23,6 +23,7 @@ package jalview.util;
  import static org.testng.AssertJUnit.assertEquals;
  import static org.testng.AssertJUnit.assertSame;
  import static org.testng.AssertJUnit.assertTrue;
+import static org.testng.internal.junit.ArrayAsserts.assertArrayEquals;
  
  import jalview.api.AlignViewportI;
  import jalview.commands.EditCommand;
@@ -855,4 +856,59 @@ public class MappingUtilsTest
      assertEquals("[0, 3]", Arrays.toString(hidden.get(0)));
      assertEquals("[5, 10]", Arrays.toString(hidden.get(1)));
    }
+
+  /**
+   * Tests for the method that removes the trailing stop codon from a mapping
+   * range i.e. the last 3 positions (whether split or not)
+   */
+  @Test(groups = { "Functional" })
+  public void testUnmapStopCodon()
+  {
+    List<int[]> ranges = new ArrayList<int[]>();
+
+    // simple case, forward strand:
+    ranges.add(new int[] { 1, 3 });
+    ranges.add(new int[] { 9, 14 });
+    MappingUtils.unmapStopCodon(ranges, 9);
+    assertEquals(2, ranges.size());
+    assertArrayEquals(new int[] { 1, 3 }, ranges.get(0));
+    assertArrayEquals(new int[] { 9, 11 }, ranges.get(1));
+
+    // split stop codon, forward strand:
+    ranges.clear();
+    ranges.add(new int[] { 1, 8 });
+    ranges.add(new int[] { 10, 10 });
+    MappingUtils.unmapStopCodon(ranges, 9);
+    assertEquals(1, ranges.size());
+    assertArrayEquals(new int[] { 1, 6 }, ranges.get(0));
+
+    // very split stop codon, forward strand:
+    ranges.clear();
+    ranges.add(new int[] { 1, 1 });
+    ranges.add(new int[] { 3, 4 });
+    ranges.add(new int[] { 6, 6 });
+    ranges.add(new int[] { 8, 8 });
+    ranges.add(new int[] { 10, 10 });
+    MappingUtils.unmapStopCodon(ranges, 6);
+    assertEquals(2, ranges.size());
+    assertArrayEquals(new int[] { 1, 1 }, ranges.get(0));
+    assertArrayEquals(new int[] { 3, 4 }, ranges.get(1));
+
+    // simple case, reverse strand:
+    ranges.clear();
+    ranges.add(new int[] { 12, 10 });
+    ranges.add(new int[] { 6, 1 });
+    MappingUtils.unmapStopCodon(ranges, 9);
+    assertEquals(2, ranges.size());
+    assertArrayEquals(new int[] { 12, 10 }, ranges.get(0));
+    assertArrayEquals(new int[] { 6, 4 }, ranges.get(1));
+
+    // split stop codon, reverse strand:
+    ranges.clear();
+    ranges.add(new int[] { 12, 6 });
+    ranges.add(new int[] { 4, 3 });
+    MappingUtils.unmapStopCodon(ranges, 9);
+    assertEquals(1, ranges.size());
+    assertArrayEquals(new int[] { 12, 7 }, ranges.get(0));
+  }
  }
diff --git a/test/jalview/ws/SequenceFetcherTest.java b/test/jalview/ws/SequenceFetcherTest.java

index 7a9b553..d7058d0 100644 (file)
--- a/test/jalview/ws/SequenceFetcherTest.java
+++ b/test/jalview/ws/SequenceFetcherTest.java
@@ -7,6 +7,7 @@ import jalview.datamodel.SequenceI;
  import jalview.ws.seqfetcher.ASequenceFetcher;
  import jalview.ws.seqfetcher.DbSourceProxy;
  
+import java.util.ArrayList;
  import java.util.Enumeration;
  import java.util.List;
  import java.util.Vector;
@@ -116,7 +117,8 @@ public class SequenceFetcherTest
                  System.out.println("Type: " + types[t]);
                  SequenceI[] prod = jalview.analysis.CrossRef
                          .findXrefSequences(al.getSequencesArray(), dna,
-                                types[t]).getSequencesArray();
+                                types[t], null, new ArrayList<SequenceI>())
+                        .getSequencesArray();
                  System.out.println("Found "
                          + ((prod == null) ? "no" : "" + prod.length)
                          + " products");
@@ -199,7 +201,7 @@ public class SequenceFetcherTest
            // sequences.
            SequenceI[] seqs = al.getSequencesArray();
            Alignment prodal = jalview.analysis.CrossRef.findXrefSequences(
-                  seqs, dna, null, ds);
+                  seqs, dna, null, ds, new ArrayList<SequenceI>());
            System.out.println("Found "
                    + ((prodal == null) ? "no" : "" + prodal.getHeight())
                    + " products");
diff --git a/test/jalview/ws/seqfetcher/DbRefFetcherTest.java b/test/jalview/ws/seqfetcher/DbRefFetcherTest.java

index fae5778..b9e209f 100644 (file)
--- a/test/jalview/ws/seqfetcher/DbRefFetcherTest.java
+++ b/test/jalview/ws/seqfetcher/DbRefFetcherTest.java
@@ -179,8 +179,8 @@ public class DbRefFetcherTest
      assertEquals("Expected local reference map to be 3 nucleotides", dr[0]
              .getMap().getWidth(), 3);
      AlignmentI sprods = CrossRef.findXrefSequences(
-            alsq.getSequencesArray(), true, dr[0].getSource(),
-            alsq.getDataset());
+            alsq.getSequencesArray(), true, dr[0].getSource(), alsq,
+            new ArrayList<SequenceI>());
      assertNotNull(
              "Couldn't recover cross reference sequence from dataset. Was it ever added ?",
              sprods);
author	gmungoc <g.m.carstairs@dundee.ac.uk>
	Sat, 20 Feb 2016 14:00:13 +0000 (14:00 +0000)
committer	gmungoc <g.m.carstairs@dundee.ac.uk>
	Sat, 20 Feb 2016 14:00:13 +0000 (14:00 +0000)
resources/lang/Messages.properties		patch \| blob \| history
src/jalview/analysis/CrossRef.java		patch \| blob \| history
src/jalview/datamodel/AlignedCodonFrame.java		patch \| blob \| history
src/jalview/ext/ensembl/EnsemblCdna.java		patch \| blob \| history
src/jalview/ext/ensembl/EnsemblGene.java		patch \| blob \| history
src/jalview/ext/ensembl/EnsemblLookup.java	[new file with mode: 0644]	patch \| blob
src/jalview/ext/ensembl/EnsemblProtein.java		patch \| blob \| history
src/jalview/ext/ensembl/EnsemblRestClient.java		patch \| blob \| history
src/jalview/ext/ensembl/EnsemblSeqProxy.java		patch \| blob \| history
src/jalview/ext/ensembl/EnsemblSequenceFetcher.java		patch \| blob \| history
src/jalview/ext/ensembl/EnsemblSymbol.java	[new file with mode: 0644]	patch \| blob
src/jalview/ext/ensembl/EnsemblXref.java		patch \| blob \| history
src/jalview/ext/ensembl/Species.java	[new file with mode: 0644]	patch \| blob
src/jalview/gui/AlignFrame.java		patch \| blob \| history
src/jalview/gui/SequenceFetcher.java		patch \| blob \| history
src/jalview/gui/SplitFrame.java		patch \| blob \| history
src/jalview/io/AlignFile.java		patch \| blob \| history
src/jalview/io/gff/SequenceOntologyLite.java		patch \| blob \| history
src/jalview/util/DBRefUtils.java		patch \| blob \| history
src/jalview/util/MappingUtils.java		patch \| blob \| history
test/jalview/ext/ensembl/EnsemblCdnaTest.java		patch \| blob \| history
test/jalview/ext/ensembl/EnsemblCdsTest.java		patch \| blob \| history
test/jalview/ext/ensembl/EnsemblGeneTest.java		patch \| blob \| history
test/jalview/ext/ensembl/EnsemblProteinTest.java		patch \| blob \| history
test/jalview/ext/ensembl/EnsemblSeqProxyTest.java		patch \| blob \| history
test/jalview/util/DBRefUtilsTest.java		patch \| blob \| history
test/jalview/util/MappingUtilsTest.java		patch \| blob \| history
test/jalview/ws/SequenceFetcherTest.java		patch \| blob \| history
test/jalview/ws/seqfetcher/DbRefFetcherTest.java		patch \| blob \| history