JAL-1705 various refactoring towards Uniprot-to-Ensembl fetching
authorgmungoc <g.m.carstairs@dundee.ac.uk>
Fri, 12 Feb 2016 16:55:19 +0000 (16:55 +0000)
committergmungoc <g.m.carstairs@dundee.ac.uk>
Fri, 12 Feb 2016 16:55:19 +0000 (16:55 +0000)
15 files changed:
src/jalview/analysis/CrossRef.java
src/jalview/ext/ensembl/EnsemblCdna.java
src/jalview/ext/ensembl/EnsemblGene.java
src/jalview/ext/ensembl/EnsemblProtein.java
src/jalview/ext/ensembl/EnsemblRestClient.java
src/jalview/ext/ensembl/EnsemblSeqProxy.java
src/jalview/ext/ensembl/EnsemblSequenceFetcher.java
src/jalview/gui/SequenceFetcher.java
src/jalview/ws/SequenceFetcher.java
src/jalview/ws/seqfetcher/ASequenceFetcher.java
src/jalview/ws/seqfetcher/DbSourceProxyImpl.java
test/jalview/ext/ensembl/EnsemblGeneTest.java
test/jalview/ext/ensembl/EnsemblProteinTest.java
test/jalview/ext/ensembl/EnsemblSeqProxyTest.java
test/jalview/io/gff/GffTests.java

index 21fd08d..d45750e 100644 (file)
@@ -27,6 +27,7 @@ import jalview.datamodel.DBRefEntry;
 import jalview.datamodel.DBRefSource;
 import jalview.datamodel.Mapping;
 import jalview.datamodel.Sequence;
+import jalview.datamodel.SequenceFeature;
 import jalview.datamodel.SequenceI;
 import jalview.util.DBRefUtils;
 import jalview.ws.SequenceFetcher;
@@ -45,6 +46,27 @@ import java.util.Vector;
  */
 public class CrossRef
 {
+  /*
+   * A sub-class that ignores Parent attribute when comparing sequence 
+   * features. This avoids 'duplicate' CDS features that only
+   * differ in their parent Transcript ids.
+   */
+  class MySequenceFeature extends SequenceFeature
+  {
+    private SequenceFeature feat;
+
+    MySequenceFeature(SequenceFeature sf)
+    {
+      this.feat = sf;
+    }
+
+    @Override
+    public boolean equals(Object o)
+    {
+      return feat.equals(o, true);
+    }
+  }
+
   /**
    * Select just the DNA or protein references for a protein or dna sequence
    * 
@@ -337,8 +359,11 @@ public class CrossRef
                               + seq.getName());
               e.printStackTrace();
             }
+
             if (retrieved != null)
             {
+              List<SequenceFeature> copiedFeatures = new ArrayList<SequenceFeature>();
+              CrossRef me = new CrossRef();
               for (int rs = 0; rs < retrieved.length; rs++)
               {
                 // TODO: examine each sequence for 'redundancy'
@@ -369,12 +394,41 @@ public class CrossRef
                                   && mappedrg.getSequenceAsString().equals(
                                           loc.getSequenceAsString()))
                           {
-                            System.err
-                                    .println("Mapping updated for retrieved crossreference");
+                            String msg = "Mapping updated from "
+                                    + ms.getName()
+                                    + " to retrieved crossreference "
+                                    + dss.getName();
+                            System.out.println(msg);
                             // method to update all refs of existing To on
                             // retrieved sequence with dss and merge any props
                             // on To onto dss.
                             map.setTo(dss);
+                            /*
+                             * copy sequence features as well, avoiding
+                             * duplication (e.g. from 2 transcripts)
+                             */
+                            SequenceFeature[] sfs = ms
+                                    .getSequenceFeatures();
+                            if (sfs != null)
+                            {
+                              for (SequenceFeature feat : sfs)
+                              {
+                                /* 
+                                 * we override the equality test here (but not
+                                 * elsewhere) to ignore Parent attribute
+                                 * TODO not quite working yet!
+                                 */
+                                if (!copiedFeatures
+                                        .contains(me.new MySequenceFeature(
+                                                feat)))
+                                {
+                                  dss.addSequenceFeature(feat);
+                                  copiedFeatures.add(feat);
+                                }
+                              }
+                            }
+                            cf.addMap(retrieved[rs].getDatasetSequence(),
+                                    dss, map.getMap());
                           }
                         } catch (Exception e)
                         {
index d4d1c08..f60125b 100644 (file)
@@ -11,7 +11,8 @@ import com.stevesoft.pat.Regex;
 public class EnsemblCdna extends EnsemblSeqProxy
 {
   // TODO modify to accept other species e.g. ENSMUSPnnn
-  private static final Regex ACCESSION_REGEX = new Regex("((ENST|ENSG|CCDS)[0-9.]{3,})");
+  private static final Regex ACCESSION_REGEX = new Regex(
+          "(ENST|ENSG|CCDS)[0-9.]{3,}$");
   
   /*
    * fetch exon features on genomic sequence (to identify the cdna regions)
index dc28796..73649b4 100644 (file)
@@ -8,6 +8,7 @@ import jalview.io.gff.SequenceOntologyFactory;
 import jalview.io.gff.SequenceOntologyI;
 import jalview.util.MapList;
 
+import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.List;
@@ -21,9 +22,11 @@ import com.stevesoft.pat.Regex;
  */
 public class EnsemblGene extends EnsemblSeqProxy
 {
+  private static final String GENE_PREFIX = "gene:";
+
   // TODO modify to accept other species e.g. ENSMUSGnnn
   private static final Regex ACCESSION_REGEX = new Regex(
-          "((ENSG)[0-9]{11})");
+          "(ENSG|ENST)[0-9]{11}$");
 
   private static final EnsemblFeatureType[] FEATURES_TO_FETCH = {
       EnsemblFeatureType.gene, EnsemblFeatureType.transcript,
@@ -66,31 +69,97 @@ public class EnsemblGene extends EnsemblSeqProxy
   @Override
   public AlignmentI getSequenceRecords(String query) throws Exception
   {
-    // TODO ? if an ENST identifier is supplied, convert to ENSG?
+    List<String> transcriptsWanted = null;
+
+    if (isTranscriptIdentifier(query))
+    {
+      transcriptsWanted = Arrays.asList(query
+              .split(getAccessionSeparator()));
+      query = getGeneForTranscript(query);
+      if (query == null)
+      {
+        return null;
+      }
+    }
+
     AlignmentI al = super.getSequenceRecords(query);
     if (al.getHeight() > 0)
     {
-      getTranscripts(al, query);
+      getTranscripts(al, query, transcriptsWanted);
     }
 
     return al;
   }
 
   /**
+   * Gets the parent gene identifier for a given transcript identifier, by
+   * retrieving 'transcript' features overlapping the transcript, and finding
+   * the Parent property of the feature whose id is the given identifier.
+   * 
+   * @param query
+   * @return
+   */
+  protected String getGeneForTranscript(String transcriptId)
+  {
+    String geneId = null;
+
+    /*
+     * reduce multiple transcripts (e.g. from Uniprot x-ref) to the first
+     * one only as representative (they should all have the same gene)
+     */
+    transcriptId = transcriptId.split(getAccessionSeparator())[0];
+
+    try
+    {
+      EnsemblFeatureType[] geneFeature = new EnsemblFeatureType[] { EnsemblFeatureType.transcript };
+      AlignmentI al = new EnsemblFeatures().getSequenceRecords(
+              transcriptId, geneFeature);
+      if (al != null && al.getHeight() > 0)
+      {
+        SequenceFeature[] sfs = al.getSequenceAt(0).getSequenceFeatures();
+        if (sfs != null)
+        {
+          for (SequenceFeature sf : sfs)
+          {
+            if (transcriptId.equals(getTranscriptId(sf)))
+            {
+              String parent = (String) sf.getValue(PARENT);
+              if (parent != null && parent.startsWith(GENE_PREFIX))
+              {
+                geneId = parent.substring(5);
+              }
+              break;
+            }
+          }
+        }
+      }
+      return geneId;
+    } catch (IOException e)
+    {
+      System.err.println("Error retrieving gene id for " + transcriptId
+              + ": " + e.getMessage());
+      return null;
+    }
+  }
+
+  /**
    * Constructs all transcripts for the gene, as identified by "transcript"
    * features whose Parent is the requested gene. The coding transcript
    * sequences (i.e. with introns omitted) are added to the alignment.
    * 
    * @param al
    * @param accId
+   * @param transcriptsWanted
+   *          optional list of transcript ids to filter by
    * @throws Exception
    */
-  protected void getTranscripts(AlignmentI al, String accId)
+  protected void getTranscripts(AlignmentI al, String accId,
+          List<String> transcriptsWanted)
           throws Exception
   {
     SequenceI gene = al.getSequenceAt(0);
     List<SequenceFeature> transcriptFeatures = getTranscriptFeatures(accId,
-            gene);
+            gene, transcriptsWanted);
 
     for (SequenceFeature transcriptFeature : transcriptFeatures)
     {
@@ -114,7 +183,7 @@ public class EnsemblGene extends EnsemblSeqProxy
   SequenceI makeTranscript(SequenceFeature transcriptFeature,
           AlignmentI al, SequenceI gene)
   {
-    String accId = (String) transcriptFeature.getValue("transcript_id");
+    String accId = getTranscriptId(transcriptFeature);
     if (accId == null)
     {
       return null;
@@ -189,19 +258,32 @@ public class EnsemblGene extends EnsemblSeqProxy
   }
 
   /**
+   * Returns the 'transcript_id' property of the sequence feature (or null)
+   * 
+   * @param feature
+   * @return
+   */
+  protected String getTranscriptId(SequenceFeature feature)
+  {
+    return (String) feature.getValue("transcript_id");
+  }
+
+  /**
    * Returns a list of the transcript features on the sequence whose Parent is
    * the gene for the accession id.
    * 
    * @param accId
    * @param geneSequence
+   * @param transcriptsWanted
+   *          optional list of ids to filter on
    * @return
    */
   protected List<SequenceFeature> getTranscriptFeatures(String accId,
-          SequenceI geneSequence)
+          SequenceI geneSequence, List<String> transcriptsWanted)
   {
     List<SequenceFeature> transcriptFeatures = new ArrayList<SequenceFeature>();
 
-    String parentIdentifier = "gene:" + accId;
+    String parentIdentifier = GENE_PREFIX + accId;
     SequenceFeature[] sfs = geneSequence.getSequenceFeatures();
 
     if (sfs != null)
@@ -210,6 +292,14 @@ public class EnsemblGene extends EnsemblSeqProxy
       {
         if (isTranscript(sf.getType()))
         {
+          if (transcriptsWanted != null)
+          {
+            String transcriptId = (String) sf.getValue("transcript_id");
+            if (!transcriptsWanted.contains(transcriptId))
+            {
+              // continue;
+            }
+          }
           String parent = (String) sf.getValue(PARENT);
           if (parentIdentifier.equals(parent))
           {
@@ -225,11 +315,11 @@ public class EnsemblGene extends EnsemblSeqProxy
   @Override
   public String getDescription()
   {
-    return "Fetches all transcripts and variant features for a gene";
+    return "Fetches all transcripts and variant features for a gene or transcript";
   }
 
   /**
-   * Default test query is a transcript
+   * Default test query is a gene id (can also enter a transcript id)
    */
   @Override
   public String getTestQuery()
@@ -251,7 +341,7 @@ public class EnsemblGene extends EnsemblSeqProxy
             SequenceOntologyI.GENE))
     {
       String id = (String) sf.getValue(ID);
-      if (("gene:" + accId).equals(id))
+      if ((GENE_PREFIX + accId).equals(id))
       {
         return true;
       }
@@ -278,7 +368,7 @@ public class EnsemblGene extends EnsemblSeqProxy
     if (isTranscript(sf.getType()))
     {
       String parent = (String) sf.getValue(PARENT);
-      if (!("gene:" + accessionId).equals(parent))
+      if (!(GENE_PREFIX + accessionId).equals(parent))
       {
         return false;
       }
index 8f23984..29c7eda 100644 (file)
@@ -12,7 +12,7 @@ public class EnsemblProtein extends EnsemblSeqProxy
 {
   // TODO modify to accept other species e.g. ENSMUSPnnn
   private static final Regex ACCESSION_REGEX = new Regex(
-          "((ENSP|CCDS)[0-9.]{3,})");
+          "(ENSP|CCDS)[0-9.]{3,}$");
 
   private static final List<String> CROSSREFS = Arrays.asList(new String[] {
       "PDB", "Uniprot/SPTREMBL", "Uniprot/SWISSPROT" });
@@ -87,4 +87,28 @@ public class EnsemblProtein extends EnsemblSeqProxy
     return ACCESSION_REGEX;
   }
 
+  /**
+   * Returns an accession id for a query, including conversion of ENST* to
+   * ENSP*. This supports querying for the protein sequence for a transcript
+   * (ENST identifier) and returning the ENSP identifier.
+   */
+  @Override
+  public String getAccessionIdFromQuery(String query)
+  {
+    String accId = super.getAccessionIdFromQuery(query);
+
+    /*
+     * ensure last character before (11) digits is P
+     * ENST00000288602 -> ENSP00000288602
+     * ENSMUST00000288602 -> ENSMUSP00000288602
+     */
+    if (accId != null && accId.length() >= 12)
+    {
+      char[] chars = accId.toCharArray();
+      chars[chars.length - 12] = 'P';
+      accId = new String(chars);
+    }
+    return accId;
+  }
+
 }
index dc4cc88..215eb7a 100644 (file)
@@ -140,7 +140,7 @@ abstract class EnsemblRestClient extends EnsemblSequenceFetcher
   protected BufferedReader getHttpResponse(URL url, List<String> ids)
           throws IOException
   {
-    long now = System.currentTimeMillis();
+    // long now = System.currentTimeMillis();
     HttpURLConnection connection = (HttpURLConnection) url.openConnection();
   
     /*
@@ -176,8 +176,8 @@ abstract class EnsemblRestClient extends EnsemblSequenceFetcher
               "Response code was not 200. Detected response was "
                       + responseCode);
     }
-    System.out.println(getClass().getName() + " took "
-            + (System.currentTimeMillis() - now) + "ms to fetch");
+    // System.out.println(getClass().getName() + " took "
+    // + (System.currentTimeMillis() - now) + "ms to fetch");
   
     BufferedReader reader = null;
     reader = new BufferedReader(new InputStreamReader(response, "UTF-8"));
index e77051d..a2be17b 100644 (file)
@@ -30,6 +30,8 @@ import java.util.LinkedHashMap;
 import java.util.List;
 import java.util.Map.Entry;
 
+import com.stevesoft.pat.Regex;
+
 /**
  * Base class for Ensembl sequence fetchers
  * 
@@ -37,6 +39,10 @@ import java.util.Map.Entry;
  */
 public abstract class EnsemblSeqProxy extends EnsemblRestClient
 {
+  // TODO modify to accept other species e.g. ENSMUSTnnn
+  private static final Regex TRANSCRIPT_REGEX = new Regex(
+          "(ENST)[0-9]{11}$");
+
   private static final List<String> CROSS_REFERENCES = Arrays
           .asList(new String[] { "CCDS" });
 
@@ -154,14 +160,6 @@ public abstract class EnsemblSeqProxy extends EnsemblRestClient
                 + ")";
         System.err.println(msg);
         break;
-        // if (alignment != null)
-        // {
-        // break; // return what we got
-        // }
-        // else
-        // {
-        // throw new JalviewException(msg, r);
-        // }
       }
     }
 
@@ -294,8 +292,7 @@ public abstract class EnsemblSeqProxy extends EnsemblRestClient
   }
 
   /**
-   * Get Uniprot and PDB xrefs from Ensembl, and attach them to the protein
-   * sequence
+   * Get database xrefs from Ensembl, and attach them to the sequence
    * 
    * @param seq
    */
@@ -719,18 +716,18 @@ public abstract class EnsemblSeqProxy extends EnsemblRestClient
       /*
        * for sequence_variant, make an additional feature with consequence
        */
-      if (SequenceOntologyFactory.getInstance().isA(sf.getType(),
-              SequenceOntologyI.SEQUENCE_VARIANT))
-      {
-        String consequence = (String) sf.getValue(CONSEQUENCE_TYPE);
-        if (consequence != null)
-        {
-          SequenceFeature sf2 = new SequenceFeature("consequence",
-                  consequence, copy.getBegin(), copy.getEnd(), 0f,
-                  null);
-          targetSequence.addSequenceFeature(sf2);
-        }
-      }
+      // if (SequenceOntologyFactory.getInstance().isA(sf.getType(),
+      // SequenceOntologyI.SEQUENCE_VARIANT))
+      // {
+      // String consequence = (String) sf.getValue(CONSEQUENCE_TYPE);
+      // if (consequence != null)
+      // {
+      // SequenceFeature sf2 = new SequenceFeature("consequence",
+      // consequence, copy.getBegin(), copy.getEnd(), 0f,
+      // null);
+      // targetSequence.addSequenceFeature(sf2);
+      // }
+      // }
     }
   }
 
@@ -750,6 +747,7 @@ public abstract class EnsemblSeqProxy extends EnsemblRestClient
       return false;
     }
 
+    // long start = System.currentTimeMillis();
     SequenceFeature[] sfs = sourceSequence.getSequenceFeatures();
     MapList mapping = getGenomicRangesFromFeatures(sourceSequence, accessionId,
             targetSequence.getStart());
@@ -758,7 +756,13 @@ public abstract class EnsemblSeqProxy extends EnsemblRestClient
       return false;
     }
 
-    return transferFeatures(sfs, targetSequence, mapping, accessionId);
+    boolean result = transferFeatures(sfs, targetSequence, mapping,
+            accessionId);
+    // System.out.println("transferFeatures (" + (sfs.length) + " --> "
+    // + targetSequence.getSequenceFeatures().length + ") to "
+    // + targetSequence.getName()
+    // + " took " + (System.currentTimeMillis() - start) + "ms");
+    return result;
   }
 
   /**
@@ -1125,4 +1129,9 @@ public abstract class EnsemblSeqProxy extends EnsemblRestClient
             || SequenceOntologyFactory.getInstance().isA(featureType,
                     SequenceOntologyI.TRANSCRIPT);
   }
+
+  public static boolean isTranscriptIdentifier(String query)
+  {
+    return query == null ? false : TRANSCRIPT_REGEX.search(query);
+  }
 }
index 2e32bd2..67c5e63 100644 (file)
@@ -13,7 +13,8 @@ import com.stevesoft.pat.Regex;
 abstract class EnsemblSequenceFetcher extends DbSourceProxyImpl
 {
   // TODO modify to accept other species e.g. ENSMUSTnnn
-  private static final Regex ACCESSION_REGEX = new Regex("((ENSP|ENST|ENSG|CCDS)[0-9.]{3,})");
+  private static final Regex ACCESSION_REGEX = new Regex(
+          "(ENSP|ENST|ENSG|CCDS)[0-9.]{3,}$");
 
   /*
    * possible values for the 'feature' parameter of the /overlap REST service
index 2004761..742a109 100755 (executable)
@@ -649,7 +649,6 @@ public class SequenceFetcher extends JPanel implements Runnable
                   {
                     rfound = true;
                     rs[r] = null;
-                    continue;
                   }
                 }
                 if (!rfound)
index 909f515..16bbfe6 100644 (file)
  */
 package jalview.ws;
 
-import jalview.ext.ensembl.EnsemblCdna;
-import jalview.ext.ensembl.EnsemblCds;
 import jalview.ext.ensembl.EnsemblGene;
-import jalview.ext.ensembl.EnsemblGenome;
 import jalview.ext.ensembl.EnsemblProtein;
 import jalview.ws.dbsources.EmblCdsSource;
 import jalview.ws.dbsources.EmblSource;
@@ -63,10 +60,10 @@ public class SequenceFetcher extends ASequenceFetcher
   public SequenceFetcher(boolean addDas)
   {
     addDBRefSourceImpl(EnsemblProtein.class);
-    addDBRefSourceImpl(EnsemblCds.class);
-    addDBRefSourceImpl(EnsemblGenome.class);
+    // addDBRefSourceImpl(EnsemblCds.class);
+    // addDBRefSourceImpl(EnsemblGenome.class);
     addDBRefSourceImpl(EnsemblGene.class);
-    addDBRefSourceImpl(EnsemblCdna.class);
+    // addDBRefSourceImpl(EnsemblCdna.class);
     addDBRefSourceImpl(EmblSource.class);
     addDBRefSourceImpl(EmblCdsSource.class);
     addDBRefSourceImpl(Uniprot.class);
index f825608..2ed3263 100644 (file)
@@ -168,8 +168,7 @@ public class ASequenceFetcher
           {
             continue; // wrong sort of data
           }
-          boolean doMultiple = fetcher.getAccessionSeparator() != null;
-          // No separator - no Multiple Queries
+          boolean doMultiple = fetcher.getMaximumQueryCount() > 1;
           while (!queriesLeft.isEmpty())
           {
             StringBuffer qsb = new StringBuffer();
@@ -188,8 +187,7 @@ public class ASequenceFetcher
             try
             {
               // create a fetcher and go to it
-              seqset = fetcher.getSequenceRecords(qsb.toString()); // ,
-              // queriesFailed);
+              seqset = fetcher.getSequenceRecords(qsb.toString());
             } catch (Exception ex)
             {
               System.err.println("Failed to retrieve the following from "
@@ -256,7 +254,7 @@ public class ASequenceFetcher
         {
           System.out.println("# Adding " + queriesMade.size()
                   + " ids back to queries list for searching again (" + db
-                  + ".");
+                  + ")");
           queriesLeft.addAll(queriesMade);
         }
       }
index 0a4d9a8..84072c0 100644 (file)
@@ -24,8 +24,6 @@ import jalview.datamodel.AlignmentI;
 import jalview.io.FormatAdapter;
 import jalview.io.IdentifyFile;
 
-import com.stevesoft.pat.Regex;
-
 /**
  * common methods for implementations of the DbSourceProxy interface.
  * 
@@ -102,23 +100,20 @@ public abstract class DbSourceProxyImpl implements DbSourceProxy
     return sequences;
   }
 
+  /**
+   * Returns the first accession id in the query (up to the first accession id
+   * separator), or the whole query if there is no separator or it is not found
+   */
   @Override
   public String getAccessionIdFromQuery(String query)
   {
-    Regex vgr = getAccessionValidator();
-    if (vgr == null)
+    String sep = getAccessionSeparator();
+    if (sep == null)
     {
       return query;
     }
-    vgr.search(query);
-    if (vgr.numSubs() > 0)
-    {
-      return (vgr.stringMatched(1));
-    }
-    else
-    {
-      return (vgr.stringMatched());
-    }
+    int sepPos = query.indexOf(sep);
+    return sepPos == -1 ? query : query.substring(0, sepPos);
   }
 
   /**
index 1f1a84e..a262c1e 100644 (file)
@@ -12,6 +12,7 @@ import jalview.io.gff.SequenceOntologyFactory;
 import jalview.io.gff.SequenceOntologyLite;
 import jalview.util.MapList;
 
+import java.util.Arrays;
 import java.util.List;
 
 import org.testng.annotations.AfterClass;
@@ -129,33 +130,51 @@ public class EnsemblGeneTest
     SequenceFeature sf1 = new SequenceFeature("transcript", "", 20000,
             20500, 0f, null);
     sf1.setValue("Parent", "gene:" + geneId);
+    sf1.setValue("transcript_id", "transcript1");
     genomic.addSequenceFeature(sf1);
 
     // transcript sub-type feature
     SequenceFeature sf2 = new SequenceFeature("snRNA", "", 20000,
             20500, 0f, null);
     sf2.setValue("Parent", "gene:" + geneId);
+    sf2.setValue("transcript_id", "transcript2");
     genomic.addSequenceFeature(sf2);
 
     // NMD_transcript_variant treated like transcript in Ensembl
     SequenceFeature sf3 = new SequenceFeature("NMD_transcript_variant", "",
             20000, 20500, 0f, null);
     sf3.setValue("Parent", "gene:" + geneId);
+    sf3.setValue("transcript_id", "transcript3");
     genomic.addSequenceFeature(sf3);
 
     // transcript for a different gene - ignored
     SequenceFeature sf4 = new SequenceFeature("snRNA", "", 20000, 20500,
             0f, null);
     sf4.setValue("Parent", "gene:XYZ");
+    sf4.setValue("transcript_id", "transcript4");
     genomic.addSequenceFeature(sf4);
 
     EnsemblGene testee = new EnsemblGene();
+
+    /*
+     * with no filter
+     */
     List<SequenceFeature> features = testee.getTranscriptFeatures(geneId,
-            genomic);
+            genomic, null);
     assertEquals(3, features.size());
     assertSame(sf1, features.get(0));
     assertSame(sf2, features.get(1));
     assertSame(sf3, features.get(2));
+
+    /*
+     * with filter
+     */
+    List<String> ids = Arrays.asList(new String[] { "transcript2",
+        "transcript3" });
+    features = testee.getTranscriptFeatures(geneId, genomic, ids);
+    assertEquals(2, features.size());
+    assertSame(sf2, features.get(0));
+    assertSame(sf3, features.get(1));
   }
 
   /**
index bd0e7b3..c5db0a8 100644 (file)
@@ -1,5 +1,7 @@
 package jalview.ext.ensembl;
 
+import static org.testng.AssertJUnit.assertEquals;
+
 import org.testng.Assert;
 import org.testng.annotations.Test;
 
@@ -16,4 +18,28 @@ public class EnsemblProteinTest
     Assert.assertFalse(esq.isValidReference("ENSG00000288602"));
   }
 
+  @Test(groups = "Functional")
+  public void testGetAccesionIdFromQuery() throws Exception
+  {
+    EnsemblSequenceFetcher esq = new EnsemblProtein();
+    assertEquals("ENSP00000288602",
+            esq.getAccessionIdFromQuery("ENSP00000288602"));
+    assertEquals("ENSMUSP00000288602",
+            esq.getAccessionIdFromQuery("ENSMUSP00000288602"));
+
+    // ENST converted to ENSP
+    assertEquals("ENSP00000288602",
+            esq.getAccessionIdFromQuery("ENST00000288602"));
+    assertEquals("ENSMUSP00000288602",
+            esq.getAccessionIdFromQuery("ENSMUST00000288602"));
+
+    // with valid separator:
+    assertEquals("ENSP00000288604",
+            esq.getAccessionIdFromQuery("ENSP00000288604 ENSP00000288602"));
+
+    // with wrong separator:
+    assertEquals("ENSP00000288604,ENSP00000288602",
+            esq.getAccessionIdFromQuery("ENSP00000288604,ENSP00000288602"));
+  }
+
 }
index a6694eb..73d2858 100644 (file)
@@ -1,6 +1,8 @@
 package jalview.ext.ensembl;
 
 import static org.testng.AssertJUnit.assertEquals;
+import static org.testng.AssertJUnit.assertFalse;
+import static org.testng.AssertJUnit.assertTrue;
 
 import jalview.datamodel.Alignment;
 import jalview.datamodel.AlignmentI;
@@ -341,7 +343,6 @@ public class EnsemblSeqProxyTest
     SequenceFeature sf = new SequenceFeature("CDS", "", 5, 9, 0f, null);
     sf.setPhase("2"); // skip 2 bases to start of next codon
     ds.addSequenceFeature(sf);
-    ds.addSequenceFeature(sf);
     // CDS for dna 13-15
     sf = new SequenceFeature("CDS_predicted", "", 13, 15, 0f, null);
     ds.addSequenceFeature(sf);
@@ -359,4 +360,16 @@ public class EnsemblSeqProxyTest
     assertEquals(13, ranges.get(1)[0]);
     assertEquals(15, ranges.get(1)[1]);
   }
+
+  @Test(groups = "Functional")
+  public void testIsTranscriptIdentifier()
+  {
+    assertFalse(EnsemblSeqProxy.isTranscriptIdentifier(null));
+    assertFalse(EnsemblSeqProxy.isTranscriptIdentifier(""));
+    assertFalse(EnsemblSeqProxy.isTranscriptIdentifier("ENSG00000012345"));
+    assertTrue(EnsemblSeqProxy.isTranscriptIdentifier("ENST00000012345"));
+    assertFalse(EnsemblSeqProxy.isTranscriptIdentifier("enst00000012345"));
+    assertFalse(EnsemblSeqProxy.isTranscriptIdentifier("ENST000000123456"));
+    assertFalse(EnsemblSeqProxy.isTranscriptIdentifier("ENST0000001234"));
+  }
 }
\ No newline at end of file
index 77da8fa..2ee4eac 100644 (file)
@@ -69,8 +69,6 @@ public class GffTests
     mappedRegion = mapList[0].getMap().locateInFrom(15, 15);
     assertArrayEquals(new int[] { 12, 10 }, mappedRegion);
 
-    // so far so good; TODO: programmatically add mapped sequences
-    // and verify the mappings are 'realised'
     SequenceI dna1 = new Sequence("dna1", "AAACCCGGGTTTAAACCCGGGTTT");
     AlignmentI al = new Alignment(new SequenceI[] { dna1 });
     al.setDataset(null);