From: gmungoc Date: Fri, 20 Oct 2017 13:26:36 +0000 (+0100) Subject: Push 1793 latest to spike branch X-Git-Tag: Release_2_11_0~62^2~24 X-Git-Url: http://source.jalview.org/gitweb/?a=commitdiff_plain;h=353109c11d706b29ae5bc9606f0e12223aa45a98;p=jalview.git Push 1793 latest to spike branch --- diff --git a/.classpath b/.classpath index c4a2832..441ba60 100644 --- a/.classpath +++ b/.classpath @@ -48,11 +48,9 @@ - - @@ -69,5 +67,7 @@ + + diff --git a/.settings/org.eclipse.jdt.core.prefs b/.settings/org.eclipse.jdt.core.prefs index 8a5e7a7..5908bb2 100644 --- a/.settings/org.eclipse.jdt.core.prefs +++ b/.settings/org.eclipse.jdt.core.prefs @@ -1,15 +1,15 @@ eclipse.preferences.version=1 org.eclipse.jdt.core.compiler.codegen.inlineJsrBytecode=enabled org.eclipse.jdt.core.compiler.codegen.methodParameters=do not generate -org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.7 +org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.8 org.eclipse.jdt.core.compiler.codegen.unusedLocal=preserve -org.eclipse.jdt.core.compiler.compliance=1.7 +org.eclipse.jdt.core.compiler.compliance=1.8 org.eclipse.jdt.core.compiler.debug.lineNumber=generate org.eclipse.jdt.core.compiler.debug.localVariable=generate org.eclipse.jdt.core.compiler.debug.sourceFile=generate org.eclipse.jdt.core.compiler.problem.assertIdentifier=error org.eclipse.jdt.core.compiler.problem.enumIdentifier=error -org.eclipse.jdt.core.compiler.source=1.7 +org.eclipse.jdt.core.compiler.source=1.8 org.eclipse.jdt.core.formatter.align_type_members_on_columns=false org.eclipse.jdt.core.formatter.alignment_for_arguments_in_allocation_expression=16 org.eclipse.jdt.core.formatter.alignment_for_arguments_in_annotation=52 diff --git a/RELEASE b/RELEASE index e6b0cf8..cecefec 100644 --- a/RELEASE +++ b/RELEASE @@ -1,2 +1,2 @@ jalview.release=releases/Release_2_10_2b1_Branch -jalview.version=2.10.2b1 +jalview.version=2.10.2b2 diff --git a/build.xml b/build.xml index eb30ef0..f39fdf3 100755 --- a/build.xml +++ b/build.xml @@ -425,7 +425,7 @@ - + @@ -450,9 +450,9 @@ - + - j2se version="1.9+" + j2se version="1.7+" diff --git a/help/html/releases.html b/help/html/releases.html index 1b8ca87..8e55ca1 100755 --- a/help/html/releases.html +++ b/help/html/releases.html @@ -82,14 +82,34 @@ li:before { rendering of sequence features - +
  • Protein annotation panel too high in CDS/Protein view +
  • +
+ + + + +
+ 2.10.2b2
+ 2/10/2017
+
+ +
+ New features in Jalview Desktop +
    +
  • + Uniprot Sequence Fetcher now uses web API at uniprot.org +
  • +
  • HTTPS used for all connections to ebi.ac.uk
+
+
diff --git a/lib/htsjdk-2.12.0.jar b/lib/htsjdk-2.12.0.jar new file mode 100644 index 0000000..1df12b2 Binary files /dev/null and b/lib/htsjdk-2.12.0.jar differ diff --git a/src/jalview/datamodel/Sequence.java b/src/jalview/datamodel/Sequence.java index cd743d1..98b0fb3 100755 --- a/src/jalview/datamodel/Sequence.java +++ b/src/jalview/datamodel/Sequence.java @@ -89,7 +89,7 @@ public class Sequence extends ASequence implements SequenceI */ int index = -1; - private SequenceFeatures sequenceFeatureStore; + private SequenceFeaturesI sequenceFeatureStore; /* * A cursor holding the approximate current view position to the sequence, diff --git a/src/jalview/datamodel/SequenceI.java b/src/jalview/datamodel/SequenceI.java index 03fc545..28be85f 100755 --- a/src/jalview/datamodel/SequenceI.java +++ b/src/jalview/datamodel/SequenceI.java @@ -536,8 +536,21 @@ public interface SequenceI extends ASequenceI */ public int replace(char c1, char c2); + /** + * Answers the GeneLociI, or null if not known + * + * @return + */ GeneLociI getGeneLoci(); + /** + * Sets the mapping to gene loci for the sequence + * + * @param speciesId + * @param assemblyId + * @param chromosomeId + * @param map + */ void setGeneLoci(String speciesId, String assemblyId, String chromosomeId, MapList map); } diff --git a/src/jalview/ext/ensembl/EnsemblData.java b/src/jalview/ext/ensembl/EnsemblData.java new file mode 100644 index 0000000..47fe0fc --- /dev/null +++ b/src/jalview/ext/ensembl/EnsemblData.java @@ -0,0 +1,91 @@ +/* + * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$) + * Copyright (C) $$Year-Rel$$ The Jalview Authors + * + * This file is part of Jalview. + * + * Jalview is free software: you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, either version 3 + * of the License, or (at your option) any later version. + * + * Jalview is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR + * PURPOSE. See the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Jalview. If not, see . + * The Jalview Authors are detailed in the 'AUTHORS' file. + */ +package jalview.ext.ensembl; + +/** + * A data class to model the data and rest version of one Ensembl domain, + * currently for rest.ensembl.org and rest.ensemblgenomes.org + * + * @author gmcarstairs + */ +class EnsemblData +{ + /* + * The http domain this object is holding data values for + */ + String domain; + + /* + * The latest version Jalview has tested for, e.g. "4.5"; a minor version change should be + * ok, a major version change may break stuff + */ + String expectedRestVersion; + + /* + * Major / minor / point version e.g. "4.5.1" + * @see http://rest.ensembl.org/info/rest/?content-type=application/json + */ + String restVersion; + + /* + * data version + * @see http://rest.ensembl.org/info/data/?content-type=application/json + */ + String dataVersion; + + /* + * true when http://rest.ensembl.org/info/ping/?content-type=application/json + * returns response code 200 and not {"error":"Database is unavailable"} + */ + boolean restAvailable; + + /* + * absolute time when availability was last checked + */ + long lastAvailableCheckTime; + + /* + * absolute time when version numbers were last checked + */ + long lastVersionCheckTime; + + // flag set to true if REST major version is not the one expected + boolean restMajorVersionMismatch; + + /* + * absolute time to wait till if we overloaded the REST service + */ + long retryAfter; + + /** + * Constructor given expected REST version number e.g 4.5 or 3.4.3 + * + * @param restExpected + */ + EnsemblData(String theDomain, String restExpected) + { + domain = theDomain; + expectedRestVersion = restExpected; + lastAvailableCheckTime = -1; + lastVersionCheckTime = -1; + } + +} diff --git a/src/jalview/ext/ensembl/EnsemblInfo.java b/src/jalview/ext/ensembl/EnsemblInfo.java index 3108194..de55a53 100644 --- a/src/jalview/ext/ensembl/EnsemblInfo.java +++ b/src/jalview/ext/ensembl/EnsemblInfo.java @@ -1,91 +1,185 @@ -/* - * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$) - * Copyright (C) $$Year-Rel$$ The Jalview Authors - * - * This file is part of Jalview. - * - * Jalview is free software: you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation, either version 3 - * of the License, or (at your option) any later version. - * - * Jalview is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty - * of MERCHANTABILITY or FITNESS FOR A PARTICULAR - * PURPOSE. See the GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Jalview. If not, see . - * The Jalview Authors are detailed in the 'AUTHORS' file. - */ package jalview.ext.ensembl; -/** - * A data class to model the data and rest version of one Ensembl domain, - * currently for rest.ensembl.org and rest.ensemblgenomes.org - * - * @author gmcarstairs - */ -class EnsemblInfo +import jalview.datamodel.AlignmentI; +import jalview.datamodel.DBRefSource; + +import java.io.BufferedReader; +import java.io.IOException; +import java.net.MalformedURLException; +import java.net.URL; +import java.util.HashMap; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.Set; + +import org.json.simple.JSONArray; +import org.json.simple.parser.JSONParser; +import org.json.simple.parser.ParseException; + +public class EnsemblInfo extends EnsemblRestClient { - /* - * The http domain this object is holding data values for - */ - String domain; /* - * The latest version Jalview has tested for, e.g. "4.5"; a minor version change should be - * ok, a major version change may break stuff + * cached results of REST /info/divisions service, currently + *
+   * { 
+   *  { "ENSEMBLFUNGI", "http://rest.ensemblgenomes.org"},
+   *    "ENSEMBLBACTERIA", "http://rest.ensemblgenomes.org"},
+   *    "ENSEMBLPROTISTS", "http://rest.ensemblgenomes.org"},
+   *    "ENSEMBLMETAZOA", "http://rest.ensemblgenomes.org"},
+   *    "ENSEMBLPLANTS",  "http://rest.ensemblgenomes.org"},
+   *    "ENSEMBL", "http://rest.ensembl.org" }
+   *  }
+   * 
+ * The values for EnsemblGenomes are retrieved by a REST call, that for + * Ensembl is added programmatically for convenience of lookup */ - String expectedRestVersion; + private static Map divisions; - /* - * Major / minor / point version e.g. "4.5.1" - * @see http://rest.ensembl.org/info/rest/?content-type=application/json - */ - String restVersion; + @Override + public String getDbName() + { + return "ENSEMBL"; + } - /* - * data version - * @see http://rest.ensembl.org/info/data/?content-type=application/json - */ - String dataVersion; + @Override + public AlignmentI getSequenceRecords(String queries) throws Exception + { + return null; + } - /* - * true when http://rest.ensembl.org/info/ping/?content-type=application/json - * returns response code 200 and not {"error":"Database is unavailable"} - */ - boolean restAvailable; + @Override + protected URL getUrl(List ids) throws MalformedURLException + { + return null; + } - /* - * absolute time when availability was last checked + @Override + protected boolean useGetRequest() + { + return true; + } + + @Override + protected String getRequestMimeType(boolean multipleIds) + { + return "application/json"; + } + + @Override + protected String getResponseMimeType() + { + return "application/json"; + } + + /** + * Answers the domain (http://rest.ensembl.org or + * http://rest.ensemblgenomes.org) for the given division, or null if not + * recognised by Ensembl. + * + * @param division + * @return */ - long lastAvailableCheckTime; + public String getDomain(String division) + { + if (divisions == null) + { + fetchDivisions(); + } + return divisions.get(division.toUpperCase()); + } - /* - * absolute time when version numbers were last checked + /** + * On first request only, populate the lookup map by fetching the list of + * divisions known to EnsemblGenomes. */ - long lastVersionCheckTime; + void fetchDivisions() + { + divisions = new HashMap<>(); - // flag set to true if REST major version is not the one expected - boolean restMajorVersionMismatch; + /* + * for convenience, pre-fill ensembl.org as the domain for "ENSEMBL" + */ + divisions.put(DBRefSource.ENSEMBL.toUpperCase(), ENSEMBL_REST); - /* - * absolute time to wait till if we overloaded the REST service + BufferedReader br = null; + try + { + URL url = getDivisionsUrl(ENSEMBL_GENOMES_REST); + if (url != null) + { + br = getHttpResponse(url, null); + } + parseResponse(br, ENSEMBL_GENOMES_REST); + } catch (IOException e) + { + // ignore + } finally + { + if (br != null) + { + try + { + br.close(); + } catch (IOException e) + { + // ignore + } + } + } + } + + /** + * Parses the JSON response to /info/divisions, and add each to the lookup map + * + * @param br + * @param domain */ - long retryAfter; + void parseResponse(BufferedReader br, String domain) + { + JSONParser jp = new JSONParser(); + + try + { + JSONArray parsed = (JSONArray) jp.parse(br); + + Iterator rvals = parsed.iterator(); + while (rvals.hasNext()) + { + String division = rvals.next().toString(); + divisions.put(division.toUpperCase(), domain); + } + } catch (IOException | ParseException | NumberFormatException e) + { + // ignore + } + } /** - * Constructor given expected REST version number e.g 4.5 or 3.4.3 + * Constructs the URL for the EnsemblGenomes /info/divisions REST service + * @param domain TODO * - * @param restExpected + * @return + * @throws MalformedURLException */ - EnsemblInfo(String theDomain, String restExpected) + URL getDivisionsUrl(String domain) throws MalformedURLException { - domain = theDomain; - expectedRestVersion = restExpected; - lastAvailableCheckTime = -1; - lastVersionCheckTime = -1; + return new URL(domain + + "/info/divisions?content-type=application/json"); } + /** + * Returns the set of 'divisions' recognised by Ensembl or EnsemblGenomes + * + * @return + */ + public Set getDivisions() { + if (divisions == null) + { + fetchDivisions(); + } + + return divisions.keySet(); + } } diff --git a/src/jalview/ext/ensembl/EnsemblLookup.java b/src/jalview/ext/ensembl/EnsemblLookup.java index eb8f90e..0968663 100644 --- a/src/jalview/ext/ensembl/EnsemblLookup.java +++ b/src/jalview/ext/ensembl/EnsemblLookup.java @@ -42,6 +42,9 @@ import org.json.simple.parser.ParseException; */ public class EnsemblLookup extends EnsemblRestClient { + private static final String SPECIES = "species"; + + private static final String PARENT = "Parent"; /** * Default constructor (to use rest.ensembl.org) @@ -124,6 +127,28 @@ public class EnsemblLookup extends EnsemblRestClient */ public String getParent(String identifier) { + return getAttribute(identifier, PARENT); + } + + /** + * Calls the Ensembl lookup REST endpoint and retrieves the 'species' for the + * given identifier, or null if not found + * + * @param identifier + * @return + */ + public String getSpecies(String identifier) + { + return getAttribute(identifier, SPECIES); + } + + /** + * @param identifier + * @param attribute + * @return + */ + protected String getAttribute(String identifier, String attribute) + { List ids = Arrays.asList(new String[] { identifier }); BufferedReader br = null; @@ -134,7 +159,7 @@ public class EnsemblLookup extends EnsemblRestClient { br = getHttpResponse(url, ids); } - return (parseResponse(br)); + return (parseResponse(br, attribute)); } catch (IOException e) { // ignore @@ -155,22 +180,23 @@ public class EnsemblLookup extends EnsemblRestClient } /** - * Parses "Parent" from the JSON response and returns the value, or null if - * not found + * Parses the value of 'attribute' from the JSON response and returns the + * value, or null if not found * * @param br + * @param attribute * @return * @throws IOException */ - protected String parseResponse(BufferedReader br) throws IOException + protected String parseResponse(BufferedReader br, String attribute) throws IOException { String parent = null; JSONParser jp = new JSONParser(); try { JSONObject val = (JSONObject) jp.parse(br); - parent = val.get("Parent").toString(); - } catch (ParseException e) + parent = val.get(attribute).toString(); + } catch (ParseException | NullPointerException e) { // ignore } diff --git a/src/jalview/ext/ensembl/EnsemblMap.java b/src/jalview/ext/ensembl/EnsemblMap.java index 05cc897..56657e0 100644 --- a/src/jalview/ext/ensembl/EnsemblMap.java +++ b/src/jalview/ext/ensembl/EnsemblMap.java @@ -2,11 +2,15 @@ package jalview.ext.ensembl; import jalview.datamodel.AlignmentI; import jalview.datamodel.DBRefSource; +import jalview.datamodel.GeneLociI; +import jalview.util.MapList; import java.io.BufferedReader; import java.io.IOException; import java.net.MalformedURLException; import java.net.URL; +import java.util.ArrayList; +import java.util.Collections; import java.util.Iterator; import java.util.List; @@ -17,6 +21,13 @@ import org.json.simple.parser.ParseException; public class EnsemblMap extends EnsemblRestClient { + private static final String MAPPED = "mapped"; + + private static final String MAPPINGS = "mappings"; + + private static final String CDS = "cds"; + + private static final String CDNA = "cdna"; /** * Default constructor (to use rest.ensembl.org) @@ -62,7 +73,7 @@ public class EnsemblMap extends EnsemblRestClient * @return * @throws MalformedURLException */ - protected URL getUrl(String species, String chromosome, String fromRef, + protected URL getAssemblyMapUrl(String species, String chromosome, String fromRef, String toRef, int startPos, int endPos) throws MalformedURLException { @@ -77,13 +88,7 @@ public class EnsemblMap extends EnsemblRestClient "%s/map/%s/%s/%s:%d..%d:%s/%s?content-type=application/json", getDomain(), species, fromRef, chromosome, start, end, strand, toRef); - try - { - return new URL(url); - } catch (MalformedURLException e) - { - return null; - } + return new URL(url); } @Override @@ -110,7 +115,19 @@ public class EnsemblMap extends EnsemblRestClient return null; // not used } - public int[] getMapping(String species, String chromosome, + /** + * Calls the REST /map service to get the chromosomal coordinates (start/end) + * in 'toRef' that corresponding to the (start/end) queryRange in 'fromRef' + * + * @param species + * @param chromosome + * @param fromRef + * @param toRef + * @param queryRange + * @return + * @see http://rest.ensemblgenomes.org/documentation/info/assembly_map + */ + public int[] getAssemblyMapping(String species, String chromosome, String fromRef, String toRef, int[] queryRange) { URL url = null; @@ -118,21 +135,32 @@ public class EnsemblMap extends EnsemblRestClient try { - url = getUrl(species, chromosome, fromRef, toRef, queryRange[0], + url = getAssemblyMapUrl(species, chromosome, fromRef, toRef, queryRange[0], queryRange[1]); - // System.out.println("Calling " + url); br = getHttpResponse(url, null); - return (parseResponse(br)); + return (parseAssemblyMappingResponse(br)); } catch (Throwable t) { System.out.println("Error calling " + url + ": " + t.getMessage()); return null; + } finally + { + if (br != null) + { + try + { + br.close(); + } catch (IOException e) + { + // ignore + } + } } } /** - * Parses the JSON response from the /map REST service. The format is (with - * some fields omitted) + * Parses the JSON response from the /map/<species>/ REST service. The + * format is (with some fields omitted) * *
    *  {"mappings": 
@@ -145,7 +173,7 @@ public class EnsemblMap extends EnsemblRestClient
    * @param br
    * @return
    */
-  protected int[] parseResponse(BufferedReader br)
+  protected int[] parseAssemblyMappingResponse(BufferedReader br)
   {
     int[] result = null;
     JSONParser jp = new JSONParser();
@@ -153,14 +181,14 @@ public class EnsemblMap extends EnsemblRestClient
     try
     {
       JSONObject parsed = (JSONObject) jp.parse(br);
-      JSONArray mappings = (JSONArray) parsed.get("mappings");
+      JSONArray mappings = (JSONArray) parsed.get(MAPPINGS);
 
       Iterator rvals = mappings.iterator();
       while (rvals.hasNext())
       {
         // todo check for "mapped"
         JSONObject val = (JSONObject) rvals.next();
-        JSONObject mapped = (JSONObject) val.get("mapped");
+        JSONObject mapped = (JSONObject) val.get(MAPPED);
         int start = Integer.parseInt(mapped.get("start").toString());
         int end = Integer.parseInt(mapped.get("end").toString());
         String strand = mapped.get("strand").toString();
@@ -180,4 +208,215 @@ public class EnsemblMap extends EnsemblRestClient
     return result;
   }
 
+  /**
+   * Calls the REST /map/cds/id service, and returns a DBRefEntry holding the
+   * returned chromosomal coordinates, or returns null if the call fails
+   * 
+   * @param division
+   *          e.g. Ensembl, EnsemblMetazoa
+   * @param accession
+   *          e.g. ENST00000592782, Y55B1AR.1.1
+   * @param start
+   * @param end
+   * @return
+   */
+  public GeneLociI getCdsMapping(String division, String accession,
+          int start, int end)
+  {
+    return getIdMapping(division, accession, start, end, CDS);
+  }
+
+  /**
+   * Calls the REST /map/cdna/id service, and returns a DBRefEntry holding the
+   * returned chromosomal coordinates, or returns null if the call fails
+   * 
+   * @param division
+   *          e.g. Ensembl, EnsemblMetazoa
+   * @param accession
+   *          e.g. ENST00000592782, Y55B1AR.1.1
+   * @param start
+   * @param end
+   * @return
+   */
+  public GeneLociI getCdnaMapping(String division, String accession,
+          int start, int end)
+  {
+    return getIdMapping(division, accession, start, end, CDNA);
+  }
+
+  GeneLociI getIdMapping(String division, String accession, int start,
+          int end, String cdsOrCdna)
+  {
+    URL url = null;
+    BufferedReader br = null;
+
+    try
+    {
+      String domain = new EnsemblInfo().getDomain(division);
+      if (domain != null)
+      {
+        url = getIdMapUrl(domain, accession, start, end, cdsOrCdna);
+        br = getHttpResponse(url, null);
+        return (parseIdMappingResponse(br, accession, domain));
+      }
+      return null;
+    } catch (Throwable t)
+    {
+      System.out.println("Error calling " + url + ": " + t.getMessage());
+      return null;
+    } finally
+    {
+      if (br != null)
+      {
+        try
+        {
+          br.close();
+        } catch (IOException e)
+        {
+          // ignore
+        }
+      }
+    }
+  }
+
+  /**
+   * Constructs a URL to the /map/cds/ or /map/cdna/ REST service. The
+   * REST call is to either ensembl or ensemblgenomes, as determined from the
+   * division, e.g. Ensembl or EnsemblProtists.
+   * 
+   * @param domain
+   * @param accession
+   * @param start
+   * @param end
+   * @param cdsOrCdna
+   * @return
+   * @throws MalformedURLException
+   */
+  URL getIdMapUrl(String domain, String accession, int start, int end,
+          String cdsOrCdna) throws MalformedURLException
+  {
+    String url = String
+            .format("%s/map/%s/%s/%d..%d?include_original_region=1&content-type=application/json",
+                    domain, cdsOrCdna, accession, start, end);
+    return new URL(url);
+  }
+
+  /**
+   * Parses the JSON response from the /map/cds/ or /map/cdna REST service. The
+   * format is
+   * 
+   * 
+   * {"mappings":
+   *   [
+   *    {"assembly_name":"TAIR10","end":2501311,"seq_region_name":"1","gap":0,
+   *     "strand":-1,"coord_system":"chromosome","rank":0,"start":2501114},
+   *    {"assembly_name":"TAIR10","end":2500815,"seq_region_name":"1","gap":0,
+   *     "strand":-1,"coord_system":"chromosome","rank":0,"start":2500714}
+   *   ]
+   * }
+   * 
+ * + * @param br + * @param accession + * @param domain + * @return + */ + GeneLociI parseIdMappingResponse(BufferedReader br, String accession, + String domain) + { + JSONParser jp = new JSONParser(); + + try + { + JSONObject parsed = (JSONObject) jp.parse(br); + JSONArray mappings = (JSONArray) parsed.get(MAPPINGS); + + Iterator rvals = mappings.iterator(); + String assembly = null; + String chromosome = null; + int fromEnd = 0; + List regions = new ArrayList<>(); + + while (rvals.hasNext()) + { + JSONObject val = (JSONObject) rvals.next(); + JSONObject original = (JSONObject) val.get("original"); + fromEnd = Integer.parseInt(original.get("end").toString()); + + JSONObject mapped = (JSONObject) val.get(MAPPED); + int start = Integer.parseInt(mapped.get("start").toString()); + int end = Integer.parseInt(mapped.get("end").toString()); + String ass = mapped.get("assembly_name").toString(); + if (assembly != null && !assembly.equals(ass)) + { + System.err + .println("EnsemblMap found multiple assemblies - can't resolve"); + return null; + } + assembly = ass; + String chr = mapped.get("seq_region_name").toString(); + if (chromosome != null && !chromosome.equals(chr)) + { + System.err + .println("EnsemblMap found multiple chromosomes - can't resolve"); + return null; + } + chromosome = chr; + String strand = mapped.get("strand").toString(); + if ("-1".equals(strand)) + { + regions.add(new int[] { end, start }); + } + else + { + regions.add(new int[] { start, end }); + } + } + + /* + * processed all mapped regions on chromosome, assemble the result, + * having first fetched the species id for the accession + */ + final String species = new EnsemblLookup(domain) + .getSpecies(accession); + final String as = assembly; + final String chr = chromosome; + List fromRange = Collections.singletonList(new int[] { 1, + fromEnd }); + final MapList map = new MapList(fromRange, regions, 1, 1); + return new GeneLociI() + { + + @Override + public String getSpeciesId() + { + return species == null ? "" : species; + } + + @Override + public String getAssemblyId() + { + return as; + } + + @Override + public String getChromosomeId() + { + return chr; + } + + @Override + public MapList getMap() + { + return map; + } + }; + } catch (IOException | ParseException | NumberFormatException e) + { + // ignore + } + + return null; + } + } diff --git a/src/jalview/ext/ensembl/EnsemblRestClient.java b/src/jalview/ext/ensembl/EnsemblRestClient.java index ad6c70c..7cabfbb 100644 --- a/src/jalview/ext/ensembl/EnsemblRestClient.java +++ b/src/jalview/ext/ensembl/EnsemblRestClient.java @@ -67,7 +67,7 @@ abstract class EnsemblRestClient extends EnsemblSequenceFetcher private static final String REST_CHANGE_LOG = "https://github.com/Ensembl/ensembl-rest/wiki/Change-log"; - private static Map domainData; + private static Map domainData; // @see https://github.com/Ensembl/ensembl-rest/wiki/Output-formats private static final String PING_URL = "http://rest.ensembl.org/info/ping.json"; @@ -87,10 +87,10 @@ abstract class EnsemblRestClient extends EnsemblSequenceFetcher static { - domainData = new HashMap(); + domainData = new HashMap(); domainData.put(ENSEMBL_REST, - new EnsemblInfo(ENSEMBL_REST, LATEST_ENSEMBL_REST_VERSION)); - domainData.put(ENSEMBL_GENOMES_REST, new EnsemblInfo( + new EnsemblData(ENSEMBL_REST, LATEST_ENSEMBL_REST_VERSION)); + domainData.put(ENSEMBL_GENOMES_REST, new EnsemblData( ENSEMBL_GENOMES_REST, LATEST_ENSEMBLGENOMES_REST_VERSION)); } @@ -368,7 +368,7 @@ abstract class EnsemblRestClient extends EnsemblSequenceFetcher // to test: // retryDelay = "5"; - EnsemblInfo info = domainData.get(getDomain()); + EnsemblData info = domainData.get(getDomain()); if (retryDelay != null) { System.err.println("Ensembl REST service rate limit exceeded, wait " @@ -403,7 +403,7 @@ abstract class EnsemblRestClient extends EnsemblSequenceFetcher */ protected boolean isEnsemblAvailable() { - EnsemblInfo info = domainData.get(getDomain()); + EnsemblData info = domainData.get(getDomain()); long now = System.currentTimeMillis(); @@ -491,7 +491,7 @@ abstract class EnsemblRestClient extends EnsemblSequenceFetcher */ private void checkEnsemblRestVersion() { - EnsemblInfo info = domainData.get(getDomain()); + EnsemblData info = domainData.get(getDomain()); JSONParser jp = new JSONParser(); URL url = null; diff --git a/src/jalview/gui/AlignFrame.java b/src/jalview/gui/AlignFrame.java index 95cabcd..f6b8392 100644 --- a/src/jalview/gui/AlignFrame.java +++ b/src/jalview/gui/AlignFrame.java @@ -4261,7 +4261,7 @@ public class AlignFrame extends GAlignFrame implements DropTargetListener, protected void showProductsFor(final SequenceI[] sel, final boolean _odna, final String source) { - new Thread(CrossRefAction.showProductsFor(sel, _odna, source, this)) + new Thread(CrossRefAction.getHandlerFor(sel, _odna, source, this)) .start(); } diff --git a/src/jalview/gui/CrossRefAction.java b/src/jalview/gui/CrossRefAction.java index 2d1dfd4..21a0a84 100644 --- a/src/jalview/gui/CrossRefAction.java +++ b/src/jalview/gui/CrossRefAction.java @@ -27,17 +27,25 @@ import jalview.api.FeatureSettingsModelI; import jalview.bin.Cache; import jalview.datamodel.Alignment; import jalview.datamodel.AlignmentI; +import jalview.datamodel.DBRefEntry; import jalview.datamodel.DBRefSource; +import jalview.datamodel.GeneLociI; import jalview.datamodel.SequenceI; +import jalview.ext.ensembl.EnsemblInfo; +import jalview.ext.ensembl.EnsemblMap; import jalview.io.gff.SequenceOntologyI; import jalview.structure.StructureSelectionManager; +import jalview.util.DBRefUtils; +import jalview.util.MapList; +import jalview.util.MappingUtils; import jalview.util.MessageManager; import jalview.ws.SequenceFetcher; import java.util.ArrayList; +import java.util.HashMap; import java.util.List; - -import javax.swing.JOptionPane; +import java.util.Map; +import java.util.Set; /** * Factory constructor and runnable for discovering and displaying @@ -52,13 +60,13 @@ public class CrossRefAction implements Runnable private SequenceI[] sel; - private boolean _odna; + private final boolean _odna; private String source; - List xrefViews = new ArrayList(); + List xrefViews = new ArrayList<>(); - public List getXrefViews() + List getXrefViews() { return xrefViews; } @@ -90,6 +98,13 @@ public class CrossRefAction implements Runnable { return; } + + /* + * try to look up chromosomal coordinates for nucleotide + * sequences (if not already retrieved) + */ + findGeneLoci(xrefs.getSequences()); + /* * get display scheme (if any) to apply to features */ @@ -113,75 +128,14 @@ public class CrossRefAction implements Runnable if (Cache.getDefault(Preferences.ENABLE_SPLIT_FRAME, true)) { - boolean copyAlignmentIsAligned = false; - if (dna) - { - copyAlignment = AlignmentUtils.makeCdsAlignment(sel, dataset, - xrefsAlignment.getSequencesArray()); - if (copyAlignment.getHeight() == 0) - { - JvOptionPane.showMessageDialog(alignFrame, - MessageManager.getString("label.cant_map_cds"), - MessageManager.getString("label.operation_failed"), - JvOptionPane.OK_OPTION); - System.err.println("Failed to make CDS alignment"); - } - - /* - * pending getting Embl transcripts to 'align', - * we are only doing this for Ensembl - */ - // TODO proper criteria for 'can align as cdna' - if (DBRefSource.ENSEMBL.equalsIgnoreCase(source) - || AlignmentUtils.looksLikeEnsembl(alignment)) - { - copyAlignment.alignAs(alignment); - copyAlignmentIsAligned = true; - } - } - else + copyAlignment = copyAlignmentForSplitFrame(alignment, dataset, dna, + xrefs, xrefsAlignment); + if (copyAlignment == null) { - copyAlignment = AlignmentUtils.makeCopyAlignment(sel, - xrefs.getSequencesArray(), dataset); - } - copyAlignment - .setGapCharacter(alignFrame.viewport.getGapCharacter()); - - StructureSelectionManager ssm = StructureSelectionManager - .getStructureSelectionManager(Desktop.instance); - - /* - * register any new mappings for sequence mouseover etc - * (will not duplicate any previously registered mappings) - */ - ssm.registerMappings(dataset.getCodonFrames()); - - if (copyAlignment.getHeight() <= 0) - { - System.err.println( - "No Sequences generated for xRef type " + source); - return; - } - /* - * align protein to dna - */ - if (dna && copyAlignmentIsAligned) - { - xrefsAlignment.alignAs(copyAlignment); - } - else - { - /* - * align cdna to protein - currently only if - * fetching and aligning Ensembl transcripts! - */ - // TODO: generalise for other sources of locus/transcript/cds data - if (dna && DBRefSource.ENSEMBL.equalsIgnoreCase(source)) - { - copyAlignment.alignAs(xrefsAlignment); - } + return; // failed } } + /* * build AlignFrame(s) according to available alignment data */ @@ -207,6 +161,7 @@ public class CrossRefAction implements Runnable xrefViews.add(newFrame.alignPanel); return; // via finally clause } + AlignFrame copyThis = new AlignFrame(copyAlignment, AlignFrame.DEFAULT_WIDTH, AlignFrame.DEFAULT_HEIGHT); copyThis.setTitle(alignFrame.getTitle()); @@ -263,6 +218,260 @@ public class CrossRefAction implements Runnable } /** + * Tries to add chromosomal coordinates to any nucleotide sequence which does + * not already have them. Coordinates are retrieved from Ensembl given an + * Ensembl identifier, either on the sequence itself or on a peptide sequence + * it has a reference to. + * + *
+   * Example (human):
+   * - fetch EMBLCDS cross-references for Uniprot entry P30419
+   * - the EMBL sequences do not have xrefs to Ensembl
+   * - the Uniprot entry has xrefs to 
+   *    ENSP00000258960, ENSP00000468424, ENST00000258960, ENST00000592782
+   * - either of the transcript ids can be used to retrieve gene loci e.g.
+   *    http://rest.ensembl.org/map/cds/ENST00000592782/1..100000
+   * Example (invertebrate):
+   * - fetch EMBLCDS cross-references for Uniprot entry Q43517 (FER1_SOLLC)
+   * - the Uniprot entry has an xref to ENSEMBLPLANTS Solyc10g044520.1.1
+   * - can retrieve gene loci with
+   *    http://rest.ensemblgenomes.org/map/cds/Solyc10g044520.1.1/1..100000
+   * 
+ * + * @param sequences + */ + public static void findGeneLoci(List sequences) + { + Map retrievedLoci = new HashMap<>(); + for (SequenceI seq : sequences) + { + findGeneLoci(seq, retrievedLoci); + } + } + + /** + * Tres to find chromosomal coordinates for the sequence, by searching its + * direct and indirect cross-references for Ensembl. If the loci have already + * been retrieved, just reads them out of the map of retrievedLoci; this is + * the case of an alternative transcript for the same protein. Otherwise calls + * a REST service to retrieve the loci, and if successful, adds them to the + * sequence and to the retrievedLoci. + * + * @param seq + * @param retrievedLoci + */ + static void findGeneLoci(SequenceI seq, + Map retrievedLoci) + { + /* + * don't replace any existing chromosomal coordinates + */ + if (seq == null || seq.isProtein() || seq.getGeneLoci() != null + || seq.getDBRefs() == null) + { + return; + } + + Set ensemblDivisions = new EnsemblInfo().getDivisions(); + + /* + * first look for direct dbrefs from sequence to Ensembl + */ + String[] divisionsArray = ensemblDivisions + .toArray(new String[ensemblDivisions.size()]); + DBRefEntry[] seqRefs = seq.getDBRefs(); + DBRefEntry[] directEnsemblRefs = DBRefUtils.selectRefs(seqRefs, + divisionsArray); + if (directEnsemblRefs != null) + { + for (DBRefEntry ensemblRef : directEnsemblRefs) + { + if (fetchGeneLoci(seq, ensemblRef, retrievedLoci)) + { + return; + } + } + } + + /* + * else look for indirect dbrefs from sequence to Ensembl + */ + for (DBRefEntry dbref : seq.getDBRefs()) + { + if (dbref.getMap() != null && dbref.getMap().getTo() != null) + { + DBRefEntry[] dbrefs = dbref.getMap().getTo().getDBRefs(); + DBRefEntry[] indirectEnsemblRefs = DBRefUtils.selectRefs(dbrefs, + divisionsArray); + if (indirectEnsemblRefs != null) + { + for (DBRefEntry ensemblRef : indirectEnsemblRefs) + { + if (fetchGeneLoci(seq, ensemblRef, retrievedLoci)) + { + return; + } + } + } + } + } + } + + /** + * Retrieves chromosomal coordinates for the Ensembl (or EnsemblGenomes) + * identifier in dbref. If successful, and the sequence length matches gene + * loci length, then add it to the sequence, and to the retrievedLoci map. + * Answers true if successful, else false. + * + * @param seq + * @param dbref + * @param retrievedLoci + * @return + */ + static boolean fetchGeneLoci(SequenceI seq, DBRefEntry dbref, + Map retrievedLoci) + { + String accession = dbref.getAccessionId(); + String division = dbref.getSource(); + + /* + * hack: ignore cross-references to Ensembl protein ids + * (can't fetch chromosomal mapping for these) + * todo: is there an equivalent in EnsemblGenomes? + */ + if (accession.startsWith("ENSP")) + { + return false; + } + EnsemblMap mapper = new EnsemblMap(); + + /* + * try CDS mapping first + */ + GeneLociI geneLoci = mapper.getCdsMapping(division, accession, 1, + seq.getLength()); + if (geneLoci != null) + { + MapList map = geneLoci.getMap(); + int mappedFromLength = MappingUtils.getLength(map.getFromRanges()); + if (mappedFromLength == seq.getLength()) + { + seq.setGeneLoci(geneLoci.getSpeciesId(), geneLoci.getAssemblyId(), + geneLoci.getChromosomeId(), geneLoci.getMap()); + retrievedLoci.put(dbref, geneLoci); + return true; + } + } + + /* + * else try CDNA mapping + */ + geneLoci = mapper.getCdnaMapping(division, accession, 1, + seq.getLength()); + if (geneLoci != null) + { + MapList map = geneLoci.getMap(); + int mappedFromLength = MappingUtils.getLength(map.getFromRanges()); + if (mappedFromLength == seq.getLength()) + { + seq.setGeneLoci(geneLoci.getSpeciesId(), geneLoci.getAssemblyId(), + geneLoci.getChromosomeId(), geneLoci.getMap()); + retrievedLoci.put(dbref, geneLoci); + return true; + } + } + + return false; + } + + /** + * @param alignment + * @param dataset + * @param dna + * @param xrefs + * @param xrefsAlignment + * @return + */ + protected AlignmentI copyAlignmentForSplitFrame(AlignmentI alignment, + AlignmentI dataset, boolean dna, AlignmentI xrefs, + AlignmentI xrefsAlignment) + { + AlignmentI copyAlignment; + boolean copyAlignmentIsAligned = false; + if (dna) + { + copyAlignment = AlignmentUtils.makeCdsAlignment(sel, dataset, + xrefsAlignment.getSequencesArray()); + if (copyAlignment.getHeight() == 0) + { + JvOptionPane.showMessageDialog(alignFrame, + MessageManager.getString("label.cant_map_cds"), + MessageManager.getString("label.operation_failed"), + JvOptionPane.OK_OPTION); + System.err.println("Failed to make CDS alignment"); + return null; + } + + /* + * pending getting Embl transcripts to 'align', + * we are only doing this for Ensembl + */ + // TODO proper criteria for 'can align as cdna' + if (DBRefSource.ENSEMBL.equalsIgnoreCase(source) + || AlignmentUtils.looksLikeEnsembl(alignment)) + { + copyAlignment.alignAs(alignment); + copyAlignmentIsAligned = true; + } + } + else + { + copyAlignment = AlignmentUtils.makeCopyAlignment(sel, + xrefs.getSequencesArray(), dataset); + } + copyAlignment + .setGapCharacter(alignFrame.viewport.getGapCharacter()); + + StructureSelectionManager ssm = StructureSelectionManager + .getStructureSelectionManager(Desktop.instance); + + /* + * register any new mappings for sequence mouseover etc + * (will not duplicate any previously registered mappings) + */ + ssm.registerMappings(dataset.getCodonFrames()); + + if (copyAlignment.getHeight() <= 0) + { + System.err.println( + "No Sequences generated for xRef type " + source); + return null; + } + + /* + * align protein to dna + */ + if (dna && copyAlignmentIsAligned) + { + xrefsAlignment.alignAs(copyAlignment); + } + else + { + /* + * align cdna to protein - currently only if + * fetching and aligning Ensembl transcripts! + */ + // TODO: generalise for other sources of locus/transcript/cds data + if (dna && DBRefSource.ENSEMBL.equalsIgnoreCase(source)) + { + copyAlignment.alignAs(xrefsAlignment); + } + } + + return copyAlignment; + } + + /** * Makes an alignment containing the given sequences, and adds them to the * given dataset, which is also set as the dataset for the new alignment * @@ -291,20 +500,28 @@ public class CrossRefAction implements Runnable return al; } - public CrossRefAction(AlignFrame alignFrame, SequenceI[] sel, - boolean _odna, String source) + /** + * Constructor + * + * @param af + * @param seqs + * @param fromDna + * @param dbSource + */ + CrossRefAction(AlignFrame af, SequenceI[] seqs, boolean fromDna, + String dbSource) { - this.alignFrame = alignFrame; - this.sel = sel; - this._odna = _odna; - this.source = source; + this.alignFrame = af; + this.sel = seqs; + this._odna = fromDna; + this.source = dbSource; } - public static CrossRefAction showProductsFor(final SequenceI[] sel, - final boolean _odna, final String source, + public static CrossRefAction getHandlerFor(final SequenceI[] sel, + final boolean fromDna, final String source, final AlignFrame alignFrame) { - return new CrossRefAction(alignFrame, sel, _odna, source); + return new CrossRefAction(alignFrame, sel, fromDna, source); } } diff --git a/src/jalview/io/SequenceAnnotationReport.java b/src/jalview/io/SequenceAnnotationReport.java index f1ebcac..13f41d4 100644 --- a/src/jalview/io/SequenceAnnotationReport.java +++ b/src/jalview/io/SequenceAnnotationReport.java @@ -58,7 +58,7 @@ public class SequenceAnnotationReport /* * Comparator to order DBRefEntry by Source + accession id (case-insensitive), - * with 'Primary' sources placed before others + * with 'Primary' sources placed before others, and 'chromosome' first of all */ private static Comparator comparator = new Comparator() { @@ -66,6 +66,14 @@ public class SequenceAnnotationReport @Override public int compare(DBRefEntry ref1, DBRefEntry ref2) { + if (ref1.isChromosome()) + { + return -1; + } + if (ref2.isChromosome()) + { + return 1; + } String s1 = ref1.getSource(); String s2 = ref2.getSource(); boolean s1Primary = isPrimarySource(s1); diff --git a/src/jalview/io/vcf/VCFLoader.java b/src/jalview/io/vcf/VCFLoader.java index c1c84fb..e381b26 100644 --- a/src/jalview/io/vcf/VCFLoader.java +++ b/src/jalview/io/vcf/VCFLoader.java @@ -287,7 +287,7 @@ public class VCFLoader /** * Transfers VCF features to sequences to which this sequence has a mapping. - * If the mapping is 1:3, computes peptide variants from nucleotide variants. + * If the mapping is 3:1, computes peptide variants from nucleotide variants. * * @param seq */ @@ -432,27 +432,21 @@ public class VCFLoader */ VariantContext variant = variants.next(); - /* - * we can only process SNP variants (which can be reported - * as part of a MIXED variant record - */ - if (!variant.isSNP() && !variant.isMixed()) - { - // continue; - } - int start = variant.getStart() - offset; int end = variant.getEnd() - offset; /* * convert chromosomal location to sequence coordinates + * - may be reverse strand (convert to forward for sequence feature) * - null if a partially overlapping feature */ int[] seqLocation = mapping.locateInFrom(start, end); if (seqLocation != null) { - count += addAlleleFeatures(seq, variant, seqLocation[0], - seqLocation[1], forwardStrand); + int featureStart = Math.min(seqLocation[0], seqLocation[1]); + int featureEnd = Math.max(seqLocation[0], seqLocation[1]); + count += addAlleleFeatures(seq, variant, featureStart, featureEnd, + forwardStrand); } } @@ -564,13 +558,6 @@ public class VCFLoader String reference = variant.getReference().getBaseString(); Allele alt = variant.getAlternateAllele(altAlleleIndex); String allele = alt.getBaseString(); - if (allele.length() != 1) - { - /* - * not a SNP variant - */ - // return 0; - } /* * build the ref,alt allele description e.g. "G,A", using the base @@ -868,7 +855,8 @@ public class VCFLoader * call (e.g.) http://rest.ensembl.org/map/human/GRCh38/17:45051610..45109016:1/GRCh37 */ EnsemblMap mapper = new EnsemblMap(); - int[] mapping = mapper.getMapping(species, chromosome, fromRef, toRef, + int[] mapping = mapper.getAssemblyMapping(species, chromosome, fromRef, + toRef, queryRange); if (mapping == null) diff --git a/src/jalview/renderer/seqfeatures/FeatureRenderer.java b/src/jalview/renderer/seqfeatures/FeatureRenderer.java index e81e519..f16522f 100644 --- a/src/jalview/renderer/seqfeatures/FeatureRenderer.java +++ b/src/jalview/renderer/seqfeatures/FeatureRenderer.java @@ -401,27 +401,6 @@ public class FeatureRenderer extends FeatureRendererModel } /** -<<<<<<< HEAD -======= - * Answers true if the feature belongs to a feature group which is not - * currently displayed, else false - * - * @param sequenceFeature - * @return - */ - @Override - protected boolean featureGroupNotShown( - final SequenceFeature sequenceFeature) - { - return featureGroups != null && sequenceFeature.featureGroup != null - && sequenceFeature.featureGroup.length() != 0 - && featureGroups.containsKey(sequenceFeature.featureGroup) - && !featureGroups.get(sequenceFeature.featureGroup) - .booleanValue(); - } - - /** ->>>>>>> refs/heads/develop * Called when alignment in associated view has new/modified features to * discover and display. * diff --git a/test/jalview/io/CrossRef2xmlTests.java b/test/jalview/io/CrossRef2xmlTests.java index 0715857..b3db4de 100644 --- a/test/jalview/io/CrossRef2xmlTests.java +++ b/test/jalview/io/CrossRef2xmlTests.java @@ -39,6 +39,9 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.HashMap; import java.util.List; +import java.util.Map; + +import junit.extensions.PA; import org.testng.Assert; import org.testng.annotations.BeforeClass; @@ -90,9 +93,9 @@ public class CrossRef2xmlTests extends Jalview2xmlBase // . codonframes // // - HashMap dbtoviewBit = new HashMap<>(); + Map dbtoviewBit = new HashMap<>(); List keyseq = new ArrayList<>(); - HashMap savedProjects = new HashMap<>(); + Map savedProjects = new HashMap<>(); for (String[] did : new String[][] { { "UNIPROT", "P00338" } }) { @@ -186,15 +189,16 @@ public class CrossRef2xmlTests extends Jalview2xmlBase if (pass2 == 0) { // retrieve and show cross-refs in this thread - cra = new CrossRefAction(af, seqs, dna, db); + cra = CrossRefAction.getHandlerFor(seqs, dna, db, af); cra.run(); - if (cra.getXrefViews().size() == 0) + cra_views = (List) PA.getValue(cra, + "xrefViews"); + if (cra_views.size() == 0) { failedXrefMenuItems.add("No crossrefs retrieved for " + first + " -> " + db); continue; } - cra_views = cra.getXrefViews(); assertNucleotide(cra_views.get(0), "Nucleotide panel included proteins for " + first + " -> " + db); @@ -286,16 +290,18 @@ public class CrossRef2xmlTests extends Jalview2xmlBase if (pass3 == 0) { - SequenceI[] xrseqs = avp.getAlignment() .getSequencesArray(); AlignFrame nextaf = Desktop.getAlignFrameFor(avp .getAlignViewport()); - cra = new CrossRefAction(nextaf, xrseqs, avp - .getAlignViewport().isNucleotide(), xrefdb); + cra = CrossRefAction.getHandlerFor(xrseqs, avp + .getAlignViewport().isNucleotide(), xrefdb, + nextaf); cra.run(); - if (cra.getXrefViews().size() == 0) + cra_views2 = (List) PA.getValue( + cra, "xrefViews"); + if (cra_views2.size() == 0) { failedXrefMenuItems .add("No crossrefs retrieved for '" @@ -303,7 +309,6 @@ public class CrossRef2xmlTests extends Jalview2xmlBase + " via '" + nextaf.getTitle() + "'"); continue; } - cra_views2 = cra.getXrefViews(); assertNucleotide(cra_views2.get(0), "Nucleotide panel included proteins for '" + nextxref + "' to " + xrefdb @@ -541,8 +546,8 @@ public class CrossRef2xmlTests extends Jalview2xmlBase * viewpanel needs to be called with a distinct xrefpath to ensure * each one's strings are compared) */ - private void stringify(HashMap dbtoviewBit, - HashMap savedProjects, String xrefpath, + private void stringify(Map dbtoviewBit, + Map savedProjects, String xrefpath, AlignmentViewPanel avp) { if (savedProjects != null) diff --git a/test/jalview/io/vcf/VCFLoaderTest.java b/test/jalview/io/vcf/VCFLoaderTest.java index b01266e..4a254d2 100644 --- a/test/jalview/io/vcf/VCFLoaderTest.java +++ b/test/jalview/io/vcf/VCFLoaderTest.java @@ -1,7 +1,7 @@ package jalview.io.vcf; import static org.testng.Assert.assertEquals; -import static org.testng.Assert.fail; +import static org.testng.Assert.assertTrue; import jalview.datamodel.AlignmentI; import jalview.datamodel.DBRefEntry; @@ -9,6 +9,7 @@ import jalview.datamodel.Mapping; import jalview.datamodel.Sequence; import jalview.datamodel.SequenceFeature; import jalview.datamodel.SequenceI; +import jalview.datamodel.features.SequenceFeatures; import jalview.gui.AlignFrame; import jalview.io.DataSourceType; import jalview.io.FileLoader; @@ -25,30 +26,43 @@ import org.testng.annotations.Test; public class VCFLoaderTest { - // columns 9717- of gene P30419 from Ensembl (modified) - private static final String FASTA = - // forward strand 'gene' - ">gene1/1-25 chromosome:GRCh38:17:45051610:45051634:1\n" + private static final float DELTA = 0.00001f; + + // columns 9717- of gene P30419 from Ensembl (much modified) + private static final String FASTA = "" + + + /* + * forward strand 'gene' and 'transcript' with two exons + */ + ">gene1/1-25 chromosome:GRCh38:17:45051610:45051634:1\n" + "CAAGCTGGCGGACGAGAGTGTGACA\n" - // and a 'made up' mini-transcript with two exons + ">transcript1/1-18\n--AGCTGGCG----AGAGTGTGAC-\n" - + - // 'reverse strand' gene (reverse complement) - ">gene2/1-25 chromosome:GRCh38:17:45051610:45051634:-1\n" + + /* + * reverse strand gene and transcript (reverse complement alleles!) + */ + + ">gene2/1-25 chromosome:GRCh38:17:45051610:45051634:-1\n" + "TGTCACACTCTCGTCCGCCAGCTTG\n" - // and its 'transcript' - + ">transcript2/1-18\n" - + "-GTCACACTCT----CGCCAGCT--\n"; + + ">transcript2/1-18\n" + "-GTCACACTCT----CGCCAGCT--\n" + + /* + * 'gene' on chromosome 5 with two transcripts + */ + + ">gene3/1-25 chromosome:GRCh38:5:45051610:45051634:1\n" + + "CAAGCTGGCGGACGAGAGTGTGACA\n" + + ">transcript3/1-18\n--AGCTGGCG----AGAGTGTGAC-\n" + + ">transcript4/1-18\n-----TGG-GGACGAGAGTGTGA-A\n"; private static final String[] VCF = { "##fileformat=VCFv4.2", "##INFO=", "##reference=GRCh38", "#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO", - // SNP A/T in position 2 of gene sequence (precedes transcript) - "17\t45051611\t.\tA\tT\t1666.64\tRF\tAC=15;AF=5.08130e-03", + // A/T,C variants in position 2 of gene sequence (precedes transcript) + // should create 2 variant features with respective scores + "17\t45051611\t.\tA\tT,C\t1666.64\tRF\tAC=15;AF=5.0e-03,4.0e-03", // SNP G/C in position 4 of gene sequence, position 2 of transcript - // this is a mixed variant, the insertion G/GA is not transferred - "17\t45051613\t.\tG\tGA,C\t1666.64\tRF\tAC=15;AF=3.08130e-03" }; + // insertion G/GA is transferred to nucleotide but not to peptide + "17\t45051613\t.\tG\tGA,C\t1666.64\tRF\tAC=15;AF=3.0e-03,2.0e-03" }; @Test(groups = "Functional") public void testDoLoad() throws IOException @@ -62,42 +76,69 @@ public class VCFLoaderTest /* * verify variant feature(s) added to gene + * NB alleles at a locus may not be processed, and features added, + * in the order in which they appear in the VCF record as method + * VariantContext.getAlternateAlleles() does not guarantee order + * - order of assertions here matches what we find (is not important) */ List geneFeatures = al.getSequenceAt(0) .getSequenceFeatures(); - assertEquals(geneFeatures.size(), 2); + SequenceFeatures.sortFeatures(geneFeatures, true); + assertEquals(geneFeatures.size(), 4); SequenceFeature sf = geneFeatures.get(0); assertEquals(sf.getFeatureGroup(), "VCF"); assertEquals(sf.getBegin(), 2); assertEquals(sf.getEnd(), 2); + assertEquals(sf.getScore(), 4.0e-03, DELTA); + assertEquals(sf.getValue(Gff3Helper.ALLELES), "A,C"); + assertEquals(sf.getType(), SequenceOntologyI.SEQUENCE_VARIANT); + sf = geneFeatures.get(1); + assertEquals(sf.getFeatureGroup(), "VCF"); + assertEquals(sf.getBegin(), 2); + assertEquals(sf.getEnd(), 2); assertEquals(sf.getType(), SequenceOntologyI.SEQUENCE_VARIANT); - assertEquals(sf.getScore(), 5.08130e-03, 0.000001f); + assertEquals(sf.getScore(), 5.0e-03, DELTA); assertEquals(sf.getValue(Gff3Helper.ALLELES), "A,T"); - sf = geneFeatures.get(1); + sf = geneFeatures.get(2); assertEquals(sf.getFeatureGroup(), "VCF"); assertEquals(sf.getBegin(), 4); assertEquals(sf.getEnd(), 4); assertEquals(sf.getType(), SequenceOntologyI.SEQUENCE_VARIANT); - assertEquals(sf.getScore(), 3.08130e-03, 0.000001f); + assertEquals(sf.getScore(), 2.0e-03, DELTA); assertEquals(sf.getValue(Gff3Helper.ALLELES), "G,C"); + sf = geneFeatures.get(3); + assertEquals(sf.getFeatureGroup(), "VCF"); + assertEquals(sf.getBegin(), 4); + assertEquals(sf.getEnd(), 4); + assertEquals(sf.getType(), SequenceOntologyI.SEQUENCE_VARIANT); + assertEquals(sf.getScore(), 3.0e-03, DELTA); + assertEquals(sf.getValue(Gff3Helper.ALLELES), "G,GA"); + /* * verify variant feature(s) added to transcript */ List transcriptFeatures = al.getSequenceAt(1) .getSequenceFeatures(); - assertEquals(transcriptFeatures.size(), 1); + assertEquals(transcriptFeatures.size(), 2); sf = transcriptFeatures.get(0); assertEquals(sf.getFeatureGroup(), "VCF"); assertEquals(sf.getBegin(), 2); assertEquals(sf.getEnd(), 2); assertEquals(sf.getType(), SequenceOntologyI.SEQUENCE_VARIANT); - assertEquals(sf.getScore(), 3.08130e-03, 0.000001f); + assertEquals(sf.getScore(), 2.0e-03, DELTA); assertEquals(sf.getValue(Gff3Helper.ALLELES), "G,C"); + sf = transcriptFeatures.get(1); + assertEquals(sf.getFeatureGroup(), "VCF"); + assertEquals(sf.getBegin(), 2); + assertEquals(sf.getEnd(), 2); + assertEquals(sf.getType(), SequenceOntologyI.SEQUENCE_VARIANT); + assertEquals(sf.getScore(), 3.0e-03, DELTA); + assertEquals(sf.getValue(Gff3Helper.ALLELES), "G,GA"); /* - * verify variant feature(s) computed and added to protein + * verify SNP variant feature(s) computed and added to protein * first codon AGC varies to ACC giving S/T */ DBRefEntry[] dbRefs = al.getSequenceAt(1).getDBRefs(); @@ -147,7 +188,7 @@ public class VCFLoaderTest * from Ensembl and transcripts computed) */ AlignmentI alignment = af.getViewport().getAlignment(); - SequenceI gene1 = alignment.getSequenceAt(0); + SequenceI gene1 = alignment.findName("gene1"); int[] to = new int[] { 45051610, 45051634 }; int[] from = new int[] { gene1.getStart(), gene1.getEnd() }; gene1.setGeneLoci("human", "GRCh38", "17", new MapList(from, to, 1, 1)); @@ -158,7 +199,7 @@ public class VCFLoaderTest * which is chromosome 45051612-45051619,45051624-45051633 */ to = new int[] { 45051612, 45051619, 45051624, 45051633 }; - SequenceI transcript1 = alignment.getSequenceAt(1); + SequenceI transcript1 = alignment.findName("transcript1"); from = new int[] { transcript1.getStart(), transcript1.getEnd() }; transcript1.setGeneLoci("human", "GRCh38", "17", new MapList(from, to, 1, 1)); @@ -166,7 +207,7 @@ public class VCFLoaderTest /* * map gene2 to chromosome reverse strand */ - SequenceI gene2 = alignment.getSequenceAt(2); + SequenceI gene2 = alignment.findName("gene2"); to = new int[] { 45051634, 45051610 }; from = new int[] { gene2.getStart(), gene2.getEnd() }; gene2.setGeneLoci("human", "GRCh38", "17", new MapList(from, to, 1, 1)); @@ -177,7 +218,7 @@ public class VCFLoaderTest * which is chromosome 45051633-45051624,45051619-45051612 */ to = new int[] { 45051633, 45051624, 45051619, 45051612 }; - SequenceI transcript2 = alignment.getSequenceAt(3); + SequenceI transcript2 = alignment.findName("transcript2"); from = new int[] { transcript2.getStart(), transcript2.getEnd() }; transcript2.setGeneLoci("human", "GRCh38", "17", new MapList(from, to, 1, 1)); @@ -201,6 +242,42 @@ public class VCFLoaderTest product = new DBRefEntry("", "", "ENSP002", map); transcript2.addDBRef(product); + /* + * map gene3 to chromosome + */ + SequenceI gene3 = alignment.findName("gene3"); + to = new int[] { 45051610, 45051634 }; + from = new int[] { gene3.getStart(), gene3.getEnd() }; + gene3.setGeneLoci("human", "GRCh38", "5", new MapList(from, to, 1, 1)); + + /* + * map 'transcript3' to chromosome + */ + SequenceI transcript3 = alignment.findName("transcript3"); + to = new int[] { 45051612, 45051619, 45051624, 45051633 }; + from = new int[] { transcript3.getStart(), transcript3.getEnd() }; + transcript3.setGeneLoci("human", "GRCh38", "5", new MapList(from, to, + 1, 1)); + + /* + * map 'transcript4' to chromosome + */ + SequenceI transcript4 = alignment.findName("transcript4"); + to = new int[] { 45051615, 45051617, 45051619, 45051632, 45051634, + 45051634 }; + from = new int[] { transcript4.getStart(), transcript4.getEnd() }; + transcript4.setGeneLoci("human", "GRCh38", "5", new MapList(from, to, + 1, 1)); + + /* + * add a protein product as a DBRef on transcript3 + */ + SequenceI peptide3 = new Sequence("ENSP003", "SWRECD"); + mapList = new MapList(new int[] { 1, 18 }, new int[] { 1, 6 }, 3, 1); + map = new Mapping(peptide3, mapList); + product = new DBRefEntry("", "", "ENSP003", map); + transcript3.addDBRef(product); + return alignment; } @@ -225,44 +302,69 @@ public class VCFLoaderTest /* * verify variant feature(s) added to gene2 * gene/1-25 maps to chromosome 45051634- reverse strand - * variants A/T at 45051611 and G/C at 45051613 map to - * T/A and C/G at gene positions 24 and 22 respectively + * variants A/T, A/C at 45051611 and G/GA,G/C at 45051613 map to + * T/A, T/G and C/TC,C/G at gene positions 24 and 22 respectively */ List geneFeatures = al.getSequenceAt(2) .getSequenceFeatures(); - assertEquals(geneFeatures.size(), 2); + SequenceFeatures.sortFeatures(geneFeatures, true); + assertEquals(geneFeatures.size(), 4); SequenceFeature sf = geneFeatures.get(0); assertEquals(sf.getFeatureGroup(), "VCF"); assertEquals(sf.getBegin(), 22); assertEquals(sf.getEnd(), 22); assertEquals(sf.getType(), SequenceOntologyI.SEQUENCE_VARIANT); - assertEquals(sf.getScore(), 3.08130e-03, 0.000001f); + assertEquals(sf.getScore(), 2.0e-03, DELTA); assertEquals("C,G", sf.getValue(Gff3Helper.ALLELES)); sf = geneFeatures.get(1); assertEquals(sf.getFeatureGroup(), "VCF"); + assertEquals(sf.getBegin(), 22); + assertEquals(sf.getEnd(), 22); + assertEquals(sf.getType(), SequenceOntologyI.SEQUENCE_VARIANT); + assertEquals(sf.getScore(), 3.0e-03, DELTA); + assertEquals("C,TC", sf.getValue(Gff3Helper.ALLELES)); + + sf = geneFeatures.get(2); + assertEquals(sf.getFeatureGroup(), "VCF"); + assertEquals(sf.getBegin(), 24); + assertEquals(sf.getEnd(), 24); + assertEquals(sf.getType(), SequenceOntologyI.SEQUENCE_VARIANT); + assertEquals(sf.getScore(), 4.0e-03, DELTA); + assertEquals("T,G", sf.getValue(Gff3Helper.ALLELES)); + + sf = geneFeatures.get(3); + assertEquals(sf.getFeatureGroup(), "VCF"); assertEquals(sf.getBegin(), 24); assertEquals(sf.getEnd(), 24); assertEquals(sf.getType(), SequenceOntologyI.SEQUENCE_VARIANT); - assertEquals(sf.getScore(), 5.08130e-03, 0.000001f); + assertEquals(sf.getScore(), 5.0e-03, DELTA); assertEquals("T,A", sf.getValue(Gff3Helper.ALLELES)); /* * verify variant feature(s) added to transcript2 - * variant C/G at position 22 of gene overlaps and maps to - * position 17 of transcript + * variants G/GA,G/C at position 22 of gene overlap and map to + * C/TC,C/G at position 17 of transcript */ List transcriptFeatures = al.getSequenceAt(3) .getSequenceFeatures(); - assertEquals(transcriptFeatures.size(), 1); + assertEquals(transcriptFeatures.size(), 2); sf = transcriptFeatures.get(0); assertEquals(sf.getFeatureGroup(), "VCF"); assertEquals(sf.getBegin(), 17); assertEquals(sf.getEnd(), 17); assertEquals(sf.getType(), SequenceOntologyI.SEQUENCE_VARIANT); - assertEquals(sf.getScore(), 3.08130e-03, 0.000001f); + assertEquals(sf.getScore(), 2.0e-03, DELTA); assertEquals("C,G", sf.getValue(Gff3Helper.ALLELES)); + sf = transcriptFeatures.get(1); + assertEquals(sf.getFeatureGroup(), "VCF"); + assertEquals(sf.getBegin(), 17); + assertEquals(sf.getEnd(), 17); + assertEquals(sf.getType(), SequenceOntologyI.SEQUENCE_VARIANT); + assertEquals(sf.getScore(), 3.0e-03, DELTA); + assertEquals("C,TC", sf.getValue(Gff3Helper.ALLELES)); + /* * verify variant feature(s) computed and added to protein * last codon GCT varies to GGT giving A/G in the last peptide position @@ -287,18 +389,6 @@ public class VCFLoaderTest } /** - * Tests that where variant records have more than one SNP allele, a variant - * feature is created for each, and the corresponding data values set on it - * - * @throws IOException - */ - @Test(groups = "Functional") - public void testDoLoad_multipleAlleles() throws IOException - { - fail("todo"); - } - - /** * Tests that if VEP consequence (CSQ) data is present in the VCF data, then * it is added to the variant feature, but restricted where possible to the * consequences for a specific transcript @@ -308,6 +398,174 @@ public class VCFLoaderTest @Test(groups = "Functional") public void testDoLoad_vepCsq() throws IOException { - fail("todo"); + AlignmentI al = buildAlignment(); + + VCFLoader loader = new VCFLoader(al); + + /* + * VCF data file with variants at gene3 positions + * 1 C/A + * 5 C/T + * 9 CGT/C (deletion) + * 13 C/G, C/T + * 17 A/AC (insertion), A/G + */ + loader.doLoad("test/jalview/io/vcf/testVcf.dat", null); + + /* + * verify variant feature(s) added to gene3 + */ + List geneFeatures = al.findName("gene3") + .getSequenceFeatures(); + SequenceFeatures.sortFeatures(geneFeatures, true); + assertEquals(geneFeatures.size(), 7); + SequenceFeature sf = geneFeatures.get(0); + assertEquals(sf.getBegin(), 1); + assertEquals(sf.getEnd(), 1); + assertEquals(sf.getScore(), 0.1f, DELTA); + assertEquals(sf.getValue("alleles"), "C,A"); + // gene features include Consequence for all transcripts + assertEquals(((String) sf.getValue("CSQ")).split(",").length, 2); + + sf = geneFeatures.get(1); + assertEquals(sf.getBegin(), 5); + assertEquals(sf.getEnd(), 5); + assertEquals(sf.getScore(), 0.2f, DELTA); + assertEquals(sf.getValue("alleles"), "C,T"); + assertEquals(((String) sf.getValue("CSQ")).split(",").length, 2); + + sf = geneFeatures.get(2); + assertEquals(sf.getBegin(), 9); + assertEquals(sf.getEnd(), 11); // deletion over 3 positions + assertEquals(sf.getScore(), 0.3f, DELTA); + assertEquals(sf.getValue("alleles"), "CGG,C"); + assertEquals(((String) sf.getValue("CSQ")).split(",").length, 2); + + sf = geneFeatures.get(3); + assertEquals(sf.getBegin(), 13); + assertEquals(sf.getEnd(), 13); + assertEquals(sf.getScore(), 0.5f, DELTA); + assertEquals(sf.getValue("alleles"), "C,T"); + assertEquals(((String) sf.getValue("CSQ")).split(",").length, 2); + + sf = geneFeatures.get(4); + assertEquals(sf.getBegin(), 13); + assertEquals(sf.getEnd(), 13); + assertEquals(sf.getScore(), 0.4f, DELTA); + assertEquals(sf.getValue("alleles"), "C,G"); + assertEquals(((String) sf.getValue("CSQ")).split(",").length, 2); + + sf = geneFeatures.get(5); + assertEquals(sf.getBegin(), 17); + assertEquals(sf.getEnd(), 17); + assertEquals(sf.getScore(), 0.7f, DELTA); + assertEquals(sf.getValue("alleles"), "A,G"); + assertEquals(((String) sf.getValue("CSQ")).split(",").length, 2); + + sf = geneFeatures.get(6); + assertEquals(sf.getBegin(), 17); + assertEquals(sf.getEnd(), 17); // insertion + assertEquals(sf.getScore(), 0.6f, DELTA); + assertEquals(sf.getValue("alleles"), "A,AC"); + assertEquals(((String) sf.getValue("CSQ")).split(",").length, 2); + + /* + * verify variant feature(s) added to transcript3 + * at columns 5 (1), 17 (2), positions 3, 11 + * note the deletion at columns 9-11 is not transferred since col 11 + * has no mapping to transcript 3 + */ + List transcriptFeatures = al.findName("transcript3") + .getSequenceFeatures(); + SequenceFeatures.sortFeatures(transcriptFeatures, true); + assertEquals(transcriptFeatures.size(), 3); + sf = transcriptFeatures.get(0); + assertEquals(sf.getBegin(), 3); + assertEquals(sf.getEnd(), 3); + assertEquals(sf.getScore(), 0.2f, DELTA); + assertEquals(sf.getValue("alleles"), "C,T"); + // transcript features only have Consequence for that transcripts + assertEquals(((String) sf.getValue("CSQ")).split(",").length, 1); + assertTrue(sf.getValue("CSQ").toString().contains("transcript3")); + + sf = transcriptFeatures.get(1); + assertEquals(sf.getBegin(), 11); + assertEquals(sf.getEnd(), 11); + assertEquals(sf.getScore(), 0.7f, DELTA); + assertEquals(sf.getValue("alleles"), "A,G"); + assertEquals(((String) sf.getValue("CSQ")).split(",").length, 1); + assertTrue(sf.getValue("CSQ").toString().contains("transcript3")); + + sf = transcriptFeatures.get(2); + assertEquals(sf.getBegin(), 11); + assertEquals(sf.getEnd(), 11); + assertEquals(sf.getScore(), 0.6f, DELTA); + assertEquals(sf.getValue("alleles"), "A,AC"); + assertEquals(((String) sf.getValue("CSQ")).split(",").length, 1); + assertTrue(sf.getValue("CSQ").toString().contains("transcript3")); + + /* + * verify variants computed on protein product for transcript3 + * peptide is SWRECD + * codon variants are AGC/AGT position 1 which is synonymous + * and GAG/GGG which is E/G in position 4 + * the insertion variant is not transferred to the peptide + */ + DBRefEntry[] dbRefs = al.findName("transcript3").getDBRefs(); + SequenceI peptide = null; + for (DBRefEntry dbref : dbRefs) + { + if (dbref.getMap().getMap().getFromRatio() == 3) + { + peptide = dbref.getMap().getTo(); + } + } + List proteinFeatures = peptide.getSequenceFeatures(); + assertEquals(proteinFeatures.size(), 1); + sf = proteinFeatures.get(0); + assertEquals(sf.getFeatureGroup(), "VCF"); + assertEquals(sf.getBegin(), 4); + assertEquals(sf.getEnd(), 4); + assertEquals(sf.getType(), SequenceOntologyI.SEQUENCE_VARIANT); + assertEquals(sf.getDescription(), "p.Glu4Gly"); + + /* + * verify variant feature(s) added to transcript4 + * at columns 13 (2) and 17 (2), positions 7 and 11 + */ + transcriptFeatures = al.findName("transcript4").getSequenceFeatures(); + SequenceFeatures.sortFeatures(transcriptFeatures, true); + assertEquals(transcriptFeatures.size(), 4); + sf = transcriptFeatures.get(0); + assertEquals(sf.getBegin(), 7); + assertEquals(sf.getEnd(), 7); + assertEquals(sf.getScore(), 0.5f, DELTA); + assertEquals(sf.getValue("alleles"), "C,T"); + assertEquals(((String) sf.getValue("CSQ")).split(",").length, 1); + assertTrue(sf.getValue("CSQ").toString().contains("transcript4")); + + sf = transcriptFeatures.get(1); + assertEquals(sf.getBegin(), 7); + assertEquals(sf.getEnd(), 7); + assertEquals(sf.getScore(), 0.4f, DELTA); + assertEquals(sf.getValue("alleles"), "C,G"); + assertEquals(((String) sf.getValue("CSQ")).split(",").length, 1); + assertTrue(sf.getValue("CSQ").toString().contains("transcript4")); + + sf = transcriptFeatures.get(2); + assertEquals(sf.getBegin(), 11); + assertEquals(sf.getEnd(), 11); + assertEquals(sf.getScore(), 0.7f, DELTA); + assertEquals(sf.getValue("alleles"), "A,G"); + assertEquals(((String) sf.getValue("CSQ")).split(",").length, 1); + assertTrue(sf.getValue("CSQ").toString().contains("transcript4")); + + sf = transcriptFeatures.get(3); + assertEquals(sf.getBegin(), 11); + assertEquals(sf.getEnd(), 11); + assertEquals(sf.getScore(), 0.6f, DELTA); + assertEquals(sf.getValue("alleles"), "A,AC"); + assertEquals(((String) sf.getValue("CSQ")).split(",").length, 1); + assertTrue(sf.getValue("CSQ").toString().contains("transcript4")); } } diff --git a/test/jalview/io/vcf/testVcf.dat b/test/jalview/io/vcf/testVcf.dat new file mode 100644 index 0000000..e9e6c22 --- /dev/null +++ b/test/jalview/io/vcf/testVcf.dat @@ -0,0 +1,13 @@ +##fileformat=VCFv4.2 +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##reference=GRCh38 +#CHROM POS ID REF ALT QUAL FILTER INFO +5 45051610 . C A 81.96 RF;AC0 AC=1;AF=0.1;AN=0;AF_Female=2;AB_MEDIAN=6.00000e-01;CSQ=A|missense_variant|MODIFIER|WASH7P|gene3|Transcript|transcript3|rna|Benign,A|downstream_gene_variant|MODIFIER|WASH7P|gene3|Transcript|transcript4|mrna|Bad +5 45051614 . C T 1666.64 RF AC=1;AF=0.2;AN=0;AF_Female=2;AB_MEDIAN=6.00000e-01;CSQ=T|missense_variant|MODIFIER|WASH7P|gene3|Transcript|transcript3|rna|Benign,T|downstream_gene_variant|MODIFIER|WASH7P|gene3|Transcript|transcript4|mrna|Bad +5 45051618 . CGG C 41.94 AC0 AC=1;AF=0.3;AN=0;AF_Female=2;AB_MEDIAN=6.00000e-01;CSQ=C|missense_variant|MODIFIER|WASH7P|gene3|Transcript|transcript3|rna|Benign,C|downstream_gene_variant|MODIFIER|WASH7P|gene3|Transcript|transcript4|mrna|Bad,CSQ=CGT|missense_variant|MODIFIER|WASH7P|gene3|Transcript|transcript3|rna|Benign,CGT|downstream_gene_variant|MODIFIER|WASH7P|gene3|Transcript|transcript4|mrna|Bad +5 45051622 . C G,T 224.23 RF;AC0 AC=1,2;AF=0.4,0.5;AN=0;AF_Female=2;AB_MEDIAN=6.00000e-01;CSQ=G|missense_variant|MODIFIER|WASH7P|gene3|Transcript|transcript3|rna|Benign,G|downstream_gene_variant|MODIFIER|WASH7P|gene3|Transcript|transcript4|mrna|Bad,T|missense_variant|MODIFIER|WASH7P|gene3|Transcript|transcript3|rna|Benign,T|downstream_gene_variant|MODIFIER|WASH7P|gene3|Transcript|transcript4|mrna|Bad +5 45051626 . A AC,G 433.35 RF;AC0 AC=3,4;AF=0.6,0.7;AN=0;AF_Female=2;AB_MEDIAN=6.00000e-01;CSQ=G|missense_variant|MODIFIER|WASH7P|gene3|Transcript|transcript3|rna|Benign,G|downstream_gene_variant|MODIFIER|WASH7P|gene3|Transcript|transcript4|mrna|Bad,AC|missense_variant|MODIFIER|WASH7P|gene3|Transcript|transcript3|rna|Benign,AC|downstream_gene_variant|MODIFIER|WASH7P|gene3|Transcript|transcript4|mrna|Bad diff --git a/test/jalview/util/MapListTest.java b/test/jalview/util/MapListTest.java index f3395ca..d2db258 100644 --- a/test/jalview/util/MapListTest.java +++ b/test/jalview/util/MapListTest.java @@ -931,5 +931,13 @@ public class MapListTest toRanges = compound.getToRanges(); assertEquals(toRanges.size(), 1); assertArrayEquals(new int[] { 47, 50, 71, 126 }, toRanges.get(0)); + + /* + * method returns null if not all regions are mapped through + */ + ml1 = new MapList(new int[] { 1, 50 }, new int[] { 101, 150 }, 1, 1); + ml2 = new MapList(new int[] { 131, 180 }, new int[] { 201, 250 }, 1, 3); + compound = ml1.traverse(ml2); + assertNull(compound); } }