label.view_full_application = View in Full Application
label.load_associated_tree = Load Associated Tree...
label.load_features_annotations = Load Features/Annotations...
+label.load_vcf = Load SNP variants from plain text or indexed VCF data
+label.load_vcf_file = Load VCF File
label.export_features = Export Features...
label.export_annotations = Export Annotations...
label.to_upper_case = To Upper Case
import jalview.datamodel.SequenceGroup;
import jalview.datamodel.SequenceI;
import jalview.datamodel.features.SequenceFeatures;
+import jalview.io.gff.Gff3Helper;
import jalview.io.gff.SequenceOntologyI;
import jalview.schemes.ResidueProperties;
import jalview.util.Comparison;
{
return variant == null ? null : variant.getFeatureGroup();
}
+
+ /**
+ * toString for aid in the debugger only
+ */
+ @Override
+ public String toString()
+ {
+ return base + ":" + (variant == null ? "" : variant.getDescription());
+ }
}
/**
{
if (var.variant != null)
{
- String alleles = (String) var.variant.getValue("alleles");
+ String alleles = (String) var.variant.getValue(Gff3Helper.ALLELES);
if (alleles != null)
{
for (String base : alleles.split(","))
{
if (var.variant != null)
{
- String alleles = (String) var.variant.getValue("alleles");
+ String alleles = (String) var.variant.getValue(Gff3Helper.ALLELES);
if (alleles != null)
{
for (String base : alleles.split(","))
{
if (var.variant != null)
{
- String alleles = (String) var.variant.getValue("alleles");
+ String alleles = (String) var.variant.getValue(Gff3Helper.ALLELES);
if (alleles != null)
{
for (String base : alleles.split(","))
// not handling multi-locus variant features
continue;
}
+
+ /*
+ * extract dna variants to a string array
+ */
+ String alls = (String) sf.getValue(Gff3Helper.ALLELES);
+ if (alls == null)
+ {
+ continue; // non-SNP VCF variant perhaps - can't process this
+ }
+ String[] alleles = alls.toUpperCase().split(",");
+ int i = 0;
+ for (String allele : alleles)
+ {
+ alleles[i++] = allele.trim(); // lose any space characters "A, G"
+ }
+
int[] mapsTo = dnaToProtein.locateInTo(dnaCol, dnaCol);
if (mapsTo == null)
{
}
/*
- * extract dna variants to a string array
- */
- String alls = (String) sf.getValue("alleles");
- if (alls == null)
- {
- continue;
- }
- String[] alleles = alls.toUpperCase().split(",");
- int i = 0;
- for (String allele : alleles)
- {
- alleles[i++] = allele.trim(); // lose any space characters "A, G"
- }
-
- /*
* get this peptide's codon positions e.g. [3, 4, 5] or [4, 7, 10]
*/
int[] codon = peptidePosition == lastPeptidePostion ? lastCodon
--- /dev/null
+package jalview.datamodel;
+
+import jalview.util.MapList;
+
+/**
+ * A data bean to model one or more contiguous regions on one chromosome
+ */
+public class GeneLoci
+{
+ /*
+ * an identifier for the species
+ */
+ public final String species;
+
+ /*
+ * an identifier for a genome assembly, e.g. GRCh38
+ */
+ public final String assembly;
+
+ /*
+ * a chromosome identifier, e.g. "5" or "X"
+ */
+ public final String chromosome;
+
+ /*
+ * mapping from sequence positions to chromosome locations;
+ * any regions with start > end are on the reverse strand
+ */
+ public final MapList mapping;
+
+ /**
+ * Constructor
+ *
+ * @param taxon
+ * @param ref
+ * @param chrId
+ * @param map
+ */
+ public GeneLoci(String taxon, String ref, String chrId, MapList map)
+ {
+ species = taxon;
+ assembly = ref;
+ chromosome = chrId;
+ mapping = map;
+ }
+}
*/
private int changeCount;
+ private GeneLoci geneLoci;
+
/**
* Creates a new Sequence object.
*
}
/**
- * DOCUMENT ME!
+ * Sets the sequence description, and also parses out any special formats of
+ * interest
*
* @param desc
- * DOCUMENT ME!
*/
@Override
public void setDescription(String desc)
{
this.description = desc;
+ parseDescription();
}
/**
- * DOCUMENT ME!
+ * Parses and saves fields of an Ensembl-style description e.g.
+ * chromosome:GRCh38:17:45051610:45109016:1
+ */
+ protected void parseDescription()
+ {
+ if (description == null)
+ {
+ return;
+ }
+ String[] tokens = description.split(":");
+ if (tokens.length == 6 && "chromosome".equals(tokens[0])) {
+ String ref = tokens[1];
+ String chrom = tokens[2];
+ try {
+ int chStart = Integer.parseInt(tokens[3]);
+ int chEnd = Integer.parseInt(tokens[4]);
+ boolean forwardStrand = "1".equals(tokens[5]);
+ String species = ""; // dunno yet!
+ int[] from = new int[] { start, end };
+ int[] to = new int[] { forwardStrand ? chStart : chEnd,
+ forwardStrand ? chEnd : chStart };
+ MapList map = new MapList(from, to, 1, 1);
+ GeneLoci gl = new GeneLoci(species, ref, chrom, map);
+ setGeneLoci(gl);
+ } catch (NumberFormatException e)
+ {
+ System.err.println("Bad integers in description " + description);
+ }
+ }
+ }
+
+ public void setGeneLoci(GeneLoci gl)
+ {
+ geneLoci = gl;
+ }
+
+ /**
+ * Returns the gene loci mapping for the sequence (may be null)
*
- * @return DOCUMENT ME!
+ * @return
+ */
+ public GeneLoci getGeneLoci()
+ {
+ return geneLoci;
+ }
+
+ /**
+ * Answers the description
+ *
+ * @return
*/
@Override
public String getDescription()
import jalview.api.FeatureColourI;
import jalview.api.FeatureSettingsModelI;
import jalview.datamodel.AlignmentI;
+import jalview.datamodel.GeneLoci;
import jalview.datamodel.Sequence;
import jalview.datamodel.SequenceFeature;
import jalview.datamodel.SequenceI;
cdna.transferFeatures(gene.getFeatures().getPositionalFeatures(),
transcript.getDatasetSequence(), mapping, parentId);
+ mapTranscriptToChromosome(transcript, gene, mapping);
+
/*
* fetch and save cross-references
*/
}
/**
+ * If the gene has a mapping to chromosome coordinates, derive the transcript
+ * chromosome regions and save on the transcript sequence
+ *
+ * @param transcript
+ * @param gene
+ * @param mapping
+ * the mapping from gene to transcript positions
+ */
+ protected void mapTranscriptToChromosome(Sequence transcript,
+ SequenceI gene, MapList mapping)
+ {
+ GeneLoci loci = ((Sequence) gene).getGeneLoci();
+ if (loci == null)
+ {
+ return;
+ }
+
+ /*
+ * patch to ensure gene to chromosome mapping is complete
+ * (in case created before gene length was known)
+ */
+ MapList geneMapping = loci.mapping;
+ if (geneMapping.getFromRanges().get(0)[1] == 0)
+ {
+ geneMapping.getFromRanges().get(0)[0] = gene.getStart();
+ geneMapping.getFromRanges().get(0)[1] = gene.getEnd();
+ }
+
+ List<int[]> exons = mapping.getFromRanges();
+ List<int[]> transcriptLoci = new ArrayList<>();
+
+ for (int[] exon : exons) {
+ transcriptLoci.add(geneMapping.locateInTo(exon[0], exon[1]));
+ }
+
+ List<int[]> transcriptRange = Arrays.asList(new int[] {
+ transcript.getStart(), transcript.getEnd() });
+ MapList mapList = new MapList(transcriptRange, transcriptLoci, 1, 1);
+ GeneLoci gl = new GeneLoci(loci.species, loci.assembly,
+ loci.chromosome, mapList);
+
+ transcript.setGeneLoci(gl);
+ }
+
+ /**
* Returns the 'transcript_id' property of the sequence feature (or null)
*
* @param feature
--- /dev/null
+package jalview.ext.ensembl;
+
+import jalview.datamodel.AlignmentI;
+import jalview.datamodel.DBRefSource;
+
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.net.MalformedURLException;
+import java.net.URL;
+import java.util.Iterator;
+import java.util.List;
+
+import org.json.simple.JSONArray;
+import org.json.simple.JSONObject;
+import org.json.simple.parser.JSONParser;
+import org.json.simple.parser.ParseException;
+
+public class EnsemblMap extends EnsemblRestClient
+{
+
+ /**
+ * Default constructor (to use rest.ensembl.org)
+ */
+ public EnsemblMap()
+ {
+ super();
+ }
+
+ /**
+ * Constructor given the target domain to fetch data from
+ *
+ * @param
+ */
+ public EnsemblMap(String domain)
+ {
+ super(domain);
+ }
+
+ @Override
+ public String getDbName()
+ {
+ return DBRefSource.ENSEMBL;
+ }
+
+ @Override
+ public AlignmentI getSequenceRecords(String queries) throws Exception
+ {
+ return null; // not used
+ }
+
+ /**
+ * Constructs a URL of the format <code>
+ * http://rest.ensembl.org/map/human/GRCh38/17:45051610..45109016:1/GRCh37?content-type=application/json
+ * </code>
+ *
+ * @param species
+ * @param chromosome
+ * @param fromRef
+ * @param toRef
+ * @param startPos
+ * @param endPos
+ * @return
+ * @throws MalformedURLException
+ */
+ protected URL getUrl(String species, String chromosome, String fromRef,
+ String toRef, int startPos, int endPos)
+ throws MalformedURLException
+ {
+ /*
+ * start-end might be reverse strand - present forwards to the service
+ */
+ boolean forward = startPos <= endPos;
+ int start = forward ? startPos : endPos;
+ int end = forward ? endPos : startPos;
+ String strand = forward ? "1" : "-1";
+ String url = String.format(
+ "%s/map/%s/%s/%s:%d..%d:%s/%s?content-type=application/json",
+ getDomain(), species, fromRef, chromosome, start, end, strand,
+ toRef);
+ try
+ {
+ return new URL(url);
+ } catch (MalformedURLException e)
+ {
+ return null;
+ }
+ }
+
+ @Override
+ protected boolean useGetRequest()
+ {
+ return true;
+ }
+
+ @Override
+ protected String getRequestMimeType(boolean multipleIds)
+ {
+ return "application/json";
+ }
+
+ @Override
+ protected String getResponseMimeType()
+ {
+ return "application/json";
+ }
+
+ @Override
+ protected URL getUrl(List<String> ids) throws MalformedURLException
+ {
+ return null; // not used
+ }
+
+ public int[] getMapping(String species, String chromosome,
+ String fromRef, String toRef, int[] queryRange)
+ {
+ URL url = null;
+ BufferedReader br = null;
+
+ try
+ {
+ url = getUrl(species, chromosome, fromRef, toRef, queryRange[0],
+ queryRange[1]);
+ // System.out.println("Calling " + url);
+ br = getHttpResponse(url, null);
+ return (parseResponse(br));
+ } catch (Throwable t)
+ {
+ System.out.println("Error calling " + url + ": " + t.getMessage());
+ return null;
+ }
+ }
+
+ /**
+ * Parses the JSON response from the /map REST service. The format is (with
+ * some fields omitted)
+ *
+ * <pre>
+ * {"mappings":
+ * [{
+ * "original": {"end":45109016,"start":45051610},
+ * "mapped" : {"end":43186384,"start":43128978}
+ * }] }
+ * </pre>
+ *
+ * @param br
+ * @return
+ */
+ protected int[] parseResponse(BufferedReader br)
+ {
+ int[] result = null;
+ JSONParser jp = new JSONParser();
+
+ try
+ {
+ JSONObject parsed = (JSONObject) jp.parse(br);
+ JSONArray mappings = (JSONArray) parsed.get("mappings");
+
+ Iterator rvals = mappings.iterator();
+ while (rvals.hasNext())
+ {
+ // todo check for "mapped"
+ JSONObject val = (JSONObject) rvals.next();
+ JSONObject mapped = (JSONObject) val.get("mapped");
+ int start = Integer.parseInt(mapped.get("start").toString());
+ int end = Integer.parseInt(mapped.get("end").toString());
+ String strand = mapped.get("strand").toString();
+ if ("1".equals(strand))
+ {
+ result = new int[] { start, end };
+ }
+ else
+ {
+ result = new int[] { end, start };
+ }
+ }
+ } catch (IOException | ParseException | NumberFormatException e)
+ {
+ // ignore
+ }
+ return result;
+ }
+
+}
import jalview.exceptions.JalviewException;
import jalview.io.FastaFile;
import jalview.io.FileParse;
+import jalview.io.gff.Gff3Helper;
import jalview.io.gff.SequenceOntologyFactory;
import jalview.io.gff.SequenceOntologyI;
import jalview.util.Comparison;
*/
public abstract class EnsemblSeqProxy extends EnsemblRestClient
{
- private static final String ALLELES = "alleles";
-
protected static final String PARENT = "Parent";
protected static final String ID = "ID";
*/
static void reverseComplementAlleles(SequenceFeature sf)
{
- final String alleles = (String) sf.getValue(ALLELES);
+ final String alleles = (String) sf.getValue(Gff3Helper.ALLELES);
if (alleles == null)
{
return;
reverseComplementAllele(complement, allele);
}
String comp = complement.toString();
- sf.setValue(ALLELES, comp);
+ sf.setValue(Gff3Helper.ALLELES, comp);
sf.setDescription(comp);
/*
String atts = sf.getAttributes();
if (atts != null)
{
- atts = atts.replace(ALLELES + "=" + alleles, ALLELES + "=" + comp);
+ atts = atts.replace(Gff3Helper.ALLELES + "=" + alleles,
+ Gff3Helper.ALLELES + "=" + comp);
sf.setAttributes(atts);
}
}
--- /dev/null
+package jalview.ext.htsjdk;
+
+import htsjdk.samtools.util.CloseableIterator;
+import htsjdk.variant.variantcontext.VariantContext;
+import htsjdk.variant.vcf.VCFFileReader;
+import htsjdk.variant.vcf.VCFHeader;
+
+import java.io.Closeable;
+import java.io.File;
+import java.io.IOException;
+
+/**
+ * A thin wrapper for htsjdk classes to read either plain, or compressed, or
+ * compressed and indexed VCF files
+ */
+public class VCFReader implements Closeable, Iterable<VariantContext>
+{
+ private static final String GZ = "gz";
+
+ private static final String TBI_EXTENSION = ".tbi";
+
+ private boolean indexed;
+
+ private VCFFileReader reader;
+
+ /**
+ * Constructor given a raw or compressed VCF file or a (tabix) index file
+ * <p>
+ * For now, file type is inferred from its suffix: .gz or .bgz for compressed
+ * data, .tbi for an index file, anything else is assumed to be plain text
+ * VCF.
+ *
+ * @param f
+ * @throws IOException
+ */
+ public VCFReader(String filePath) throws IOException
+ {
+ if (filePath.endsWith(GZ))
+ {
+ if (new File(filePath + TBI_EXTENSION).exists())
+ {
+ indexed = true;
+ }
+ }
+ else if (filePath.endsWith(TBI_EXTENSION))
+ {
+ indexed = true;
+ filePath = filePath.substring(0, filePath.length() - 4);
+ }
+
+ reader = new VCFFileReader(new File(filePath), indexed);
+ }
+
+ @Override
+ public void close() throws IOException
+ {
+ if (reader != null)
+ {
+ reader.close();
+ }
+ }
+
+ /**
+ * Returns an iterator over VCF variants in the file. The client should call
+ * close() on the iterator when finished with it.
+ */
+ @Override
+ public CloseableIterator<VariantContext> iterator()
+ {
+ return reader == null ? null : reader.iterator();
+ }
+
+ /**
+ * Queries for records overlapping the region specified. Note that this method
+ * is performant if the VCF file is indexed, and may be very slow if it is
+ * not.
+ * <p>
+ * Client code should call close() on the iterator when finished with it.
+ *
+ * @param chrom
+ * the chromosome to query
+ * @param start
+ * query interval start
+ * @param end
+ * query interval end
+ * @return
+ */
+ public CloseableIterator<VariantContext> query(final String chrom,
+ final int start, final int end)
+ {
+ if (reader == null) {
+ return null;
+ }
+ if (indexed)
+ {
+ return reader.query(chrom, start, end);
+ }
+ else
+ {
+ return queryUnindexed(chrom, start, end);
+ }
+ }
+
+ /**
+ * Returns an iterator over variant records read from a flat file which
+ * overlap the specified chromosomal positions. Call close() on the iterator
+ * when finished with it!
+ *
+ * @param chrom
+ * @param start
+ * @param end
+ * @return
+ */
+ protected CloseableIterator<VariantContext> queryUnindexed(
+ final String chrom, final int start, final int end)
+ {
+ final CloseableIterator<VariantContext> it = reader.iterator();
+
+ return new CloseableIterator<VariantContext>()
+ {
+ boolean atEnd = false;
+
+ // prime look-ahead buffer with next matching record
+ private VariantContext next = findNext();
+
+ private VariantContext findNext()
+ {
+ if (atEnd)
+ {
+ return null;
+ }
+ VariantContext variant = null;
+ while (it.hasNext())
+ {
+ variant = it.next();
+ int vstart = variant.getStart();
+
+ if (vstart > end)
+ {
+ atEnd = true;
+ close();
+ return null;
+ }
+
+ int vend = variant.getEnd();
+ // todo what is the undeprecated way to get
+ // the chromosome for the variant?
+ if (chrom.equals(variant.getChr()) && (vstart <= end)
+ && (vend >= start))
+ {
+ return variant;
+ }
+ }
+ return null;
+ }
+
+ @Override
+ public boolean hasNext()
+ {
+ boolean hasNext = !atEnd && (next != null);
+ if (!hasNext)
+ {
+ close();
+ }
+ return hasNext;
+ }
+
+ @Override
+ public VariantContext next()
+ {
+ /*
+ * return the next match, and then re-prime
+ * it with the following one (if any)
+ */
+ VariantContext temp = next;
+ next = findNext();
+ return temp;
+ }
+
+ @Override
+ public void remove()
+ {
+ // not implemented
+ }
+
+ @Override
+ public void close()
+ {
+ it.close();
+ }
+ };
+ }
+
+ /**
+ * Returns an object that models the VCF file headers
+ *
+ * @return
+ */
+ public VCFHeader getFileHeader()
+ {
+ return reader == null ? null : reader.getFileHeader();
+ }
+
+ /**
+ * Answers true if we are processing a tab-indexed VCF file, false if it is a
+ * plain text (uncompressed) file.
+ *
+ * @return
+ */
+ public boolean isIndex()
+ {
+ return indexed;
+ }
+}
import jalview.io.NewickFile;
import jalview.io.ScoreMatrixFile;
import jalview.io.TCoffeeScoreFile;
+import jalview.io.vcf.VCFLoader;
import jalview.jbgui.GAlignFrame;
import jalview.schemes.ColourSchemeI;
import jalview.schemes.ColourSchemes;
AlignmentI al = getViewport().getAlignment();
boolean nucleotide = al.isNucleotide();
+ loadVcf.setVisible(nucleotide);
showTranslation.setVisible(nucleotide);
showReverse.setVisible(nucleotide);
showReverseComplement.setVisible(nucleotide);
new CalculationChooser(AlignFrame.this);
}
}
+
+ @Override
+ protected void loadVcf_actionPerformed()
+ {
+ JalviewFileChooser chooser = new JalviewFileChooser(
+ Cache.getProperty("LAST_DIRECTORY"));
+ chooser.setFileView(new JalviewFileView());
+ chooser.setDialogTitle(MessageManager.getString("label.load_vcf_file"));
+ chooser.setToolTipText(MessageManager.getString("label.load_vcf_file"));
+
+ int value = chooser.showOpenDialog(null);
+
+ if (value == JalviewFileChooser.APPROVE_OPTION)
+ {
+ String choice = chooser.getSelectedFile().getPath();
+ Cache.setProperty("LAST_DIRECTORY", choice);
+ new VCFLoader(viewport.getAlignment()).loadVCF(choice, this);
+ }
+
+ }
}
class PrintThread extends Thread
*/
public class Gff3Helper extends GffHelperBase
{
+ public static final String ALLELES = "alleles";
+
protected static final String TARGET = "Target";
protected static final String ID = "ID";
/*
* Ensembl returns dna variants as 'alleles'
*/
- desc = StringUtils.listToDelimitedString(attributes.get("alleles"),
+ desc = StringUtils.listToDelimitedString(attributes.get(ALLELES),
",");
}
--- /dev/null
+package jalview.io.vcf;
+
+import htsjdk.samtools.util.CloseableIterator;
+import htsjdk.variant.variantcontext.Allele;
+import htsjdk.variant.variantcontext.VariantContext;
+import htsjdk.variant.vcf.VCFHeader;
+import htsjdk.variant.vcf.VCFHeaderLine;
+
+import jalview.analysis.AlignmentUtils;
+import jalview.api.AlignViewControllerGuiI;
+import jalview.datamodel.AlignmentI;
+import jalview.datamodel.DBRefEntry;
+import jalview.datamodel.GeneLoci;
+import jalview.datamodel.Mapping;
+import jalview.datamodel.Sequence;
+import jalview.datamodel.SequenceFeature;
+import jalview.datamodel.SequenceI;
+import jalview.ext.ensembl.EnsemblMap;
+import jalview.ext.htsjdk.VCFReader;
+import jalview.io.gff.Gff3Helper;
+import jalview.io.gff.SequenceOntologyI;
+import jalview.util.MapList;
+import jalview.util.MappingUtils;
+
+import java.io.IOException;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Map.Entry;
+
+/**
+ * A class to read VCF data (using the htsjdk) and add variants as sequence
+ * features on dna and any related protein product sequences
+ *
+ * @author gmcarstairs
+ */
+public class VCFLoader
+{
+ private static final String EXCL = "!";
+
+ /*
+ * the alignment we are associated VCF data with
+ */
+ private AlignmentI al;
+
+ /*
+ * mappings between VCF and sequence reference assembly regions, as
+ * key = "species!chromosome!fromAssembly!toAssembly
+ * value = Map{fromRange, toRange}
+ */
+ private Map<String, Map<int[], int[]>> assemblyMappings;
+
+ /**
+ * Constructor given an alignment context
+ *
+ * @param alignment
+ */
+ public VCFLoader(AlignmentI alignment)
+ {
+ al = alignment;
+
+ // map of species!chromosome!fromAssembly!toAssembly to {fromRange, toRange}
+ assemblyMappings = new HashMap<String, Map<int[], int[]>>();
+ }
+
+ /**
+ * Loads VCF on to an alignment - provided it can be related to one or more
+ * sequence's chromosomal coordinates.
+ * <p>
+ * This method is not thread safe - concurrent threads should use separate
+ * instances of this class.
+ *
+ * @param filePath
+ * @param status
+ */
+ public void loadVCF(String filePath, AlignViewControllerGuiI status)
+ {
+ VCFReader reader = null;
+
+ try
+ {
+ // long start = System.currentTimeMillis();
+ reader = new VCFReader(filePath);
+
+ VCFHeader header = reader.getFileHeader();
+ VCFHeaderLine ref = header
+ .getOtherHeaderLine(VCFHeader.REFERENCE_KEY);
+ // check if reference is wrt assembly19 (GRCh37)
+ // todo may need to allow user to specify reference assembly?
+ boolean isRefGrch37 = (ref != null && ref.getValue().contains(
+ "assembly19"));
+
+ int varCount = 0;
+ int seqCount = 0;
+
+ /*
+ * query for VCF overlapping each sequence in turn
+ */
+ for (SequenceI seq : al.getSequences())
+ {
+ int added = loadVCF(seq, reader, isRefGrch37);
+ if (added > 0)
+ {
+ seqCount++;
+ varCount += added;
+ computePeptideVariants(seq);
+ }
+ }
+ // long elapsed = System.currentTimeMillis() - start;
+ String msg = String.format("Added %d VCF variants to %d sequence(s)",
+ varCount, seqCount);
+ if (status != null)
+ {
+ status.setStatus(msg);
+ }
+ } catch (Throwable e)
+ {
+ System.err.println("Error processing VCF: " + e.getMessage());
+ e.printStackTrace();
+ } finally
+ {
+ if (reader != null)
+ {
+ try
+ {
+ reader.close();
+ } catch (IOException e)
+ {
+ // ignore
+ }
+ }
+ }
+ }
+
+ /**
+ * (Re-)computes peptide variants from dna variants, for any protein sequence
+ * to which the dna sequence has a mapping. Note that although duplicate
+ * features may get computed, they will not be added, since duplicate sequence
+ * features are ignored in Sequence.addSequenceFeature.
+ *
+ * @param dnaSeq
+ */
+ protected void computePeptideVariants(SequenceI dnaSeq)
+ {
+ DBRefEntry[] dbrefs = dnaSeq.getDBRefs();
+ if (dbrefs == null)
+ {
+ return;
+ }
+ for (DBRefEntry dbref : dbrefs)
+ {
+ Mapping mapping = dbref.getMap();
+ if (mapping == null || mapping.getTo() == null
+ || mapping.getMap().getFromRatio() != 3)
+ {
+ continue;
+ }
+ AlignmentUtils.computeProteinFeatures(dnaSeq, mapping.getTo(),
+ mapping.getMap());
+ }
+ }
+
+ /**
+ * Tries to add overlapping variants read from a VCF file to the given
+ * sequence, and returns the number of overlapping variants found. Note that
+ * this requires the sequence to hold information as to its chromosomal
+ * positions and reference, in order to be able to map the VCF variants to the
+ * sequence.
+ *
+ * @param seq
+ * @param reader
+ * @param isVcfRefGrch37
+ * @return
+ */
+ protected int loadVCF(SequenceI seq, VCFReader reader,
+ boolean isVcfRefGrch37)
+ {
+ int count = 0;
+ GeneLoci seqCoords = ((Sequence) seq).getGeneLoci();
+ if (seqCoords == null)
+ {
+ return 0;
+ }
+
+ List<int[]> seqChromosomalContigs = seqCoords.mapping.getToRanges();
+ for (int[] range : seqChromosomalContigs)
+ {
+ count += addVcfVariants(seq, reader, range, isVcfRefGrch37);
+ }
+
+ return count;
+ }
+
+ /**
+ * Queries the VCF reader for any variants that overlap the given chromosome
+ * region of the sequence, and adds as variant features. Returns the number of
+ * overlapping variants found.
+ *
+ * @param seq
+ * @param reader
+ * @param range
+ * start-end range of a sequence region in its chromosomal
+ * coordinates
+ * @param isVcfRefGrch37
+ * true if the VCF is with reference to GRCh37
+ * @return
+ */
+ protected int addVcfVariants(SequenceI seq, VCFReader reader,
+ int[] range, boolean isVcfRefGrch37)
+ {
+ GeneLoci seqCoords = ((Sequence) seq).getGeneLoci();
+
+ String chromosome = seqCoords.chromosome;
+ String seqRef = seqCoords.assembly;
+ String species = seqCoords.species;
+
+ // TODO handle species properly
+ if ("".equals(species))
+ {
+ species = "human";
+ }
+
+ /*
+ * map chromosomal coordinates from GRCh38 (sequence) to
+ * GRCh37 (VCF) if necessary
+ */
+ // TODO generalise for other assemblies and species
+ int offset = 0;
+ String fromRef = "GRCh38";
+ if (fromRef.equalsIgnoreCase(seqRef) && isVcfRefGrch37)
+ {
+ String toRef = "GRCh37";
+ int[] newRange = mapReferenceRange(range, chromosome, species,
+ fromRef, toRef);
+ if (newRange == null)
+ {
+ System.err.println(String.format(
+ "Failed to map %s:%s:%s:%d:%d to %s", species, chromosome,
+ fromRef, range[0], range[1], toRef));
+ return 0;
+ }
+ offset = newRange[0] - range[0];
+ range = newRange;
+ }
+
+ /*
+ * query the VCF for overlaps
+ * (convert a reverse strand range to forwards)
+ */
+ int count = 0;
+ MapList mapping = seqCoords.mapping;
+
+ int fromLocus = Math.min(range[0], range[1]);
+ int toLocus = Math.max(range[0], range[1]);
+ CloseableIterator<VariantContext> variants = reader.query(chromosome,
+ fromLocus, toLocus);
+ while (variants.hasNext())
+ {
+ /*
+ * get variant location in sequence chromosomal coordinates
+ */
+ VariantContext variant = variants.next();
+
+ /*
+ * we can only process SNP variants (which can be reported
+ * as part of a MIXED variant record
+ */
+ if (!variant.isSNP() && !variant.isMixed())
+ {
+ continue;
+ }
+
+ count++;
+ int start = variant.getStart() - offset;
+ int end = variant.getEnd() - offset;
+
+ /*
+ * convert chromosomal location to sequence coordinates
+ * - null if a partially overlapping feature
+ */
+ int[] seqLocation = mapping.locateInFrom(start, end);
+ if (seqLocation != null)
+ {
+ addVariantFeatures(seq, variant, seqLocation[0], seqLocation[1]);
+ }
+ }
+
+ variants.close();
+
+ return count;
+ }
+
+ /**
+ * Inspects the VCF variant record, and adds variant features to the sequence.
+ * Only SNP variants are added, not INDELs.
+ *
+ * @param seq
+ * @param variant
+ * @param featureStart
+ * @param featureEnd
+ */
+ protected void addVariantFeatures(SequenceI seq, VariantContext variant,
+ int featureStart, int featureEnd)
+ {
+ String reference = variant.getReference().getBaseString();
+ if (reference.length() != 1)
+ {
+ /*
+ * sorry, we don't handle INDEL variants
+ */
+ return;
+ }
+
+ /*
+ * for now we extract allele frequency as feature score; note
+ * this attribute is String for a simple SNP, but List<String> if
+ * multiple alleles at the locus; we extract for the simple case only,
+ * since not sure how to match allele order with AF values
+ */
+ Object af = variant.getAttribute("AF");
+ float score = 0f;
+ if (af instanceof String)
+ {
+ try
+ {
+ score = Float.parseFloat((String) af);
+ } catch (NumberFormatException e)
+ {
+ // leave as 0
+ }
+ }
+
+ StringBuilder sb = new StringBuilder();
+ sb.append(reference);
+
+ /*
+ * inspect alleles and record SNP variants (as the variant
+ * record could be MIXED and include INDEL and SNP alleles)
+ */
+ int alleleCount = 0;
+
+ /*
+ * inspect alleles; warning: getAlleles gives no guarantee
+ * as to the order in which they are returned
+ */
+ for (Allele allele : variant.getAlleles())
+ {
+ if (!allele.isReference())
+ {
+ String alleleBase = allele.getBaseString();
+ if (alleleBase.length() == 1)
+ {
+ sb.append(",").append(alleleBase);
+ alleleCount++;
+ }
+ }
+ }
+ String alleles = sb.toString(); // e.g. G,A,C
+
+ String type = SequenceOntologyI.SEQUENCE_VARIANT;
+
+ SequenceFeature sf = new SequenceFeature(type, alleles, featureStart,
+ featureEnd, score, "VCF");
+
+ sf.setValue(Gff3Helper.ALLELES, alleles);
+
+ Map<String, Object> atts = variant.getAttributes();
+ for (Entry<String, Object> att : atts.entrySet())
+ {
+ sf.setValue(att.getKey(), att.getValue());
+ }
+ seq.addSequenceFeature(sf);
+ }
+
+ /**
+ * Determines the location of the query range (chromosome positions) in a
+ * different reference assembly.
+ * <p>
+ * If the range is just a subregion of one for which we already have a mapping
+ * (for example, an exon sub-region of a gene), then the mapping is just
+ * computed arithmetically.
+ * <p>
+ * Otherwise, calls the Ensembl REST service that maps from one assembly
+ * reference's coordinates to another's
+ *
+ * @param queryRange
+ * start-end chromosomal range in 'fromRef' coordinates
+ * @param chromosome
+ * @param species
+ * @param fromRef
+ * assembly reference for the query coordinates
+ * @param toRef
+ * assembly reference we wish to translate to
+ * @return the start-end range in 'toRef' coordinates
+ */
+ protected int[] mapReferenceRange(int[] queryRange, String chromosome,
+ String species, String fromRef, String toRef)
+ {
+ /*
+ * first try shorcut of computing the mapping as a subregion of one
+ * we already have (e.g. for an exon, if we have the gene mapping)
+ */
+ int[] mappedRange = findSubsumedRangeMapping(queryRange, chromosome,
+ species, fromRef, toRef);
+ if (mappedRange != null)
+ {
+ return mappedRange;
+ }
+
+ /*
+ * call (e.g.) http://rest.ensembl.org/map/human/GRCh38/17:45051610..45109016:1/GRCh37
+ */
+ EnsemblMap mapper = new EnsemblMap();
+ int[] mapping = mapper.getMapping(species, chromosome, fromRef, toRef,
+ queryRange);
+
+ if (mapping == null)
+ {
+ // mapping service failure
+ return null;
+ }
+
+ /*
+ * save mapping for possible future re-use
+ */
+ String key = makeRangesKey(chromosome, species, fromRef, toRef);
+ if (!assemblyMappings.containsKey(key))
+ {
+ assemblyMappings.put(key, new HashMap<int[], int[]>());
+ }
+
+ assemblyMappings.get(key).put(queryRange, mapping);
+
+ return mapping;
+ }
+
+ /**
+ * If we already have a 1:1 contiguous mapping which subsumes the given query
+ * range, this method just calculates and returns the subset of that mapping,
+ * else it returns null. In practical terms, if a gene has a contiguous
+ * mapping between (for example) GRCh37 and GRCh38, then we assume that its
+ * subsidiary exons occupy unchanged relative positions, and just compute
+ * these as offsets, rather than do another lookup of the mapping.
+ * <p>
+ * If in future these assumptions prove invalid (e.g. for bacterial dna?!),
+ * simply remove this method or let it always return null.
+ * <p>
+ * Warning: many rapid calls to the /map service map result in a 429 overload
+ * error response
+ *
+ * @param queryRange
+ * @param chromosome
+ * @param species
+ * @param fromRef
+ * @param toRef
+ * @return
+ */
+ protected int[] findSubsumedRangeMapping(int[] queryRange, String chromosome,
+ String species, String fromRef, String toRef)
+ {
+ String key = makeRangesKey(chromosome, species, fromRef, toRef);
+ if (assemblyMappings.containsKey(key))
+ {
+ Map<int[], int[]> mappedRanges = assemblyMappings.get(key);
+ for (Entry<int[], int[]> mappedRange : mappedRanges.entrySet())
+ {
+ int[] fromRange = mappedRange.getKey();
+ int[] toRange = mappedRange.getValue();
+ if (fromRange[1] - fromRange[0] == toRange[1] - toRange[0])
+ {
+ /*
+ * mapping is 1:1 in length, so we trust it to have no discontinuities
+ */
+ if (MappingUtils.rangeContains(fromRange, queryRange))
+ {
+ /*
+ * fromRange subsumes our query range
+ */
+ int offset = queryRange[0] - fromRange[0];
+ int mappedRangeFrom = toRange[0] + offset;
+ int mappedRangeTo = mappedRangeFrom + (queryRange[1] - queryRange[0]);
+ return new int[] { mappedRangeFrom, mappedRangeTo };
+ }
+ }
+ }
+ }
+ return null;
+ }
+
+ /**
+ * Formats a ranges map lookup key
+ *
+ * @param chromosome
+ * @param species
+ * @param fromRef
+ * @param toRef
+ * @return
+ */
+ protected static String makeRangesKey(String chromosome, String species,
+ String fromRef, String toRef)
+ {
+ return species + EXCL + chromosome + EXCL + fromRef + EXCL
+ + toRef;
+ }
+}
protected JMenuItem runGroovy = new JMenuItem();
+ protected JMenuItem loadVcf;
+
protected JCheckBoxMenuItem autoCalculate = new JCheckBoxMenuItem();
protected JCheckBoxMenuItem sortByTree = new JCheckBoxMenuItem();
associatedData_actionPerformed(e);
}
});
+ loadVcf = new JMenuItem(MessageManager.getString("label.load_vcf_file"));
+ loadVcf.setToolTipText(MessageManager.getString("label.load_vcf"));
+ loadVcf.addActionListener(new ActionListener()
+ {
+ @Override
+ public void actionPerformed(ActionEvent e)
+ {
+ loadVcf_actionPerformed();
+ }
+ });
autoCalculate.setText(
MessageManager.getString("label.autocalculate_consensus"));
autoCalculate.setState(
fileMenu.add(exportAnnotations);
fileMenu.add(loadTreeMenuItem);
fileMenu.add(associatedData);
+ fileMenu.add(loadVcf);
fileMenu.addSeparator();
fileMenu.add(closeMenuItem);
// selectMenu.add(listenToViewSelections);
}
+ protected void loadVcf_actionPerformed()
+ {
+ }
+
/**
* Constructs the entries on the Colour menu (but does not add them to the
* menu).
}
return copy;
}
+
+ /**
+ * Answers true if range's start-end positions include those of queryRange,
+ * where either range might be in reverse direction, else false
+ *
+ * @param range
+ * a start-end range
+ * @param queryRange
+ * a candidate subrange of range (start2-end2)
+ * @return
+ */
+ public static boolean rangeContains(int[] range, int[] queryRange)
+ {
+ if (range == null || queryRange == null || range.length != 2
+ || queryRange.length != 2)
+ {
+ /*
+ * invalid arguments
+ */
+ return false;
+ }
+
+ int min = Math.min(range[0], range[1]);
+ int max = Math.max(range[0], range[1]);
+
+ return (min <= queryRange[0] && max >= queryRange[0]
+ && min <= queryRange[1] && max >= queryRange[1]);
+ }
}
--- /dev/null
+package jalview.ext.htsjdk;
+
+import static org.testng.Assert.assertEquals;
+import static org.testng.Assert.assertFalse;
+import static org.testng.Assert.assertTrue;
+import htsjdk.samtools.util.CloseableIterator;
+import htsjdk.variant.variantcontext.Allele;
+import htsjdk.variant.variantcontext.VariantContext;
+
+import java.io.File;
+import java.io.IOException;
+import java.io.PrintWriter;
+import java.util.List;
+
+import org.testng.annotations.Test;
+
+public class VCFReaderTest
+{
+ private static final String[] VCF = new String[] {
+ "##fileformat=VCFv4.2",
+ "#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO",
+ "20\t3\t.\tC\tG\t.\tPASS\tDP=100", // SNP C/G
+ "20\t7\t.\tG\tGA\t.\tPASS\tDP=100", // insertion G/GA
+ "18\t2\t.\tACG\tA\t.\tPASS\tDP=100" }; // deletion ACG/A
+
+ // gnomAD exome variant dataset
+ private static final String VCF_PATH = "/Volumes/gjb/smacgowan/NOBACK/resources/gnomad/gnomad.exomes.r2.0.1.sites.vcf.gz";
+
+ // "https://storage.cloud.google.com/gnomad-public/release/2.0.1/vcf/exomes/gnomad.exomes.r2.0.1.sites.vcf.gz";
+
+ /**
+ * A test to exercise some basic functionality of the htsjdk VCF reader,
+ * reading from a non-index VCF file
+ *
+ * @throws IOException
+ */
+ @Test(groups = "Functional")
+ public void testReadVcf_plain() throws IOException
+ {
+ File f = writeVcfFile();
+ VCFReader reader = new VCFReader(f.getAbsolutePath());
+ CloseableIterator<VariantContext> variants = reader.iterator();
+
+ /*
+ * SNP C/G variant
+ */
+ VariantContext vc = variants.next();
+ assertTrue(vc.isSNP());
+ Allele ref = vc.getReference();
+ assertEquals(ref.getBaseString(), "C");
+ List<Allele> alleles = vc.getAlleles();
+ assertEquals(alleles.size(), 2);
+ assertTrue(alleles.get(0).isReference());
+ assertEquals(alleles.get(0).getBaseString(), "C");
+ assertFalse(alleles.get(1).isReference());
+ assertEquals(alleles.get(1).getBaseString(), "G");
+
+ /*
+ * Insertion G -> GA
+ */
+ vc = variants.next();
+ assertFalse(vc.isSNP());
+ assertTrue(vc.isSimpleInsertion());
+ ref = vc.getReference();
+ assertEquals(ref.getBaseString(), "G");
+ alleles = vc.getAlleles();
+ assertEquals(alleles.size(), 2);
+ assertTrue(alleles.get(0).isReference());
+ assertEquals(alleles.get(0).getBaseString(), "G");
+ assertFalse(alleles.get(1).isReference());
+ assertEquals(alleles.get(1).getBaseString(), "GA");
+
+ /*
+ * Deletion ACG -> A
+ */
+ vc = variants.next();
+ assertFalse(vc.isSNP());
+ assertTrue(vc.isSimpleDeletion());
+ ref = vc.getReference();
+ assertEquals(ref.getBaseString(), "ACG");
+ alleles = vc.getAlleles();
+ assertEquals(alleles.size(), 2);
+ assertTrue(alleles.get(0).isReference());
+ assertEquals(alleles.get(0).getBaseString(), "ACG");
+ assertFalse(alleles.get(1).isReference());
+ assertEquals(alleles.get(1).getBaseString(), "A");
+
+ assertFalse(variants.hasNext());
+
+ variants.close();
+ reader.close();
+ }
+
+ /**
+ * Creates a temporary file to be read by the htsjdk VCF reader
+ *
+ * @return
+ * @throws IOException
+ */
+ protected File writeVcfFile() throws IOException
+ {
+ File f = File.createTempFile("Test", "vcf");
+ f.deleteOnExit();
+ PrintWriter pw = new PrintWriter(f);
+ for (String vcfLine : VCF) {
+ pw.println(vcfLine);
+ }
+ pw.close();
+ return f;
+ }
+
+ /**
+ * A 'test' that demonstrates querying an indexed VCF file for features in a
+ * specified interval
+ *
+ * @throws IOException
+ */
+ @Test
+ public void testQuery_indexed() throws IOException
+ {
+ /*
+ * if not specified, assumes index file is filename.tbi
+ */
+ VCFReader reader = new VCFReader(VCF_PATH);
+
+ /*
+ * gene NMT1 (human) is on chromosome 17
+ * GCHR38 (Ensembl): 45051610-45109016
+ * GCHR37 (gnoMAD): 43128978-43186384
+ * CDS begins at offset 9720, first CDS variant at offset 9724
+ */
+ CloseableIterator<VariantContext> features = reader.query("17",
+ 43128978 + 9724, 43128978 + 9734); // first 11 CDS positions
+
+ assertEquals(printNext(features), 43138702);
+ assertEquals(printNext(features), 43138704);
+ assertEquals(printNext(features), 43138707);
+ assertEquals(printNext(features), 43138708);
+ assertEquals(printNext(features), 43138710);
+ assertEquals(printNext(features), 43138711);
+ assertFalse(features.hasNext());
+
+ features.close();
+ reader.close();
+ }
+
+ /**
+ * Prints the toString value of the next variant, and returns its start
+ * location
+ *
+ * @param features
+ * @return
+ */
+ protected int printNext(CloseableIterator<VariantContext> features)
+ {
+ VariantContext next = features.next();
+ System.out.println(next.toString());
+ return next.getStart();
+ }
+
+ // "https://storage.cloud.google.com/gnomad-public/release/2.0.1/vcf/exomes/gnomad.exomes.r2.0.1.sites.vcf.gz";
+
+ /**
+ * Test the query method that wraps a non-indexed VCF file
+ *
+ * @throws IOException
+ */
+ @Test(groups = "Functional")
+ public void testQuery_plain() throws IOException
+ {
+ File f = writeVcfFile();
+ VCFReader reader = new VCFReader(f.getAbsolutePath());
+
+ /*
+ * query for overlap of 5-8 - should find variant at 7
+ */
+ CloseableIterator<VariantContext> variants = reader.query("20", 5, 8);
+
+ /*
+ * INDEL G/GA variant
+ */
+ VariantContext vc = variants.next();
+ assertTrue(vc.isIndel());
+ assertEquals(vc.getStart(), 7);
+ assertEquals(vc.getEnd(), 7);
+ Allele ref = vc.getReference();
+ assertEquals(ref.getBaseString(), "G");
+ List<Allele> alleles = vc.getAlleles();
+ assertEquals(alleles.size(), 2);
+ assertTrue(alleles.get(0).isReference());
+ assertEquals(alleles.get(0).getBaseString(), "G");
+ assertFalse(alleles.get(1).isReference());
+ assertEquals(alleles.get(1).getBaseString(), "GA");
+
+ assertFalse(variants.hasNext());
+
+ variants.close();
+ reader.close();
+ }
+}
--- /dev/null
+package jalview.io.vcf;
+
+import static org.testng.Assert.assertEquals;
+
+import jalview.datamodel.AlignmentI;
+import jalview.datamodel.DBRefEntry;
+import jalview.datamodel.GeneLoci;
+import jalview.datamodel.Mapping;
+import jalview.datamodel.Sequence;
+import jalview.datamodel.SequenceFeature;
+import jalview.datamodel.SequenceI;
+import jalview.gui.AlignFrame;
+import jalview.io.DataSourceType;
+import jalview.io.FileLoader;
+import jalview.io.gff.Gff3Helper;
+import jalview.io.gff.SequenceOntologyI;
+import jalview.util.MapList;
+
+import java.io.File;
+import java.io.IOException;
+import java.io.PrintWriter;
+import java.util.Arrays;
+import java.util.List;
+
+import org.testng.annotations.Test;
+
+public class VCFLoaderTest
+{
+ // columns 9717- of gene P30419 from Ensembl (modified)
+ private static final String FASTA = ">ENSG00000136448/1-25 chromosome:GRCh38:17:45051610:45051634:1\n"
+ + "CAAGCTGGCGGACGAGAGTGTGACA\n"
+ // and a 'made up' mini-transcript with two exons
+ + ">ENST00000592782/1-18\n--AGCTGGCG----AGAGTGTGAC-\n";
+
+ private static final String[] VCF = { "##fileformat=VCFv4.2",
+ "##INFO=<ID=AF,Number=A,Type=Float,Description=\"Allele Frequency, for each ALT allele, in the same order as listed\">",
+ "##reference=GRCh38",
+ "#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO",
+ // SNP A/T in position 2 of gene sequence (precedes transcript)
+ "17\t45051611\t.\tA\tT\t1666.64\tRF\tAC=15;AF=5.08130e-03",
+ // SNP G/C in position 4 of gene sequence, position 2 of transcript
+ // this is a mixed variant, the insertion G/GA is not transferred
+ "17\t45051613\t.\tG\tGA,C\t1666.64\tRF\tAC=15;AF=3.08130e-03" };
+
+ @Test(groups = "Functional")
+ public void testLoadVCF() throws IOException
+ {
+ AlignmentI al = buildAlignment();
+ VCFLoader loader = new VCFLoader(al);
+
+ File f = makeVcf();
+
+ loader.loadVCF(f.getPath(), null);
+
+ /*
+ * verify variant feature(s) added to gene
+ */
+ List<SequenceFeature> geneFeatures = al.getSequenceAt(0).findFeatures(
+ 2, 2);
+ assertEquals(geneFeatures.size(), 1);
+ SequenceFeature sf = geneFeatures.get(0);
+ assertEquals(sf.getFeatureGroup(), "VCF");
+ assertEquals(sf.getType(), SequenceOntologyI.SEQUENCE_VARIANT);
+ assertEquals(sf.getScore(), 5.08130e-03, 0.000001f);
+ assertEquals("A,T", sf.getValue(Gff3Helper.ALLELES));
+
+ /*
+ * verify variant feature(s) added to transcript
+ */
+ List<SequenceFeature> transcriptFeatures = al.getSequenceAt(1)
+ .findFeatures(4, 4);
+ assertEquals(transcriptFeatures.size(), 1);
+ sf = transcriptFeatures.get(0);
+ assertEquals(sf.getFeatureGroup(), "VCF");
+ assertEquals(sf.getType(), SequenceOntologyI.SEQUENCE_VARIANT);
+ assertEquals(sf.getScore(), 3.08130e-03, 0.000001f);
+ assertEquals("G,C", sf.getValue(Gff3Helper.ALLELES));
+
+ /*
+ * verify variant feature(s) computed and added to protein
+ * first codon AGC varies to ACC giving S/T
+ */
+ SequenceI peptide = al.getSequenceAt(1)
+ .getDBRefs()[0].getMap().getTo();
+ List<SequenceFeature> proteinFeatures = peptide.findFeatures(1, 6);
+ assertEquals(proteinFeatures.size(), 1);
+ sf = proteinFeatures.get(0);
+ assertEquals(sf.getFeatureGroup(), "VCF");
+ assertEquals(sf.getBegin(), 2);
+ assertEquals(sf.getEnd(), 2);
+ assertEquals(sf.getType(), SequenceOntologyI.SEQUENCE_VARIANT);
+ assertEquals(sf.getDescription(), "p.Ser1Thr");
+ }
+
+ private File makeVcf() throws IOException
+ {
+ File f = File.createTempFile("Test", ".vcf");
+ f.deleteOnExit();
+ PrintWriter pw = new PrintWriter(f);
+ for (String vcfLine : VCF)
+ {
+ pw.println(vcfLine);
+ }
+ pw.close();
+ return f;
+ }
+
+ /**
+ * Make a simple alignment with one 'gene' and one 'transcript'
+ *
+ * @return
+ */
+ private AlignmentI buildAlignment()
+ {
+ AlignFrame af = new FileLoader().LoadFileWaitTillLoaded(FASTA,
+ DataSourceType.PASTE);
+
+ /*
+ * map gene sequence to chromosome (normally done when the sequence is fetched
+ * from Ensembl and transcripts computed)
+ */
+ AlignmentI alignment = af.getViewport().getAlignment();
+ int[][] to = new int[][] { new int[] { 45051610, 45051634 } };
+ List<int[]> toRanges = Arrays.asList(to);
+ SequenceI gene = alignment.getSequenceAt(0);
+ List<int[]> fromRanges = Arrays.asList(new int[][] { new int[] {
+ gene.getStart(), gene.getEnd() } });
+ ((Sequence) gene).setGeneLoci(new GeneLoci("human", "GRCh38", "17",
+ new MapList(fromRanges, toRanges, 1, 1)));
+
+ /*
+ * map 'transcript' to chromosome via 'gene'
+ * transcript/1-18 is gene/3-10,15-24
+ * which is chromosome 45051612-45051619,45051624-45051633
+ */
+ to = new int[][] { new int[] { 45051612, 45051619 },
+ new int[] { 45051624, 45051633 } };
+ toRanges = Arrays.asList(to);
+ SequenceI transcript = alignment.getSequenceAt(1);
+ fromRanges = Arrays.asList(new int[][] { new int[] {
+ transcript.getStart(), transcript.getEnd() } });
+ ((Sequence) transcript).setGeneLoci(new GeneLoci("human", "GRCh38",
+ "17", new MapList(fromRanges, toRanges, 1, 1)));
+
+ /*
+ * add a protein product as a DBRef on the transcript
+ */
+ SequenceI peptide = new Sequence("ENSP001", "SWRECD");
+ MapList mapList = new MapList(new int[] { 1, 18 }, new int[] { 1, 6 },
+ 3, 1);
+ Mapping map = new Mapping(peptide, mapList);
+ DBRefEntry product = new DBRefEntry("", "", "ENSP001", map);
+ transcript.addDBRef(product);
+
+ return alignment;
+ }
+
+ @Test(groups = "Functional")
+ public void testLoadVCF_reverseStrand() throws IOException
+ {
+ // TODO a test with reverse strand mapping of
+ // gene and transcript to chromosome
+ }
+}
assertEquals("[12, 11, 8, 4]", Arrays.toString(ranges));
}
+ @Test(groups = { "Functional" })
+ public void testRangeContains()
+ {
+ /*
+ * both forward ranges
+ */
+ assertTrue(MappingUtils.rangeContains(new int[] { 1, 10 }, new int[] {
+ 1, 10 }));
+ assertTrue(MappingUtils.rangeContains(new int[] { 1, 10 }, new int[] {
+ 2, 10 }));
+ assertTrue(MappingUtils.rangeContains(new int[] { 1, 10 }, new int[] {
+ 1, 9 }));
+ assertTrue(MappingUtils.rangeContains(new int[] { 1, 10 }, new int[] {
+ 4, 5 }));
+ assertFalse(MappingUtils.rangeContains(new int[] { 1, 10 }, new int[] {
+ 0, 9 }));
+ assertFalse(MappingUtils.rangeContains(new int[] { 1, 10 }, new int[] {
+ -10, -9 }));
+ assertFalse(MappingUtils.rangeContains(new int[] { 1, 10 }, new int[] {
+ 1, 11 }));
+ assertFalse(MappingUtils.rangeContains(new int[] { 1, 10 }, new int[] {
+ 11, 12 }));
+
+ /*
+ * forward range, reverse query
+ */
+ assertTrue(MappingUtils.rangeContains(new int[] { 1, 10 }, new int[] {
+ 10, 1 }));
+ assertTrue(MappingUtils.rangeContains(new int[] { 1, 10 }, new int[] {
+ 9, 1 }));
+ assertTrue(MappingUtils.rangeContains(new int[] { 1, 10 }, new int[] {
+ 10, 2 }));
+ assertTrue(MappingUtils.rangeContains(new int[] { 1, 10 }, new int[] {
+ 5, 5 }));
+ assertFalse(MappingUtils.rangeContains(new int[] { 1, 10 }, new int[] {
+ 11, 1 }));
+ assertFalse(MappingUtils.rangeContains(new int[] { 1, 10 }, new int[] {
+ 10, 0 }));
+
+ /*
+ * reverse range, forward query
+ */
+ assertTrue(MappingUtils.rangeContains(new int[] { 10, 1 }, new int[] {
+ 1, 10 }));
+ assertTrue(MappingUtils.rangeContains(new int[] { 10, 1 }, new int[] {
+ 1, 9 }));
+ assertTrue(MappingUtils.rangeContains(new int[] { 10, 1 }, new int[] {
+ 2, 10 }));
+ assertTrue(MappingUtils.rangeContains(new int[] { 10, 1 }, new int[] {
+ 6, 6 }));
+ assertFalse(MappingUtils.rangeContains(new int[] { 10, 1 }, new int[] {
+ 6, 11 }));
+ assertFalse(MappingUtils.rangeContains(new int[] { 10, 1 }, new int[] {
+ 11, 20 }));
+ assertFalse(MappingUtils.rangeContains(new int[] { 10, 1 }, new int[] {
+ -3, -2 }));
+
+ /*
+ * both reverse
+ */
+ assertTrue(MappingUtils.rangeContains(new int[] { 10, 1 }, new int[] {
+ 10, 1 }));
+ assertTrue(MappingUtils.rangeContains(new int[] { 10, 1 }, new int[] {
+ 9, 1 }));
+ assertTrue(MappingUtils.rangeContains(new int[] { 10, 1 }, new int[] {
+ 10, 2 }));
+ assertTrue(MappingUtils.rangeContains(new int[] { 10, 1 }, new int[] {
+ 3, 3 }));
+ assertFalse(MappingUtils.rangeContains(new int[] { 10, 1 }, new int[] {
+ 11, 1 }));
+ assertFalse(MappingUtils.rangeContains(new int[] { 10, 1 }, new int[] {
+ 10, 0 }));
+ assertFalse(MappingUtils.rangeContains(new int[] { 10, 1 }, new int[] {
+ 12, 11 }));
+ assertFalse(MappingUtils.rangeContains(new int[] { 10, 1 }, new int[] {
+ -5, -8 }));
+
+ /*
+ * bad arguments
+ */
+ assertFalse(MappingUtils.rangeContains(new int[] { 1, 10, 12 },
+ new int[] {
+ 1, 10 }));
+ assertFalse(MappingUtils.rangeContains(new int[] { 1, 10 },
+ new int[] { 1 }));
+ assertFalse(MappingUtils.rangeContains(new int[] { 1, 10 }, null));
+ assertFalse(MappingUtils.rangeContains(null, new int[] { 1, 10 }));
+ }
+
}