<classpathentry kind="lib" path="lib/VARNAv3-93.jar"/>
<classpathentry kind="lib" path="lib/jfreesvg-2.1.jar"/>
<classpathentry kind="lib" path="lib/quaqua-filechooser-only-8.0.jar"/>
- <classpathentry kind="lib" path="lib/htsjdk-1.133.jar"/>
<classpathentry kind="con" path="org.eclipse.jdt.USER_LIBRARY/plugin"/>
<classpathentry kind="lib" path="lib/xml-apis.jar"/>
<classpathentry kind="con" path="org.eclipse.jdt.junit.JUNIT_CONTAINER/4"/>
- <classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER"/>
<classpathentry kind="con" path="org.eclipse.jdt.USER_LIBRARY/Plugin.jar"/>
<classpathentry kind="lib" path="lib/jersey-client-1.19.jar"/>
<classpathentry kind="lib" path="lib/jersey-core-1.19.jar"/>
<classpathentry kind="lib" path="lib/biojava-core-4.1.0.jar"/>
<classpathentry kind="lib" path="lib/biojava-ontology-4.1.0.jar"/>
<classpathentry kind="lib" path="lib/groovy-all-2.4.6-indy.jar"/>
+ <classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER/org.eclipse.jdt.internal.debug.ui.launcher.StandardVMType/JavaSE-1.8"/>
+ <classpathentry kind="lib" path="lib/htsjdk-2.12.0.jar"/>
<classpathentry kind="output" path="classes"/>
</classpath>
eclipse.preferences.version=1
org.eclipse.jdt.core.compiler.codegen.inlineJsrBytecode=enabled
org.eclipse.jdt.core.compiler.codegen.methodParameters=do not generate
-org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.7
+org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.8
org.eclipse.jdt.core.compiler.codegen.unusedLocal=preserve
-org.eclipse.jdt.core.compiler.compliance=1.7
+org.eclipse.jdt.core.compiler.compliance=1.8
org.eclipse.jdt.core.compiler.debug.lineNumber=generate
org.eclipse.jdt.core.compiler.debug.localVariable=generate
org.eclipse.jdt.core.compiler.debug.sourceFile=generate
org.eclipse.jdt.core.compiler.problem.assertIdentifier=error
org.eclipse.jdt.core.compiler.problem.enumIdentifier=error
-org.eclipse.jdt.core.compiler.source=1.7
+org.eclipse.jdt.core.compiler.source=1.8
org.eclipse.jdt.core.formatter.align_type_members_on_columns=false
org.eclipse.jdt.core.formatter.alignment_for_arguments_in_allocation_expression=16
org.eclipse.jdt.core.formatter.alignment_for_arguments_in_annotation=52
label.view_full_application = View in Full Application
label.load_associated_tree = Load Associated Tree...
label.load_features_annotations = Load Features/Annotations...
+label.load_vcf = Load SNP variants from plain text or indexed VCF data
+label.load_vcf_file = Load VCF File
+label.searching_vcf = Loading VCF variants...
+label.added_vcf = Added {0} VCF variants to {1} sequence(s)
label.export_features = Export Features...
label.export_annotations = Export Annotations...
label.to_upper_case = To Upper Case
label.overview = Overview
label.reset_to_defaults = Reset to defaults
label.oview_calc = Recalculating overview...
+label.feature_details = Feature details
import jalview.datamodel.AlignmentAnnotation;
import jalview.datamodel.AlignmentI;
import jalview.datamodel.DBRefEntry;
+import jalview.datamodel.GeneLociI;
import jalview.datamodel.IncompleteCodonException;
import jalview.datamodel.Mapping;
import jalview.datamodel.Sequence;
import jalview.datamodel.SequenceGroup;
import jalview.datamodel.SequenceI;
import jalview.datamodel.features.SequenceFeatures;
+import jalview.io.gff.Gff3Helper;
import jalview.io.gff.SequenceOntologyI;
import jalview.schemes.ResidueProperties;
import jalview.util.Comparison;
{
return variant == null ? null : variant.getFeatureGroup();
}
+
+ /**
+ * toString for aid in the debugger only
+ */
+ @Override
+ public String toString()
+ {
+ return base + ":" + (variant == null ? "" : variant.getDescription());
+ }
}
/**
* Answers true if the mappings include one between the given (dataset)
* sequences.
*/
- public static boolean mappingExists(List<AlignedCodonFrame> mappings,
+ protected static boolean mappingExists(List<AlignedCodonFrame> mappings,
SequenceI aaSeq, SequenceI cdnaSeq)
{
if (mappings != null)
productSeqs = new HashSet<SequenceI>();
for (SequenceI seq : products)
{
- productSeqs.add(seq.getDatasetSequence() == null ? seq
- : seq.getDatasetSequence());
+ productSeqs.add(seq.getDatasetSequence() == null ? seq : seq
+ .getDatasetSequence());
}
}
/*
* add a mapping from CDS to the (unchanged) mapped to range
*/
- List<int[]> cdsRange = Collections
- .singletonList(new int[]
- { 1, cdsSeq.getLength() });
+ List<int[]> cdsRange = Collections.singletonList(new int[] { 1,
+ cdsSeq.getLength() });
MapList cdsToProteinMap = new MapList(cdsRange,
mapList.getToRanges(), mapList.getFromRatio(),
mapList.getToRatio());
* add another mapping from original 'from' range to CDS
*/
AlignedCodonFrame dnaToCdsMapping = new AlignedCodonFrame();
- MapList dnaToCdsMap = new MapList(mapList.getFromRanges(),
+ final MapList dnaToCdsMap = new MapList(mapList.getFromRanges(),
cdsRange, 1, 1);
dnaToCdsMapping.addMap(dnaSeq.getDatasetSequence(), cdsSeqDss,
dnaToCdsMap);
}
/*
+ * transfer dna chromosomal loci (if known) to the CDS
+ * sequence (via the mapping)
+ */
+ final MapList cdsToDnaMap = dnaToCdsMap.getInverse();
+ transferGeneLoci(dnaSeq, cdsToDnaMap, cdsSeq);
+
+ /*
* add DBRef with mapping from protein to CDS
* (this enables Get Cross-References from protein alignment)
* This is tricky because we can't have two DBRefs with the
for (DBRefEntry primRef : dnaDss.getPrimaryDBRefs())
{
- // creates a complementary cross-reference to the source sequence's
- // primary reference.
-
- DBRefEntry cdsCrossRef = new DBRefEntry(primRef.getSource(),
- primRef.getSource() + ":" + primRef.getVersion(),
- primRef.getAccessionId());
- cdsCrossRef
- .setMap(new Mapping(dnaDss, new MapList(dnaToCdsMap)));
+ /*
+ * create a cross-reference from CDS to the source sequence's
+ * primary reference and vice versa
+ */
+ String source = primRef.getSource();
+ String version = primRef.getVersion();
+ DBRefEntry cdsCrossRef = new DBRefEntry(source, source + ":"
+ + version, primRef.getAccessionId());
+ cdsCrossRef.setMap(new Mapping(dnaDss, new MapList(cdsToDnaMap)));
cdsSeqDss.addDBRef(cdsCrossRef);
+ dnaSeq.addDBRef(new DBRefEntry(source, version, cdsSeq
+ .getName(), new Mapping(cdsSeqDss, dnaToCdsMap)));
+
// problem here is that the cross-reference is synthesized -
// cdsSeq.getName() may be like 'CDS|dnaaccession' or
// 'CDS|emblcdsacc'
// assuming cds version same as dna ?!?
- DBRefEntry proteinToCdsRef = new DBRefEntry(primRef.getSource(),
- primRef.getVersion(), cdsSeq.getName());
+ DBRefEntry proteinToCdsRef = new DBRefEntry(source, version,
+ cdsSeq.getName());
//
- proteinToCdsRef.setMap(
- new Mapping(cdsSeqDss, cdsToProteinMap.getInverse()));
+ proteinToCdsRef.setMap(new Mapping(cdsSeqDss, cdsToProteinMap
+ .getInverse()));
proteinProduct.addDBRef(proteinToCdsRef);
}
}
}
- AlignmentI cds = new Alignment(
- cdsSeqs.toArray(new SequenceI[cdsSeqs.size()]));
+ AlignmentI cds = new Alignment(cdsSeqs.toArray(new SequenceI[cdsSeqs
+ .size()]));
cds.setDataset(dataset);
return cds;
}
/**
+ * Tries to transfer gene loci (dbref to chromosome positions) from fromSeq to
+ * toSeq, mediated by the given mapping between the sequences
+ *
+ * @param fromSeq
+ * @param targetToFrom
+ * Map
+ * @param targetSeq
+ */
+ protected static void transferGeneLoci(SequenceI fromSeq,
+ MapList targetToFrom, SequenceI targetSeq)
+ {
+ if (targetSeq.getGeneLoci() != null)
+ {
+ // already have - don't override
+ return;
+ }
+ GeneLociI fromLoci = fromSeq.getGeneLoci();
+ if (fromLoci == null)
+ {
+ return;
+ }
+
+ MapList newMap = targetToFrom.traverse(fromLoci.getMap());
+
+ if (newMap != null)
+ {
+ targetSeq.setGeneLoci(fromLoci.getSpeciesId(),
+ fromLoci.getAssemblyId(), fromLoci.getChromosomeId(), newMap);
+ }
+ }
+
+ /**
* A helper method that finds a CDS sequence in the alignment dataset that is
* mapped to the given protein sequence, and either is, or has a mapping from,
* the given dna sequence.
}
/**
- * add any DBRefEntrys to cdsSeq from contig that have a Mapping congruent to
+ * Adds any DBRefEntrys to cdsSeq from contig that have a Mapping congruent to
* the given mapping.
*
* @param cdsSeq
* @param contig
+ * @param proteinProduct
* @param mapping
- * @return list of DBRefEntrys added.
+ * @return list of DBRefEntrys added
*/
- public static List<DBRefEntry> propagateDBRefsToCDS(SequenceI cdsSeq,
+ protected static List<DBRefEntry> propagateDBRefsToCDS(SequenceI cdsSeq,
SequenceI contig, SequenceI proteinProduct, Mapping mapping)
{
-
- // gather direct refs from contig congrent with mapping
+ // gather direct refs from contig congruent with mapping
List<DBRefEntry> direct = new ArrayList<DBRefEntry>();
HashSet<String> directSources = new HashSet<String>();
if (contig.getDBRefs() != null)
* subtypes in the Sequence Ontology)
* @param omitting
*/
- public static int transferFeatures(SequenceI fromSeq, SequenceI toSeq,
+ protected static int transferFeatures(SequenceI fromSeq, SequenceI toSeq,
MapList mapping, String select, String... omitting)
{
SequenceI copyTo = toSeq;
* @param dnaSeq
* @return
*/
- public static List<int[]> findCdsPositions(SequenceI dnaSeq)
+ protected static List<int[]> findCdsPositions(SequenceI dnaSeq)
{
List<int[]> result = new ArrayList<int[]>();
{
if (var.variant != null)
{
- String alleles = (String) var.variant.getValue("alleles");
+ String alleles = (String) var.variant.getValue(Gff3Helper.ALLELES);
if (alleles != null)
{
for (String base : alleles.split(","))
{
if (var.variant != null)
{
- String alleles = (String) var.variant.getValue("alleles");
+ String alleles = (String) var.variant.getValue(Gff3Helper.ALLELES);
if (alleles != null)
{
for (String base : alleles.split(","))
{
if (var.variant != null)
{
- String alleles = (String) var.variant.getValue("alleles");
+ String alleles = (String) var.variant.getValue(Gff3Helper.ALLELES);
if (alleles != null)
{
for (String base : alleles.split(","))
/**
* Builds a map whose key is position in the protein sequence, and value is a
- * list of the base and all variants for each corresponding codon position
+ * list of the base and all variants for each corresponding codon position.
+ * <p>
+ * This depends on dna variants being held as a comma-separated list as
+ * property "alleles" on variant features.
*
* @param dnaSeq
* @param dnaToProtein
// not handling multi-locus variant features
continue;
}
+
+ /*
+ * ignore variant if not a SNP
+ */
+ String alls = (String) sf.getValue(Gff3Helper.ALLELES);
+ if (alls == null)
+ {
+ continue; // non-SNP VCF variant perhaps - can't process this
+ }
+
+ String[] alleles = alls.toUpperCase().split(",");
+ boolean isSnp = true;
+ for (String allele : alleles)
+ {
+ if (allele.trim().length() > 1)
+ {
+ isSnp = false;
+ }
+ }
+ if (!isSnp)
+ {
+ continue;
+ }
+
int[] mapsTo = dnaToProtein.locateInTo(dnaCol, dnaCol);
if (mapsTo == null)
{
}
/*
- * extract dna variants to a string array
- */
- String alls = (String) sf.getValue("alleles");
- if (alls == null)
- {
- continue;
- }
- String[] alleles = alls.toUpperCase().split(",");
- int i = 0;
- for (String allele : alleles)
- {
- alleles[i++] = allele.trim(); // lose any space characters "A, G"
- }
-
- /*
* get this peptide's codon positions e.g. [3, 4, 5] or [4, 7, 10]
*/
int[] codon = peptidePosition == lastPeptidePostion ? lastCodon
}
/**
+ * Answers the reverse complement of the input string
+ *
+ * @see #getComplement(char)
+ * @param s
+ * @return
+ */
+ public static String reverseComplement(String s)
+ {
+ StringBuilder sb = new StringBuilder(s.length());
+ for (int i = s.length() - 1; i >= 0; i--)
+ {
+ sb.append(Dna.getComplement(s.charAt(i)));
+ }
+ return sb.toString();
+ }
+
+ /**
* Returns dna complement (preserving case) for aAcCgGtTuU. Ambiguity codes
* are treated as on http://reverse-complement.com/. Anything else is left
* unchanged.
public class DBRefEntry implements DBRefEntryI
{
- String source = "", version = "", accessionId = "";
+ /*
+ * the mapping to chromosome (genome) is held as an instance with
+ * source = speciesId
+ * version = assemblyId
+ * accessionId = "chromosome:" + chromosomeId
+ * map = mapping from sequence to reference assembly
+ */
+ public static final String CHROMOSOME = "chromosome";
+
+ String source = "";
+
+ String version = "";
+
+ String accessionId = "";
/**
* maps from associated sequence to the database sequence's coordinate system
}
return true;
}
+
+ /**
+ * Mappings to chromosome are held with accessionId as "chromosome:id"
+ *
+ * @return
+ */
+ public boolean isChromosome()
+ {
+ return accessionId != null && accessionId.startsWith(CHROMOSOME + ":");
+ }
}
--- /dev/null
+package jalview.datamodel;
+
+import jalview.util.MapList;
+
+/**
+ * An interface to model one or more contiguous regions on one chromosome
+ */
+public interface GeneLociI
+{
+ /**
+ * Answers the species identifier
+ *
+ * @return
+ */
+ String getSpeciesId();
+
+ /**
+ * Answers the reference assembly identifier
+ *
+ * @return
+ */
+ String getAssemblyId();
+
+ /**
+ * Answers the chromosome identifier e.g. "2", "Y", "II"
+ *
+ * @return
+ */
+ String getChromosomeId();
+
+ /**
+ * Answers the mapping from sequence to chromosome loci. For a reverse strand
+ * mapping, the chromosomal ranges will have start > end.
+ *
+ * @return
+ */
+ MapList getMap();
+}
}
/**
- * DOCUMENT ME!
+ * Sets the sequence description, and also parses out any special formats of
+ * interest
*
* @param desc
- * DOCUMENT ME!
*/
@Override
public void setDescription(String desc)
this.description = desc;
}
+ @Override
+ public void setGeneLoci(String speciesId, String assemblyId,
+ String chromosomeId, MapList map)
+ {
+ addDBRef(new DBRefEntry(speciesId, assemblyId, DBRefEntry.CHROMOSOME
+ + ":" + chromosomeId, new Mapping(map)));
+ }
+
/**
- * DOCUMENT ME!
+ * Returns the gene loci mapping for the sequence (may be null)
*
- * @return DOCUMENT ME!
+ * @return
+ */
+ @Override
+ public GeneLociI getGeneLoci()
+ {
+ DBRefEntry[] refs = getDBRefs();
+ if (refs != null)
+ {
+ for (final DBRefEntry ref : refs)
+ {
+ if (ref.isChromosome())
+ {
+ return new GeneLociI()
+ {
+ @Override
+ public String getSpeciesId()
+ {
+ return ref.getSource();
+ }
+
+ @Override
+ public String getAssemblyId()
+ {
+ return ref.getVersion();
+ }
+
+ @Override
+ public String getChromosomeId()
+ {
+ // strip off "chromosome:" prefix to chrId
+ return ref.getAccessionId().substring(
+ DBRefEntry.CHROMOSOME.length() + 1);
+ }
+
+ @Override
+ public MapList getMap()
+ {
+ return ref.getMap().getMap();
+ }
+ };
+ }
+ }
+ }
+ return null;
+ }
+
+ /**
+ * Answers the description
+ *
+ * @return
*/
@Override
public String getDescription()
package jalview.datamodel;
import jalview.datamodel.features.FeatureLocationI;
+import jalview.util.StringUtils;
import java.util.HashMap;
import java.util.Map;
import java.util.Map.Entry;
+import java.util.TreeMap;
import java.util.Vector;
/**
- * DOCUMENT ME!
- *
- * @author $author$
- * @version $Revision$
+ * A class that models a single contiguous feature on a sequence. If flag
+ * 'contactFeature' is true, the start and end positions are interpreted instead
+ * as two contact points.
*/
public class SequenceFeature implements FeatureLocationI
{
// private key for ENA location designed not to conflict with real GFF data
private static final String LOCATION = "!Location";
+ private static final String ROW_DATA = "<tr><td>%s</td><td>%s</td></tr>";
+
+ /*
+ * map of otherDetails special keys, and their value fields' delimiter
+ */
+ private static final Map<String, String> INFO_KEYS = new HashMap<>();
+
+ static
+ {
+ INFO_KEYS.put("CSQ", ",");
+ // todo capture second level metadata (CSQ FORMAT)
+ // and delimiter "|" so as to report in a table within a table?
+ }
+
/*
* ATTRIBUTES is reserved for the GFF 'column 9' data, formatted as
* name1=value1;name2=value2,value3;...etc
{
return begin == 0 && end == 0;
}
+
+ /**
+ * Answers an html-formatted report of feature details
+ *
+ * @return
+ */
+ public String getDetailsReport()
+ {
+ StringBuilder sb = new StringBuilder(128);
+ sb.append("<br>");
+ sb.append("<table>");
+ sb.append(String.format(ROW_DATA, "Type", type));
+ sb.append(String.format(ROW_DATA, "Start/end", begin == end ? begin
+ : begin + (isContactFeature() ? ":" : "-") + end));
+ String desc = StringUtils.stripHtmlTags(description);
+ sb.append(String.format(ROW_DATA, "Description", desc));
+ if (!Float.isNaN(score) && score != 0f)
+ {
+ sb.append(String.format(ROW_DATA, "Score", score));
+ }
+ if (featureGroup != null)
+ {
+ sb.append(String.format(ROW_DATA, "Group", featureGroup));
+ }
+
+ if (otherDetails != null)
+ {
+ TreeMap<String, Object> ordered = new TreeMap<>(
+ String.CASE_INSENSITIVE_ORDER);
+ ordered.putAll(otherDetails);
+
+ for (Entry<String, Object> entry : ordered.entrySet())
+ {
+ String key = entry.getKey();
+ if (ATTRIBUTES.equals(key))
+ {
+ continue; // to avoid double reporting
+ }
+ if (INFO_KEYS.containsKey(key))
+ {
+ /*
+ * split selected INFO data by delimiter over multiple lines
+ */
+ String delimiter = INFO_KEYS.get(key);
+ String[] values = entry.getValue().toString().split(delimiter);
+ for (String value : values)
+ {
+ sb.append("<tr><td>").append(key).append("</td><td>")
+ .append(value)
+ .append("</td></tr>");
+ }
+ }
+ else
+ { // tried <td title="key"> but it failed to provide a tooltip :-(
+ sb.append("<tr><td>").append(key).append("</td><td>");
+ sb.append(entry.getValue().toString()).append("</td></tr>");
+ }
+ }
+ }
+ sb.append("</table>");
+
+ String text = sb.toString();
+ return text;
+ }
}
package jalview.datamodel;
import jalview.datamodel.features.SequenceFeaturesI;
+import jalview.util.MapList;
import java.util.BitSet;
import java.util.List;
* @param c2
*/
public int replace(char c1, char c2);
+
+ /**
+ * Answers the GeneLociI, or null if not known
+ *
+ * @return
+ */
+ GeneLociI getGeneLoci();
+
+ /**
+ * Sets the mapping to gene loci for the sequence
+ *
+ * @param speciesId
+ * @param assemblyId
+ * @param chromosomeId
+ * @param map
+ */
+ void setGeneLoci(String speciesId, String assemblyId,
+ String chromosomeId, MapList map);
}
--- /dev/null
+/*
+ * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
+ * Copyright (C) $$Year-Rel$$ The Jalview Authors
+ *
+ * This file is part of Jalview.
+ *
+ * Jalview is free software: you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, either version 3
+ * of the License, or (at your option) any later version.
+ *
+ * Jalview is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
+ * PURPOSE. See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
+ * The Jalview Authors are detailed in the 'AUTHORS' file.
+ */
+package jalview.ext.ensembl;
+
+/**
+ * A data class to model the data and rest version of one Ensembl domain,
+ * currently for rest.ensembl.org and rest.ensemblgenomes.org
+ *
+ * @author gmcarstairs
+ */
+class EnsemblData
+{
+ /*
+ * The http domain this object is holding data values for
+ */
+ String domain;
+
+ /*
+ * The latest version Jalview has tested for, e.g. "4.5"; a minor version change should be
+ * ok, a major version change may break stuff
+ */
+ String expectedRestVersion;
+
+ /*
+ * Major / minor / point version e.g. "4.5.1"
+ * @see http://rest.ensembl.org/info/rest/?content-type=application/json
+ */
+ String restVersion;
+
+ /*
+ * data version
+ * @see http://rest.ensembl.org/info/data/?content-type=application/json
+ */
+ String dataVersion;
+
+ /*
+ * true when http://rest.ensembl.org/info/ping/?content-type=application/json
+ * returns response code 200 and not {"error":"Database is unavailable"}
+ */
+ boolean restAvailable;
+
+ /*
+ * absolute time when availability was last checked
+ */
+ long lastAvailableCheckTime;
+
+ /*
+ * absolute time when version numbers were last checked
+ */
+ long lastVersionCheckTime;
+
+ // flag set to true if REST major version is not the one expected
+ boolean restMajorVersionMismatch;
+
+ /*
+ * absolute time to wait till if we overloaded the REST service
+ */
+ long retryAfter;
+
+ /**
+ * Constructor given expected REST version number e.g 4.5 or 3.4.3
+ *
+ * @param restExpected
+ */
+ EnsemblData(String theDomain, String restExpected)
+ {
+ domain = theDomain;
+ expectedRestVersion = restExpected;
+ lastAvailableCheckTime = -1;
+ lastVersionCheckTime = -1;
+ }
+
+}
import jalview.api.FeatureColourI;
import jalview.api.FeatureSettingsModelI;
import jalview.datamodel.AlignmentI;
+import jalview.datamodel.DBRefEntry;
+import jalview.datamodel.GeneLociI;
import jalview.datamodel.Sequence;
import jalview.datamodel.SequenceFeature;
import jalview.datamodel.SequenceI;
{
continue;
}
+
if (geneAlignment.getHeight() == 1)
{
+ findGeneLoci(geneAlignment.getSequenceAt(0), geneId);
getTranscripts(geneAlignment, geneId);
}
if (al == null)
}
/**
+ * Calls the /lookup/id REST service, parses the response for gene
+ * coordinates, and if successful, adds these to the sequence. If this fails,
+ * fall back on trying to parse the sequence description in case it is in
+ * Ensembl-gene format e.g. chromosome:GRCh38:17:45051610:45109016:1.
+ *
+ * @param seq
+ * @param geneId
+ */
+ void findGeneLoci(SequenceI seq, String geneId)
+ {
+ GeneLociI geneLoci = new EnsemblLookup(getDomain()).getGeneLoci(geneId);
+ if (geneLoci != null)
+ {
+ seq.setGeneLoci(geneLoci.getSpeciesId(), geneLoci.getAssemblyId(),
+ geneLoci.getChromosomeId(), geneLoci.getMap());
+ }
+ else
+ {
+ parseChromosomeLocations(seq);
+ }
+ }
+
+ /**
+ * Parses and saves fields of an Ensembl-style description e.g.
+ * chromosome:GRCh38:17:45051610:45109016:1
+ *
+ * @param seq
+ */
+ boolean parseChromosomeLocations(SequenceI seq)
+ {
+ String description = seq.getDescription();
+ if (description == null)
+ {
+ return false;
+ }
+ String[] tokens = description.split(":");
+ if (tokens.length == 6 && tokens[0].startsWith(DBRefEntry.CHROMOSOME))
+ {
+ String ref = tokens[1];
+ String chrom = tokens[2];
+ try
+ {
+ int chStart = Integer.parseInt(tokens[3]);
+ int chEnd = Integer.parseInt(tokens[4]);
+ boolean forwardStrand = "1".equals(tokens[5]);
+ String species = ""; // not known here
+ int[] from = new int[] { seq.getStart(), seq.getEnd() };
+ int[] to = new int[] { forwardStrand ? chStart : chEnd,
+ forwardStrand ? chEnd : chStart };
+ MapList map = new MapList(from, to, 1, 1);
+ seq.setGeneLoci(species, ref, chrom, map);
+ return true;
+ } catch (NumberFormatException e)
+ {
+ System.err.println("Bad integers in description " + description);
+ }
+ }
+ return false;
+ }
+
+ /**
* Converts a query, which may contain one or more gene, transcript, or
* external (to Ensembl) identifiers, into a non-redundant list of gene
* identifiers.
cdna.transferFeatures(gene.getFeatures().getPositionalFeatures(),
transcript.getDatasetSequence(), mapping, parentId);
+ mapTranscriptToChromosome(transcript, gene, mapping);
+
/*
* fetch and save cross-references
*/
}
/**
+ * If the gene has a mapping to chromosome coordinates, derive the transcript
+ * chromosome regions and save on the transcript sequence
+ *
+ * @param transcript
+ * @param gene
+ * @param mapping
+ * the mapping from gene to transcript positions
+ */
+ protected void mapTranscriptToChromosome(SequenceI transcript,
+ SequenceI gene, MapList mapping)
+ {
+ GeneLociI loci = gene.getGeneLoci();
+ if (loci == null)
+ {
+ return;
+ }
+
+ MapList geneMapping = loci.getMap();
+
+ List<int[]> exons = mapping.getFromRanges();
+ List<int[]> transcriptLoci = new ArrayList<>();
+
+ for (int[] exon : exons)
+ {
+ transcriptLoci.add(geneMapping.locateInTo(exon[0], exon[1]));
+ }
+
+ List<int[]> transcriptRange = Arrays.asList(new int[] {
+ transcript.getStart(), transcript.getEnd() });
+ MapList mapList = new MapList(transcriptRange, transcriptLoci, 1, 1);
+
+ transcript.setGeneLoci(loci.getSpeciesId(), loci.getAssemblyId(),
+ loci.getChromosomeId(), mapList);
+ }
+
+ /**
* Returns the 'transcript_id' property of the sequence feature (or null)
*
* @param feature
-/*
- * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
- * Copyright (C) $$Year-Rel$$ The Jalview Authors
- *
- * This file is part of Jalview.
- *
- * Jalview is free software: you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation, either version 3
- * of the License, or (at your option) any later version.
- *
- * Jalview is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty
- * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
- * PURPOSE. See the GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
- * The Jalview Authors are detailed in the 'AUTHORS' file.
- */
package jalview.ext.ensembl;
-/**
- * A data class to model the data and rest version of one Ensembl domain,
- * currently for rest.ensembl.org and rest.ensemblgenomes.org
- *
- * @author gmcarstairs
- */
-class EnsemblInfo
+import jalview.datamodel.AlignmentI;
+import jalview.datamodel.DBRefSource;
+
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.net.MalformedURLException;
+import java.net.URL;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+
+import org.json.simple.JSONArray;
+import org.json.simple.parser.JSONParser;
+import org.json.simple.parser.ParseException;
+
+public class EnsemblInfo extends EnsemblRestClient
{
- /*
- * The http domain this object is holding data values for
- */
- String domain;
/*
- * The latest version Jalview has tested for, e.g. "4.5"; a minor version change should be
- * ok, a major version change may break stuff
+ * cached results of REST /info/divisions service, currently
+ * <pre>
+ * {
+ * { "ENSEMBLFUNGI", "http://rest.ensemblgenomes.org"},
+ * "ENSEMBLBACTERIA", "http://rest.ensemblgenomes.org"},
+ * "ENSEMBLPROTISTS", "http://rest.ensemblgenomes.org"},
+ * "ENSEMBLMETAZOA", "http://rest.ensemblgenomes.org"},
+ * "ENSEMBLPLANTS", "http://rest.ensemblgenomes.org"},
+ * "ENSEMBL", "http://rest.ensembl.org" }
+ * }
+ * </pre>
+ * The values for EnsemblGenomes are retrieved by a REST call, that for
+ * Ensembl is added programmatically for convenience of lookup
*/
- String expectedRestVersion;
+ private static Map<String, String> divisions;
- /*
- * Major / minor / point version e.g. "4.5.1"
- * @see http://rest.ensembl.org/info/rest/?content-type=application/json
- */
- String restVersion;
+ @Override
+ public String getDbName()
+ {
+ return "ENSEMBL";
+ }
- /*
- * data version
- * @see http://rest.ensembl.org/info/data/?content-type=application/json
- */
- String dataVersion;
+ @Override
+ public AlignmentI getSequenceRecords(String queries) throws Exception
+ {
+ return null;
+ }
- /*
- * true when http://rest.ensembl.org/info/ping/?content-type=application/json
- * returns response code 200 and not {"error":"Database is unavailable"}
+ @Override
+ protected URL getUrl(List<String> ids) throws MalformedURLException
+ {
+ return null;
+ }
+
+ @Override
+ protected boolean useGetRequest()
+ {
+ return true;
+ }
+
+ @Override
+ protected String getRequestMimeType(boolean multipleIds)
+ {
+ return "application/json";
+ }
+
+ @Override
+ protected String getResponseMimeType()
+ {
+ return "application/json";
+ }
+
+ /**
+ * Answers the domain (http://rest.ensembl.org or
+ * http://rest.ensemblgenomes.org) for the given division, or null if not
+ * recognised by Ensembl.
+ *
+ * @param division
+ * @return
*/
- boolean restAvailable;
+ public String getDomain(String division)
+ {
+ if (divisions == null)
+ {
+ fetchDivisions();
+ }
+ return divisions.get(division.toUpperCase());
+ }
- /*
- * absolute time when availability was last checked
+ /**
+ * On first request only, populate the lookup map by fetching the list of
+ * divisions known to EnsemblGenomes.
*/
- long lastAvailableCheckTime;
+ void fetchDivisions()
+ {
+ divisions = new HashMap<>();
- /*
- * absolute time when version numbers were last checked
+ /*
+ * for convenience, pre-fill ensembl.org as the domain for "ENSEMBL"
+ */
+ divisions.put(DBRefSource.ENSEMBL.toUpperCase(), ENSEMBL_REST);
+
+ BufferedReader br = null;
+ try
+ {
+ URL url = getDivisionsUrl(ENSEMBL_GENOMES_REST);
+ if (url != null)
+ {
+ br = getHttpResponse(url, null);
+ }
+ parseResponse(br, ENSEMBL_GENOMES_REST);
+ } catch (IOException e)
+ {
+ // ignore
+ } finally
+ {
+ if (br != null)
+ {
+ try
+ {
+ br.close();
+ } catch (IOException e)
+ {
+ // ignore
+ }
+ }
+ }
+ }
+
+ /**
+ * Parses the JSON response to /info/divisions, and add each to the lookup map
+ *
+ * @param br
+ * @param domain
*/
- long lastVersionCheckTime;
+ void parseResponse(BufferedReader br, String domain)
+ {
+ JSONParser jp = new JSONParser();
+
+ try
+ {
+ JSONArray parsed = (JSONArray) jp.parse(br);
- // flag set to true if REST major version is not the one expected
- boolean restMajorVersionMismatch;
+ Iterator rvals = parsed.iterator();
+ while (rvals.hasNext())
+ {
+ String division = rvals.next().toString();
+ divisions.put(division.toUpperCase(), domain);
+ }
+ } catch (IOException | ParseException | NumberFormatException e)
+ {
+ // ignore
+ }
+ }
/**
- * Constructor given expected REST version number e.g 4.5 or 3.4.3
+ * Constructs the URL for the EnsemblGenomes /info/divisions REST service
+ * @param domain TODO
*
- * @param restExpected
+ * @return
+ * @throws MalformedURLException
*/
- EnsemblInfo(String theDomain, String restExpected)
+ URL getDivisionsUrl(String domain) throws MalformedURLException
{
- domain = theDomain;
- expectedRestVersion = restExpected;
- lastAvailableCheckTime = -1;
- lastVersionCheckTime = -1;
+ return new URL(domain
+ + "/info/divisions?content-type=application/json");
}
+ /**
+ * Returns the set of 'divisions' recognised by Ensembl or EnsemblGenomes
+ *
+ * @return
+ */
+ public Set<String> getDivisions() {
+ if (divisions == null)
+ {
+ fetchDivisions();
+ }
+
+ return divisions.keySet();
+ }
}
*/
package jalview.ext.ensembl;
+import jalview.bin.Cache;
import jalview.datamodel.AlignmentI;
+import jalview.datamodel.GeneLociI;
+import jalview.util.MapList;
import java.io.BufferedReader;
import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.Arrays;
+import java.util.Collections;
import java.util.List;
+import java.util.function.Function;
import org.json.simple.JSONObject;
import org.json.simple.parser.JSONParser;
import org.json.simple.parser.ParseException;
/**
- * A client for the Ensembl lookup REST endpoint; used to find the Parent gene
- * identifier given a transcript identifier.
+ * A client for the Ensembl lookup REST endpoint
*
* @author gmcarstairs
- *
*/
public class EnsemblLookup extends EnsemblRestClient
{
+ private static final String SPECIES = "species";
- private static final String OBJECT_TYPE_TRANSLATION = "Translation";
private static final String PARENT = "Parent";
+
+ private static final String OBJECT_TYPE_TRANSLATION = "Translation";
private static final String OBJECT_TYPE_TRANSCRIPT = "Transcript";
private static final String ID = "id";
private static final String OBJECT_TYPE_GENE = "Gene";
}
/**
- * Calls the Ensembl lookup REST endpoint and retrieves the 'Parent' for the
- * given identifier, or null if not found
+ * Calls the Ensembl lookup REST endpoint and returns
+ * <ul>
+ * <li>the 'id' for the identifier if its type is "Gene"</li>
+ * <li>the 'Parent' if its type is 'Transcript'</li>
+ * <ul>
+ * If the type is 'Translation', does a recursive call to this method, passing
+ * in the 'Parent' (transcript id).
*
* @param identifier
* @return
*/
public String getGeneId(String identifier)
{
+ return (String) getResult(identifier, br -> parseGeneId(br));
+ }
+
+ /**
+ * Calls the Ensembl lookup REST endpoint and retrieves the 'species' for the
+ * given identifier, or null if not found
+ *
+ * @param identifier
+ * @return
+ */
+ public String getSpecies(String identifier)
+ {
+ return (String) getResult(identifier, br -> getAttribute(br, SPECIES));
+ }
+
+ /**
+ * Calls the /lookup/id rest service and delegates parsing of the JSON
+ * response to the supplied parser
+ *
+ * @param identifier
+ * @param parser
+ * @return
+ */
+ protected Object getResult(String identifier,
+ Function<BufferedReader, Object> parser)
+ {
List<String> ids = Arrays.asList(new String[] { identifier });
BufferedReader br = null;
{
br = getHttpResponse(url, ids);
}
- return br == null ? null : parseResponse(br);
+ return br == null ? null : parser.apply(br);
} catch (IOException e)
{
// ignore
}
/**
+ * Answers the value of 'attribute' from the JSON response, or null if not
+ * found
+ *
+ * @param br
+ * @param attribute
+ * @return
+ */
+ protected String getAttribute(BufferedReader br, String attribute)
+ {
+ String value = null;
+ JSONParser jp = new JSONParser();
+ try
+ {
+ JSONObject val = (JSONObject) jp.parse(br);
+ value = val.get(attribute).toString();
+ } catch (ParseException | NullPointerException | IOException e)
+ {
+ // ignore
+ }
+ return value;
+ }
+
+ /**
* Parses the JSON response and returns the gene identifier, or null if not
* found. If the returned object_type is Gene, returns the id, if Transcript
* returns the Parent. If it is Translation (peptide identifier), then the
*
* @param br
* @return
- * @throws IOException
*/
- protected String parseResponse(BufferedReader br) throws IOException
+ protected String parseGeneId(BufferedReader br)
{
String geneId = null;
JSONParser jp = new JSONParser();
+ " looping on Parent!");
}
}
- } catch (ParseException e)
+ } catch (ParseException | IOException e)
{
// ignore
}
return geneId;
}
+ /**
+ * Calls the /lookup/id rest service for the given id, and if successful,
+ * parses and returns the gene's chromosomal coordinates
+ *
+ * @param geneId
+ * @return
+ */
+ public GeneLociI getGeneLoci(String geneId)
+ {
+ return (GeneLociI) getResult(geneId, br -> parseGeneLoci(br));
+ }
+
+ /**
+ * Parses the /lookup/id response for species, asssembly_name,
+ * seq_region_name, start, end and returns an object that wraps them, or null
+ * if unsuccessful
+ *
+ * @param br
+ * @return
+ */
+ GeneLociI parseGeneLoci(BufferedReader br)
+ {
+ JSONParser jp = new JSONParser();
+ try
+ {
+ JSONObject val = (JSONObject) jp.parse(br);
+ final String species = val.get("species").toString();
+ final String assembly = val.get("assembly_name").toString();
+ final String chromosome = val.get("seq_region_name").toString();
+ String strand = val.get("strand").toString();
+ int start = Integer.parseInt(val.get("start").toString());
+ int end = Integer.parseInt(val.get("end").toString());
+ int fromEnd = end - start + 1;
+ boolean reverseStrand = "-1".equals(strand);
+ int toStart = reverseStrand ? end : start;
+ int toEnd = reverseStrand ? start : end;
+ List<int[]> fromRange = Collections.singletonList(new int[] { 1,
+ fromEnd });
+ List<int[]> toRange = Collections.singletonList(new int[] { toStart,
+ toEnd });
+ final MapList map = new MapList(fromRange, toRange, 1, 1);
+ return new GeneLociI()
+ {
+
+ @Override
+ public String getSpeciesId()
+ {
+ return species == null ? "" : species;
+ }
+
+ @Override
+ public String getAssemblyId()
+ {
+ return assembly;
+ }
+
+ @Override
+ public String getChromosomeId()
+ {
+ return chromosome;
+ }
+
+ @Override
+ public MapList getMap()
+ {
+ return map;
+ }
+ };
+ } catch (ParseException | NullPointerException | IOException
+ | NumberFormatException | ClassCastException e)
+ {
+ Cache.log.error("Error looking up gene loci: " + e.getMessage());
+ }
+ return null;
+ }
+
}
--- /dev/null
+package jalview.ext.ensembl;
+
+import jalview.datamodel.AlignmentI;
+import jalview.datamodel.DBRefSource;
+import jalview.datamodel.GeneLociI;
+import jalview.util.MapList;
+
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.net.MalformedURLException;
+import java.net.URL;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Iterator;
+import java.util.List;
+
+import org.json.simple.JSONArray;
+import org.json.simple.JSONObject;
+import org.json.simple.parser.JSONParser;
+import org.json.simple.parser.ParseException;
+
+public class EnsemblMap extends EnsemblRestClient
+{
+ private static final String MAPPED = "mapped";
+
+ private static final String MAPPINGS = "mappings";
+
+ private static final String CDS = "cds";
+
+ private static final String CDNA = "cdna";
+
+ /**
+ * Default constructor (to use rest.ensembl.org)
+ */
+ public EnsemblMap()
+ {
+ super();
+ }
+
+ /**
+ * Constructor given the target domain to fetch data from
+ *
+ * @param
+ */
+ public EnsemblMap(String domain)
+ {
+ super(domain);
+ }
+
+ @Override
+ public String getDbName()
+ {
+ return DBRefSource.ENSEMBL;
+ }
+
+ @Override
+ public AlignmentI getSequenceRecords(String queries) throws Exception
+ {
+ return null; // not used
+ }
+
+ /**
+ * Constructs a URL of the format <code>
+ * http://rest.ensembl.org/map/human/GRCh38/17:45051610..45109016:1/GRCh37?content-type=application/json
+ * </code>
+ *
+ * @param species
+ * @param chromosome
+ * @param fromRef
+ * @param toRef
+ * @param startPos
+ * @param endPos
+ * @return
+ * @throws MalformedURLException
+ */
+ protected URL getAssemblyMapUrl(String species, String chromosome, String fromRef,
+ String toRef, int startPos, int endPos)
+ throws MalformedURLException
+ {
+ /*
+ * start-end might be reverse strand - present forwards to the service
+ */
+ boolean forward = startPos <= endPos;
+ int start = forward ? startPos : endPos;
+ int end = forward ? endPos : startPos;
+ String strand = forward ? "1" : "-1";
+ String url = String.format(
+ "%s/map/%s/%s/%s:%d..%d:%s/%s?content-type=application/json",
+ getDomain(), species, fromRef, chromosome, start, end, strand,
+ toRef);
+ return new URL(url);
+ }
+
+ @Override
+ protected boolean useGetRequest()
+ {
+ return true;
+ }
+
+ @Override
+ protected String getRequestMimeType(boolean multipleIds)
+ {
+ return "application/json";
+ }
+
+ @Override
+ protected String getResponseMimeType()
+ {
+ return "application/json";
+ }
+
+ @Override
+ protected URL getUrl(List<String> ids) throws MalformedURLException
+ {
+ return null; // not used
+ }
+
+ /**
+ * Calls the REST /map service to get the chromosomal coordinates (start/end)
+ * in 'toRef' that corresponding to the (start/end) queryRange in 'fromRef'
+ *
+ * @param species
+ * @param chromosome
+ * @param fromRef
+ * @param toRef
+ * @param queryRange
+ * @return
+ * @see http://rest.ensemblgenomes.org/documentation/info/assembly_map
+ */
+ public int[] getAssemblyMapping(String species, String chromosome,
+ String fromRef, String toRef, int[] queryRange)
+ {
+ URL url = null;
+ BufferedReader br = null;
+
+ try
+ {
+ url = getAssemblyMapUrl(species, chromosome, fromRef, toRef, queryRange[0],
+ queryRange[1]);
+ br = getHttpResponse(url, null);
+ return (parseAssemblyMappingResponse(br));
+ } catch (Throwable t)
+ {
+ System.out.println("Error calling " + url + ": " + t.getMessage());
+ return null;
+ } finally
+ {
+ if (br != null)
+ {
+ try
+ {
+ br.close();
+ } catch (IOException e)
+ {
+ // ignore
+ }
+ }
+ }
+ }
+
+ /**
+ * Parses the JSON response from the /map/<species>/ REST service. The
+ * format is (with some fields omitted)
+ *
+ * <pre>
+ * {"mappings":
+ * [{
+ * "original": {"end":45109016,"start":45051610},
+ * "mapped" : {"end":43186384,"start":43128978}
+ * }] }
+ * </pre>
+ *
+ * @param br
+ * @return
+ */
+ protected int[] parseAssemblyMappingResponse(BufferedReader br)
+ {
+ int[] result = null;
+ JSONParser jp = new JSONParser();
+
+ try
+ {
+ JSONObject parsed = (JSONObject) jp.parse(br);
+ JSONArray mappings = (JSONArray) parsed.get(MAPPINGS);
+
+ Iterator rvals = mappings.iterator();
+ while (rvals.hasNext())
+ {
+ // todo check for "mapped"
+ JSONObject val = (JSONObject) rvals.next();
+ JSONObject mapped = (JSONObject) val.get(MAPPED);
+ int start = Integer.parseInt(mapped.get("start").toString());
+ int end = Integer.parseInt(mapped.get("end").toString());
+ String strand = mapped.get("strand").toString();
+ if ("1".equals(strand))
+ {
+ result = new int[] { start, end };
+ }
+ else
+ {
+ result = new int[] { end, start };
+ }
+ }
+ } catch (IOException | ParseException | NumberFormatException e)
+ {
+ // ignore
+ }
+ return result;
+ }
+
+ /**
+ * Calls the REST /map/cds/id service, and returns a DBRefEntry holding the
+ * returned chromosomal coordinates, or returns null if the call fails
+ *
+ * @param division
+ * e.g. Ensembl, EnsemblMetazoa
+ * @param accession
+ * e.g. ENST00000592782, Y55B1AR.1.1
+ * @param start
+ * @param end
+ * @return
+ */
+ public GeneLociI getCdsMapping(String division, String accession,
+ int start, int end)
+ {
+ return getIdMapping(division, accession, start, end, CDS);
+ }
+
+ /**
+ * Calls the REST /map/cdna/id service, and returns a DBRefEntry holding the
+ * returned chromosomal coordinates, or returns null if the call fails
+ *
+ * @param division
+ * e.g. Ensembl, EnsemblMetazoa
+ * @param accession
+ * e.g. ENST00000592782, Y55B1AR.1.1
+ * @param start
+ * @param end
+ * @return
+ */
+ public GeneLociI getCdnaMapping(String division, String accession,
+ int start, int end)
+ {
+ return getIdMapping(division, accession, start, end, CDNA);
+ }
+
+ GeneLociI getIdMapping(String division, String accession, int start,
+ int end, String cdsOrCdna)
+ {
+ URL url = null;
+ BufferedReader br = null;
+
+ try
+ {
+ String domain = new EnsemblInfo().getDomain(division);
+ if (domain != null)
+ {
+ url = getIdMapUrl(domain, accession, start, end, cdsOrCdna);
+ br = getHttpResponse(url, null);
+ return (parseIdMappingResponse(br, accession, domain));
+ }
+ return null;
+ } catch (Throwable t)
+ {
+ System.out.println("Error calling " + url + ": " + t.getMessage());
+ return null;
+ } finally
+ {
+ if (br != null)
+ {
+ try
+ {
+ br.close();
+ } catch (IOException e)
+ {
+ // ignore
+ }
+ }
+ }
+ }
+
+ /**
+ * Constructs a URL to the /map/cds/<id> or /map/cdna/<id> REST service. The
+ * REST call is to either ensembl or ensemblgenomes, as determined from the
+ * division, e.g. Ensembl or EnsemblProtists.
+ *
+ * @param domain
+ * @param accession
+ * @param start
+ * @param end
+ * @param cdsOrCdna
+ * @return
+ * @throws MalformedURLException
+ */
+ URL getIdMapUrl(String domain, String accession, int start, int end,
+ String cdsOrCdna) throws MalformedURLException
+ {
+ String url = String
+ .format("%s/map/%s/%s/%d..%d?include_original_region=1&content-type=application/json",
+ domain, cdsOrCdna, accession, start, end);
+ return new URL(url);
+ }
+
+ /**
+ * Parses the JSON response from the /map/cds/ or /map/cdna REST service. The
+ * format is
+ *
+ * <pre>
+ * {"mappings":
+ * [
+ * {"assembly_name":"TAIR10","end":2501311,"seq_region_name":"1","gap":0,
+ * "strand":-1,"coord_system":"chromosome","rank":0,"start":2501114},
+ * {"assembly_name":"TAIR10","end":2500815,"seq_region_name":"1","gap":0,
+ * "strand":-1,"coord_system":"chromosome","rank":0,"start":2500714}
+ * ]
+ * }
+ * </pre>
+ *
+ * @param br
+ * @param accession
+ * @param domain
+ * @return
+ */
+ GeneLociI parseIdMappingResponse(BufferedReader br, String accession,
+ String domain)
+ {
+ JSONParser jp = new JSONParser();
+
+ try
+ {
+ JSONObject parsed = (JSONObject) jp.parse(br);
+ JSONArray mappings = (JSONArray) parsed.get(MAPPINGS);
+
+ Iterator rvals = mappings.iterator();
+ String assembly = null;
+ String chromosome = null;
+ int fromEnd = 0;
+ List<int[]> regions = new ArrayList<>();
+
+ while (rvals.hasNext())
+ {
+ JSONObject val = (JSONObject) rvals.next();
+ JSONObject original = (JSONObject) val.get("original");
+ fromEnd = Integer.parseInt(original.get("end").toString());
+
+ JSONObject mapped = (JSONObject) val.get(MAPPED);
+ int start = Integer.parseInt(mapped.get("start").toString());
+ int end = Integer.parseInt(mapped.get("end").toString());
+ String ass = mapped.get("assembly_name").toString();
+ if (assembly != null && !assembly.equals(ass))
+ {
+ System.err
+ .println("EnsemblMap found multiple assemblies - can't resolve");
+ return null;
+ }
+ assembly = ass;
+ String chr = mapped.get("seq_region_name").toString();
+ if (chromosome != null && !chromosome.equals(chr))
+ {
+ System.err
+ .println("EnsemblMap found multiple chromosomes - can't resolve");
+ return null;
+ }
+ chromosome = chr;
+ String strand = mapped.get("strand").toString();
+ if ("-1".equals(strand))
+ {
+ regions.add(new int[] { end, start });
+ }
+ else
+ {
+ regions.add(new int[] { start, end });
+ }
+ }
+
+ /*
+ * processed all mapped regions on chromosome, assemble the result,
+ * having first fetched the species id for the accession
+ */
+ final String species = new EnsemblLookup(domain)
+ .getSpecies(accession);
+ final String as = assembly;
+ final String chr = chromosome;
+ List<int[]> fromRange = Collections.singletonList(new int[] { 1,
+ fromEnd });
+ final MapList map = new MapList(fromRange, regions, 1, 1);
+ return new GeneLociI()
+ {
+
+ @Override
+ public String getSpeciesId()
+ {
+ return species == null ? "" : species;
+ }
+
+ @Override
+ public String getAssemblyId()
+ {
+ return as;
+ }
+
+ @Override
+ public String getChromosomeId()
+ {
+ return chr;
+ }
+
+ @Override
+ public MapList getMap()
+ {
+ return map;
+ }
+ };
+ } catch (IOException | ParseException | NumberFormatException e)
+ {
+ // ignore
+ }
+
+ return null;
+ }
+
+}
private static final String REST_CHANGE_LOG = "https://github.com/Ensembl/ensembl-rest/wiki/Change-log";
- private static Map<String, EnsemblInfo> domainData;
+ private static Map<String, EnsemblData> domainData;
// @see https://github.com/Ensembl/ensembl-rest/wiki/Output-formats
private static final String PING_URL = "http://rest.ensembl.org/info/ping.json";
{
domainData = new HashMap<>();
domainData.put(ENSEMBL_REST,
- new EnsemblInfo(ENSEMBL_REST, LATEST_ENSEMBL_REST_VERSION));
- domainData.put(ENSEMBL_GENOMES_REST, new EnsemblInfo(
+ new EnsemblData(ENSEMBL_REST, LATEST_ENSEMBL_REST_VERSION));
+ domainData.put(ENSEMBL_GENOMES_REST, new EnsemblData(
ENSEMBL_GENOMES_REST, LATEST_ENSEMBLGENOMES_REST_VERSION));
}
*/
protected boolean isEnsemblAvailable()
{
- EnsemblInfo info = domainData.get(getDomain());
+ EnsemblData info = domainData.get(getDomain());
long now = System.currentTimeMillis();
*/
private void checkEnsemblRestVersion()
{
- EnsemblInfo info = domainData.get(getDomain());
+ EnsemblData info = domainData.get(getDomain());
JSONParser jp = new JSONParser();
URL url = null;
import jalview.exceptions.JalviewException;
import jalview.io.FastaFile;
import jalview.io.FileParse;
+import jalview.io.gff.Gff3Helper;
import jalview.io.gff.SequenceOntologyFactory;
import jalview.io.gff.SequenceOntologyI;
import jalview.util.Comparison;
*/
public abstract class EnsemblSeqProxy extends EnsemblRestClient
{
- private static final String ALLELES = "alleles";
-
protected static final String PARENT = "Parent";
protected static final String ID = "ID";
*/
static void reverseComplementAlleles(SequenceFeature sf)
{
- final String alleles = (String) sf.getValue(ALLELES);
+ final String alleles = (String) sf.getValue(Gff3Helper.ALLELES);
if (alleles == null)
{
return;
reverseComplementAllele(complement, allele);
}
String comp = complement.toString();
- sf.setValue(ALLELES, comp);
+ sf.setValue(Gff3Helper.ALLELES, comp);
sf.setDescription(comp);
/*
String atts = sf.getAttributes();
if (atts != null)
{
- atts = atts.replace(ALLELES + "=" + alleles, ALLELES + "=" + comp);
+ atts = atts.replace(Gff3Helper.ALLELES + "=" + alleles,
+ Gff3Helper.ALLELES + "=" + comp);
sf.setAttributes(atts);
}
}
if (br != null)
{
String geneId = parseSymbolResponse(br);
- System.out.println(url + " returned " + geneId);
if (geneId != null && !result.contains(geneId))
{
result.add(geneId);
--- /dev/null
+package jalview.ext.htsjdk;
+
+import htsjdk.samtools.util.CloseableIterator;
+import htsjdk.variant.variantcontext.VariantContext;
+import htsjdk.variant.vcf.VCFFileReader;
+import htsjdk.variant.vcf.VCFHeader;
+
+import java.io.Closeable;
+import java.io.File;
+import java.io.IOException;
+
+/**
+ * A thin wrapper for htsjdk classes to read either plain, or compressed, or
+ * compressed and indexed VCF files
+ */
+public class VCFReader implements Closeable, Iterable<VariantContext>
+{
+ private static final String GZ = "gz";
+
+ private static final String TBI_EXTENSION = ".tbi";
+
+ private boolean indexed;
+
+ private VCFFileReader reader;
+
+ /**
+ * Constructor given a raw or compressed VCF file or a (tabix) index file
+ * <p>
+ * For now, file type is inferred from its suffix: .gz or .bgz for compressed
+ * data, .tbi for an index file, anything else is assumed to be plain text
+ * VCF.
+ *
+ * @param f
+ * @throws IOException
+ */
+ public VCFReader(String filePath) throws IOException
+ {
+ if (filePath.endsWith(GZ))
+ {
+ if (new File(filePath + TBI_EXTENSION).exists())
+ {
+ indexed = true;
+ }
+ }
+ else if (filePath.endsWith(TBI_EXTENSION))
+ {
+ indexed = true;
+ filePath = filePath.substring(0, filePath.length() - 4);
+ }
+
+ reader = new VCFFileReader(new File(filePath), indexed);
+ }
+
+ @Override
+ public void close() throws IOException
+ {
+ if (reader != null)
+ {
+ reader.close();
+ }
+ }
+
+ /**
+ * Returns an iterator over VCF variants in the file. The client should call
+ * close() on the iterator when finished with it.
+ */
+ @Override
+ public CloseableIterator<VariantContext> iterator()
+ {
+ return reader == null ? null : reader.iterator();
+ }
+
+ /**
+ * Queries for records overlapping the region specified. Note that this method
+ * is performant if the VCF file is indexed, and may be very slow if it is
+ * not.
+ * <p>
+ * Client code should call close() on the iterator when finished with it.
+ *
+ * @param chrom
+ * the chromosome to query
+ * @param start
+ * query interval start
+ * @param end
+ * query interval end
+ * @return
+ */
+ public CloseableIterator<VariantContext> query(final String chrom,
+ final int start, final int end)
+ {
+ if (reader == null) {
+ return null;
+ }
+ if (indexed)
+ {
+ return reader.query(chrom, start, end);
+ }
+ else
+ {
+ return queryUnindexed(chrom, start, end);
+ }
+ }
+
+ /**
+ * Returns an iterator over variant records read from a flat file which
+ * overlap the specified chromosomal positions. Call close() on the iterator
+ * when finished with it!
+ *
+ * @param chrom
+ * @param start
+ * @param end
+ * @return
+ */
+ protected CloseableIterator<VariantContext> queryUnindexed(
+ final String chrom, final int start, final int end)
+ {
+ final CloseableIterator<VariantContext> it = reader.iterator();
+
+ return new CloseableIterator<VariantContext>()
+ {
+ boolean atEnd = false;
+
+ // prime look-ahead buffer with next matching record
+ private VariantContext next = findNext();
+
+ private VariantContext findNext()
+ {
+ if (atEnd)
+ {
+ return null;
+ }
+ VariantContext variant = null;
+ while (it.hasNext())
+ {
+ variant = it.next();
+ int vstart = variant.getStart();
+
+ if (vstart > end)
+ {
+ atEnd = true;
+ close();
+ return null;
+ }
+
+ int vend = variant.getEnd();
+ // todo what is the undeprecated way to get
+ // the chromosome for the variant?
+ if (chrom.equals(variant.getChr()) && (vstart <= end)
+ && (vend >= start))
+ {
+ return variant;
+ }
+ }
+ return null;
+ }
+
+ @Override
+ public boolean hasNext()
+ {
+ boolean hasNext = !atEnd && (next != null);
+ if (!hasNext)
+ {
+ close();
+ }
+ return hasNext;
+ }
+
+ @Override
+ public VariantContext next()
+ {
+ /*
+ * return the next match, and then re-prime
+ * it with the following one (if any)
+ */
+ VariantContext temp = next;
+ next = findNext();
+ return temp;
+ }
+
+ @Override
+ public void remove()
+ {
+ // not implemented
+ }
+
+ @Override
+ public void close()
+ {
+ it.close();
+ }
+ };
+ }
+
+ /**
+ * Returns an object that models the VCF file headers
+ *
+ * @return
+ */
+ public VCFHeader getFileHeader()
+ {
+ return reader == null ? null : reader.getFileHeader();
+ }
+
+ /**
+ * Answers true if we are processing a tab-indexed VCF file, false if it is a
+ * plain text (uncompressed) file.
+ *
+ * @return
+ */
+ public boolean isIndex()
+ {
+ return indexed;
+ }
+}
import jalview.io.NewickFile;
import jalview.io.ScoreMatrixFile;
import jalview.io.TCoffeeScoreFile;
+import jalview.io.vcf.VCFLoader;
import jalview.jbgui.GAlignFrame;
import jalview.schemes.ColourSchemeI;
import jalview.schemes.ColourSchemes;
AlignmentI al = getViewport().getAlignment();
boolean nucleotide = al.isNucleotide();
+ loadVcf.setVisible(nucleotide);
showTranslation.setVisible(nucleotide);
showReverse.setVisible(nucleotide);
showReverseComplement.setVisible(nucleotide);
protected void showProductsFor(final SequenceI[] sel, final boolean _odna,
final String source)
{
- new Thread(CrossRefAction.showProductsFor(sel, _odna, source, this))
+ new Thread(CrossRefAction.getHandlerFor(sel, _odna, source, this))
.start();
}
new CalculationChooser(AlignFrame.this);
}
}
+
+ @Override
+ protected void loadVcf_actionPerformed()
+ {
+ JalviewFileChooser chooser = new JalviewFileChooser(
+ Cache.getProperty("LAST_DIRECTORY"));
+ chooser.setFileView(new JalviewFileView());
+ chooser.setDialogTitle(MessageManager.getString("label.load_vcf_file"));
+ chooser.setToolTipText(MessageManager.getString("label.load_vcf_file"));
+
+ int value = chooser.showOpenDialog(null);
+
+ if (value == JalviewFileChooser.APPROVE_OPTION)
+ {
+ String choice = chooser.getSelectedFile().getPath();
+ Cache.setProperty("LAST_DIRECTORY", choice);
+ new VCFLoader(viewport.getAlignment()).loadVCF(choice, this);
+ }
+
+ }
}
class PrintThread extends Thread
* around to the bottom of the window stack (as the original implementation
* does)
*
- * @see com.sun.java.swing.plaf.windows.WindowsDesktopManager
*/
public class AquaInternalFrameManager extends DefaultDesktopManager
{
import jalview.bin.Cache;
import jalview.datamodel.Alignment;
import jalview.datamodel.AlignmentI;
+import jalview.datamodel.DBRefEntry;
import jalview.datamodel.DBRefSource;
+import jalview.datamodel.GeneLociI;
import jalview.datamodel.SequenceI;
+import jalview.ext.ensembl.EnsemblInfo;
+import jalview.ext.ensembl.EnsemblMap;
import jalview.io.gff.SequenceOntologyI;
import jalview.structure.StructureSelectionManager;
+import jalview.util.DBRefUtils;
+import jalview.util.MapList;
+import jalview.util.MappingUtils;
import jalview.util.MessageManager;
import jalview.ws.SequenceFetcher;
import java.util.ArrayList;
+import java.util.HashMap;
import java.util.List;
-
-import javax.swing.JOptionPane;
+import java.util.Map;
+import java.util.Set;
/**
* Factory constructor and runnable for discovering and displaying
private SequenceI[] sel;
- private boolean _odna;
+ private final boolean _odna;
private String source;
- List<AlignmentViewPanel> xrefViews = new ArrayList<AlignmentViewPanel>();
+ List<AlignmentViewPanel> xrefViews = new ArrayList<>();
- public List<jalview.api.AlignmentViewPanel> getXrefViews()
+ List<AlignmentViewPanel> getXrefViews()
{
return xrefViews;
}
{
return;
}
+
+ /*
+ * try to look up chromosomal coordinates for nucleotide
+ * sequences (if not already retrieved)
+ */
+ findGeneLoci(xrefs.getSequences());
+
/*
* get display scheme (if any) to apply to features
*/
if (Cache.getDefault(Preferences.ENABLE_SPLIT_FRAME, true))
{
- boolean copyAlignmentIsAligned = false;
- if (dna)
- {
- copyAlignment = AlignmentUtils.makeCdsAlignment(sel, dataset,
- xrefsAlignment.getSequencesArray());
- if (copyAlignment.getHeight() == 0)
- {
- JvOptionPane.showMessageDialog(alignFrame,
- MessageManager.getString("label.cant_map_cds"),
- MessageManager.getString("label.operation_failed"),
- JvOptionPane.OK_OPTION);
- System.err.println("Failed to make CDS alignment");
- }
-
- /*
- * pending getting Embl transcripts to 'align',
- * we are only doing this for Ensembl
- */
- // TODO proper criteria for 'can align as cdna'
- if (DBRefSource.ENSEMBL.equalsIgnoreCase(source)
- || AlignmentUtils.looksLikeEnsembl(alignment))
- {
- copyAlignment.alignAs(alignment);
- copyAlignmentIsAligned = true;
- }
- }
- else
+ copyAlignment = copyAlignmentForSplitFrame(alignment, dataset, dna,
+ xrefs, xrefsAlignment);
+ if (copyAlignment == null)
{
- copyAlignment = AlignmentUtils.makeCopyAlignment(sel,
- xrefs.getSequencesArray(), dataset);
- }
- copyAlignment
- .setGapCharacter(alignFrame.viewport.getGapCharacter());
-
- StructureSelectionManager ssm = StructureSelectionManager
- .getStructureSelectionManager(Desktop.instance);
-
- /*
- * register any new mappings for sequence mouseover etc
- * (will not duplicate any previously registered mappings)
- */
- ssm.registerMappings(dataset.getCodonFrames());
-
- if (copyAlignment.getHeight() <= 0)
- {
- System.err.println(
- "No Sequences generated for xRef type " + source);
- return;
- }
- /*
- * align protein to dna
- */
- if (dna && copyAlignmentIsAligned)
- {
- xrefsAlignment.alignAs(copyAlignment);
- }
- else
- {
- /*
- * align cdna to protein - currently only if
- * fetching and aligning Ensembl transcripts!
- */
- // TODO: generalise for other sources of locus/transcript/cds data
- if (dna && DBRefSource.ENSEMBL.equalsIgnoreCase(source))
- {
- copyAlignment.alignAs(xrefsAlignment);
- }
+ return; // failed
}
}
+
/*
* build AlignFrame(s) according to available alignment data
*/
xrefViews.add(newFrame.alignPanel);
return; // via finally clause
}
+
AlignFrame copyThis = new AlignFrame(copyAlignment,
AlignFrame.DEFAULT_WIDTH, AlignFrame.DEFAULT_HEIGHT);
copyThis.setTitle(alignFrame.getTitle());
}
/**
+ * Tries to add chromosomal coordinates to any nucleotide sequence which does
+ * not already have them. Coordinates are retrieved from Ensembl given an
+ * Ensembl identifier, either on the sequence itself or on a peptide sequence
+ * it has a reference to.
+ *
+ * <pre>
+ * Example (human):
+ * - fetch EMBLCDS cross-references for Uniprot entry P30419
+ * - the EMBL sequences do not have xrefs to Ensembl
+ * - the Uniprot entry has xrefs to
+ * ENSP00000258960, ENSP00000468424, ENST00000258960, ENST00000592782
+ * - either of the transcript ids can be used to retrieve gene loci e.g.
+ * http://rest.ensembl.org/map/cds/ENST00000592782/1..100000
+ * Example (invertebrate):
+ * - fetch EMBLCDS cross-references for Uniprot entry Q43517 (FER1_SOLLC)
+ * - the Uniprot entry has an xref to ENSEMBLPLANTS Solyc10g044520.1.1
+ * - can retrieve gene loci with
+ * http://rest.ensemblgenomes.org/map/cds/Solyc10g044520.1.1/1..100000
+ * </pre>
+ *
+ * @param sequences
+ */
+ public static void findGeneLoci(List<SequenceI> sequences)
+ {
+ Map<DBRefEntry, GeneLociI> retrievedLoci = new HashMap<>();
+ for (SequenceI seq : sequences)
+ {
+ findGeneLoci(seq, retrievedLoci);
+ }
+ }
+
+ /**
+ * Tres to find chromosomal coordinates for the sequence, by searching its
+ * direct and indirect cross-references for Ensembl. If the loci have already
+ * been retrieved, just reads them out of the map of retrievedLoci; this is
+ * the case of an alternative transcript for the same protein. Otherwise calls
+ * a REST service to retrieve the loci, and if successful, adds them to the
+ * sequence and to the retrievedLoci.
+ *
+ * @param seq
+ * @param retrievedLoci
+ */
+ static void findGeneLoci(SequenceI seq,
+ Map<DBRefEntry, GeneLociI> retrievedLoci)
+ {
+ /*
+ * don't replace any existing chromosomal coordinates
+ */
+ if (seq == null || seq.isProtein() || seq.getGeneLoci() != null
+ || seq.getDBRefs() == null)
+ {
+ return;
+ }
+
+ Set<String> ensemblDivisions = new EnsemblInfo().getDivisions();
+
+ /*
+ * first look for direct dbrefs from sequence to Ensembl
+ */
+ String[] divisionsArray = ensemblDivisions
+ .toArray(new String[ensemblDivisions.size()]);
+ DBRefEntry[] seqRefs = seq.getDBRefs();
+ DBRefEntry[] directEnsemblRefs = DBRefUtils.selectRefs(seqRefs,
+ divisionsArray);
+ if (directEnsemblRefs != null)
+ {
+ for (DBRefEntry ensemblRef : directEnsemblRefs)
+ {
+ if (fetchGeneLoci(seq, ensemblRef, retrievedLoci))
+ {
+ return;
+ }
+ }
+ }
+
+ /*
+ * else look for indirect dbrefs from sequence to Ensembl
+ */
+ for (DBRefEntry dbref : seq.getDBRefs())
+ {
+ if (dbref.getMap() != null && dbref.getMap().getTo() != null)
+ {
+ DBRefEntry[] dbrefs = dbref.getMap().getTo().getDBRefs();
+ DBRefEntry[] indirectEnsemblRefs = DBRefUtils.selectRefs(dbrefs,
+ divisionsArray);
+ if (indirectEnsemblRefs != null)
+ {
+ for (DBRefEntry ensemblRef : indirectEnsemblRefs)
+ {
+ if (fetchGeneLoci(seq, ensemblRef, retrievedLoci))
+ {
+ return;
+ }
+ }
+ }
+ }
+ }
+ }
+
+ /**
+ * Retrieves chromosomal coordinates for the Ensembl (or EnsemblGenomes)
+ * identifier in dbref. If successful, and the sequence length matches gene
+ * loci length, then add it to the sequence, and to the retrievedLoci map.
+ * Answers true if successful, else false.
+ *
+ * @param seq
+ * @param dbref
+ * @param retrievedLoci
+ * @return
+ */
+ static boolean fetchGeneLoci(SequenceI seq, DBRefEntry dbref,
+ Map<DBRefEntry, GeneLociI> retrievedLoci)
+ {
+ String accession = dbref.getAccessionId();
+ String division = dbref.getSource();
+
+ /*
+ * hack: ignore cross-references to Ensembl protein ids
+ * (or use map/translation perhaps?)
+ * todo: is there an equivalent in EnsemblGenomes?
+ */
+ if (accession.startsWith("ENSP"))
+ {
+ return false;
+ }
+ EnsemblMap mapper = new EnsemblMap();
+
+ /*
+ * try CDS mapping first
+ */
+ GeneLociI geneLoci = mapper.getCdsMapping(division, accession, 1,
+ seq.getLength());
+ if (geneLoci != null)
+ {
+ MapList map = geneLoci.getMap();
+ int mappedFromLength = MappingUtils.getLength(map.getFromRanges());
+ if (mappedFromLength == seq.getLength())
+ {
+ seq.setGeneLoci(geneLoci.getSpeciesId(), geneLoci.getAssemblyId(),
+ geneLoci.getChromosomeId(), geneLoci.getMap());
+ retrievedLoci.put(dbref, geneLoci);
+ return true;
+ }
+ }
+
+ /*
+ * else try CDNA mapping
+ */
+ geneLoci = mapper.getCdnaMapping(division, accession, 1,
+ seq.getLength());
+ if (geneLoci != null)
+ {
+ MapList map = geneLoci.getMap();
+ int mappedFromLength = MappingUtils.getLength(map.getFromRanges());
+ if (mappedFromLength == seq.getLength())
+ {
+ seq.setGeneLoci(geneLoci.getSpeciesId(), geneLoci.getAssemblyId(),
+ geneLoci.getChromosomeId(), geneLoci.getMap());
+ retrievedLoci.put(dbref, geneLoci);
+ return true;
+ }
+ }
+
+ return false;
+ }
+
+ /**
+ * @param alignment
+ * @param dataset
+ * @param dna
+ * @param xrefs
+ * @param xrefsAlignment
+ * @return
+ */
+ protected AlignmentI copyAlignmentForSplitFrame(AlignmentI alignment,
+ AlignmentI dataset, boolean dna, AlignmentI xrefs,
+ AlignmentI xrefsAlignment)
+ {
+ AlignmentI copyAlignment;
+ boolean copyAlignmentIsAligned = false;
+ if (dna)
+ {
+ copyAlignment = AlignmentUtils.makeCdsAlignment(sel, dataset,
+ xrefsAlignment.getSequencesArray());
+ if (copyAlignment.getHeight() == 0)
+ {
+ JvOptionPane.showMessageDialog(alignFrame,
+ MessageManager.getString("label.cant_map_cds"),
+ MessageManager.getString("label.operation_failed"),
+ JvOptionPane.OK_OPTION);
+ System.err.println("Failed to make CDS alignment");
+ return null;
+ }
+
+ /*
+ * pending getting Embl transcripts to 'align',
+ * we are only doing this for Ensembl
+ */
+ // TODO proper criteria for 'can align as cdna'
+ if (DBRefSource.ENSEMBL.equalsIgnoreCase(source)
+ || AlignmentUtils.looksLikeEnsembl(alignment))
+ {
+ copyAlignment.alignAs(alignment);
+ copyAlignmentIsAligned = true;
+ }
+ }
+ else
+ {
+ copyAlignment = AlignmentUtils.makeCopyAlignment(sel,
+ xrefs.getSequencesArray(), dataset);
+ }
+ copyAlignment
+ .setGapCharacter(alignFrame.viewport.getGapCharacter());
+
+ StructureSelectionManager ssm = StructureSelectionManager
+ .getStructureSelectionManager(Desktop.instance);
+
+ /*
+ * register any new mappings for sequence mouseover etc
+ * (will not duplicate any previously registered mappings)
+ */
+ ssm.registerMappings(dataset.getCodonFrames());
+
+ if (copyAlignment.getHeight() <= 0)
+ {
+ System.err.println(
+ "No Sequences generated for xRef type " + source);
+ return null;
+ }
+
+ /*
+ * align protein to dna
+ */
+ if (dna && copyAlignmentIsAligned)
+ {
+ xrefsAlignment.alignAs(copyAlignment);
+ }
+ else
+ {
+ /*
+ * align cdna to protein - currently only if
+ * fetching and aligning Ensembl transcripts!
+ */
+ // TODO: generalise for other sources of locus/transcript/cds data
+ if (dna && DBRefSource.ENSEMBL.equalsIgnoreCase(source))
+ {
+ copyAlignment.alignAs(xrefsAlignment);
+ }
+ }
+
+ return copyAlignment;
+ }
+
+ /**
* Makes an alignment containing the given sequences, and adds them to the
* given dataset, which is also set as the dataset for the new alignment
*
return al;
}
- public CrossRefAction(AlignFrame alignFrame, SequenceI[] sel,
- boolean _odna, String source)
+ /**
+ * Constructor
+ *
+ * @param af
+ * @param seqs
+ * @param fromDna
+ * @param dbSource
+ */
+ CrossRefAction(AlignFrame af, SequenceI[] seqs, boolean fromDna,
+ String dbSource)
{
- this.alignFrame = alignFrame;
- this.sel = sel;
- this._odna = _odna;
- this.source = source;
+ this.alignFrame = af;
+ this.sel = seqs;
+ this._odna = fromDna;
+ this.source = dbSource;
}
- public static CrossRefAction showProductsFor(final SequenceI[] sel,
- final boolean _odna, final String source,
+ public static CrossRefAction getHandlerFor(final SequenceI[] sel,
+ final boolean fromDna, final String source,
final AlignFrame alignFrame)
{
- return new CrossRefAction(alignFrame, sel, _odna, source);
+ return new CrossRefAction(alignFrame, sel, fromDna, source);
}
}
*/
public void setText(String text)
{
+ textarea.setDocument(textarea.getEditorKit().createDefaultDocument());
textarea.setText(text);
}
* and any non-positional features
*/
List<String> nlinks = Preferences.sequenceUrlLinks.getLinksForMenu();
- for (SequenceFeature sf : sq.getFeatures().getNonPositionalFeatures())
+ List<SequenceFeature> features = sq.getFeatures().getNonPositionalFeatures();
+ for (SequenceFeature sf : features)
{
if (sf.links != null)
{
}
}
- PopupMenu pop = new PopupMenu(alignPanel, sq, nlinks,
+ PopupMenu pop = new PopupMenu(alignPanel, sq, features,
Preferences.getGroupURLLinks());
pop.show(this, e.getX(), e.getY());
}
import jalview.datamodel.DBRefEntry;
import jalview.datamodel.HiddenColumns;
import jalview.datamodel.PDBEntry;
-import jalview.datamodel.Sequence;
import jalview.datamodel.SequenceFeature;
import jalview.datamodel.SequenceGroup;
import jalview.datamodel.SequenceI;
import jalview.util.GroupUrlLink;
import jalview.util.GroupUrlLink.UrlStringTooLongException;
import jalview.util.MessageManager;
+import jalview.util.StringUtils;
import jalview.util.UrlLink;
import java.awt.Color;
* Creates a new PopupMenu object.
*
* @param ap
- * DOCUMENT ME!
* @param seq
- * DOCUMENT ME!
+ * @param features
+ * non-positional features (for seq not null), or positional features
+ * at residue (for seq equal to null)
*/
- public PopupMenu(final AlignmentPanel ap, Sequence seq,
- List<String> links)
+ public PopupMenu(final AlignmentPanel ap, SequenceI seq,
+ List<SequenceFeature> features)
{
- this(ap, seq, links, null);
+ this(ap, seq, features, null);
}
/**
+ * Constructor
*
- * @param ap
+ * @param alignPanel
* @param seq
- * @param links
+ * the sequence under the cursor if in the Id panel, null if in the
+ * sequence panel
+ * @param features
+ * non-positional features if in the Id panel, features at the
+ * clicked residue if in the sequence panel
* @param groupLinks
*/
- public PopupMenu(final AlignmentPanel ap, final SequenceI seq,
- List<String> links, List<String> groupLinks)
+ public PopupMenu(final AlignmentPanel alignPanel, final SequenceI seq,
+ List<SequenceFeature> features, List<String> groupLinks)
{
// /////////////////////////////////////////////////////////
// If this is activated from the sequence panel, the user may want to
//
// If from the IDPanel, we must display the sequence menu
// ////////////////////////////////////////////////////////
- this.ap = ap;
+ this.ap = alignPanel;
sequence = seq;
for (String ff : FileFormats.getInstance().getWritableFormats(true))
/*
* And repeat for the current selection group (if there is one):
*/
- final List<SequenceI> selectedGroup = (ap.av.getSelectionGroup() == null
+ final List<SequenceI> selectedGroup = (alignPanel.av.getSelectionGroup() == null
? Collections.<SequenceI> emptyList()
- : ap.av.getSelectionGroup().getSequences());
+ : alignPanel.av.getSelectionGroup().getSequences());
buildAnnotationTypesMenus(groupShowAnnotationsMenu,
groupHideAnnotationsMenu, selectedGroup);
configureReferenceAnnotationsMenu(groupAddReferenceAnnotations,
if (seq != null)
{
sequenceMenu.setText(sequence.getName());
- if (seq == ap.av.getAlignment().getSeqrep())
+ if (seq == alignPanel.av.getAlignment().getSeqrep())
{
makeReferenceSeq.setText(
MessageManager.getString("action.unmark_as_reference"));
MessageManager.getString("action.set_as_reference"));
}
- if (!ap.av.getAlignment().isNucleotide())
+ if (!alignPanel.av.getAlignment().isNucleotide())
{
remove(rnaStructureMenu);
}
* add menu items to 2D-render any alignment or sequence secondary
* structure annotation
*/
- AlignmentAnnotation[] aas = ap.av.getAlignment()
+ AlignmentAnnotation[] aas = alignPanel.av.getAlignment()
.getAlignmentAnnotation();
if (aas != null)
{
@Override
public void actionPerformed(ActionEvent e)
{
- new AppVarna(seq, aa, ap);
+ new AppVarna(seq, aa, alignPanel);
}
});
rnaStructureMenu.add(menuItem);
public void actionPerformed(ActionEvent e)
{
// TODO: VARNA does'nt print gaps in the sequence
- new AppVarna(seq, aa, ap);
+ new AppVarna(seq, aa, alignPanel);
}
});
rnaStructureMenu.add(menuItem);
});
add(menuItem);
- if (ap.av.getSelectionGroup() != null
- && ap.av.getSelectionGroup().getSize() > 1)
+ if (alignPanel.av.getSelectionGroup() != null
+ && alignPanel.av.getSelectionGroup().getSize() > 1)
{
menuItem = new JMenuItem(MessageManager
.formatMessage("label.represent_group_with", new Object[]
sequenceMenu.add(menuItem);
}
- if (ap.av.hasHiddenRows())
+ if (alignPanel.av.hasHiddenRows())
{
- final int index = ap.av.getAlignment().findIndex(seq);
+ final int index = alignPanel.av.getAlignment().findIndex(seq);
- if (ap.av.adjustForHiddenSeqs(index)
- - ap.av.adjustForHiddenSeqs(index - 1) > 1)
+ if (alignPanel.av.adjustForHiddenSeqs(index)
+ - alignPanel.av.adjustForHiddenSeqs(index - 1) > 1)
{
menuItem = new JMenuItem(
MessageManager.getString("action.reveal_sequences"));
@Override
public void actionPerformed(ActionEvent e)
{
- ap.av.showSequence(index);
- if (ap.overviewPanel != null)
+ alignPanel.av.showSequence(index);
+ if (alignPanel.overviewPanel != null)
{
- ap.overviewPanel.updateOverviewImage();
+ alignPanel.overviewPanel.updateOverviewImage();
}
}
});
}
}
// for the case when no sequences are even visible
- if (ap.av.hasHiddenRows())
+ if (alignPanel.av.hasHiddenRows())
{
{
menuItem = new JMenuItem(
@Override
public void actionPerformed(ActionEvent e)
{
- ap.av.showAllHiddenSeqs();
- if (ap.overviewPanel != null)
+ alignPanel.av.showAllHiddenSeqs();
+ if (alignPanel.overviewPanel != null)
{
- ap.overviewPanel.updateOverviewImage();
+ alignPanel.overviewPanel.updateOverviewImage();
}
}
});
}
}
- SequenceGroup sg = ap.av.getSelectionGroup();
+ SequenceGroup sg = alignPanel.av.getSelectionGroup();
boolean isDefinedGroup = (sg != null)
- ? ap.av.getAlignment().getGroups().contains(sg)
+ ? alignPanel.av.getAlignment().getGroups().contains(sg)
: false;
if (sg != null && sg.getSize() > 0)
Hashtable<String, PDBEntry> pdbe = new Hashtable<>(), reppdb = new Hashtable<>();
SequenceI sqass = null;
- for (SequenceI sq : ap.av.getSequenceSelection())
+ for (SequenceI sq : alignPanel.av.getSequenceSelection())
{
Vector<PDBEntry> pes = sq.getDatasetSequence().getAllPDBEntries();
if (pes != null && pes.size() > 0)
rnaStructureMenu.setVisible(false);
}
- if (links != null && links.size() > 0)
+ addLinks(seq, features);
+
+ if (seq == null)
+ {
+ addFeatureDetails(features);
+ }
+ }
+
+ /**
+ * Add a link to show feature details for each sequence feature
+ *
+ * @param features
+ */
+ protected void addFeatureDetails(List<SequenceFeature> features)
+ {
+ if (features == null || features.isEmpty())
+ {
+ return;
+ }
+ JMenu details = new JMenu(
+ MessageManager.getString("label.feature_details"));
+ add(details);
+
+ for (final SequenceFeature sf : features)
{
- addFeatureLinks(seq, links);
+ int start = sf.getBegin();
+ int end = sf.getEnd();
+ String desc = null;
+ if (start == end)
+ {
+ desc = String.format("%s %d", sf.getType(), start);
+ }
+ else
+ {
+ desc = String.format("%s %d-%d", sf.getType(), start, end);
+ }
+ String description = sf.getDescription();
+ if (description != null)
+ {
+ description = StringUtils.stripHtmlTags(description);
+ if (description.length() <= 6)
+ {
+ desc = desc + " " + description;
+ }
+ else
+ {
+ desc = desc + " " + description.substring(0, 6) + "..";
+ }
+ }
+ if (sf.getFeatureGroup() != null)
+ {
+ desc = desc + " (" + sf.getFeatureGroup() + ")";
+ }
+ JMenuItem item = new JMenuItem(desc);
+ item.addActionListener(new ActionListener()
+ {
+ @Override
+ public void actionPerformed(ActionEvent e)
+ {
+ showFeatureDetails(sf);
+ }
+ });
+ details.add(item);
}
}
/**
+ * Opens a panel showing a text report of feature dteails
+ *
+ * @param sf
+ */
+ protected void showFeatureDetails(SequenceFeature sf)
+ {
+ CutAndPasteHtmlTransfer cap = new CutAndPasteHtmlTransfer();
+ // it appears Java's CSS does not support border-collaps :-(
+ cap.addStylesheetRule("table { border-collapse: collapse;}");
+ cap.addStylesheetRule("table, td, th {border: 1px solid black;}");
+ cap.setText(sf.getDetailsReport());
+
+ Desktop.addInternalFrame(cap,
+ MessageManager.getString("label.feature_details"), 500, 500);
+ }
+
+ /**
* Adds a 'Link' menu item with a sub-menu item for each hyperlink provided.
+ * When seq is not null, these are links for the sequence id, which may be to
+ * external web sites for the sequence accession, and/or links embedded in
+ * non-positional features. When seq is null, only links embedded in the
+ * provided features are added.
*
* @param seq
- * @param links
+ * @param features
*/
- void addFeatureLinks(final SequenceI seq, List<String> links)
+ void addLinks(final SequenceI seq, List<SequenceFeature> features)
{
JMenu linkMenu = new JMenu(MessageManager.getString("action.link"));
+
+ List<String> nlinks = null;
+ if (seq != null)
+ {
+ nlinks = Preferences.sequenceUrlLinks.getLinksForMenu();
+ }
+ else
+ {
+ nlinks = new ArrayList<>();
+ }
+
+ if (features != null)
+ {
+ for (SequenceFeature sf : features)
+ {
+ if (sf.links != null)
+ {
+ for (String link : sf.links)
+ {
+ nlinks.add(link);
+ }
+ }
+ }
+ }
+
Map<String, List<String>> linkset = new LinkedHashMap<>();
- for (String link : links)
+ for (String link : nlinks)
{
UrlLink urlLink = null;
try
addshowLinks(linkMenu, linkset.values());
- // disable link menu if there are no valid entries
+ // only add link menu if it has entries
if (linkMenu.getItemCount() > 0)
{
- linkMenu.setEnabled(true);
- }
- else
- {
- linkMenu.setEnabled(false);
- }
-
- if (sequence != null)
- {
- sequenceMenu.add(linkMenu);
- }
- else
- {
- add(linkMenu);
+ if (sequence != null)
+ {
+ sequenceMenu.add(linkMenu);
+ }
+ else
+ {
+ add(linkMenu);
+ }
}
-
}
/**
import java.awt.event.MouseMotionListener;
import java.awt.event.MouseWheelEvent;
import java.awt.event.MouseWheelListener;
-import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
final int column = findColumn(evt);
final int seq = findSeq(evt);
SequenceI sequence = av.getAlignment().getSequenceAt(seq);
- List<SequenceFeature> allFeatures = ap.getFeatureRenderer()
+ List<SequenceFeature> features = ap.getFeatureRenderer()
.findFeaturesAtColumn(sequence, column + 1);
- List<String> links = new ArrayList<>();
- for (SequenceFeature sf : allFeatures)
- {
- if (sf.links != null)
- {
- for (String link : sf.links)
- {
- links.add(link);
- }
- }
- }
- PopupMenu pop = new PopupMenu(ap, null, links);
+ PopupMenu pop = new PopupMenu(ap, null, features);
pop.show(this, evt.getX(), evt.getY());
}
import jalview.datamodel.SequenceI;
import jalview.io.gff.GffConstants;
import jalview.util.MessageManager;
+import jalview.util.StringUtils;
import jalview.util.UrlLink;
import java.util.Arrays;
/*
* Comparator to order DBRefEntry by Source + accession id (case-insensitive),
- * with 'Primary' sources placed before others
+ * with 'Primary' sources placed before others, and 'chromosome' first of all
*/
private static Comparator<DBRefEntry> comparator = new Comparator<DBRefEntry>()
{
@Override
public int compare(DBRefEntry ref1, DBRefEntry ref2)
{
+ if (ref1.isChromosome())
+ {
+ return -1;
+ }
+ if (ref2.isChromosome())
+ {
+ return 1;
+ }
String s1 = ref1.getSource();
String s2 = ref2.getSource();
boolean s1Primary = isPrimarySource(s1);
sb.append(" ").append(feature.end);
}
- if (feature.getDescription() != null
- && !feature.description.equals(feature.getType()))
+ String description = feature.getDescription();
+ if (description != null && !description.equals(feature.getType()))
{
- String tmpString = feature.getDescription();
- String tmp2up = tmpString.toUpperCase();
- int startTag = tmp2up.indexOf("<HTML>");
- if (startTag > -1)
- {
- tmpString = tmpString.substring(startTag + 6);
- tmp2up = tmp2up.substring(startTag + 6);
- }
- int endTag = tmp2up.indexOf("</BODY>");
- if (endTag > -1)
- {
- tmpString = tmpString.substring(0, endTag);
- tmp2up = tmp2up.substring(0, endTag);
- }
- endTag = tmp2up.indexOf("</HTML>");
- if (endTag > -1)
- {
- tmpString = tmpString.substring(0, endTag);
- }
-
- if (startTag > -1)
- {
- sb.append("; ").append(tmpString);
- }
- else
- {
- if (tmpString.indexOf("<") > -1 || tmpString.indexOf(">") > -1)
- {
- // The description does not specify html is to
- // be used, so we must remove < > symbols
- tmpString = tmpString.replaceAll("<", "<");
- tmpString = tmpString.replaceAll(">", ">");
-
- sb.append("; ");
- sb.append(tmpString);
- }
- else
- {
- sb.append("; ").append(tmpString);
- }
- }
+ description = StringUtils.stripHtmlTags(description);
+ sb.append("; ").append(description);
}
// check score should be shown
if (!Float.isNaN(feature.getScore()))
*/
public class Gff3Helper extends GffHelperBase
{
+ public static final String ALLELES = "alleles";
+
protected static final String TARGET = "Target";
protected static final String ID = "ID";
/*
* Ensembl returns dna variants as 'alleles'
*/
- desc = StringUtils.listToDelimitedString(attributes.get("alleles"),
+ desc = StringUtils.listToDelimitedString(attributes.get(ALLELES),
",");
}
--- /dev/null
+package jalview.io.vcf;
+
+import htsjdk.samtools.util.CloseableIterator;
+import htsjdk.variant.variantcontext.Allele;
+import htsjdk.variant.variantcontext.VariantContext;
+import htsjdk.variant.vcf.VCFHeader;
+import htsjdk.variant.vcf.VCFHeaderLine;
+import htsjdk.variant.vcf.VCFHeaderLineCount;
+import htsjdk.variant.vcf.VCFInfoHeaderLine;
+
+import jalview.analysis.AlignmentUtils;
+import jalview.analysis.Dna;
+import jalview.api.AlignViewControllerGuiI;
+import jalview.datamodel.AlignmentI;
+import jalview.datamodel.DBRefEntry;
+import jalview.datamodel.GeneLociI;
+import jalview.datamodel.Mapping;
+import jalview.datamodel.SequenceFeature;
+import jalview.datamodel.SequenceI;
+import jalview.ext.ensembl.EnsemblMap;
+import jalview.ext.htsjdk.VCFReader;
+import jalview.io.gff.Gff3Helper;
+import jalview.io.gff.SequenceOntologyI;
+import jalview.util.MapList;
+import jalview.util.MappingUtils;
+import jalview.util.MessageManager;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Map.Entry;
+
+/**
+ * A class to read VCF data (using the htsjdk) and add variants as sequence
+ * features on dna and any related protein product sequences
+ *
+ * @author gmcarstairs
+ */
+public class VCFLoader
+{
+ /*
+ * keys to fields of VEP CSQ consequence data
+ * see https://www.ensembl.org/info/docs/tools/vep/vep_formats.html
+ */
+ private static final String ALLELE_KEY = "Allele";
+
+ private static final String ALLELE_NUM_KEY = "ALLELE_NUM"; // 0 (ref), 1...
+ private static final String FEATURE_KEY = "Feature"; // Ensembl stable id
+
+ /*
+ * what comes before column headings in CSQ Description field
+ */
+ private static final String FORMAT = "Format: ";
+
+ /*
+ * default VCF INFO key for VEP consequence data
+ * NB this can be overridden running VEP with --vcf_info_field
+ * - we don't handle this case (require CSQ identifier)
+ */
+ private static final String CSQ = "CSQ";
+
+ /*
+ * separator for fields in consequence data
+ */
+ private static final String PIPE = "|";
+
+ private static final String PIPE_REGEX = "\\" + PIPE;
+
+ /*
+ * key for Allele Frequency output by VEP
+ * see http://www.ensembl.org/info/docs/tools/vep/vep_formats.html
+ */
+ private static final String ALLELE_FREQUENCY_KEY = "AF";
+
+ /*
+ * delimiter that separates multiple consequence data blocks
+ */
+ private static final String COMMA = ",";
+
+ /*
+ * the feature group assigned to a VCF variant in Jalview
+ */
+ private static final String FEATURE_GROUP_VCF = "VCF";
+
+ /*
+ * internal delimiter used to build keys for assemblyMappings
+ *
+ */
+ private static final String EXCL = "!";
+
+ /*
+ * the alignment we are associating VCF data with
+ */
+ private AlignmentI al;
+
+ /*
+ * mappings between VCF and sequence reference assembly regions, as
+ * key = "species!chromosome!fromAssembly!toAssembly
+ * value = Map{fromRange, toRange}
+ */
+ private Map<String, Map<int[], int[]>> assemblyMappings;
+
+ /*
+ * holds details of the VCF header lines (metadata)
+ */
+ private VCFHeader header;
+
+ /*
+ * the position (0...) of field in each block of
+ * CSQ (consequence) data (if declared in the VCF INFO header for CSQ)
+ * see http://www.ensembl.org/info/docs/tools/vep/vep_formats.html
+ */
+ private int csqAlleleFieldIndex = -1;
+ private int csqAlleleNumberFieldIndex = -1;
+ private int csqFeatureFieldIndex = -1;
+
+ /**
+ * Constructor given an alignment context
+ *
+ * @param alignment
+ */
+ public VCFLoader(AlignmentI alignment)
+ {
+ al = alignment;
+
+ // map of species!chromosome!fromAssembly!toAssembly to {fromRange, toRange}
+ assemblyMappings = new HashMap<String, Map<int[], int[]>>();
+ }
+
+ /**
+ * Starts a new thread to query and load VCF variant data on to the alignment
+ * <p>
+ * This method is not thread safe - concurrent threads should use separate
+ * instances of this class.
+ *
+ * @param filePath
+ * @param gui
+ */
+ public void loadVCF(final String filePath,
+ final AlignViewControllerGuiI gui)
+ {
+ if (gui != null)
+ {
+ gui.setStatus(MessageManager.getString("label.searching_vcf"));
+ }
+
+ new Thread()
+ {
+
+ @Override
+ public void run()
+ {
+ VCFLoader.this.doLoad(filePath, gui);
+ }
+
+ }.start();
+ }
+
+ /**
+ * Loads VCF on to an alignment - provided it can be related to one or more
+ * sequence's chromosomal coordinates
+ *
+ * @param filePath
+ * @param gui
+ * optional callback handler for messages
+ */
+ protected void doLoad(String filePath, AlignViewControllerGuiI gui)
+ {
+ VCFReader reader = null;
+ try
+ {
+ // long start = System.currentTimeMillis();
+ reader = new VCFReader(filePath);
+
+ header = reader.getFileHeader();
+ VCFHeaderLine ref = header
+ .getOtherHeaderLine(VCFHeader.REFERENCE_KEY);
+
+ /*
+ * get offset of CSQ ALLELE_NUM and Feature if declared
+ */
+ locateCsqFields();
+
+ String vcfAssembly = ref.getValue();
+
+ int varCount = 0;
+ int seqCount = 0;
+
+ /*
+ * query for VCF overlapping each sequence in turn
+ */
+ for (SequenceI seq : al.getSequences())
+ {
+ int added = loadSequenceVCF(seq, reader, vcfAssembly);
+ if (added > 0)
+ {
+ seqCount++;
+ varCount += added;
+ transferAddedFeatures(seq);
+ }
+ }
+ if (gui != null)
+ {
+ // long elapsed = System.currentTimeMillis() - start;
+ String msg = MessageManager.formatMessage("label.added_vcf",
+ varCount, seqCount);
+ gui.setStatus(msg);
+ if (gui.getFeatureSettingsUI() != null)
+ {
+ gui.getFeatureSettingsUI().discoverAllFeatureData();
+ }
+ }
+ } catch (Throwable e)
+ {
+ System.err.println("Error processing VCF: " + e.getMessage());
+ e.printStackTrace();
+ if (gui != null)
+ {
+ gui.setStatus("Error occurred - see console for details");
+ }
+ } finally
+ {
+ if (reader != null)
+ {
+ try
+ {
+ reader.close();
+ } catch (IOException e)
+ {
+ // ignore
+ }
+ }
+ }
+ }
+
+ /**
+ * Records the position of selected fields defined in the CSQ INFO header (if
+ * there is one). CSQ fields are declared in the CSQ INFO Description e.g.
+ * <p>
+ * Description="Consequence ...from ... VEP. Format: Allele|Consequence|...
+ */
+ protected void locateCsqFields()
+ {
+ VCFInfoHeaderLine csqInfo = header.getInfoHeaderLine(CSQ);
+ if (csqInfo == null)
+ {
+ return;
+ }
+
+ String desc = csqInfo.getDescription();
+ int formatPos = desc.indexOf(FORMAT);
+ if (formatPos == -1)
+ {
+ System.err.println("Parse error, failed to find " + FORMAT
+ + " in " + desc);
+ return;
+ }
+ desc = desc.substring(formatPos + FORMAT.length());
+
+ if (desc != null)
+ {
+ String[] format = desc.split(PIPE_REGEX);
+ int index = 0;
+ for (String field : format)
+ {
+ if (ALLELE_NUM_KEY.equals(field))
+ {
+ csqAlleleNumberFieldIndex = index;
+ }
+ if (ALLELE_KEY.equals(field))
+ {
+ csqAlleleFieldIndex = index;
+ }
+ if (FEATURE_KEY.equals(field))
+ {
+ csqFeatureFieldIndex = index;
+ }
+ index++;
+ }
+ }
+ }
+
+ /**
+ * Transfers VCF features to sequences to which this sequence has a mapping.
+ * If the mapping is 3:1, computes peptide variants from nucleotide variants.
+ *
+ * @param seq
+ */
+ protected void transferAddedFeatures(SequenceI seq)
+ {
+ DBRefEntry[] dbrefs = seq.getDBRefs();
+ if (dbrefs == null)
+ {
+ return;
+ }
+ for (DBRefEntry dbref : dbrefs)
+ {
+ Mapping mapping = dbref.getMap();
+ if (mapping == null || mapping.getTo() == null)
+ {
+ continue;
+ }
+
+ SequenceI mapTo = mapping.getTo();
+ MapList map = mapping.getMap();
+ if (map.getFromRatio() == 3)
+ {
+ /*
+ * dna-to-peptide product mapping
+ */
+ AlignmentUtils.computeProteinFeatures(seq, mapTo, map);
+ }
+ else
+ {
+ /*
+ * nucleotide-to-nucleotide mapping e.g. transcript to CDS
+ */
+ List<SequenceFeature> features = seq.getFeatures()
+ .getPositionalFeatures(SequenceOntologyI.SEQUENCE_VARIANT);
+ for (SequenceFeature sf : features)
+ {
+ if (FEATURE_GROUP_VCF.equals(sf.getFeatureGroup()))
+ {
+ transferFeature(sf, mapTo, map);
+ }
+ }
+ }
+ }
+ }
+
+ /**
+ * Tries to add overlapping variants read from a VCF file to the given
+ * sequence, and returns the number of variant features added. Note that this
+ * requires the sequence to hold information as to its species, chromosomal
+ * positions and reference assembly, in order to be able to map the VCF
+ * variants to the sequence (or not)
+ *
+ * @param seq
+ * @param reader
+ * @param vcfAssembly
+ * @return
+ */
+ protected int loadSequenceVCF(SequenceI seq, VCFReader reader,
+ String vcfAssembly)
+ {
+ int count = 0;
+ GeneLociI seqCoords = seq.getGeneLoci();
+ if (seqCoords == null)
+ {
+ System.out.println(String.format(
+ "Can't query VCF for %s as chromosome coordinates not known",
+ seq.getName()));
+ return 0;
+ }
+
+ if (!vcfSpeciesMatchesSequence(vcfAssembly, seqCoords.getSpeciesId()))
+ {
+ return 0;
+ }
+
+ List<int[]> seqChromosomalContigs = seqCoords.getMap().getToRanges();
+ for (int[] range : seqChromosomalContigs)
+ {
+ count += addVcfVariants(seq, reader, range, vcfAssembly);
+ }
+
+ return count;
+ }
+
+ /**
+ * Answers true if the species inferred from the VCF reference identifier
+ * matches that for the sequence
+ *
+ * @param vcfAssembly
+ * @param speciesId
+ * @return
+ */
+ boolean vcfSpeciesMatchesSequence(String vcfAssembly, String speciesId)
+ {
+ // PROBLEM 1
+ // there are many aliases for species - how to equate one with another?
+ // PROBLEM 2
+ // VCF ##reference header is an unstructured URI - how to extract species?
+ // perhaps check if ref includes any (Ensembl) alias of speciesId??
+ // TODO ask the user to confirm this??
+
+ if (vcfAssembly.contains("Homo_sapiens") // gnomAD exome data example
+ && "HOMO_SAPIENS".equals(speciesId)) // Ensembl species id
+ {
+ return true;
+ }
+
+ if (vcfAssembly.contains("c_elegans") // VEP VCF response example
+ && "CAENORHABDITIS_ELEGANS".equals(speciesId)) // Ensembl
+ {
+ return true;
+ }
+
+ // this is not a sustainable solution...
+
+ return false;
+ }
+
+ /**
+ * Queries the VCF reader for any variants that overlap the given chromosome
+ * region of the sequence, and adds as variant features. Returns the number of
+ * overlapping variants found.
+ *
+ * @param seq
+ * @param reader
+ * @param range
+ * start-end range of a sequence region in its chromosomal
+ * coordinates
+ * @param vcfAssembly
+ * the '##reference' identifier for the VCF reference assembly
+ * @return
+ */
+ protected int addVcfVariants(SequenceI seq, VCFReader reader,
+ int[] range, String vcfAssembly)
+ {
+ GeneLociI seqCoords = seq.getGeneLoci();
+
+ String chromosome = seqCoords.getChromosomeId();
+ String seqRef = seqCoords.getAssemblyId();
+ String species = seqCoords.getSpeciesId();
+
+ /*
+ * map chromosomal coordinates from sequence to VCF if the VCF
+ * data has a different reference assembly to the sequence
+ */
+ // TODO generalise for non-human species
+ // - or get the user to choose in a dialog
+
+ int offset = 0;
+ if ("GRCh38".equalsIgnoreCase(seqRef) // Ensembl
+ && vcfAssembly.contains("Homo_sapiens_assembly19")) // gnomAD
+ {
+ String toRef = "GRCh37";
+ int[] newRange = mapReferenceRange(range, chromosome, "human",
+ seqRef, toRef);
+ if (newRange == null)
+ {
+ System.err.println(String.format(
+ "Failed to map %s:%s:%s:%d:%d to %s", species, chromosome,
+ seqRef, range[0], range[1], toRef));
+ return 0;
+ }
+ offset = newRange[0] - range[0];
+ range = newRange;
+ }
+
+ boolean forwardStrand = range[0] <= range[1];
+
+ /*
+ * query the VCF for overlaps
+ * (convert a reverse strand range to forwards)
+ */
+ int count = 0;
+ MapList mapping = seqCoords.getMap();
+
+ int fromLocus = Math.min(range[0], range[1]);
+ int toLocus = Math.max(range[0], range[1]);
+ CloseableIterator<VariantContext> variants = reader.query(chromosome,
+ fromLocus, toLocus);
+ while (variants.hasNext())
+ {
+ /*
+ * get variant location in sequence chromosomal coordinates
+ */
+ VariantContext variant = variants.next();
+
+ int start = variant.getStart() - offset;
+ int end = variant.getEnd() - offset;
+
+ /*
+ * convert chromosomal location to sequence coordinates
+ * - may be reverse strand (convert to forward for sequence feature)
+ * - null if a partially overlapping feature
+ */
+ int[] seqLocation = mapping.locateInFrom(start, end);
+ if (seqLocation != null)
+ {
+ int featureStart = Math.min(seqLocation[0], seqLocation[1]);
+ int featureEnd = Math.max(seqLocation[0], seqLocation[1]);
+ count += addAlleleFeatures(seq, variant, featureStart, featureEnd,
+ forwardStrand);
+ }
+ }
+
+ variants.close();
+
+ return count;
+ }
+
+ /**
+ * A convenience method to get the AF value for the given alternate allele
+ * index
+ *
+ * @param variant
+ * @param alleleIndex
+ * @return
+ */
+ protected float getAlleleFrequency(VariantContext variant, int alleleIndex)
+ {
+ float score = 0f;
+ String attributeValue = getAttributeValue(variant,
+ ALLELE_FREQUENCY_KEY, alleleIndex);
+ if (attributeValue != null)
+ {
+ try
+ {
+ score = Float.parseFloat(attributeValue);
+ } catch (NumberFormatException e)
+ {
+ // leave as 0
+ }
+ }
+
+ return score;
+ }
+
+ /**
+ * A convenience method to get an attribute value for an alternate allele
+ *
+ * @param variant
+ * @param attributeName
+ * @param alleleIndex
+ * @return
+ */
+ protected String getAttributeValue(VariantContext variant,
+ String attributeName, int alleleIndex)
+ {
+ Object att = variant.getAttribute(attributeName);
+
+ if (att instanceof String)
+ {
+ return (String) att;
+ }
+ else if (att instanceof ArrayList)
+ {
+ return ((List<String>) att).get(alleleIndex);
+ }
+
+ return null;
+ }
+
+ /**
+ * Adds one variant feature for each allele in the VCF variant record, and
+ * returns the number of features added.
+ *
+ * @param seq
+ * @param variant
+ * @param featureStart
+ * @param featureEnd
+ * @param forwardStrand
+ * @return
+ */
+ protected int addAlleleFeatures(SequenceI seq, VariantContext variant,
+ int featureStart, int featureEnd, boolean forwardStrand)
+ {
+ int added = 0;
+
+ /*
+ * Javadoc says getAlternateAlleles() imposes no order on the list returned
+ * so we proceed defensively to get them in strict order
+ */
+ int altAlleleCount = variant.getAlternateAlleles().size();
+ for (int i = 0; i < altAlleleCount; i++)
+ {
+ added += addAlleleFeature(seq, variant, i, featureStart, featureEnd,
+ forwardStrand);
+ }
+ return added;
+ }
+
+ /**
+ * Inspects one allele and attempts to add a variant feature for it to the
+ * sequence. We extract as much as possible of the additional data associated
+ * with this allele to store in the feature's key-value map. Answers the
+ * number of features added (0 or 1).
+ *
+ * @param seq
+ * @param variant
+ * @param altAlleleIndex
+ * (0, 1..)
+ * @param featureStart
+ * @param featureEnd
+ * @param forwardStrand
+ * @return
+ */
+ protected int addAlleleFeature(SequenceI seq, VariantContext variant,
+ int altAlleleIndex, int featureStart, int featureEnd,
+ boolean forwardStrand)
+ {
+ String reference = variant.getReference().getBaseString();
+ Allele alt = variant.getAlternateAllele(altAlleleIndex);
+ String allele = alt.getBaseString();
+
+ /*
+ * build the ref,alt allele description e.g. "G,A", using the base
+ * complement if the sequence is on the reverse strand
+ */
+ // TODO check how structural variants are shown on reverse strand
+ StringBuilder sb = new StringBuilder();
+ sb.append(forwardStrand ? reference : Dna.reverseComplement(reference));
+ sb.append(COMMA);
+ sb.append(forwardStrand ? allele : Dna.reverseComplement(allele));
+ String alleles = sb.toString(); // e.g. G,A
+
+ String type = SequenceOntologyI.SEQUENCE_VARIANT;
+ float score = getAlleleFrequency(variant, altAlleleIndex);
+
+ SequenceFeature sf = new SequenceFeature(type, alleles, featureStart,
+ featureEnd, score, FEATURE_GROUP_VCF);
+
+ sf.setValue(Gff3Helper.ALLELES, alleles);
+
+ addAlleleProperties(variant, seq, sf, altAlleleIndex);
+
+ seq.addSequenceFeature(sf);
+
+ return 1;
+ }
+
+ /**
+ * Add any allele-specific VCF key-value data to the sequence feature
+ *
+ * @param variant
+ * @param seq
+ * @param sf
+ * @param altAlelleIndex
+ * (0, 1..)
+ */
+ protected void addAlleleProperties(VariantContext variant, SequenceI seq,
+ SequenceFeature sf, final int altAlelleIndex)
+ {
+ Map<String, Object> atts = variant.getAttributes();
+
+ for (Entry<String, Object> att : atts.entrySet())
+ {
+ String key = att.getKey();
+
+ /*
+ * extract Consequence data (if present) that we are able to
+ * associated with the allele for this variant feature
+ */
+ if (CSQ.equals(key))
+ {
+ addConsequences(variant, seq, sf, altAlelleIndex);
+ continue;
+ }
+
+ /*
+ * we extract values for other data which are allele-specific;
+ * these may be per alternate allele (INFO[key].Number = 'A')
+ * or per allele including reference (INFO[key].Number = 'R')
+ */
+ VCFInfoHeaderLine infoHeader = header.getInfoHeaderLine(key);
+ if (infoHeader == null)
+ {
+ /*
+ * can't be sure what data belongs to this allele, so
+ * play safe and don't take any
+ */
+ continue;
+ }
+
+ VCFHeaderLineCount number = infoHeader.getCountType();
+ int index = altAlelleIndex;
+ if (number == VCFHeaderLineCount.R)
+ {
+ /*
+ * one value per allele including reference, so bump index
+ * e.g. the 3rd value is for the 2nd alternate allele
+ */
+ index++;
+ }
+ else if (number != VCFHeaderLineCount.A)
+ {
+ /*
+ * don't save other values as not allele-related
+ */
+ continue;
+ }
+
+ /*
+ * take the index'th value
+ */
+ String value = getAttributeValue(variant, key, index);
+ if (value != null)
+ {
+ sf.setValue(key, value);
+ }
+ }
+ }
+
+ /**
+ * Inspects CSQ data blocks (consequences) and adds attributes on the sequence
+ * feature for the current allele (and transcript if applicable)
+ * <p>
+ * Allele matching: if field ALLELE_NUM is present, it must match
+ * altAlleleIndex. If not present, then field Allele value must match the VCF
+ * Allele.
+ * <p>
+ * Transcript matching: if sequence name can be identified to at least one of
+ * the consequences' Feature values, then select only consequences that match
+ * the value (i.e. consequences for the current transcript sequence). If not,
+ * take all consequences (this is the case when adding features to the gene
+ * sequence).
+ *
+ * @param variant
+ * @param seq
+ * @param sf
+ * @param altAlelleIndex
+ * (0, 1..)
+ */
+ protected void addConsequences(VariantContext variant, SequenceI seq,
+ SequenceFeature sf, int altAlelleIndex)
+ {
+ Object value = variant.getAttribute(CSQ);
+
+ if (value == null || !(value instanceof ArrayList<?>))
+ {
+ return;
+ }
+
+ List<String> consequences = (List<String>) value;
+
+ /*
+ * if CSQ data includes 'Feature', and any value matches the sequence name,
+ * then restrict consequence data to only the matching value (transcript)
+ * i.e. just pick out consequences for the transcript the variant feature is on
+ */
+ String seqName = seq.getName()== null ? "" : seq.getName().toLowerCase();
+ String matchFeature = null;
+ if (csqFeatureFieldIndex > -1)
+ {
+ for (String consequence : consequences)
+ {
+ String[] csqFields = consequence.split(PIPE_REGEX);
+ if (csqFields.length > csqFeatureFieldIndex)
+ {
+ String featureIdentifier = csqFields[csqFeatureFieldIndex];
+ if (featureIdentifier.length() > 4
+ && seqName.indexOf(featureIdentifier.toLowerCase()) > -1)
+ {
+ matchFeature = featureIdentifier;
+ }
+ }
+ }
+ }
+
+ StringBuilder sb = new StringBuilder(128);
+ boolean found = false;
+
+ for (String consequence : consequences)
+ {
+ String[] csqFields = consequence.split(PIPE_REGEX);
+
+ if (includeConsequence(csqFields, matchFeature, variant,
+ altAlelleIndex))
+ {
+ if (found)
+ {
+ sb.append(COMMA);
+ }
+ found = true;
+ sb.append(consequence);
+ }
+ }
+
+ if (found)
+ {
+ sf.setValue(CSQ, sb.toString());
+ }
+ }
+
+ /**
+ * Answers true if we want to associate this block of consequence data with
+ * the specified alternate allele of the VCF variant.
+ * <p>
+ * If consequence data includes the ALLELE_NUM field, then this has to match
+ * altAlleleIndex. Otherwise the Allele field of the consequence data has to
+ * match the allele value.
+ * <p>
+ * Optionally (if matchFeature is not null), restrict to only include
+ * consequences whose Feature value matches. This allows us to attach
+ * consequences to their respective transcripts.
+ *
+ * @param csqFields
+ * @param matchFeature
+ * @param variant
+ * @param altAlelleIndex
+ * (0, 1..)
+ * @return
+ */
+ protected boolean includeConsequence(String[] csqFields,
+ String matchFeature, VariantContext variant, int altAlelleIndex)
+ {
+ /*
+ * check consequence is for the current transcript
+ */
+ if (matchFeature != null)
+ {
+ if (csqFields.length <= csqFeatureFieldIndex)
+ {
+ return false;
+ }
+ String featureIdentifier = csqFields[csqFeatureFieldIndex];
+ if (!featureIdentifier.equals(matchFeature))
+ {
+ return false; // consequence is for a different transcript
+ }
+ }
+
+ /*
+ * if ALLELE_NUM is present, it must match altAlleleIndex
+ * NB first alternate allele is 1 for ALLELE_NUM, 0 for altAlleleIndex
+ */
+ if (csqAlleleNumberFieldIndex > -1)
+ {
+ if (csqFields.length <= csqAlleleNumberFieldIndex)
+ {
+ return false;
+ }
+ String alleleNum = csqFields[csqAlleleNumberFieldIndex];
+ return String.valueOf(altAlelleIndex + 1).equals(alleleNum);
+ }
+
+ /*
+ * else consequence allele must match variant allele
+ */
+ if (csqAlleleFieldIndex > -1 && csqFields.length > csqAlleleFieldIndex)
+ {
+ String csqAllele = csqFields[csqAlleleFieldIndex];
+ String vcfAllele = variant.getAlternateAllele(altAlelleIndex)
+ .getBaseString();
+ return csqAllele.equals(vcfAllele);
+ }
+
+ return false;
+ }
+
+ /**
+ * A convenience method to complement a dna base and return the string value
+ * of its complement
+ *
+ * @param reference
+ * @return
+ */
+ protected String complement(byte[] reference)
+ {
+ return String.valueOf(Dna.getComplement((char) reference[0]));
+ }
+
+ /**
+ * Determines the location of the query range (chromosome positions) in a
+ * different reference assembly.
+ * <p>
+ * If the range is just a subregion of one for which we already have a mapping
+ * (for example, an exon sub-region of a gene), then the mapping is just
+ * computed arithmetically.
+ * <p>
+ * Otherwise, calls the Ensembl REST service that maps from one assembly
+ * reference's coordinates to another's
+ *
+ * @param queryRange
+ * start-end chromosomal range in 'fromRef' coordinates
+ * @param chromosome
+ * @param species
+ * @param fromRef
+ * assembly reference for the query coordinates
+ * @param toRef
+ * assembly reference we wish to translate to
+ * @return the start-end range in 'toRef' coordinates
+ */
+ protected int[] mapReferenceRange(int[] queryRange, String chromosome,
+ String species, String fromRef, String toRef)
+ {
+ /*
+ * first try shorcut of computing the mapping as a subregion of one
+ * we already have (e.g. for an exon, if we have the gene mapping)
+ */
+ int[] mappedRange = findSubsumedRangeMapping(queryRange, chromosome,
+ species, fromRef, toRef);
+ if (mappedRange != null)
+ {
+ return mappedRange;
+ }
+
+ /*
+ * call (e.g.) http://rest.ensembl.org/map/human/GRCh38/17:45051610..45109016:1/GRCh37
+ */
+ EnsemblMap mapper = new EnsemblMap();
+ int[] mapping = mapper.getAssemblyMapping(species, chromosome, fromRef,
+ toRef, queryRange);
+
+ if (mapping == null)
+ {
+ // mapping service failure
+ return null;
+ }
+
+ /*
+ * save mapping for possible future re-use
+ */
+ String key = makeRangesKey(chromosome, species, fromRef, toRef);
+ if (!assemblyMappings.containsKey(key))
+ {
+ assemblyMappings.put(key, new HashMap<int[], int[]>());
+ }
+
+ assemblyMappings.get(key).put(queryRange, mapping);
+
+ return mapping;
+ }
+
+ /**
+ * If we already have a 1:1 contiguous mapping which subsumes the given query
+ * range, this method just calculates and returns the subset of that mapping,
+ * else it returns null. In practical terms, if a gene has a contiguous
+ * mapping between (for example) GRCh37 and GRCh38, then we assume that its
+ * subsidiary exons occupy unchanged relative positions, and just compute
+ * these as offsets, rather than do another lookup of the mapping.
+ * <p>
+ * If in future these assumptions prove invalid (e.g. for bacterial dna?!),
+ * simply remove this method or let it always return null.
+ * <p>
+ * Warning: many rapid calls to the /map service map result in a 429 overload
+ * error response
+ *
+ * @param queryRange
+ * @param chromosome
+ * @param species
+ * @param fromRef
+ * @param toRef
+ * @return
+ */
+ protected int[] findSubsumedRangeMapping(int[] queryRange, String chromosome,
+ String species, String fromRef, String toRef)
+ {
+ String key = makeRangesKey(chromosome, species, fromRef, toRef);
+ if (assemblyMappings.containsKey(key))
+ {
+ Map<int[], int[]> mappedRanges = assemblyMappings.get(key);
+ for (Entry<int[], int[]> mappedRange : mappedRanges.entrySet())
+ {
+ int[] fromRange = mappedRange.getKey();
+ int[] toRange = mappedRange.getValue();
+ if (fromRange[1] - fromRange[0] == toRange[1] - toRange[0])
+ {
+ /*
+ * mapping is 1:1 in length, so we trust it to have no discontinuities
+ */
+ if (MappingUtils.rangeContains(fromRange, queryRange))
+ {
+ /*
+ * fromRange subsumes our query range
+ */
+ int offset = queryRange[0] - fromRange[0];
+ int mappedRangeFrom = toRange[0] + offset;
+ int mappedRangeTo = mappedRangeFrom + (queryRange[1] - queryRange[0]);
+ return new int[] { mappedRangeFrom, mappedRangeTo };
+ }
+ }
+ }
+ }
+ return null;
+ }
+
+ /**
+ * Transfers the sequence feature to the target sequence, locating its start
+ * and end range based on the mapping. Features which do not overlap the
+ * target sequence are ignored.
+ *
+ * @param sf
+ * @param targetSequence
+ * @param mapping
+ * mapping from the feature's coordinates to the target sequence
+ */
+ protected void transferFeature(SequenceFeature sf,
+ SequenceI targetSequence, MapList mapping)
+ {
+ int[] mappedRange = mapping.locateInTo(sf.getBegin(), sf.getEnd());
+
+ if (mappedRange != null)
+ {
+ String group = sf.getFeatureGroup();
+ int newBegin = Math.min(mappedRange[0], mappedRange[1]);
+ int newEnd = Math.max(mappedRange[0], mappedRange[1]);
+ SequenceFeature copy = new SequenceFeature(sf, newBegin, newEnd,
+ group, sf.getScore());
+ targetSequence.addSequenceFeature(copy);
+ }
+ }
+
+ /**
+ * Formats a ranges map lookup key
+ *
+ * @param chromosome
+ * @param species
+ * @param fromRef
+ * @param toRef
+ * @return
+ */
+ protected static String makeRangesKey(String chromosome, String species,
+ String fromRef, String toRef)
+ {
+ return species + EXCL + chromosome + EXCL + fromRef + EXCL
+ + toRef;
+ }
+}
protected JMenuItem runGroovy = new JMenuItem();
+ protected JMenuItem loadVcf;
+
protected JCheckBoxMenuItem autoCalculate = new JCheckBoxMenuItem();
protected JCheckBoxMenuItem sortByTree = new JCheckBoxMenuItem();
associatedData_actionPerformed(e);
}
});
+ loadVcf = new JMenuItem(MessageManager.getString("label.load_vcf_file"));
+ loadVcf.setToolTipText(MessageManager.getString("label.load_vcf"));
+ loadVcf.addActionListener(new ActionListener()
+ {
+ @Override
+ public void actionPerformed(ActionEvent e)
+ {
+ loadVcf_actionPerformed();
+ }
+ });
autoCalculate.setText(
MessageManager.getString("label.autocalculate_consensus"));
autoCalculate.setState(
fileMenu.add(exportAnnotations);
fileMenu.add(loadTreeMenuItem);
fileMenu.add(associatedData);
+ fileMenu.add(loadVcf);
fileMenu.addSeparator();
fileMenu.add(closeMenuItem);
// selectMenu.add(listenToViewSelections);
}
+ protected void loadVcf_actionPerformed()
+ {
+ }
+
/**
* Constructs the entries on the Colour menu (but does not add them to the
* menu).
import javax.swing.JMenuItem;
import javax.swing.JPanel;
import javax.swing.JScrollPane;
+import javax.swing.text.EditorKit;
+import javax.swing.text.html.HTMLEditorKit;
/**
* DOCUMENT ME!
{
try
{
+ textarea.setEditorKit(new HTMLEditorKit());
setJMenuBar(editMenubar);
jbInit();
} catch (Exception e)
{
}
+
+ /**
+ * Adds the given stylesheet rule to the Html editor. However note that CSS
+ * support is limited.
+ *
+ * @param rule
+ * @see javax.swing.text.html.CSS
+ */
+ public void addStylesheetRule(String rule)
+ {
+ EditorKit editorKit = textarea.getEditorKit();
+ if (editorKit != null)
+ {
+ ((HTMLEditorKit) editorKit).getStyleSheet().addRule(rule);
+ }
+ }
}
|| (fromRatio == 3 && toRatio == 1);
}
+ /**
+ * Returns a map which is the composite of this one and the input map. That
+ * is, the output map has the fromRanges of this map, and its toRanges are the
+ * toRanges of this map as transformed by the input map.
+ * <p>
+ * Returns null if the mappings cannot be traversed (not all toRanges of this
+ * map correspond to fromRanges of the input), or if this.toRatio does not
+ * match map.fromRatio.
+ *
+ * <pre>
+ * Example 1:
+ * this: from [1-100] to [501-600]
+ * input: from [10-40] to [60-90]
+ * output: from [10-40] to [560-590]
+ * Example 2 ('reverse strand exons'):
+ * this: from [1-100] to [2000-1951], [1000-951] // transcript to loci
+ * input: from [1-50] to [41-90] // CDS to transcript
+ * output: from [10-40] to [1960-1951], [1000-971] // CDS to gene loci
+ * </pre>
+ *
+ * @param map
+ * @return
+ */
+ public MapList traverse(MapList map)
+ {
+ if (map == null)
+ {
+ return null;
+ }
+
+ /*
+ * compound the ratios by this rule:
+ * A:B with M:N gives A*M:B*N
+ * reduced by greatest common divisor
+ * so 1:3 with 3:3 is 3:9 or 1:3
+ * 1:3 with 3:1 is 3:3 or 1:1
+ * 1:3 with 1:3 is 1:9
+ * 2:5 with 3:7 is 6:35
+ */
+ int outFromRatio = getFromRatio() * map.getFromRatio();
+ int outToRatio = getToRatio() * map.getToRatio();
+ int gcd = MathUtils.gcd(outFromRatio, outToRatio);
+ outFromRatio /= gcd;
+ outToRatio /= gcd;
+
+ List<int[]> toRanges = new ArrayList<>();
+ for (int[] range : getToRanges())
+ {
+ int[] transferred = map.locateInTo(range[0], range[1]);
+ if (transferred == null)
+ {
+ return null;
+ }
+ toRanges.add(transferred);
+ }
+
+ return new MapList(getFromRanges(), toRanges, outFromRatio, outToRatio);
+ }
+
}
}
/**
+ * Answers true if range's start-end positions include those of queryRange,
+ * where either range might be in reverse direction, else false
+ *
+ * @param range
+ * a start-end range
+ * @param queryRange
+ * a candidate subrange of range (start2-end2)
+ * @return
+ */
+ public static boolean rangeContains(int[] range, int[] queryRange)
+ {
+ if (range == null || queryRange == null || range.length != 2
+ || queryRange.length != 2)
+ {
+ /*
+ * invalid arguments
+ */
+ return false;
+ }
+
+ int min = Math.min(range[0], range[1]);
+ int max = Math.max(range[0], range[1]);
+
+ return (min <= queryRange[0] && max >= queryRange[0]
+ && min <= queryRange[1] && max >= queryRange[1]);
+ }
+
+ /**
* Removes the specified number of positions from the given ranges. Provided
* to allow a stop codon to be stripped from a CDS sequence so that it matches
* the peptide translation length.
--- /dev/null
+package jalview.util;
+
+public class MathUtils
+{
+
+ /**
+ * Returns the greatest common divisor of two integers
+ *
+ * @param a
+ * @param b
+ * @return
+ */
+ public static int gcd(int a, int b)
+ {
+ if (b == 0)
+ {
+ return Math.abs(a);
+ }
+ return gcd(b, a % b);
+ }
+
+}
}
return s.substring(0, 1).toUpperCase() + s.substring(1).toLowerCase();
}
+
+ /**
+ * A helper method that strips off any leading or trailing html and body tags.
+ * If no html tag is found, then also html-encodes angle bracket characters.
+ *
+ * @param text
+ * @return
+ */
+ public static String stripHtmlTags(String text)
+ {
+ if (text == null)
+ {
+ return null;
+ }
+ String tmp2up = text.toUpperCase();
+ int startTag = tmp2up.indexOf("<HTML>");
+ if (startTag > -1)
+ {
+ text = text.substring(startTag + 6);
+ tmp2up = tmp2up.substring(startTag + 6);
+ }
+ // is omission of "<BODY>" intentional here??
+ int endTag = tmp2up.indexOf("</BODY>");
+ if (endTag > -1)
+ {
+ text = text.substring(0, endTag);
+ tmp2up = tmp2up.substring(0, endTag);
+ }
+ endTag = tmp2up.indexOf("</HTML>");
+ if (endTag > -1)
+ {
+ text = text.substring(0, endTag);
+ }
+
+ if (startTag == -1 && (text.contains("<") || text.contains(">")))
+ {
+ text = text.replaceAll("<", "<");
+ text = text.replaceAll(">", ">");
+ }
+ return text;
+ }
}
import jalview.datamodel.AlignmentI;
import jalview.datamodel.Annotation;
import jalview.datamodel.DBRefEntry;
+import jalview.datamodel.GeneLociI;
import jalview.datamodel.Mapping;
import jalview.datamodel.SearchResultMatchI;
import jalview.datamodel.SearchResultsI;
public class AlignmentUtilsTests
{
+ private static Sequence ts = new Sequence("short",
+ "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklm");
@BeforeClass(alwaysRun = true)
public void setUpJvOptionPane()
JvOptionPane.setMockResponse(JvOptionPane.CANCEL_OPTION);
}
- public static Sequence ts = new Sequence("short",
- "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklm");
-
@Test(groups = { "Functional" })
public void testExpandContext()
{
dna.addCodonFrame(acf);
/*
- * In this case, mappings originally came from matching Uniprot accessions - so need an xref on dna involving those regions. These are normally constructed from CDS annotation
+ * In this case, mappings originally came from matching Uniprot accessions
+ * - so need an xref on dna involving those regions.
+ * These are normally constructed from CDS annotation
*/
DBRefEntry dna1xref = new DBRefEntry("UNIPROT", "ENSEMBL", "pep1",
new Mapping(mapfordna1));
- dna1.getDatasetSequence().addDBRef(dna1xref);
+ dna1.addDBRef(dna1xref);
+ assertEquals(2, dna1.getDBRefs().length); // to self and to pep1
DBRefEntry dna2xref = new DBRefEntry("UNIPROT", "ENSEMBL", "pep2",
new Mapping(mapfordna2));
- dna2.getDatasetSequence().addDBRef(dna2xref);
+ dna2.addDBRef(dna2xref);
+ assertEquals(2, dna2.getDBRefs().length); // to self and to pep2
/*
* execute method under test:
assertEquals(cdsMapping.getInverse(), dbref.getMap().getMap());
/*
+ * verify cDNA has added a dbref with mapping to CDS
+ */
+ assertEquals(3, dna1.getDBRefs().length);
+ DBRefEntry dbRefEntry = dna1.getDBRefs()[2];
+ assertSame(cds1Dss, dbRefEntry.getMap().getTo());
+ MapList dnaToCdsMapping = new MapList(new int[] { 4, 6, 10, 12 },
+ new int[] { 1, 6 }, 1, 1);
+ assertEquals(dnaToCdsMapping, dbRefEntry.getMap().getMap());
+ assertEquals(3, dna2.getDBRefs().length);
+ dbRefEntry = dna2.getDBRefs()[2];
+ assertSame(cds2Dss, dbRefEntry.getMap().getTo());
+ dnaToCdsMapping = new MapList(new int[] { 1, 3, 7, 9, 13, 15 },
+ new int[] { 1, 9 }, 1, 1);
+ assertEquals(dnaToCdsMapping, dbRefEntry.getMap().getMap());
+
+ /*
+ * verify CDS has added a dbref with mapping to cDNA
+ */
+ assertEquals(2, cds1Dss.getDBRefs().length);
+ dbRefEntry = cds1Dss.getDBRefs()[1];
+ assertSame(dna1.getDatasetSequence(), dbRefEntry.getMap().getTo());
+ MapList cdsToDnaMapping = new MapList(new int[] { 1, 6 }, new int[] {
+ 4, 6, 10, 12 }, 1, 1);
+ assertEquals(cdsToDnaMapping, dbRefEntry.getMap().getMap());
+ assertEquals(2, cds2Dss.getDBRefs().length);
+ dbRefEntry = cds2Dss.getDBRefs()[1];
+ assertSame(dna2.getDatasetSequence(), dbRefEntry.getMap().getTo());
+ cdsToDnaMapping = new MapList(new int[] { 1, 9 }, new int[] { 1, 3, 7,
+ 9, 13, 15 }, 1, 1);
+ assertEquals(cdsToDnaMapping, dbRefEntry.getMap().getMap());
+
+ /*
* Verify mappings from CDS to peptide, cDNA to CDS, and cDNA to peptide
* the mappings are on the shared alignment dataset
* 6 mappings, 2*(DNA->CDS), 2*(DNA->Pep), 2*(CDS->Pep)
assertEquals(s_as3, uas3.getSequenceAsString());
}
+ @Test(groups = { "Functional" })
+ public void testTransferGeneLoci()
+ {
+ SequenceI from = new Sequence("transcript",
+ "aaacccgggTTTAAACCCGGGtttaaacccgggttt");
+ SequenceI to = new Sequence("CDS", "TTTAAACCCGGG");
+ MapList map = new MapList(new int[] { 1, 12 }, new int[] { 10, 21 }, 1,
+ 1);
+
+ /*
+ * first with nothing to transfer
+ */
+ AlignmentUtils.transferGeneLoci(from, map, to);
+ assertNull(to.getGeneLoci());
+
+ /*
+ * next with gene loci set on 'from' sequence
+ */
+ int[] exons = new int[] { 100, 105, 155, 164, 210, 229 };
+ MapList geneMap = new MapList(new int[] { 1, 36 }, exons, 1, 1);
+ from.setGeneLoci("human", "GRCh38", "7", geneMap);
+ AlignmentUtils.transferGeneLoci(from, map, to);
+
+ GeneLociI toLoci = to.getGeneLoci();
+ assertNotNull(toLoci);
+ // DBRefEntry constructor upper-cases 'source'
+ assertEquals("HUMAN", toLoci.getSpeciesId());
+ assertEquals("GRCh38", toLoci.getAssemblyId());
+ assertEquals("7", toLoci.getChromosomeId());
+
+ /*
+ * transcript 'exons' are 1-6, 7-16, 17-36
+ * CDS 1:12 is transcript 10-21
+ * transcript 'CDS' is 10-16, 17-21
+ * which is 'gene' 158-164, 210-214
+ */
+ MapList toMap = toLoci.getMap();
+ assertEquals(1, toMap.getFromRanges().size());
+ assertEquals(2, toMap.getFromRanges().get(0).length);
+ assertEquals(1, toMap.getFromRanges().get(0)[0]);
+ assertEquals(12, toMap.getFromRanges().get(0)[1]);
+ assertEquals(1, toMap.getToRanges().size());
+ assertEquals(4, toMap.getToRanges().get(0).length);
+ assertEquals(158, toMap.getToRanges().get(0)[0]);
+ assertEquals(164, toMap.getToRanges().get(0)[1]);
+ assertEquals(210, toMap.getToRanges().get(0)[2]);
+ assertEquals(214, toMap.getToRanges().get(0)[3]);
+ // or summarised as (but toString might change in future):
+ assertEquals("[ [1, 12] ] 1:1 to [ [158, 164, 210, 214] ]",
+ toMap.toString());
+
+ /*
+ * an existing value is not overridden
+ */
+ geneMap = new MapList(new int[] { 1, 36 }, new int[] { 36, 1 }, 1, 1);
+ from.setGeneLoci("inhuman", "GRCh37", "6", geneMap);
+ AlignmentUtils.transferGeneLoci(from, map, to);
+ assertEquals("GRCh38", toLoci.getAssemblyId());
+ assertEquals("7", toLoci.getChromosomeId());
+ toMap = toLoci.getMap();
+ assertEquals("[ [1, 12] ] 1:1 to [ [158, 164, 210, 214] ]",
+ toMap.toString());
+ }
+
/**
* Tests for the method that maps nucleotide to protein based on CDS features
*/
assertEquals("[[3, 3], [8, 12]]",
Arrays.deepToString(ml.getFromRanges().toArray()));
}
-
}
"group");
assertTrue(sf.isContactFeature());
}
+
+ @Test(groups = { "Functional" })
+ public void testGetDetailsReport()
+ {
+ // single locus, no group, no score
+ SequenceFeature sf = new SequenceFeature("variant", "G,C", 22, 22, null);
+ String expected = "<br><table><tr><td>Type</td><td>variant</td></tr>"
+ + "<tr><td>Start/end</td><td>22</td></tr>"
+ + "<tr><td>Description</td><td>G,C</td></tr></table>";
+ assertEquals(expected, sf.getDetailsReport());
+
+ // contact feature
+ sf = new SequenceFeature("Disulphide Bond", "a description", 28, 31,
+ null);
+ expected = "<br><table><tr><td>Type</td><td>Disulphide Bond</td></tr>"
+ + "<tr><td>Start/end</td><td>28:31</td></tr>"
+ + "<tr><td>Description</td><td>a description</td></tr></table>";
+ assertEquals(expected, sf.getDetailsReport());
+
+ sf = new SequenceFeature("variant", "G,C", 22, 33,
+ 12.5f, "group");
+ sf.setValue("Parent", "ENSG001");
+ sf.setValue("Child", "ENSP002");
+ expected = "<br><table><tr><td>Type</td><td>variant</td></tr>"
+ + "<tr><td>Start/end</td><td>22-33</td></tr>"
+ + "<tr><td>Description</td><td>G,C</td></tr>"
+ + "<tr><td>Score</td><td>12.5</td></tr>"
+ + "<tr><td>Group</td><td>group</td></tr>"
+ + "<tr><td>Child</td><td>ENSP002</td></tr>"
+ + "<tr><td>Parent</td><td>ENSG001</td></tr></table>";
+ assertEquals(expected, sf.getDetailsReport());
+
+ /*
+ * feature with embedded html link in description
+ */
+ String desc = "<html>Fer2 Status: True Positive <a href=\"http://pfam.xfam.org/family/PF00111\">Pfam 8_8</a></html>";
+ sf = new SequenceFeature("Pfam", desc, 8, 83, "Uniprot");
+ expected = "<br><table><tr><td>Type</td><td>Pfam</td></tr>"
+ + "<tr><td>Start/end</td><td>8-83</td></tr>"
+ + "<tr><td>Description</td><td>Fer2 Status: True Positive <a href=\"http://pfam.xfam.org/family/PF00111\">Pfam 8_8</a></td></tr>"
+ + "<tr><td>Group</td><td>Uniprot</td></tr></table>";
+ assertEquals(expected, sf.getDetailsReport());
+ }
}
--- /dev/null
+package jalview.ext.htsjdk;
+
+import static org.testng.Assert.assertEquals;
+import static org.testng.Assert.assertFalse;
+import static org.testng.Assert.assertTrue;
+import htsjdk.samtools.util.CloseableIterator;
+import htsjdk.variant.variantcontext.Allele;
+import htsjdk.variant.variantcontext.VariantContext;
+
+import java.io.File;
+import java.io.IOException;
+import java.io.PrintWriter;
+import java.util.List;
+
+import org.testng.annotations.Test;
+
+public class VCFReaderTest
+{
+ private static final String[] VCF = new String[] {
+ "##fileformat=VCFv4.2",
+ "#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO",
+ "20\t3\t.\tC\tG\t.\tPASS\tDP=100", // SNP C/G
+ "20\t7\t.\tG\tGA\t.\tPASS\tDP=100", // insertion G/GA
+ "18\t2\t.\tACG\tA\t.\tPASS\tDP=100" }; // deletion ACG/A
+
+ // gnomAD exome variant dataset
+ private static final String VCF_PATH = "/Volumes/gjb/smacgowan/NOBACK/resources/gnomad/gnomad.exomes.r2.0.1.sites.vcf.gz";
+
+ // "https://storage.cloud.google.com/gnomad-public/release/2.0.1/vcf/exomes/gnomad.exomes.r2.0.1.sites.vcf.gz";
+
+ /**
+ * A test to exercise some basic functionality of the htsjdk VCF reader,
+ * reading from a non-index VCF file
+ *
+ * @throws IOException
+ */
+ @Test(groups = "Functional")
+ public void testReadVcf_plain() throws IOException
+ {
+ File f = writeVcfFile();
+ VCFReader reader = new VCFReader(f.getAbsolutePath());
+ CloseableIterator<VariantContext> variants = reader.iterator();
+
+ /*
+ * SNP C/G variant
+ */
+ VariantContext vc = variants.next();
+ assertTrue(vc.isSNP());
+ Allele ref = vc.getReference();
+ assertEquals(ref.getBaseString(), "C");
+ List<Allele> alleles = vc.getAlleles();
+ assertEquals(alleles.size(), 2);
+ assertTrue(alleles.get(0).isReference());
+ assertEquals(alleles.get(0).getBaseString(), "C");
+ assertFalse(alleles.get(1).isReference());
+ assertEquals(alleles.get(1).getBaseString(), "G");
+
+ /*
+ * Insertion G -> GA
+ */
+ vc = variants.next();
+ assertFalse(vc.isSNP());
+ assertTrue(vc.isSimpleInsertion());
+ ref = vc.getReference();
+ assertEquals(ref.getBaseString(), "G");
+ alleles = vc.getAlleles();
+ assertEquals(alleles.size(), 2);
+ assertTrue(alleles.get(0).isReference());
+ assertEquals(alleles.get(0).getBaseString(), "G");
+ assertFalse(alleles.get(1).isReference());
+ assertEquals(alleles.get(1).getBaseString(), "GA");
+
+ /*
+ * Deletion ACG -> A
+ */
+ vc = variants.next();
+ assertFalse(vc.isSNP());
+ assertTrue(vc.isSimpleDeletion());
+ ref = vc.getReference();
+ assertEquals(ref.getBaseString(), "ACG");
+ alleles = vc.getAlleles();
+ assertEquals(alleles.size(), 2);
+ assertTrue(alleles.get(0).isReference());
+ assertEquals(alleles.get(0).getBaseString(), "ACG");
+ assertFalse(alleles.get(1).isReference());
+ assertEquals(alleles.get(1).getBaseString(), "A");
+
+ assertFalse(variants.hasNext());
+
+ variants.close();
+ reader.close();
+ }
+
+ /**
+ * Creates a temporary file to be read by the htsjdk VCF reader
+ *
+ * @return
+ * @throws IOException
+ */
+ protected File writeVcfFile() throws IOException
+ {
+ File f = File.createTempFile("Test", "vcf");
+ f.deleteOnExit();
+ PrintWriter pw = new PrintWriter(f);
+ for (String vcfLine : VCF) {
+ pw.println(vcfLine);
+ }
+ pw.close();
+ return f;
+ }
+
+ /**
+ * A 'test' that demonstrates querying an indexed VCF file for features in a
+ * specified interval
+ *
+ * @throws IOException
+ */
+ @Test
+ public void testQuery_indexed() throws IOException
+ {
+ /*
+ * if not specified, assumes index file is filename.tbi
+ */
+ VCFReader reader = new VCFReader(VCF_PATH);
+
+ /*
+ * gene NMT1 (human) is on chromosome 17
+ * GCHR38 (Ensembl): 45051610-45109016
+ * GCHR37 (gnoMAD): 43128978-43186384
+ * CDS begins at offset 9720, first CDS variant at offset 9724
+ */
+ CloseableIterator<VariantContext> features = reader.query("17",
+ 43128978 + 9724, 43128978 + 9734); // first 11 CDS positions
+
+ assertEquals(printNext(features), 43138702);
+ assertEquals(printNext(features), 43138704);
+ assertEquals(printNext(features), 43138707);
+ assertEquals(printNext(features), 43138708);
+ assertEquals(printNext(features), 43138710);
+ assertEquals(printNext(features), 43138711);
+ assertFalse(features.hasNext());
+
+ features.close();
+ reader.close();
+ }
+
+ /**
+ * Prints the toString value of the next variant, and returns its start
+ * location
+ *
+ * @param features
+ * @return
+ */
+ protected int printNext(CloseableIterator<VariantContext> features)
+ {
+ VariantContext next = features.next();
+ System.out.println(next.toString());
+ return next.getStart();
+ }
+
+ // "https://storage.cloud.google.com/gnomad-public/release/2.0.1/vcf/exomes/gnomad.exomes.r2.0.1.sites.vcf.gz";
+
+ /**
+ * Test the query method that wraps a non-indexed VCF file
+ *
+ * @throws IOException
+ */
+ @Test(groups = "Functional")
+ public void testQuery_plain() throws IOException
+ {
+ File f = writeVcfFile();
+ VCFReader reader = new VCFReader(f.getAbsolutePath());
+
+ /*
+ * query for overlap of 5-8 - should find variant at 7
+ */
+ CloseableIterator<VariantContext> variants = reader.query("20", 5, 8);
+
+ /*
+ * INDEL G/GA variant
+ */
+ VariantContext vc = variants.next();
+ assertTrue(vc.isIndel());
+ assertEquals(vc.getStart(), 7);
+ assertEquals(vc.getEnd(), 7);
+ Allele ref = vc.getReference();
+ assertEquals(ref.getBaseString(), "G");
+ List<Allele> alleles = vc.getAlleles();
+ assertEquals(alleles.size(), 2);
+ assertTrue(alleles.get(0).isReference());
+ assertEquals(alleles.get(0).getBaseString(), "G");
+ assertFalse(alleles.get(1).isReference());
+ assertEquals(alleles.get(1).getBaseString(), "GA");
+
+ assertFalse(variants.hasNext());
+
+ variants.close();
+ reader.close();
+ }
+}
import static org.testng.AssertJUnit.assertFalse;
import static org.testng.AssertJUnit.assertTrue;
+import jalview.bin.Cache;
import jalview.datamodel.AlignmentAnnotation;
import jalview.datamodel.AlignmentI;
import jalview.datamodel.Annotation;
import jalview.datamodel.DBRefEntry;
import jalview.datamodel.DBRefSource;
-import jalview.datamodel.Sequence;
+import jalview.datamodel.SequenceFeature;
import jalview.datamodel.SequenceI;
import jalview.io.DataSourceType;
import jalview.io.FileFormat;
import jalview.io.FormatAdapter;
+import jalview.urls.api.UrlProviderFactoryI;
+import jalview.urls.desktop.DesktopUrlProviderFactory;
import jalview.util.MessageManager;
+import jalview.util.UrlConstants;
import java.awt.Component;
import java.io.IOException;
import java.util.ArrayList;
+import java.util.Collections;
import java.util.List;
import javax.swing.JMenu;
@BeforeMethod(alwaysRun = true)
public void setUp() throws IOException
{
+ Cache.loadProperties("test/jalview/io/testProps.jvprops");
+ String inMenuString = ("EMBL-EBI Search | http://www.ebi.ac.uk/ebisearch/search.ebi?db=allebi&query=$"
+ + SEQUENCE_ID
+ + "$"
+ + "|"
+ + "UNIPROT | http://www.uniprot.org/uniprot/$" + DB_ACCESSION + "$")
+ + "|"
+ + ("INTERPRO | http://www.ebi.ac.uk/interpro/entry/$"
+ + DB_ACCESSION + "$")
+ + "|"
+ +
+ // Gene3D entry tests for case (in)sensitivity
+ ("Gene3D | http://gene3d.biochem.ucl.ac.uk/Gene3D/search?sterm=$"
+ + DB_ACCESSION + "$&mode=protein");
+
+ UrlProviderFactoryI factory = new DesktopUrlProviderFactory(
+ UrlConstants.DEFAULT_LABEL, inMenuString, "");
+ Preferences.sequenceUrlLinks = factory.createUrlProvider();
+
alignment = new FormatAdapter().readFile(TEST_DATA,
DataSourceType.PASTE, FileFormat.Fasta);
AlignFrame af = new AlignFrame(alignment, 700, 500);
// add all the dbrefs to the sequences: Uniprot 1 each, Interpro all 3 to
// seq0, Gene3D to seq1
- seqs.get(0).addDBRef(refs.get(0));
+ SequenceI seq = seqs.get(0);
+ seq.addDBRef(refs.get(0));
- seqs.get(0).addDBRef(refs.get(1));
- seqs.get(0).addDBRef(refs.get(2));
- seqs.get(0).addDBRef(refs.get(3));
+ seq.addDBRef(refs.get(1));
+ seq.addDBRef(refs.get(2));
+ seq.addDBRef(refs.get(3));
seqs.get(1).addDBRef(refs.get(4));
seqs.get(1).addDBRef(refs.get(5));
// get the Popup Menu for first sequence
- testee = new PopupMenu(parentPanel, (Sequence) seqs.get(0), links);
+ List<SequenceFeature> noFeatures = Collections.<SequenceFeature> emptyList();
+ testee = new PopupMenu(parentPanel, seq, noFeatures);
Component[] seqItems = testee.sequenceMenu.getMenuComponents();
JMenu linkMenu = (JMenu) seqItems[6];
Component[] linkItems = linkMenu.getMenuComponents();
// sequence id for each link should match corresponding DB accession id
for (int i = 1; i < 4; i++)
{
- assertEquals(refs.get(i - 1).getSource(), ((JMenuItem) linkItems[i])
+ String msg = seq.getName() + " link[" + i + "]";
+ assertEquals(msg, refs.get(i - 1).getSource(),
+ ((JMenuItem) linkItems[i])
.getText().split("\\|")[0]);
- assertEquals(refs.get(i - 1).getAccessionId(),
+ assertEquals(msg, refs.get(i - 1).getAccessionId(),
((JMenuItem) linkItems[i])
.getText().split("\\|")[1]);
}
// get the Popup Menu for second sequence
- testee = new PopupMenu(parentPanel, (Sequence) seqs.get(1), links);
+ seq = seqs.get(1);
+ testee = new PopupMenu(parentPanel, seq, noFeatures);
seqItems = testee.sequenceMenu.getMenuComponents();
linkMenu = (JMenu) seqItems[6];
linkItems = linkMenu.getMenuComponents();
// sequence id for each link should match corresponding DB accession id
for (int i = 1; i < 3; i++)
{
- assertEquals(refs.get(i + 3).getSource(), ((JMenuItem) linkItems[i])
+ String msg = seq.getName() + " link[" + i + "]";
+ assertEquals(msg, refs.get(i + 3).getSource(),
+ ((JMenuItem) linkItems[i])
.getText().split("\\|")[0].toUpperCase());
- assertEquals(refs.get(i + 3).getAccessionId(),
+ assertEquals(msg, refs.get(i + 3).getAccessionId(),
((JMenuItem) linkItems[i]).getText().split("\\|")[1]);
}
nomatchlinks.add("NOMATCH | http://www.uniprot.org/uniprot/$"
+ DB_ACCESSION + "$");
- testee = new PopupMenu(parentPanel, (Sequence) seqs.get(0),
- nomatchlinks);
+ testee = new PopupMenu(parentPanel, seq, noFeatures);
seqItems = testee.sequenceMenu.getMenuComponents();
linkMenu = (JMenu) seqItems[6];
assertFalse(linkMenu.isEnabled());
import org.testng.annotations.Test;
-import sun.swing.SwingUtilities2;
-
public class SeqCanvasTest
{
/**
av.setScaleAboveWrapped(true);
av.setScaleLeftWrapped(true);
av.setScaleRightWrapped(true);
- FontMetrics fm = SwingUtilities2.getFontMetrics(testee, av.getFont());
+ FontMetrics fm = testee.getFontMetrics(av.getFont());
int labelWidth = fm.stringWidth("000") + charWidth;
assertEquals(labelWidth, 39); // 3 x 9 + charWidth
av.setScaleAboveWrapped(true);
av.setScaleLeftWrapped(true);
av.setScaleRightWrapped(true);
- FontMetrics fm = SwingUtilities2.getFontMetrics(testee, av.getFont());
+ FontMetrics fm = testee.getFontMetrics(av.getFont());
int labelWidth = fm.stringWidth("000") + charWidth;
assertEquals(labelWidth, 39); // 3 x 9 + charWidth
int annotationHeight = testee.getAnnotationHeight();
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
+import java.util.Map;
+
+import junit.extensions.PA;
import org.testng.Assert;
import org.testng.annotations.BeforeClass;
// . codonframes
//
//
- HashMap<String, String> dbtoviewBit = new HashMap<>();
+ Map<String, String> dbtoviewBit = new HashMap<>();
List<String> keyseq = new ArrayList<>();
- HashMap<String, File> savedProjects = new HashMap<>();
+ Map<String, File> savedProjects = new HashMap<>();
for (String[] did : new String[][] { { "UNIPROT", "P00338" } })
{
if (pass2 == 0)
{ // retrieve and show cross-refs in this thread
- cra = new CrossRefAction(af, seqs, dna, db);
+ cra = CrossRefAction.getHandlerFor(seqs, dna, db, af);
cra.run();
- if (cra.getXrefViews().size() == 0)
+ cra_views = (List<AlignmentViewPanel>) PA.getValue(cra,
+ "xrefViews");
+ if (cra_views.size() == 0)
{
failedXrefMenuItems.add("No crossrefs retrieved for "
+ first + " -> " + db);
continue;
}
- cra_views = cra.getXrefViews();
assertNucleotide(cra_views.get(0),
"Nucleotide panel included proteins for " + first
+ " -> " + db);
if (pass3 == 0)
{
-
SequenceI[] xrseqs = avp.getAlignment()
.getSequencesArray();
AlignFrame nextaf = Desktop.getAlignFrameFor(avp
.getAlignViewport());
- cra = new CrossRefAction(nextaf, xrseqs, avp
- .getAlignViewport().isNucleotide(), xrefdb);
+ cra = CrossRefAction.getHandlerFor(xrseqs, avp
+ .getAlignViewport().isNucleotide(), xrefdb,
+ nextaf);
cra.run();
- if (cra.getXrefViews().size() == 0)
+ cra_views2 = (List<AlignmentViewPanel>) PA.getValue(
+ cra, "xrefViews");
+ if (cra_views2.size() == 0)
{
failedXrefMenuItems
.add("No crossrefs retrieved for '"
+ " via '" + nextaf.getTitle() + "'");
continue;
}
- cra_views2 = cra.getXrefViews();
assertNucleotide(cra_views2.get(0),
"Nucleotide panel included proteins for '"
+ nextxref + "' to " + xrefdb
* viewpanel needs to be called with a distinct xrefpath to ensure
* each one's strings are compared)
*/
- private void stringify(HashMap<String, String> dbtoviewBit,
- HashMap<String, File> savedProjects, String xrefpath,
+ private void stringify(Map<String, String> dbtoviewBit,
+ Map<String, File> savedProjects, String xrefpath,
AlignmentViewPanel avp)
{
if (savedProjects != null)
--- /dev/null
+package jalview.io.vcf;
+
+import static org.testng.Assert.assertEquals;
+import static org.testng.Assert.assertTrue;
+
+import jalview.datamodel.AlignmentI;
+import jalview.datamodel.DBRefEntry;
+import jalview.datamodel.Mapping;
+import jalview.datamodel.Sequence;
+import jalview.datamodel.SequenceFeature;
+import jalview.datamodel.SequenceI;
+import jalview.datamodel.features.SequenceFeatures;
+import jalview.gui.AlignFrame;
+import jalview.io.DataSourceType;
+import jalview.io.FileLoader;
+import jalview.io.gff.Gff3Helper;
+import jalview.io.gff.SequenceOntologyI;
+import jalview.util.MapList;
+
+import java.io.File;
+import java.io.IOException;
+import java.io.PrintWriter;
+import java.util.List;
+
+import org.testng.annotations.Test;
+
+public class VCFLoaderTest
+{
+ private static final float DELTA = 0.00001f;
+
+ // columns 9717- of gene P30419 from Ensembl (much modified)
+ private static final String FASTA = ""
+ +
+ /*
+ * forward strand 'gene' and 'transcript' with two exons
+ */
+ ">gene1/1-25 chromosome:GRCh38:17:45051610:45051634:1\n"
+ + "CAAGCTGGCGGACGAGAGTGTGACA\n"
+ + ">transcript1/1-18\n--AGCTGGCG----AGAGTGTGAC-\n"
+
+ /*
+ * reverse strand gene and transcript (reverse complement alleles!)
+ */
+ + ">gene2/1-25 chromosome:GRCh38:17:45051610:45051634:-1\n"
+ + "TGTCACACTCTCGTCCGCCAGCTTG\n"
+ + ">transcript2/1-18\n" + "-GTCACACTCT----CGCCAGCT--\n"
+
+ /*
+ * 'gene' on chromosome 5 with two transcripts
+ */
+ + ">gene3/1-25 chromosome:GRCh38:5:45051610:45051634:1\n"
+ + "CAAGCTGGCGGACGAGAGTGTGACA\n"
+ + ">transcript3/1-18\n--AGCTGGCG----AGAGTGTGAC-\n"
+ + ">transcript4/1-18\n-----TGG-GGACGAGAGTGTGA-A\n";
+
+ private static final String[] VCF = { "##fileformat=VCFv4.2",
+ "##INFO=<ID=AF,Number=A,Type=Float,Description=\"Allele Frequency, for each ALT allele, in the same order as listed\">",
+ "##reference=Homo_sapiens/GRCh38",
+ "#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO",
+ // A/T,C variants in position 2 of gene sequence (precedes transcript)
+ // should create 2 variant features with respective scores
+ "17\t45051611\t.\tA\tT,C\t1666.64\tRF\tAC=15;AF=5.0e-03,4.0e-03",
+ // SNP G/C in position 4 of gene sequence, position 2 of transcript
+ // insertion G/GA is transferred to nucleotide but not to peptide
+ "17\t45051613\t.\tG\tGA,C\t1666.64\tRF\tAC=15;AF=3.0e-03,2.0e-03" };
+
+ @Test(groups = "Functional")
+ public void testDoLoad() throws IOException
+ {
+ AlignmentI al = buildAlignment();
+ VCFLoader loader = new VCFLoader(al);
+
+ File f = makeVcf();
+
+ loader.doLoad(f.getPath(), null);
+
+ /*
+ * verify variant feature(s) added to gene
+ * NB alleles at a locus may not be processed, and features added,
+ * in the order in which they appear in the VCF record as method
+ * VariantContext.getAlternateAlleles() does not guarantee order
+ * - order of assertions here matches what we find (is not important)
+ */
+ List<SequenceFeature> geneFeatures = al.getSequenceAt(0)
+ .getSequenceFeatures();
+ SequenceFeatures.sortFeatures(geneFeatures, true);
+ assertEquals(geneFeatures.size(), 4);
+ SequenceFeature sf = geneFeatures.get(0);
+ assertEquals(sf.getFeatureGroup(), "VCF");
+ assertEquals(sf.getBegin(), 2);
+ assertEquals(sf.getEnd(), 2);
+ assertEquals(sf.getScore(), 4.0e-03, DELTA);
+ assertEquals(sf.getValue(Gff3Helper.ALLELES), "A,C");
+ assertEquals(sf.getType(), SequenceOntologyI.SEQUENCE_VARIANT);
+ sf = geneFeatures.get(1);
+ assertEquals(sf.getFeatureGroup(), "VCF");
+ assertEquals(sf.getBegin(), 2);
+ assertEquals(sf.getEnd(), 2);
+ assertEquals(sf.getType(), SequenceOntologyI.SEQUENCE_VARIANT);
+ assertEquals(sf.getScore(), 5.0e-03, DELTA);
+ assertEquals(sf.getValue(Gff3Helper.ALLELES), "A,T");
+
+ sf = geneFeatures.get(2);
+ assertEquals(sf.getFeatureGroup(), "VCF");
+ assertEquals(sf.getBegin(), 4);
+ assertEquals(sf.getEnd(), 4);
+ assertEquals(sf.getType(), SequenceOntologyI.SEQUENCE_VARIANT);
+ assertEquals(sf.getScore(), 2.0e-03, DELTA);
+ assertEquals(sf.getValue(Gff3Helper.ALLELES), "G,C");
+
+ sf = geneFeatures.get(3);
+ assertEquals(sf.getFeatureGroup(), "VCF");
+ assertEquals(sf.getBegin(), 4);
+ assertEquals(sf.getEnd(), 4);
+ assertEquals(sf.getType(), SequenceOntologyI.SEQUENCE_VARIANT);
+ assertEquals(sf.getScore(), 3.0e-03, DELTA);
+ assertEquals(sf.getValue(Gff3Helper.ALLELES), "G,GA");
+
+ /*
+ * verify variant feature(s) added to transcript
+ */
+ List<SequenceFeature> transcriptFeatures = al.getSequenceAt(1)
+ .getSequenceFeatures();
+ assertEquals(transcriptFeatures.size(), 2);
+ sf = transcriptFeatures.get(0);
+ assertEquals(sf.getFeatureGroup(), "VCF");
+ assertEquals(sf.getBegin(), 2);
+ assertEquals(sf.getEnd(), 2);
+ assertEquals(sf.getType(), SequenceOntologyI.SEQUENCE_VARIANT);
+ assertEquals(sf.getScore(), 2.0e-03, DELTA);
+ assertEquals(sf.getValue(Gff3Helper.ALLELES), "G,C");
+ sf = transcriptFeatures.get(1);
+ assertEquals(sf.getFeatureGroup(), "VCF");
+ assertEquals(sf.getBegin(), 2);
+ assertEquals(sf.getEnd(), 2);
+ assertEquals(sf.getType(), SequenceOntologyI.SEQUENCE_VARIANT);
+ assertEquals(sf.getScore(), 3.0e-03, DELTA);
+ assertEquals(sf.getValue(Gff3Helper.ALLELES), "G,GA");
+
+ /*
+ * verify SNP variant feature(s) computed and added to protein
+ * first codon AGC varies to ACC giving S/T
+ */
+ DBRefEntry[] dbRefs = al.getSequenceAt(1).getDBRefs();
+ SequenceI peptide = null;
+ for (DBRefEntry dbref : dbRefs)
+ {
+ if (dbref.getMap().getMap().getFromRatio() == 3)
+ {
+ peptide = dbref.getMap().getTo();
+ }
+ }
+ List<SequenceFeature> proteinFeatures = peptide.getSequenceFeatures();
+ assertEquals(proteinFeatures.size(), 1);
+ sf = proteinFeatures.get(0);
+ assertEquals(sf.getFeatureGroup(), "VCF");
+ assertEquals(sf.getBegin(), 1);
+ assertEquals(sf.getEnd(), 1);
+ assertEquals(sf.getType(), SequenceOntologyI.SEQUENCE_VARIANT);
+ assertEquals(sf.getDescription(), "p.Ser1Thr");
+ }
+
+ private File makeVcf() throws IOException
+ {
+ File f = File.createTempFile("Test", ".vcf");
+ f.deleteOnExit();
+ PrintWriter pw = new PrintWriter(f);
+ for (String vcfLine : VCF)
+ {
+ pw.println(vcfLine);
+ }
+ pw.close();
+ return f;
+ }
+
+ /**
+ * Make a simple alignment with one 'gene' and one 'transcript'
+ *
+ * @return
+ */
+ private AlignmentI buildAlignment()
+ {
+ AlignFrame af = new FileLoader().LoadFileWaitTillLoaded(FASTA,
+ DataSourceType.PASTE);
+
+ /*
+ * map gene1 sequence to chromosome (normally done when the sequence is fetched
+ * from Ensembl and transcripts computed)
+ */
+ AlignmentI alignment = af.getViewport().getAlignment();
+ SequenceI gene1 = alignment.findName("gene1");
+ int[] to = new int[] { 45051610, 45051634 };
+ int[] from = new int[] { gene1.getStart(), gene1.getEnd() };
+ gene1.setGeneLoci("homo_sapiens", "GRCh38", "17", new MapList(from, to,
+ 1, 1));
+
+ /*
+ * map 'transcript1' to chromosome via 'gene1'
+ * transcript1/1-18 is gene1/3-10,15-24
+ * which is chromosome 45051612-45051619,45051624-45051633
+ */
+ to = new int[] { 45051612, 45051619, 45051624, 45051633 };
+ SequenceI transcript1 = alignment.findName("transcript1");
+ from = new int[] { transcript1.getStart(), transcript1.getEnd() };
+ transcript1.setGeneLoci("homo_sapiens", "GRCh38", "17", new MapList(
+ from, to,
+ 1, 1));
+
+ /*
+ * map gene2 to chromosome reverse strand
+ */
+ SequenceI gene2 = alignment.findName("gene2");
+ to = new int[] { 45051634, 45051610 };
+ from = new int[] { gene2.getStart(), gene2.getEnd() };
+ gene2.setGeneLoci("homo_sapiens", "GRCh38", "17", new MapList(from, to,
+ 1, 1));
+
+ /*
+ * map 'transcript2' to chromosome via 'gene2'
+ * transcript2/1-18 is gene2/2-11,16-23
+ * which is chromosome 45051633-45051624,45051619-45051612
+ */
+ to = new int[] { 45051633, 45051624, 45051619, 45051612 };
+ SequenceI transcript2 = alignment.findName("transcript2");
+ from = new int[] { transcript2.getStart(), transcript2.getEnd() };
+ transcript2.setGeneLoci("homo_sapiens", "GRCh38", "17", new MapList(
+ from, to,
+ 1, 1));
+
+ /*
+ * add a protein product as a DBRef on transcript1
+ */
+ SequenceI peptide1 = new Sequence("ENSP001", "SWRECD");
+ MapList mapList = new MapList(new int[] { 1, 18 }, new int[] { 1, 6 },
+ 3, 1);
+ Mapping map = new Mapping(peptide1, mapList);
+ DBRefEntry product = new DBRefEntry("", "", "ENSP001", map);
+ transcript1.addDBRef(product);
+
+ /*
+ * add a protein product as a DBRef on transcript2
+ */
+ SequenceI peptide2 = new Sequence("ENSP002", "VTLSPA");
+ mapList = new MapList(new int[] { 1, 18 }, new int[] { 1, 6 }, 3, 1);
+ map = new Mapping(peptide2, mapList);
+ product = new DBRefEntry("", "", "ENSP002", map);
+ transcript2.addDBRef(product);
+
+ /*
+ * map gene3 to chromosome
+ */
+ SequenceI gene3 = alignment.findName("gene3");
+ to = new int[] { 45051610, 45051634 };
+ from = new int[] { gene3.getStart(), gene3.getEnd() };
+ gene3.setGeneLoci("homo_sapiens", "GRCh38", "5", new MapList(from, to,
+ 1, 1));
+
+ /*
+ * map 'transcript3' to chromosome
+ */
+ SequenceI transcript3 = alignment.findName("transcript3");
+ to = new int[] { 45051612, 45051619, 45051624, 45051633 };
+ from = new int[] { transcript3.getStart(), transcript3.getEnd() };
+ transcript3.setGeneLoci("homo_sapiens", "GRCh38", "5", new MapList(
+ from, to,
+ 1, 1));
+
+ /*
+ * map 'transcript4' to chromosome
+ */
+ SequenceI transcript4 = alignment.findName("transcript4");
+ to = new int[] { 45051615, 45051617, 45051619, 45051632, 45051634,
+ 45051634 };
+ from = new int[] { transcript4.getStart(), transcript4.getEnd() };
+ transcript4.setGeneLoci("homo_sapiens", "GRCh38", "5", new MapList(
+ from, to,
+ 1, 1));
+
+ /*
+ * add a protein product as a DBRef on transcript3
+ */
+ SequenceI peptide3 = new Sequence("ENSP003", "SWRECD");
+ mapList = new MapList(new int[] { 1, 18 }, new int[] { 1, 6 }, 3, 1);
+ map = new Mapping(peptide3, mapList);
+ product = new DBRefEntry("", "", "ENSP003", map);
+ transcript3.addDBRef(product);
+
+ return alignment;
+ }
+
+ /**
+ * Test with 'gene' and 'transcript' mapped to the reverse strand of the
+ * chromosome. The VCF variant positions (in forward coordinates) should get
+ * correctly located on sequence positions.
+ *
+ * @throws IOException
+ */
+ @Test(groups = "Functional")
+ public void testDoLoad_reverseStrand() throws IOException
+ {
+ AlignmentI al = buildAlignment();
+
+ VCFLoader loader = new VCFLoader(al);
+
+ File f = makeVcf();
+
+ loader.doLoad(f.getPath(), null);
+
+ /*
+ * verify variant feature(s) added to gene2
+ * gene/1-25 maps to chromosome 45051634- reverse strand
+ * variants A/T, A/C at 45051611 and G/GA,G/C at 45051613 map to
+ * T/A, T/G and C/TC,C/G at gene positions 24 and 22 respectively
+ */
+ List<SequenceFeature> geneFeatures = al.getSequenceAt(2)
+ .getSequenceFeatures();
+ SequenceFeatures.sortFeatures(geneFeatures, true);
+ assertEquals(geneFeatures.size(), 4);
+ SequenceFeature sf = geneFeatures.get(0);
+ assertEquals(sf.getFeatureGroup(), "VCF");
+ assertEquals(sf.getBegin(), 22);
+ assertEquals(sf.getEnd(), 22);
+ assertEquals(sf.getType(), SequenceOntologyI.SEQUENCE_VARIANT);
+ assertEquals(sf.getScore(), 2.0e-03, DELTA);
+ assertEquals("C,G", sf.getValue(Gff3Helper.ALLELES));
+
+ sf = geneFeatures.get(1);
+ assertEquals(sf.getFeatureGroup(), "VCF");
+ assertEquals(sf.getBegin(), 22);
+ assertEquals(sf.getEnd(), 22);
+ assertEquals(sf.getType(), SequenceOntologyI.SEQUENCE_VARIANT);
+ assertEquals(sf.getScore(), 3.0e-03, DELTA);
+ assertEquals("C,TC", sf.getValue(Gff3Helper.ALLELES));
+
+ sf = geneFeatures.get(2);
+ assertEquals(sf.getFeatureGroup(), "VCF");
+ assertEquals(sf.getBegin(), 24);
+ assertEquals(sf.getEnd(), 24);
+ assertEquals(sf.getType(), SequenceOntologyI.SEQUENCE_VARIANT);
+ assertEquals(sf.getScore(), 4.0e-03, DELTA);
+ assertEquals("T,G", sf.getValue(Gff3Helper.ALLELES));
+
+ sf = geneFeatures.get(3);
+ assertEquals(sf.getFeatureGroup(), "VCF");
+ assertEquals(sf.getBegin(), 24);
+ assertEquals(sf.getEnd(), 24);
+ assertEquals(sf.getType(), SequenceOntologyI.SEQUENCE_VARIANT);
+ assertEquals(sf.getScore(), 5.0e-03, DELTA);
+ assertEquals("T,A", sf.getValue(Gff3Helper.ALLELES));
+
+ /*
+ * verify variant feature(s) added to transcript2
+ * variants G/GA,G/C at position 22 of gene overlap and map to
+ * C/TC,C/G at position 17 of transcript
+ */
+ List<SequenceFeature> transcriptFeatures = al.getSequenceAt(3)
+ .getSequenceFeatures();
+ assertEquals(transcriptFeatures.size(), 2);
+ sf = transcriptFeatures.get(0);
+ assertEquals(sf.getFeatureGroup(), "VCF");
+ assertEquals(sf.getBegin(), 17);
+ assertEquals(sf.getEnd(), 17);
+ assertEquals(sf.getType(), SequenceOntologyI.SEQUENCE_VARIANT);
+ assertEquals(sf.getScore(), 2.0e-03, DELTA);
+ assertEquals("C,G", sf.getValue(Gff3Helper.ALLELES));
+
+ sf = transcriptFeatures.get(1);
+ assertEquals(sf.getFeatureGroup(), "VCF");
+ assertEquals(sf.getBegin(), 17);
+ assertEquals(sf.getEnd(), 17);
+ assertEquals(sf.getType(), SequenceOntologyI.SEQUENCE_VARIANT);
+ assertEquals(sf.getScore(), 3.0e-03, DELTA);
+ assertEquals("C,TC", sf.getValue(Gff3Helper.ALLELES));
+
+ /*
+ * verify variant feature(s) computed and added to protein
+ * last codon GCT varies to GGT giving A/G in the last peptide position
+ */
+ DBRefEntry[] dbRefs = al.getSequenceAt(3).getDBRefs();
+ SequenceI peptide = null;
+ for (DBRefEntry dbref : dbRefs)
+ {
+ if (dbref.getMap().getMap().getFromRatio() == 3)
+ {
+ peptide = dbref.getMap().getTo();
+ }
+ }
+ List<SequenceFeature> proteinFeatures = peptide.getSequenceFeatures();
+ assertEquals(proteinFeatures.size(), 1);
+ sf = proteinFeatures.get(0);
+ assertEquals(sf.getFeatureGroup(), "VCF");
+ assertEquals(sf.getBegin(), 6);
+ assertEquals(sf.getEnd(), 6);
+ assertEquals(sf.getType(), SequenceOntologyI.SEQUENCE_VARIANT);
+ assertEquals(sf.getDescription(), "p.Ala6Gly");
+ }
+
+ /**
+ * Tests that if VEP consequence (CSQ) data is present in the VCF data, then
+ * it is added to the variant feature, but restricted where possible to the
+ * consequences for a specific transcript
+ *
+ * @throws IOException
+ */
+ @Test(groups = "Functional")
+ public void testDoLoad_vepCsq() throws IOException
+ {
+ AlignmentI al = buildAlignment();
+
+ VCFLoader loader = new VCFLoader(al);
+
+ /*
+ * VCF data file with variants at gene3 positions
+ * 1 C/A
+ * 5 C/T
+ * 9 CGT/C (deletion)
+ * 13 C/G, C/T
+ * 17 A/AC (insertion), A/G
+ */
+ loader.doLoad("test/jalview/io/vcf/testVcf.dat", null);
+
+ /*
+ * verify variant feature(s) added to gene3
+ */
+ List<SequenceFeature> geneFeatures = al.findName("gene3")
+ .getSequenceFeatures();
+ SequenceFeatures.sortFeatures(geneFeatures, true);
+ assertEquals(geneFeatures.size(), 7);
+ SequenceFeature sf = geneFeatures.get(0);
+ assertEquals(sf.getBegin(), 1);
+ assertEquals(sf.getEnd(), 1);
+ assertEquals(sf.getScore(), 0.1f, DELTA);
+ assertEquals(sf.getValue("alleles"), "C,A");
+ // gene features include Consequence for all transcripts
+ assertEquals(((String) sf.getValue("CSQ")).split(",").length, 2);
+
+ sf = geneFeatures.get(1);
+ assertEquals(sf.getBegin(), 5);
+ assertEquals(sf.getEnd(), 5);
+ assertEquals(sf.getScore(), 0.2f, DELTA);
+ assertEquals(sf.getValue("alleles"), "C,T");
+ assertEquals(((String) sf.getValue("CSQ")).split(",").length, 2);
+
+ sf = geneFeatures.get(2);
+ assertEquals(sf.getBegin(), 9);
+ assertEquals(sf.getEnd(), 11); // deletion over 3 positions
+ assertEquals(sf.getScore(), 0.3f, DELTA);
+ assertEquals(sf.getValue("alleles"), "CGG,C");
+ assertEquals(((String) sf.getValue("CSQ")).split(",").length, 2);
+
+ sf = geneFeatures.get(3);
+ assertEquals(sf.getBegin(), 13);
+ assertEquals(sf.getEnd(), 13);
+ assertEquals(sf.getScore(), 0.5f, DELTA);
+ assertEquals(sf.getValue("alleles"), "C,T");
+ assertEquals(((String) sf.getValue("CSQ")).split(",").length, 2);
+
+ sf = geneFeatures.get(4);
+ assertEquals(sf.getBegin(), 13);
+ assertEquals(sf.getEnd(), 13);
+ assertEquals(sf.getScore(), 0.4f, DELTA);
+ assertEquals(sf.getValue("alleles"), "C,G");
+ assertEquals(((String) sf.getValue("CSQ")).split(",").length, 2);
+
+ sf = geneFeatures.get(5);
+ assertEquals(sf.getBegin(), 17);
+ assertEquals(sf.getEnd(), 17);
+ assertEquals(sf.getScore(), 0.7f, DELTA);
+ assertEquals(sf.getValue("alleles"), "A,G");
+ assertEquals(((String) sf.getValue("CSQ")).split(",").length, 2);
+
+ sf = geneFeatures.get(6);
+ assertEquals(sf.getBegin(), 17);
+ assertEquals(sf.getEnd(), 17); // insertion
+ assertEquals(sf.getScore(), 0.6f, DELTA);
+ assertEquals(sf.getValue("alleles"), "A,AC");
+ assertEquals(((String) sf.getValue("CSQ")).split(",").length, 2);
+
+ /*
+ * verify variant feature(s) added to transcript3
+ * at columns 5 (1), 17 (2), positions 3, 11
+ * note the deletion at columns 9-11 is not transferred since col 11
+ * has no mapping to transcript 3
+ */
+ List<SequenceFeature> transcriptFeatures = al.findName("transcript3")
+ .getSequenceFeatures();
+ SequenceFeatures.sortFeatures(transcriptFeatures, true);
+ assertEquals(transcriptFeatures.size(), 3);
+ sf = transcriptFeatures.get(0);
+ assertEquals(sf.getBegin(), 3);
+ assertEquals(sf.getEnd(), 3);
+ assertEquals(sf.getScore(), 0.2f, DELTA);
+ assertEquals(sf.getValue("alleles"), "C,T");
+ // transcript features only have Consequence for that transcripts
+ assertEquals(((String) sf.getValue("CSQ")).split(",").length, 1);
+ assertTrue(sf.getValue("CSQ").toString().contains("transcript3"));
+
+ sf = transcriptFeatures.get(1);
+ assertEquals(sf.getBegin(), 11);
+ assertEquals(sf.getEnd(), 11);
+ assertEquals(sf.getScore(), 0.7f, DELTA);
+ assertEquals(sf.getValue("alleles"), "A,G");
+ assertEquals(((String) sf.getValue("CSQ")).split(",").length, 1);
+ assertTrue(sf.getValue("CSQ").toString().contains("transcript3"));
+
+ sf = transcriptFeatures.get(2);
+ assertEquals(sf.getBegin(), 11);
+ assertEquals(sf.getEnd(), 11);
+ assertEquals(sf.getScore(), 0.6f, DELTA);
+ assertEquals(sf.getValue("alleles"), "A,AC");
+ assertEquals(((String) sf.getValue("CSQ")).split(",").length, 1);
+ assertTrue(sf.getValue("CSQ").toString().contains("transcript3"));
+
+ /*
+ * verify variants computed on protein product for transcript3
+ * peptide is SWRECD
+ * codon variants are AGC/AGT position 1 which is synonymous
+ * and GAG/GGG which is E/G in position 4
+ * the insertion variant is not transferred to the peptide
+ */
+ DBRefEntry[] dbRefs = al.findName("transcript3").getDBRefs();
+ SequenceI peptide = null;
+ for (DBRefEntry dbref : dbRefs)
+ {
+ if (dbref.getMap().getMap().getFromRatio() == 3)
+ {
+ peptide = dbref.getMap().getTo();
+ }
+ }
+ List<SequenceFeature> proteinFeatures = peptide.getSequenceFeatures();
+ assertEquals(proteinFeatures.size(), 1);
+ sf = proteinFeatures.get(0);
+ assertEquals(sf.getFeatureGroup(), "VCF");
+ assertEquals(sf.getBegin(), 4);
+ assertEquals(sf.getEnd(), 4);
+ assertEquals(sf.getType(), SequenceOntologyI.SEQUENCE_VARIANT);
+ assertEquals(sf.getDescription(), "p.Glu4Gly");
+
+ /*
+ * verify variant feature(s) added to transcript4
+ * at columns 13 (2) and 17 (2), positions 7 and 11
+ */
+ transcriptFeatures = al.findName("transcript4").getSequenceFeatures();
+ SequenceFeatures.sortFeatures(transcriptFeatures, true);
+ assertEquals(transcriptFeatures.size(), 4);
+ sf = transcriptFeatures.get(0);
+ assertEquals(sf.getBegin(), 7);
+ assertEquals(sf.getEnd(), 7);
+ assertEquals(sf.getScore(), 0.5f, DELTA);
+ assertEquals(sf.getValue("alleles"), "C,T");
+ assertEquals(((String) sf.getValue("CSQ")).split(",").length, 1);
+ assertTrue(sf.getValue("CSQ").toString().contains("transcript4"));
+
+ sf = transcriptFeatures.get(1);
+ assertEquals(sf.getBegin(), 7);
+ assertEquals(sf.getEnd(), 7);
+ assertEquals(sf.getScore(), 0.4f, DELTA);
+ assertEquals(sf.getValue("alleles"), "C,G");
+ assertEquals(((String) sf.getValue("CSQ")).split(",").length, 1);
+ assertTrue(sf.getValue("CSQ").toString().contains("transcript4"));
+
+ sf = transcriptFeatures.get(2);
+ assertEquals(sf.getBegin(), 11);
+ assertEquals(sf.getEnd(), 11);
+ assertEquals(sf.getScore(), 0.7f, DELTA);
+ assertEquals(sf.getValue("alleles"), "A,G");
+ assertEquals(((String) sf.getValue("CSQ")).split(",").length, 1);
+ assertTrue(sf.getValue("CSQ").toString().contains("transcript4"));
+
+ sf = transcriptFeatures.get(3);
+ assertEquals(sf.getBegin(), 11);
+ assertEquals(sf.getEnd(), 11);
+ assertEquals(sf.getScore(), 0.6f, DELTA);
+ assertEquals(sf.getValue("alleles"), "A,AC");
+ assertEquals(((String) sf.getValue("CSQ")).split(",").length, 1);
+ assertTrue(sf.getValue("CSQ").toString().contains("transcript4"));
+ }
+}
--- /dev/null
+##fileformat=VCFv4.2
+##INFO=<ID=AC,Number=A,Type=Integer,Description="Allele count in genotypes, for each ALT allele, in the same order as listed">
+##INFO=<ID=AF,Number=A,Type=Float,Description="Allele Frequency, for each ALT allele, in the same order as listed">
+##INFO=<ID=AF_Female,Number=R,Type=Float,Description="Allele Frequency among Female genotypes, for each ALT allele, in the same order as listed">
+##INFO=<ID=AN,Number=1,Type=Integer,Description="Total number of alleles in called genotypes">
+##INFO=<ID=CSQ,Number=.,Type=String,Description="Consequence annotations from Ensembl VEP. Format: Allele|Consequence|IMPACT|SYMBOL|Gene|Feature_type|Feature|BIOTYPE|PolyPhen">
+##reference=/Homo_sapiens/GRCh38
+#CHROM POS ID REF ALT QUAL FILTER INFO
+5 45051610 . C A 81.96 RF;AC0 AC=1;AF=0.1;AN=0;AF_Female=2;AB_MEDIAN=6.00000e-01;CSQ=A|missense_variant|MODIFIER|WASH7P|gene3|Transcript|transcript3|rna|Benign,A|downstream_gene_variant|MODIFIER|WASH7P|gene3|Transcript|transcript4|mrna|Bad
+5 45051614 . C T 1666.64 RF AC=1;AF=0.2;AN=0;AF_Female=2;AB_MEDIAN=6.00000e-01;CSQ=T|missense_variant|MODIFIER|WASH7P|gene3|Transcript|transcript3|rna|Benign,T|downstream_gene_variant|MODIFIER|WASH7P|gene3|Transcript|transcript4|mrna|Bad
+5 45051618 . CGG C 41.94 AC0 AC=1;AF=0.3;AN=0;AF_Female=2;AB_MEDIAN=6.00000e-01;CSQ=C|missense_variant|MODIFIER|WASH7P|gene3|Transcript|transcript3|rna|Benign,C|downstream_gene_variant|MODIFIER|WASH7P|gene3|Transcript|transcript4|mrna|Bad,CSQ=CGT|missense_variant|MODIFIER|WASH7P|gene3|Transcript|transcript3|rna|Benign,CGT|downstream_gene_variant|MODIFIER|WASH7P|gene3|Transcript|transcript4|mrna|Bad
+5 45051622 . C G,T 224.23 RF;AC0 AC=1,2;AF=0.4,0.5;AN=0;AF_Female=2;AB_MEDIAN=6.00000e-01;CSQ=G|missense_variant|MODIFIER|WASH7P|gene3|Transcript|transcript3|rna|Benign,G|downstream_gene_variant|MODIFIER|WASH7P|gene3|Transcript|transcript4|mrna|Bad,T|missense_variant|MODIFIER|WASH7P|gene3|Transcript|transcript3|rna|Benign,T|downstream_gene_variant|MODIFIER|WASH7P|gene3|Transcript|transcript4|mrna|Bad
+5 45051626 . A AC,G 433.35 RF;AC0 AC=3,4;AF=0.6,0.7;AN=0;AF_Female=2;AB_MEDIAN=6.00000e-01;CSQ=G|missense_variant|MODIFIER|WASH7P|gene3|Transcript|transcript3|rna|Benign,G|downstream_gene_variant|MODIFIER|WASH7P|gene3|Transcript|transcript4|mrna|Bad,AC|missense_variant|MODIFIER|WASH7P|gene3|Transcript|transcript3|rna|Benign,AC|downstream_gene_variant|MODIFIER|WASH7P|gene3|Transcript|transcript4|mrna|Bad
assertEquals(1, merged.size());
assertArrayEquals(new int[] { 9, 0 }, merged.get(0));
}
+
+ /**
+ * Test the method that compounds ('traverses') two mappings
+ */
+ @Test
+ public void testTraverse()
+ {
+ /*
+ * simple 1:1 plus 1:1 forwards
+ */
+ MapList ml1 = new MapList(new int[] { 3, 4, 8, 12 }, new int[] { 5, 8,
+ 11, 13 }, 1, 1);
+ MapList ml2 = new MapList(new int[] { 1, 50 }, new int[] { 40, 45, 70,
+ 75, 90, 127 }, 1, 1);
+ MapList compound = ml1.traverse(ml2);
+
+ assertEquals(compound.getFromRatio(), 1);
+ assertEquals(compound.getToRatio(), 1);
+ List<int[]> fromRanges = compound.getFromRanges();
+ assertEquals(fromRanges.size(), 2);
+ assertArrayEquals(new int[] { 3, 4 }, fromRanges.get(0));
+ assertArrayEquals(new int[] { 8, 12 }, fromRanges.get(1));
+ List<int[]> toRanges = compound.getToRanges();
+ assertEquals(toRanges.size(), 2);
+ // 5-8 maps to 44-45,70-71
+ // 11-13 maps to 74-75,90
+ assertArrayEquals(new int[] { 44, 45, 70, 71 }, toRanges.get(0));
+ assertArrayEquals(new int[] { 74, 75, 90, 90 }, toRanges.get(1));
+
+ /*
+ * 1:1 over 1:1 backwards ('reverse strand')
+ */
+ ml1 = new MapList(new int[] { 1, 50 }, new int[] { 70, 119 }, 1, 1);
+ ml2 = new MapList(new int[] { 1, 500 },
+ new int[] { 1000, 901, 600, 201 }, 1, 1);
+ compound = ml1.traverse(ml2);
+
+ assertEquals(compound.getFromRatio(), 1);
+ assertEquals(compound.getToRatio(), 1);
+ fromRanges = compound.getFromRanges();
+ assertEquals(fromRanges.size(), 1);
+ assertArrayEquals(new int[] { 1, 50 }, fromRanges.get(0));
+ toRanges = compound.getToRanges();
+ assertEquals(toRanges.size(), 1);
+ assertArrayEquals(new int[] { 931, 901, 600, 582 }, toRanges.get(0));
+
+ /*
+ * 1:1 plus 1:3 should result in 1:3
+ */
+ ml1 = new MapList(new int[] { 1, 30 }, new int[] { 11, 40 }, 1, 1);
+ ml2 = new MapList(new int[] { 1, 100 }, new int[] { 1, 50, 91, 340 },
+ 1, 3);
+ compound = ml1.traverse(ml2);
+
+ assertEquals(compound.getFromRatio(), 1);
+ assertEquals(compound.getToRatio(), 3);
+ fromRanges = compound.getFromRanges();
+ assertEquals(fromRanges.size(), 1);
+ assertArrayEquals(new int[] { 1, 30 }, fromRanges.get(0));
+ // 11-40 maps to 31-50,91-160
+ toRanges = compound.getToRanges();
+ assertEquals(toRanges.size(), 1);
+ assertArrayEquals(new int[] { 31, 50, 91, 160 }, toRanges.get(0));
+
+ /*
+ * 3:1 plus 1:1 should result in 3:1
+ */
+ ml1 = new MapList(new int[] { 1, 30 }, new int[] { 11, 20 }, 3, 1);
+ ml2 = new MapList(new int[] { 1, 100 }, new int[] { 1, 15, 91, 175 },
+ 1, 1);
+ compound = ml1.traverse(ml2);
+
+ assertEquals(compound.getFromRatio(), 3);
+ assertEquals(compound.getToRatio(), 1);
+ fromRanges = compound.getFromRanges();
+ assertEquals(fromRanges.size(), 1);
+ assertArrayEquals(new int[] { 1, 30 }, fromRanges.get(0));
+ // 11-20 maps to 11-15, 91-95
+ toRanges = compound.getToRanges();
+ assertEquals(toRanges.size(), 1);
+ assertArrayEquals(new int[] { 11, 15, 91, 95 }, toRanges.get(0));
+
+ /*
+ * 1:3 plus 3:1 should result in 1:1
+ */
+ ml1 = new MapList(new int[] { 21, 40 }, new int[] { 13, 72 }, 1, 3);
+ ml2 = new MapList(new int[] { 1, 300 }, new int[] { 51, 70, 121, 200 },
+ 3, 1);
+ compound = ml1.traverse(ml2);
+
+ assertEquals(compound.getFromRatio(), 1);
+ assertEquals(compound.getToRatio(), 1);
+ fromRanges = compound.getFromRanges();
+ assertEquals(fromRanges.size(), 1);
+ assertArrayEquals(new int[] { 21, 40 }, fromRanges.get(0));
+ // 13-72 maps 3:1 to 55-70, 121-124
+ toRanges = compound.getToRanges();
+ assertEquals(toRanges.size(), 1);
+ assertArrayEquals(new int[] { 55, 70, 121, 124 }, toRanges.get(0));
+
+ /*
+ * 3:1 plus 1:3 should result in 1:1
+ */
+ ml1 = new MapList(new int[] { 31, 90 }, new int[] { 13, 32 }, 3, 1);
+ ml2 = new MapList(new int[] { 11, 40 }, new int[] { 41, 50, 71, 150 },
+ 1, 3);
+ compound = ml1.traverse(ml2);
+
+ assertEquals(compound.getFromRatio(), 1);
+ assertEquals(compound.getToRatio(), 1);
+ fromRanges = compound.getFromRanges();
+ assertEquals(fromRanges.size(), 1);
+ assertArrayEquals(new int[] { 31, 90 }, fromRanges.get(0));
+ // 13-32 maps to 47-50,71-126
+ toRanges = compound.getToRanges();
+ assertEquals(toRanges.size(), 1);
+ assertArrayEquals(new int[] { 47, 50, 71, 126 }, toRanges.get(0));
+
+ /*
+ * method returns null if not all regions are mapped through
+ */
+ ml1 = new MapList(new int[] { 1, 50 }, new int[] { 101, 150 }, 1, 1);
+ ml2 = new MapList(new int[] { 131, 180 }, new int[] { 201, 250 }, 1, 3);
+ compound = ml1.traverse(ml2);
+ assertNull(compound);
+ }
}
assertEquals("[12, 11, 8, 4]", Arrays.toString(ranges));
}
+ @Test(groups = { "Functional" })
+ public void testRangeContains()
+ {
+ /*
+ * both forward ranges
+ */
+ assertTrue(MappingUtils.rangeContains(new int[] { 1, 10 }, new int[] {
+ 1, 10 }));
+ assertTrue(MappingUtils.rangeContains(new int[] { 1, 10 }, new int[] {
+ 2, 10 }));
+ assertTrue(MappingUtils.rangeContains(new int[] { 1, 10 }, new int[] {
+ 1, 9 }));
+ assertTrue(MappingUtils.rangeContains(new int[] { 1, 10 }, new int[] {
+ 4, 5 }));
+ assertFalse(MappingUtils.rangeContains(new int[] { 1, 10 }, new int[] {
+ 0, 9 }));
+ assertFalse(MappingUtils.rangeContains(new int[] { 1, 10 }, new int[] {
+ -10, -9 }));
+ assertFalse(MappingUtils.rangeContains(new int[] { 1, 10 }, new int[] {
+ 1, 11 }));
+ assertFalse(MappingUtils.rangeContains(new int[] { 1, 10 }, new int[] {
+ 11, 12 }));
+
+ /*
+ * forward range, reverse query
+ */
+ assertTrue(MappingUtils.rangeContains(new int[] { 1, 10 }, new int[] {
+ 10, 1 }));
+ assertTrue(MappingUtils.rangeContains(new int[] { 1, 10 }, new int[] {
+ 9, 1 }));
+ assertTrue(MappingUtils.rangeContains(new int[] { 1, 10 }, new int[] {
+ 10, 2 }));
+ assertTrue(MappingUtils.rangeContains(new int[] { 1, 10 }, new int[] {
+ 5, 5 }));
+ assertFalse(MappingUtils.rangeContains(new int[] { 1, 10 }, new int[] {
+ 11, 1 }));
+ assertFalse(MappingUtils.rangeContains(new int[] { 1, 10 }, new int[] {
+ 10, 0 }));
+
+ /*
+ * reverse range, forward query
+ */
+ assertTrue(MappingUtils.rangeContains(new int[] { 10, 1 }, new int[] {
+ 1, 10 }));
+ assertTrue(MappingUtils.rangeContains(new int[] { 10, 1 }, new int[] {
+ 1, 9 }));
+ assertTrue(MappingUtils.rangeContains(new int[] { 10, 1 }, new int[] {
+ 2, 10 }));
+ assertTrue(MappingUtils.rangeContains(new int[] { 10, 1 }, new int[] {
+ 6, 6 }));
+ assertFalse(MappingUtils.rangeContains(new int[] { 10, 1 }, new int[] {
+ 6, 11 }));
+ assertFalse(MappingUtils.rangeContains(new int[] { 10, 1 }, new int[] {
+ 11, 20 }));
+ assertFalse(MappingUtils.rangeContains(new int[] { 10, 1 }, new int[] {
+ -3, -2 }));
+
+ /*
+ * both reverse
+ */
+ assertTrue(MappingUtils.rangeContains(new int[] { 10, 1 }, new int[] {
+ 10, 1 }));
+ assertTrue(MappingUtils.rangeContains(new int[] { 10, 1 }, new int[] {
+ 9, 1 }));
+ assertTrue(MappingUtils.rangeContains(new int[] { 10, 1 }, new int[] {
+ 10, 2 }));
+ assertTrue(MappingUtils.rangeContains(new int[] { 10, 1 }, new int[] {
+ 3, 3 }));
+ assertFalse(MappingUtils.rangeContains(new int[] { 10, 1 }, new int[] {
+ 11, 1 }));
+ assertFalse(MappingUtils.rangeContains(new int[] { 10, 1 }, new int[] {
+ 10, 0 }));
+ assertFalse(MappingUtils.rangeContains(new int[] { 10, 1 }, new int[] {
+ 12, 11 }));
+ assertFalse(MappingUtils.rangeContains(new int[] { 10, 1 }, new int[] {
+ -5, -8 }));
+
+ /*
+ * bad arguments
+ */
+ assertFalse(MappingUtils.rangeContains(new int[] { 1, 10, 12 },
+ new int[] {
+ 1, 10 }));
+ assertFalse(MappingUtils.rangeContains(new int[] { 1, 10 },
+ new int[] { 1 }));
+ assertFalse(MappingUtils.rangeContains(new int[] { 1, 10 }, null));
+ assertFalse(MappingUtils.rangeContains(null, new int[] { 1, 10 }));
+ }
+
@Test(groups = "Functional")
public void testRemoveEndPositions()
{
--- /dev/null
+package jalview.util;
+
+import static org.testng.Assert.assertEquals;
+
+import org.testng.annotations.Test;
+
+public class MathUtilsTest
+{
+ @Test
+ public void testGcd()
+ {
+ assertEquals(MathUtils.gcd(0, 0), 0);
+ assertEquals(MathUtils.gcd(0, 1), 1);
+ assertEquals(MathUtils.gcd(1, 0), 1);
+ assertEquals(MathUtils.gcd(1, 1), 1);
+ assertEquals(MathUtils.gcd(1, -1), 1);
+ assertEquals(MathUtils.gcd(-1, 1), 1);
+ assertEquals(MathUtils.gcd(2, 3), 1);
+ assertEquals(MathUtils.gcd(4, 2), 2);
+ assertEquals(MathUtils.gcd(2, 4), 2);
+ assertEquals(MathUtils.gcd(2, -4), 2);
+ assertEquals(MathUtils.gcd(-2, 4), 2);
+ assertEquals(MathUtils.gcd(-2, -4), 2);
+ assertEquals(MathUtils.gcd(2 * 3 * 5 * 7 * 11, 3 * 7 * 13 * 17), 3 * 7);
+ }
+}
assertEquals("", StringUtils.toSentenceCase(""));
assertNull(StringUtils.toSentenceCase(null));
}
+
+ @Test(groups = { "Functional" })
+ public void testStripHtmlTags()
+ {
+ assertNull(StringUtils.stripHtmlTags(null));
+ assertEquals("", StringUtils.stripHtmlTags(""));
+ assertEquals(
+ "<a href=\"something\">label</href>",
+ StringUtils
+ .stripHtmlTags("<html><a href=\"something\">label</href></html>"));
+
+ // if no "<html>" tag, < and > get html-encoded (not sure why)
+ assertEquals("<a href=\"something\">label</href>",
+ StringUtils.stripHtmlTags("<a href=\"something\">label</href>"));
+
+ // </body> gets removed but not <body> (is this intentional?)
+ assertEquals("<body><p>hello",
+ StringUtils.stripHtmlTags("<html><body><p>hello</body></html>"));
+
+ assertEquals("kdHydro < 12.53",
+ StringUtils.stripHtmlTags("kdHydro < 12.53"));
+ }
}