From: gmungoc Date: Tue, 31 Oct 2017 18:12:36 +0000 (+0000) Subject: Merge branch 'bug/JAL-2791selectVisibleFeatures' into X-Git-Tag: Release_2_11_0~164 X-Git-Url: http://source.jalview.org/gitweb/?a=commitdiff_plain;h=d2fbfb17ae43c983123e18a0ef1074c2aba907ff;hp=19bf96b1bcedc7c00f97e1947d889c24fec48ae7;p=jalview.git Merge branch 'bug/JAL-2791selectVisibleFeatures' into features/JAL-1793VCF Conflicts: src/jalview/controller/AlignViewController.java --- diff --git a/.classpath b/.classpath index c4a2832..441ba60 100644 --- a/.classpath +++ b/.classpath @@ -48,11 +48,9 @@ - - @@ -69,5 +67,7 @@ + + diff --git a/.settings/org.eclipse.jdt.core.prefs b/.settings/org.eclipse.jdt.core.prefs index 8a5e7a7..5908bb2 100644 --- a/.settings/org.eclipse.jdt.core.prefs +++ b/.settings/org.eclipse.jdt.core.prefs @@ -1,15 +1,15 @@ eclipse.preferences.version=1 org.eclipse.jdt.core.compiler.codegen.inlineJsrBytecode=enabled org.eclipse.jdt.core.compiler.codegen.methodParameters=do not generate -org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.7 +org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.8 org.eclipse.jdt.core.compiler.codegen.unusedLocal=preserve -org.eclipse.jdt.core.compiler.compliance=1.7 +org.eclipse.jdt.core.compiler.compliance=1.8 org.eclipse.jdt.core.compiler.debug.lineNumber=generate org.eclipse.jdt.core.compiler.debug.localVariable=generate org.eclipse.jdt.core.compiler.debug.sourceFile=generate org.eclipse.jdt.core.compiler.problem.assertIdentifier=error org.eclipse.jdt.core.compiler.problem.enumIdentifier=error -org.eclipse.jdt.core.compiler.source=1.7 +org.eclipse.jdt.core.compiler.source=1.8 org.eclipse.jdt.core.formatter.align_type_members_on_columns=false org.eclipse.jdt.core.formatter.alignment_for_arguments_in_allocation_expression=16 org.eclipse.jdt.core.formatter.alignment_for_arguments_in_annotation=52 diff --git a/build.xml b/build.xml index f39fdf3..436148a 100755 --- a/build.xml +++ b/build.xml @@ -106,8 +106,8 @@ - - + + @@ -425,7 +425,7 @@ - + @@ -451,8 +451,8 @@ - - j2se version="1.7+" + + j2se version="1.8+" @@ -624,7 +624,7 @@ - + diff --git a/lib/htsjdk-2.12.0.jar b/lib/htsjdk-2.12.0.jar new file mode 100644 index 0000000..1df12b2 Binary files /dev/null and b/lib/htsjdk-2.12.0.jar differ diff --git a/resources/lang/Messages.properties b/resources/lang/Messages.properties index 5d9bdff..c950bbc 100644 --- a/resources/lang/Messages.properties +++ b/resources/lang/Messages.properties @@ -490,6 +490,10 @@ label.settings_for_type = Settings for {0} label.view_full_application = View in Full Application label.load_associated_tree = Load Associated Tree... label.load_features_annotations = Load Features/Annotations... +label.load_vcf = Load SNP variants from plain text or indexed VCF data +label.load_vcf_file = Load VCF File +label.searching_vcf = Loading VCF variants... +label.added_vcf = Added {0} VCF variants to {1} sequence(s) label.export_features = Export Features... label.export_annotations = Export Annotations... label.to_upper_case = To Upper Case @@ -1319,3 +1323,14 @@ label.select_hidden_colour = Select hidden colour label.overview = Overview label.reset_to_defaults = Reset to defaults label.oview_calc = Recalculating overview... +label.feature_details = Feature details +label.matchCondition_contains = Contains +label.matchCondition_notcontains = Does not contain +label.matchCondition_matches = Matches +label.matchCondition_notmatches = Does not match +label.matchCondition_eq = = +label.matchCondition_ne = not = +label.matchCondition_lt = < +label.matchCondition_le = <= +label.matchCondition_gt = > +label.matchCondition_ge = >= diff --git a/src/jalview/analysis/AlignmentUtils.java b/src/jalview/analysis/AlignmentUtils.java index 90d9197..bef667d 100644 --- a/src/jalview/analysis/AlignmentUtils.java +++ b/src/jalview/analysis/AlignmentUtils.java @@ -29,6 +29,7 @@ import jalview.datamodel.Alignment; import jalview.datamodel.AlignmentAnnotation; import jalview.datamodel.AlignmentI; import jalview.datamodel.DBRefEntry; +import jalview.datamodel.GeneLociI; import jalview.datamodel.IncompleteCodonException; import jalview.datamodel.Mapping; import jalview.datamodel.Sequence; @@ -36,6 +37,7 @@ import jalview.datamodel.SequenceFeature; import jalview.datamodel.SequenceGroup; import jalview.datamodel.SequenceI; import jalview.datamodel.features.SequenceFeatures; +import jalview.io.gff.Gff3Helper; import jalview.io.gff.SequenceOntologyI; import jalview.schemes.ResidueProperties; import jalview.util.Comparison; @@ -105,6 +107,15 @@ public class AlignmentUtils { return variant == null ? null : variant.getFeatureGroup(); } + + /** + * toString for aid in the debugger only + */ + @Override + public String toString() + { + return base + ":" + (variant == null ? "" : variant.getDescription()); + } } /** @@ -384,7 +395,7 @@ public class AlignmentUtils * Answers true if the mappings include one between the given (dataset) * sequences. */ - public static boolean mappingExists(List mappings, + protected static boolean mappingExists(List mappings, SequenceI aaSeq, SequenceI cdnaSeq) { if (mappings != null) @@ -1636,8 +1647,8 @@ public class AlignmentUtils productSeqs = new HashSet(); for (SequenceI seq : products) { - productSeqs.add(seq.getDatasetSequence() == null ? seq - : seq.getDatasetSequence()); + productSeqs.add(seq.getDatasetSequence() == null ? seq : seq + .getDatasetSequence()); } } @@ -1730,9 +1741,8 @@ public class AlignmentUtils /* * add a mapping from CDS to the (unchanged) mapped to range */ - List cdsRange = Collections - .singletonList(new int[] - { 1, cdsSeq.getLength() }); + List cdsRange = Collections.singletonList(new int[] { 1, + cdsSeq.getLength() }); MapList cdsToProteinMap = new MapList(cdsRange, mapList.getToRanges(), mapList.getFromRatio(), mapList.getToRatio()); @@ -1754,7 +1764,7 @@ public class AlignmentUtils * add another mapping from original 'from' range to CDS */ AlignedCodonFrame dnaToCdsMapping = new AlignedCodonFrame(); - MapList dnaToCdsMap = new MapList(mapList.getFromRanges(), + final MapList dnaToCdsMap = new MapList(mapList.getFromRanges(), cdsRange, 1, 1); dnaToCdsMapping.addMap(dnaSeq.getDatasetSequence(), cdsSeqDss, dnaToCdsMap); @@ -1764,6 +1774,13 @@ public class AlignmentUtils } /* + * transfer dna chromosomal loci (if known) to the CDS + * sequence (via the mapping) + */ + final MapList cdsToDnaMap = dnaToCdsMap.getInverse(); + transferGeneLoci(dnaSeq, cdsToDnaMap, cdsSeq); + + /* * add DBRef with mapping from protein to CDS * (this enables Get Cross-References from protein alignment) * This is tricky because we can't have two DBRefs with the @@ -1782,26 +1799,30 @@ public class AlignmentUtils for (DBRefEntry primRef : dnaDss.getPrimaryDBRefs()) { - // creates a complementary cross-reference to the source sequence's - // primary reference. - - DBRefEntry cdsCrossRef = new DBRefEntry(primRef.getSource(), - primRef.getSource() + ":" + primRef.getVersion(), - primRef.getAccessionId()); - cdsCrossRef - .setMap(new Mapping(dnaDss, new MapList(dnaToCdsMap))); + /* + * create a cross-reference from CDS to the source sequence's + * primary reference and vice versa + */ + String source = primRef.getSource(); + String version = primRef.getVersion(); + DBRefEntry cdsCrossRef = new DBRefEntry(source, source + ":" + + version, primRef.getAccessionId()); + cdsCrossRef.setMap(new Mapping(dnaDss, new MapList(cdsToDnaMap))); cdsSeqDss.addDBRef(cdsCrossRef); + dnaSeq.addDBRef(new DBRefEntry(source, version, cdsSeq + .getName(), new Mapping(cdsSeqDss, dnaToCdsMap))); + // problem here is that the cross-reference is synthesized - // cdsSeq.getName() may be like 'CDS|dnaaccession' or // 'CDS|emblcdsacc' // assuming cds version same as dna ?!? - DBRefEntry proteinToCdsRef = new DBRefEntry(primRef.getSource(), - primRef.getVersion(), cdsSeq.getName()); + DBRefEntry proteinToCdsRef = new DBRefEntry(source, version, + cdsSeq.getName()); // - proteinToCdsRef.setMap( - new Mapping(cdsSeqDss, cdsToProteinMap.getInverse())); + proteinToCdsRef.setMap(new Mapping(cdsSeqDss, cdsToProteinMap + .getInverse())); proteinProduct.addDBRef(proteinToCdsRef); } @@ -1814,14 +1835,46 @@ public class AlignmentUtils } } - AlignmentI cds = new Alignment( - cdsSeqs.toArray(new SequenceI[cdsSeqs.size()])); + AlignmentI cds = new Alignment(cdsSeqs.toArray(new SequenceI[cdsSeqs + .size()])); cds.setDataset(dataset); return cds; } /** + * Tries to transfer gene loci (dbref to chromosome positions) from fromSeq to + * toSeq, mediated by the given mapping between the sequences + * + * @param fromSeq + * @param targetToFrom + * Map + * @param targetSeq + */ + protected static void transferGeneLoci(SequenceI fromSeq, + MapList targetToFrom, SequenceI targetSeq) + { + if (targetSeq.getGeneLoci() != null) + { + // already have - don't override + return; + } + GeneLociI fromLoci = fromSeq.getGeneLoci(); + if (fromLoci == null) + { + return; + } + + MapList newMap = targetToFrom.traverse(fromLoci.getMap()); + + if (newMap != null) + { + targetSeq.setGeneLoci(fromLoci.getSpeciesId(), + fromLoci.getAssemblyId(), fromLoci.getChromosomeId(), newMap); + } + } + + /** * A helper method that finds a CDS sequence in the alignment dataset that is * mapped to the given protein sequence, and either is, or has a mapping from, * the given dna sequence. @@ -1989,19 +2042,19 @@ public class AlignmentUtils } /** - * add any DBRefEntrys to cdsSeq from contig that have a Mapping congruent to + * Adds any DBRefEntrys to cdsSeq from contig that have a Mapping congruent to * the given mapping. * * @param cdsSeq * @param contig + * @param proteinProduct * @param mapping - * @return list of DBRefEntrys added. + * @return list of DBRefEntrys added */ - public static List propagateDBRefsToCDS(SequenceI cdsSeq, + protected static List propagateDBRefsToCDS(SequenceI cdsSeq, SequenceI contig, SequenceI proteinProduct, Mapping mapping) { - - // gather direct refs from contig congrent with mapping + // gather direct refs from contig congruent with mapping List direct = new ArrayList(); HashSet directSources = new HashSet(); if (contig.getDBRefs() != null) @@ -2081,7 +2134,7 @@ public class AlignmentUtils * subtypes in the Sequence Ontology) * @param omitting */ - public static int transferFeatures(SequenceI fromSeq, SequenceI toSeq, + protected static int transferFeatures(SequenceI fromSeq, SequenceI toSeq, MapList mapping, String select, String... omitting) { SequenceI copyTo = toSeq; @@ -2235,7 +2288,7 @@ public class AlignmentUtils * @param dnaSeq * @return */ - public static List findCdsPositions(SequenceI dnaSeq) + protected static List findCdsPositions(SequenceI dnaSeq) { List result = new ArrayList(); @@ -2370,7 +2423,7 @@ public class AlignmentUtils { if (var.variant != null) { - String alleles = (String) var.variant.getValue("alleles"); + String alleles = (String) var.variant.getValue(Gff3Helper.ALLELES); if (alleles != null) { for (String base : alleles.split(",")) @@ -2392,7 +2445,7 @@ public class AlignmentUtils { if (var.variant != null) { - String alleles = (String) var.variant.getValue("alleles"); + String alleles = (String) var.variant.getValue(Gff3Helper.ALLELES); if (alleles != null) { for (String base : alleles.split(",")) @@ -2414,7 +2467,7 @@ public class AlignmentUtils { if (var.variant != null) { - String alleles = (String) var.variant.getValue("alleles"); + String alleles = (String) var.variant.getValue(Gff3Helper.ALLELES); if (alleles != null) { for (String base : alleles.split(",")) @@ -2509,7 +2562,10 @@ public class AlignmentUtils /** * Builds a map whose key is position in the protein sequence, and value is a - * list of the base and all variants for each corresponding codon position + * list of the base and all variants for each corresponding codon position. + *

+ * This depends on dna variants being held as a comma-separated list as + * property "alleles" on variant features. * * @param dnaSeq * @param dnaToProtein @@ -2547,6 +2603,30 @@ public class AlignmentUtils // not handling multi-locus variant features continue; } + + /* + * ignore variant if not a SNP + */ + String alls = (String) sf.getValue(Gff3Helper.ALLELES); + if (alls == null) + { + continue; // non-SNP VCF variant perhaps - can't process this + } + + String[] alleles = alls.toUpperCase().split(","); + boolean isSnp = true; + for (String allele : alleles) + { + if (allele.trim().length() > 1) + { + isSnp = false; + } + } + if (!isSnp) + { + continue; + } + int[] mapsTo = dnaToProtein.locateInTo(dnaCol, dnaCol); if (mapsTo == null) { @@ -2565,21 +2645,6 @@ public class AlignmentUtils } /* - * extract dna variants to a string array - */ - String alls = (String) sf.getValue("alleles"); - if (alls == null) - { - continue; - } - String[] alleles = alls.toUpperCase().split(","); - int i = 0; - for (String allele : alleles) - { - alleles[i++] = allele.trim(); // lose any space characters "A, G" - } - - /* * get this peptide's codon positions e.g. [3, 4, 5] or [4, 7, 10] */ int[] codon = peptidePosition == lastPeptidePostion ? lastCodon diff --git a/src/jalview/analysis/Dna.java b/src/jalview/analysis/Dna.java index a10b037..f3088ea 100644 --- a/src/jalview/analysis/Dna.java +++ b/src/jalview/analysis/Dna.java @@ -851,6 +851,23 @@ public class Dna } /** + * Answers the reverse complement of the input string + * + * @see #getComplement(char) + * @param s + * @return + */ + public static String reverseComplement(String s) + { + StringBuilder sb = new StringBuilder(s.length()); + for (int i = s.length() - 1; i >= 0; i--) + { + sb.append(Dna.getComplement(s.charAt(i))); + } + return sb.toString(); + } + + /** * Returns dna complement (preserving case) for aAcCgGtTuU. Ambiguity codes * are treated as on http://reverse-complement.com/. Anything else is left * unchanged. diff --git a/src/jalview/api/FeatureColourI.java b/src/jalview/api/FeatureColourI.java index 0ded079..3b2313d 100644 --- a/src/jalview/api/FeatureColourI.java +++ b/src/jalview/api/FeatureColourI.java @@ -21,6 +21,7 @@ package jalview.api; import jalview.datamodel.SequenceFeature; +import jalview.util.matcher.KeyedMatcherSetI; import java.awt.Color; @@ -169,4 +170,20 @@ public interface FeatureColourI * @return */ String toJalviewFormat(String featureType); + + /** + * Sets the attribute filter conditions, or removes them if the argument is + * null + * + * @param filter + */ + public void setAttributeFilters(KeyedMatcherSetI filter); + + /** + * Answers the attribute value filters for the colour scheme, or null if no + * filters are set + * + * @return + */ + public KeyedMatcherSetI getAttributeFilters(); } diff --git a/src/jalview/controller/AlignViewController.java b/src/jalview/controller/AlignViewController.java index 7bb295e..8b001b3 100644 --- a/src/jalview/controller/AlignViewController.java +++ b/src/jalview/controller/AlignViewController.java @@ -247,12 +247,11 @@ public class AlignViewController implements AlignViewControllerI boolean found = false; for (SequenceFeature sf : sfs) { - { - if (!visibleFeatures.contains(sf.getType()) - || fr.getColour(sf) == null) // could pull up getColour to - // FeatureRenderer interface - { - continue; + { + if (!visibleFeatures.contains(sf.getType()) + || fr.getColour(sf) == null) // could pull up getColour to FeatureRenderer interface + { + continue; } } if (!found) @@ -260,6 +259,7 @@ public class AlignViewController implements AlignViewControllerI nseq++; } found = true; + int sfStartCol = sq.findIndex(sf.getBegin()); int sfEndCol = sq.findIndex(sf.getEnd()); diff --git a/src/jalview/datamodel/DBRefEntry.java b/src/jalview/datamodel/DBRefEntry.java index f7837f7..98868ce 100755 --- a/src/jalview/datamodel/DBRefEntry.java +++ b/src/jalview/datamodel/DBRefEntry.java @@ -27,7 +27,20 @@ import java.util.List; public class DBRefEntry implements DBRefEntryI { - String source = "", version = "", accessionId = ""; + /* + * the mapping to chromosome (genome) is held as an instance with + * source = speciesId + * version = assemblyId + * accessionId = "chromosome:" + chromosomeId + * map = mapping from sequence to reference assembly + */ + public static final String CHROMOSOME = "chromosome"; + + String source = ""; + + String version = ""; + + String accessionId = ""; /** * maps from associated sequence to the database sequence's coordinate system @@ -331,4 +344,14 @@ public class DBRefEntry implements DBRefEntryI } return true; } + + /** + * Mappings to chromosome are held with accessionId as "chromosome:id" + * + * @return + */ + public boolean isChromosome() + { + return accessionId != null && accessionId.startsWith(CHROMOSOME + ":"); + } } diff --git a/src/jalview/datamodel/GeneLociI.java b/src/jalview/datamodel/GeneLociI.java new file mode 100644 index 0000000..f8c7ec5 --- /dev/null +++ b/src/jalview/datamodel/GeneLociI.java @@ -0,0 +1,38 @@ +package jalview.datamodel; + +import jalview.util.MapList; + +/** + * An interface to model one or more contiguous regions on one chromosome + */ +public interface GeneLociI +{ + /** + * Answers the species identifier + * + * @return + */ + String getSpeciesId(); + + /** + * Answers the reference assembly identifier + * + * @return + */ + String getAssemblyId(); + + /** + * Answers the chromosome identifier e.g. "2", "Y", "II" + * + * @return + */ + String getChromosomeId(); + + /** + * Answers the mapping from sequence to chromosome loci. For a reverse strand + * mapping, the chromosomal ranges will have start > end. + * + * @return + */ + MapList getMap(); +} diff --git a/src/jalview/datamodel/Sequence.java b/src/jalview/datamodel/Sequence.java index 96b0757..1905f42 100755 --- a/src/jalview/datamodel/Sequence.java +++ b/src/jalview/datamodel/Sequence.java @@ -662,10 +662,10 @@ public class Sequence extends ASequence implements SequenceI } /** - * DOCUMENT ME! + * Sets the sequence description, and also parses out any special formats of + * interest * * @param desc - * DOCUMENT ME! */ @Override public void setDescription(String desc) @@ -673,10 +673,67 @@ public class Sequence extends ASequence implements SequenceI this.description = desc; } + @Override + public void setGeneLoci(String speciesId, String assemblyId, + String chromosomeId, MapList map) + { + addDBRef(new DBRefEntry(speciesId, assemblyId, DBRefEntry.CHROMOSOME + + ":" + chromosomeId, new Mapping(map))); + } + /** - * DOCUMENT ME! + * Returns the gene loci mapping for the sequence (may be null) * - * @return DOCUMENT ME! + * @return + */ + @Override + public GeneLociI getGeneLoci() + { + DBRefEntry[] refs = getDBRefs(); + if (refs != null) + { + for (final DBRefEntry ref : refs) + { + if (ref.isChromosome()) + { + return new GeneLociI() + { + @Override + public String getSpeciesId() + { + return ref.getSource(); + } + + @Override + public String getAssemblyId() + { + return ref.getVersion(); + } + + @Override + public String getChromosomeId() + { + // strip off "chromosome:" prefix to chrId + return ref.getAccessionId().substring( + DBRefEntry.CHROMOSOME.length() + 1); + } + + @Override + public MapList getMap() + { + return ref.getMap().getMap(); + } + }; + } + } + } + return null; + } + + /** + * Answers the description + * + * @return */ @Override public String getDescription() diff --git a/src/jalview/datamodel/SequenceFeature.java b/src/jalview/datamodel/SequenceFeature.java index 9c4087e..5029da5 100755 --- a/src/jalview/datamodel/SequenceFeature.java +++ b/src/jalview/datamodel/SequenceFeature.java @@ -20,18 +20,23 @@ */ package jalview.datamodel; +import jalview.datamodel.features.FeatureAttributeType; +import jalview.datamodel.features.FeatureAttributes; import jalview.datamodel.features.FeatureLocationI; +import jalview.datamodel.features.FeatureSourceI; +import jalview.datamodel.features.FeatureSources; +import jalview.util.StringUtils; import java.util.HashMap; import java.util.Map; import java.util.Map.Entry; +import java.util.TreeMap; import java.util.Vector; /** - * DOCUMENT ME! - * - * @author $author$ - * @version $Revision$ + * A class that models a single contiguous feature on a sequence. If flag + * 'contactFeature' is true, the start and end positions are interpreted instead + * as two contact points. */ public class SequenceFeature implements FeatureLocationI { @@ -51,6 +56,20 @@ public class SequenceFeature implements FeatureLocationI // private key for ENA location designed not to conflict with real GFF data private static final String LOCATION = "!Location"; + private static final String ROW_DATA = "%s%s%s"; + + /* + * map of otherDetails special keys, and their value fields' delimiter + */ + private static final Map INFO_KEYS = new HashMap<>(); + + static + { + INFO_KEYS.put("CSQ", ","); + // todo capture second level metadata (CSQ FORMAT) + // and delimiter "|" so as to report in a table within a table? + } + /* * ATTRIBUTES is reserved for the GFF 'column 9' data, formatted as * name1=value1;name2=value2,value3;...etc @@ -84,6 +103,12 @@ public class SequenceFeature implements FeatureLocationI public Vector links; + /* + * the identifier (if known) for the FeatureSource held in FeatureSources, + * as a provider of metadata about feature attributes + */ + private String source; + /** * Constructs a duplicate feature. Note: Uses makes a shallow copy of the * otherDetails map, so the new and original SequenceFeature may reference the @@ -155,6 +180,8 @@ public class SequenceFeature implements FeatureLocationI this(newType, sf.getDescription(), newBegin, newEnd, newScore, newGroup); + this.source = sf.source; + if (sf.otherDetails != null) { otherDetails = new HashMap(); @@ -398,6 +425,7 @@ public class SequenceFeature implements FeatureLocationI } otherDetails.put(key, value); + FeatureAttributes.getInstance().addAttribute(this.type, key); } } @@ -535,4 +563,135 @@ public class SequenceFeature implements FeatureLocationI { return begin == 0 && end == 0; } + + /** + * Answers an html-formatted report of feature details + * + * @return + */ + public String getDetailsReport() + { + FeatureSourceI metadata = FeatureSources.getInstance() + .getSource(source); + + StringBuilder sb = new StringBuilder(128); + sb.append("
"); + sb.append(""); + sb.append(String.format(ROW_DATA, "Type", type, "")); + sb.append(String.format(ROW_DATA, "Start/end", begin == end ? begin + : begin + (isContactFeature() ? ":" : "-") + end, "")); + String desc = StringUtils.stripHtmlTags(description); + sb.append(String.format(ROW_DATA, "Description", desc, "")); + if (!Float.isNaN(score) && score != 0f) + { + sb.append(String.format(ROW_DATA, "Score", score, "")); + } + if (featureGroup != null) + { + sb.append(String.format(ROW_DATA, "Group", featureGroup, "")); + } + + if (otherDetails != null) + { + TreeMap ordered = new TreeMap<>( + String.CASE_INSENSITIVE_ORDER); + ordered.putAll(otherDetails); + + for (Entry entry : ordered.entrySet()) + { + String key = entry.getKey(); + if (ATTRIBUTES.equals(key)) + { + continue; // to avoid double reporting + } + if (INFO_KEYS.containsKey(key)) + { + /* + * split selected INFO data by delimiter over multiple lines + */ + String delimiter = INFO_KEYS.get(key); + String[] values = entry.getValue().toString().split(delimiter); + for (String value : values) + { + sb.append(String.format(ROW_DATA, key, "", value)); + } + } + else + { // tried
but it failed to provide a tooltip :-( + String attDesc = null; + if (metadata != null) + { + attDesc = metadata.getAttributeName(key); + } + String value = entry.getValue().toString(); + if (isValueInteresting(key, value, metadata)) + { + sb.append(String.format(ROW_DATA, key, attDesc == null ? "" + : attDesc, value)); + } + } + } + } + sb.append("
"); + + String text = sb.toString(); + return text; + } + + /** + * Answers true if we judge the value is worth displaying, by some heuristic + * rules, else false + * + * @param key + * @param value + * @param metadata + * @return + */ + boolean isValueInteresting(String key, String value, + FeatureSourceI metadata) + { + /* + * currently suppressing zero values as well as null or empty + */ + if (value == null || "".equals(value) || ".".equals(value) + || "0".equals(value)) + { + return false; + } + + if (metadata == null) + { + return true; + } + + FeatureAttributeType attType = metadata.getAttributeType(key); + if (attType != null + && (attType == FeatureAttributeType.Float || attType + .equals(FeatureAttributeType.Integer))) + { + try + { + float fval = Float.valueOf(value); + if (fval == 0f) + { + return false; + } + } catch (NumberFormatException e) + { + // ignore + } + } + + return true; // default to interesting + } + + /** + * Sets the feature source identifier + * + * @param theSource + */ + public void setSource(String theSource) + { + source = theSource; + } } diff --git a/src/jalview/datamodel/SequenceI.java b/src/jalview/datamodel/SequenceI.java index 6e6d1aa..28be85f 100755 --- a/src/jalview/datamodel/SequenceI.java +++ b/src/jalview/datamodel/SequenceI.java @@ -21,6 +21,7 @@ package jalview.datamodel; import jalview.datamodel.features.SequenceFeaturesI; +import jalview.util.MapList; import java.util.BitSet; import java.util.List; @@ -534,4 +535,22 @@ public interface SequenceI extends ASequenceI * @param c2 */ public int replace(char c1, char c2); + + /** + * Answers the GeneLociI, or null if not known + * + * @return + */ + GeneLociI getGeneLoci(); + + /** + * Sets the mapping to gene loci for the sequence + * + * @param speciesId + * @param assemblyId + * @param chromosomeId + * @param map + */ + void setGeneLoci(String speciesId, String assemblyId, + String chromosomeId, MapList map); } diff --git a/src/jalview/datamodel/features/FeatureAttributeType.java b/src/jalview/datamodel/features/FeatureAttributeType.java new file mode 100644 index 0000000..fd3069d --- /dev/null +++ b/src/jalview/datamodel/features/FeatureAttributeType.java @@ -0,0 +1,12 @@ +package jalview.datamodel.features; + +/** + * A class to model the datatype of feature attributes. + * + * @author gmcarstairs + * + */ +public enum FeatureAttributeType +{ + String, Integer, Float, Character, Flag; +} diff --git a/src/jalview/datamodel/features/FeatureAttributes.java b/src/jalview/datamodel/features/FeatureAttributes.java new file mode 100644 index 0000000..7990f6b --- /dev/null +++ b/src/jalview/datamodel/features/FeatureAttributes.java @@ -0,0 +1,71 @@ +package jalview.datamodel.features; + +import java.util.Collections; +import java.util.HashMap; +import java.util.Map; +import java.util.Set; +import java.util.TreeSet; + +/** + * A singleton class to hold the set of attributes known for each feature type + */ +public class FeatureAttributes +{ + private static FeatureAttributes instance = new FeatureAttributes(); + + private Map> attributes; + + /** + * Answers the singleton instance of this class + * + * @return + */ + public static FeatureAttributes getInstance() + { + return instance; + } + + private FeatureAttributes() + { + attributes = new HashMap<>(); + } + + /** + * Answers the attributes known for the given feature type, in alphabetical + * order (not case sensitive), or an empty set if no attributes are known + * + * @param featureType + * @return + */ + public Iterable getAttributes(String featureType) + { + if (!attributes.containsKey(featureType)) + { + return Collections.emptySet(); + } + + return attributes.get(featureType); + } + + /** + * Records the given attribute name for the given feature type + * + * @param featureType + * @param attName + */ + public void addAttribute(String featureType, String attName) + { + if (featureType == null || attName == null) + { + return; + } + + if (!attributes.containsKey(featureType)) + { + attributes.put(featureType, new TreeSet( + String.CASE_INSENSITIVE_ORDER)); + } + + attributes.get(featureType).add(attName); + } +} diff --git a/src/jalview/datamodel/features/FeatureSource.java b/src/jalview/datamodel/features/FeatureSource.java new file mode 100644 index 0000000..a1be1dc --- /dev/null +++ b/src/jalview/datamodel/features/FeatureSource.java @@ -0,0 +1,78 @@ +package jalview.datamodel.features; + +import java.util.HashMap; +import java.util.Map; + +/** + * A class to model one source of feature data, including metadata about + * attributes of features + * + * @author gmcarstairs + * + */ +public class FeatureSource implements FeatureSourceI +{ + private String name; + + private Map attributeNames; + + private Map attributeTypes; + + /** + * Constructor + * + * @param theName + */ + public FeatureSource(String theName) + { + this.name = theName; + attributeNames = new HashMap<>(); + attributeTypes = new HashMap<>(); + } + + /** + * {@inheritDoc} + */ + @Override + public String getName() + { + return name; + } + + /** + * {@inheritDoc} + */ + @Override + public String getAttributeName(String attributeId) + { + return attributeNames.get(attributeId); + } + + /** + * {@inheritDoc} + */ + @Override + public FeatureAttributeType getAttributeType(String attributeId) + { + return attributeTypes.get(attributeId); + } + + /** + * {@inheritDoc} + */ + @Override + public void setAttributeName(String id, String attName) + { + attributeNames.put(id, attName); + } + + /** + * {@inheritDoc} + */ + @Override + public void setAttributeType(String id, FeatureAttributeType type) + { + attributeTypes.put(id, type); + } + +} diff --git a/src/jalview/datamodel/features/FeatureSourceI.java b/src/jalview/datamodel/features/FeatureSourceI.java new file mode 100644 index 0000000..c873593 --- /dev/null +++ b/src/jalview/datamodel/features/FeatureSourceI.java @@ -0,0 +1,45 @@ +package jalview.datamodel.features; + +public interface FeatureSourceI +{ + /** + * Answers a name for the feature source (not necessarily unique) + * + * @return + */ + String getName(); + + /** + * Answers the 'long name' of an attribute given its id (short name or + * abbreviation), or null if not known + * + * @param attributeId + * @return + */ + String getAttributeName(String attributeId); + + /** + * Sets the 'long name' of an attribute given its id (short name or + * abbreviation). + * + * @param id + * @param name + */ + void setAttributeName(String id, String name); + + /** + * Answers the datatype of the attribute with given id, or null if not known + * + * @param attributeId + * @return + */ + FeatureAttributeType getAttributeType(String attributeId); + + /** + * Sets the datatype of the attribute with given id + * + * @param id + * @param type + */ + void setAttributeType(String id, FeatureAttributeType type); +} diff --git a/src/jalview/datamodel/features/FeatureSources.java b/src/jalview/datamodel/features/FeatureSources.java new file mode 100644 index 0000000..1be1b82 --- /dev/null +++ b/src/jalview/datamodel/features/FeatureSources.java @@ -0,0 +1,58 @@ +package jalview.datamodel.features; + +import java.util.HashMap; +import java.util.Map; + +/** + * A singleton to hold metadata about feature attributes, keyed by a unique + * feature source identifier + * + * @author gmcarstairs + * + */ +public class FeatureSources +{ + private static FeatureSources instance = new FeatureSources(); + + private Map sources; + + /** + * Answers the singleton instance of this class + * + * @return + */ + public static FeatureSources getInstance() + { + return instance; + } + + private FeatureSources() + { + sources = new HashMap<>(); + } + + /** + * Answers the FeatureSource with the given unique identifier, or null if not + * known + * + * @param sourceId + * @return + */ + public FeatureSourceI getSource(String sourceId) + { + return sources.get(sourceId); + } + + /** + * Adds the given source under the given key. This will replace any existing + * source with the same id, it is the caller's responsibility to ensure keys + * are unique if necessary. + * + * @param sourceId + * @param source + */ + public void addSource(String sourceId, FeatureSource source) + { + sources.put(sourceId, source); + } +} diff --git a/src/jalview/ext/ensembl/EnsemblData.java b/src/jalview/ext/ensembl/EnsemblData.java new file mode 100644 index 0000000..47fe0fc --- /dev/null +++ b/src/jalview/ext/ensembl/EnsemblData.java @@ -0,0 +1,91 @@ +/* + * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$) + * Copyright (C) $$Year-Rel$$ The Jalview Authors + * + * This file is part of Jalview. + * + * Jalview is free software: you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, either version 3 + * of the License, or (at your option) any later version. + * + * Jalview is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR + * PURPOSE. See the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Jalview. If not, see . + * The Jalview Authors are detailed in the 'AUTHORS' file. + */ +package jalview.ext.ensembl; + +/** + * A data class to model the data and rest version of one Ensembl domain, + * currently for rest.ensembl.org and rest.ensemblgenomes.org + * + * @author gmcarstairs + */ +class EnsemblData +{ + /* + * The http domain this object is holding data values for + */ + String domain; + + /* + * The latest version Jalview has tested for, e.g. "4.5"; a minor version change should be + * ok, a major version change may break stuff + */ + String expectedRestVersion; + + /* + * Major / minor / point version e.g. "4.5.1" + * @see http://rest.ensembl.org/info/rest/?content-type=application/json + */ + String restVersion; + + /* + * data version + * @see http://rest.ensembl.org/info/data/?content-type=application/json + */ + String dataVersion; + + /* + * true when http://rest.ensembl.org/info/ping/?content-type=application/json + * returns response code 200 and not {"error":"Database is unavailable"} + */ + boolean restAvailable; + + /* + * absolute time when availability was last checked + */ + long lastAvailableCheckTime; + + /* + * absolute time when version numbers were last checked + */ + long lastVersionCheckTime; + + // flag set to true if REST major version is not the one expected + boolean restMajorVersionMismatch; + + /* + * absolute time to wait till if we overloaded the REST service + */ + long retryAfter; + + /** + * Constructor given expected REST version number e.g 4.5 or 3.4.3 + * + * @param restExpected + */ + EnsemblData(String theDomain, String restExpected) + { + domain = theDomain; + expectedRestVersion = restExpected; + lastAvailableCheckTime = -1; + lastVersionCheckTime = -1; + } + +} diff --git a/src/jalview/ext/ensembl/EnsemblGene.java b/src/jalview/ext/ensembl/EnsemblGene.java index 50dfa90..cdcfa96 100644 --- a/src/jalview/ext/ensembl/EnsemblGene.java +++ b/src/jalview/ext/ensembl/EnsemblGene.java @@ -23,6 +23,8 @@ package jalview.ext.ensembl; import jalview.api.FeatureColourI; import jalview.api.FeatureSettingsModelI; import jalview.datamodel.AlignmentI; +import jalview.datamodel.DBRefEntry; +import jalview.datamodel.GeneLociI; import jalview.datamodel.Sequence; import jalview.datamodel.SequenceFeature; import jalview.datamodel.SequenceI; @@ -144,8 +146,10 @@ public class EnsemblGene extends EnsemblSeqProxy { continue; } + if (geneAlignment.getHeight() == 1) { + findGeneLoci(geneAlignment.getSequenceAt(0), geneId); getTranscripts(geneAlignment, geneId); } if (al == null) @@ -161,6 +165,67 @@ public class EnsemblGene extends EnsemblSeqProxy } /** + * Calls the /lookup/id REST service, parses the response for gene + * coordinates, and if successful, adds these to the sequence. If this fails, + * fall back on trying to parse the sequence description in case it is in + * Ensembl-gene format e.g. chromosome:GRCh38:17:45051610:45109016:1. + * + * @param seq + * @param geneId + */ + void findGeneLoci(SequenceI seq, String geneId) + { + GeneLociI geneLoci = new EnsemblLookup(getDomain()).getGeneLoci(geneId); + if (geneLoci != null) + { + seq.setGeneLoci(geneLoci.getSpeciesId(), geneLoci.getAssemblyId(), + geneLoci.getChromosomeId(), geneLoci.getMap()); + } + else + { + parseChromosomeLocations(seq); + } + } + + /** + * Parses and saves fields of an Ensembl-style description e.g. + * chromosome:GRCh38:17:45051610:45109016:1 + * + * @param seq + */ + boolean parseChromosomeLocations(SequenceI seq) + { + String description = seq.getDescription(); + if (description == null) + { + return false; + } + String[] tokens = description.split(":"); + if (tokens.length == 6 && tokens[0].startsWith(DBRefEntry.CHROMOSOME)) + { + String ref = tokens[1]; + String chrom = tokens[2]; + try + { + int chStart = Integer.parseInt(tokens[3]); + int chEnd = Integer.parseInt(tokens[4]); + boolean forwardStrand = "1".equals(tokens[5]); + String species = ""; // not known here + int[] from = new int[] { seq.getStart(), seq.getEnd() }; + int[] to = new int[] { forwardStrand ? chStart : chEnd, + forwardStrand ? chEnd : chStart }; + MapList map = new MapList(from, to, 1, 1); + seq.setGeneLoci(species, ref, chrom, map); + return true; + } catch (NumberFormatException e) + { + System.err.println("Bad integers in description " + description); + } + } + return false; + } + + /** * Converts a query, which may contain one or more gene, transcript, or * external (to Ensembl) identifiers, into a non-redundant list of gene * identifiers. @@ -354,6 +419,8 @@ public class EnsemblGene extends EnsemblSeqProxy cdna.transferFeatures(gene.getFeatures().getPositionalFeatures(), transcript.getDatasetSequence(), mapping, parentId); + mapTranscriptToChromosome(transcript, gene, mapping); + /* * fetch and save cross-references */ @@ -368,6 +435,42 @@ public class EnsemblGene extends EnsemblSeqProxy } /** + * If the gene has a mapping to chromosome coordinates, derive the transcript + * chromosome regions and save on the transcript sequence + * + * @param transcript + * @param gene + * @param mapping + * the mapping from gene to transcript positions + */ + protected void mapTranscriptToChromosome(SequenceI transcript, + SequenceI gene, MapList mapping) + { + GeneLociI loci = gene.getGeneLoci(); + if (loci == null) + { + return; + } + + MapList geneMapping = loci.getMap(); + + List exons = mapping.getFromRanges(); + List transcriptLoci = new ArrayList<>(); + + for (int[] exon : exons) + { + transcriptLoci.add(geneMapping.locateInTo(exon[0], exon[1])); + } + + List transcriptRange = Arrays.asList(new int[] { + transcript.getStart(), transcript.getEnd() }); + MapList mapList = new MapList(transcriptRange, transcriptLoci, 1, 1); + + transcript.setGeneLoci(loci.getSpeciesId(), loci.getAssemblyId(), + loci.getChromosomeId(), mapList); + } + + /** * Returns the 'transcript_id' property of the sequence feature (or null) * * @param feature diff --git a/src/jalview/ext/ensembl/EnsemblInfo.java b/src/jalview/ext/ensembl/EnsemblInfo.java index 7668941..de55a53 100644 --- a/src/jalview/ext/ensembl/EnsemblInfo.java +++ b/src/jalview/ext/ensembl/EnsemblInfo.java @@ -1,86 +1,185 @@ -/* - * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$) - * Copyright (C) $$Year-Rel$$ The Jalview Authors - * - * This file is part of Jalview. - * - * Jalview is free software: you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation, either version 3 - * of the License, or (at your option) any later version. - * - * Jalview is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty - * of MERCHANTABILITY or FITNESS FOR A PARTICULAR - * PURPOSE. See the GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Jalview. If not, see . - * The Jalview Authors are detailed in the 'AUTHORS' file. - */ package jalview.ext.ensembl; -/** - * A data class to model the data and rest version of one Ensembl domain, - * currently for rest.ensembl.org and rest.ensemblgenomes.org - * - * @author gmcarstairs - */ -class EnsemblInfo +import jalview.datamodel.AlignmentI; +import jalview.datamodel.DBRefSource; + +import java.io.BufferedReader; +import java.io.IOException; +import java.net.MalformedURLException; +import java.net.URL; +import java.util.HashMap; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.Set; + +import org.json.simple.JSONArray; +import org.json.simple.parser.JSONParser; +import org.json.simple.parser.ParseException; + +public class EnsemblInfo extends EnsemblRestClient { - /* - * The http domain this object is holding data values for - */ - String domain; /* - * The latest version Jalview has tested for, e.g. "4.5"; a minor version change should be - * ok, a major version change may break stuff + * cached results of REST /info/divisions service, currently + *

+   * { 
+   *  { "ENSEMBLFUNGI", "http://rest.ensemblgenomes.org"},
+   *    "ENSEMBLBACTERIA", "http://rest.ensemblgenomes.org"},
+   *    "ENSEMBLPROTISTS", "http://rest.ensemblgenomes.org"},
+   *    "ENSEMBLMETAZOA", "http://rest.ensemblgenomes.org"},
+   *    "ENSEMBLPLANTS",  "http://rest.ensemblgenomes.org"},
+   *    "ENSEMBL", "http://rest.ensembl.org" }
+   *  }
+   * 
+ * The values for EnsemblGenomes are retrieved by a REST call, that for + * Ensembl is added programmatically for convenience of lookup */ - String expectedRestVersion; + private static Map divisions; - /* - * Major / minor / point version e.g. "4.5.1" - * @see http://rest.ensembl.org/info/rest/?content-type=application/json - */ - String restVersion; + @Override + public String getDbName() + { + return "ENSEMBL"; + } - /* - * data version - * @see http://rest.ensembl.org/info/data/?content-type=application/json - */ - String dataVersion; + @Override + public AlignmentI getSequenceRecords(String queries) throws Exception + { + return null; + } - /* - * true when http://rest.ensembl.org/info/ping/?content-type=application/json - * returns response code 200 and not {"error":"Database is unavailable"} + @Override + protected URL getUrl(List ids) throws MalformedURLException + { + return null; + } + + @Override + protected boolean useGetRequest() + { + return true; + } + + @Override + protected String getRequestMimeType(boolean multipleIds) + { + return "application/json"; + } + + @Override + protected String getResponseMimeType() + { + return "application/json"; + } + + /** + * Answers the domain (http://rest.ensembl.org or + * http://rest.ensemblgenomes.org) for the given division, or null if not + * recognised by Ensembl. + * + * @param division + * @return */ - boolean restAvailable; + public String getDomain(String division) + { + if (divisions == null) + { + fetchDivisions(); + } + return divisions.get(division.toUpperCase()); + } - /* - * absolute time when availability was last checked + /** + * On first request only, populate the lookup map by fetching the list of + * divisions known to EnsemblGenomes. */ - long lastAvailableCheckTime; + void fetchDivisions() + { + divisions = new HashMap<>(); - /* - * absolute time when version numbers were last checked + /* + * for convenience, pre-fill ensembl.org as the domain for "ENSEMBL" + */ + divisions.put(DBRefSource.ENSEMBL.toUpperCase(), ENSEMBL_REST); + + BufferedReader br = null; + try + { + URL url = getDivisionsUrl(ENSEMBL_GENOMES_REST); + if (url != null) + { + br = getHttpResponse(url, null); + } + parseResponse(br, ENSEMBL_GENOMES_REST); + } catch (IOException e) + { + // ignore + } finally + { + if (br != null) + { + try + { + br.close(); + } catch (IOException e) + { + // ignore + } + } + } + } + + /** + * Parses the JSON response to /info/divisions, and add each to the lookup map + * + * @param br + * @param domain */ - long lastVersionCheckTime; + void parseResponse(BufferedReader br, String domain) + { + JSONParser jp = new JSONParser(); + + try + { + JSONArray parsed = (JSONArray) jp.parse(br); - // flag set to true if REST major version is not the one expected - boolean restMajorVersionMismatch; + Iterator rvals = parsed.iterator(); + while (rvals.hasNext()) + { + String division = rvals.next().toString(); + divisions.put(division.toUpperCase(), domain); + } + } catch (IOException | ParseException | NumberFormatException e) + { + // ignore + } + } /** - * Constructor given expected REST version number e.g 4.5 or 3.4.3 + * Constructs the URL for the EnsemblGenomes /info/divisions REST service + * @param domain TODO * - * @param restExpected + * @return + * @throws MalformedURLException */ - EnsemblInfo(String theDomain, String restExpected) + URL getDivisionsUrl(String domain) throws MalformedURLException { - domain = theDomain; - expectedRestVersion = restExpected; - lastAvailableCheckTime = -1; - lastVersionCheckTime = -1; + return new URL(domain + + "/info/divisions?content-type=application/json"); } + /** + * Returns the set of 'divisions' recognised by Ensembl or EnsemblGenomes + * + * @return + */ + public Set getDivisions() { + if (divisions == null) + { + fetchDivisions(); + } + + return divisions.keySet(); + } } diff --git a/src/jalview/ext/ensembl/EnsemblLookup.java b/src/jalview/ext/ensembl/EnsemblLookup.java index 31da9c0..0d1b554 100644 --- a/src/jalview/ext/ensembl/EnsemblLookup.java +++ b/src/jalview/ext/ensembl/EnsemblLookup.java @@ -20,31 +20,36 @@ */ package jalview.ext.ensembl; +import jalview.bin.Cache; import jalview.datamodel.AlignmentI; +import jalview.datamodel.GeneLociI; +import jalview.util.MapList; import java.io.BufferedReader; import java.io.IOException; import java.net.MalformedURLException; import java.net.URL; import java.util.Arrays; +import java.util.Collections; import java.util.List; +import java.util.function.Function; import org.json.simple.JSONObject; import org.json.simple.parser.JSONParser; import org.json.simple.parser.ParseException; /** - * A client for the Ensembl lookup REST endpoint; used to find the Parent gene - * identifier given a transcript identifier. + * A client for the Ensembl lookup REST endpoint * * @author gmcarstairs - * */ public class EnsemblLookup extends EnsemblRestClient { + private static final String SPECIES = "species"; - private static final String OBJECT_TYPE_TRANSLATION = "Translation"; private static final String PARENT = "Parent"; + + private static final String OBJECT_TYPE_TRANSLATION = "Translation"; private static final String OBJECT_TYPE_TRANSCRIPT = "Transcript"; private static final String ID = "id"; private static final String OBJECT_TYPE_GENE = "Gene"; @@ -123,14 +128,45 @@ public class EnsemblLookup extends EnsemblRestClient } /** - * Calls the Ensembl lookup REST endpoint and retrieves the 'Parent' for the - * given identifier, or null if not found + * Calls the Ensembl lookup REST endpoint and returns + *
    + *
  • the 'id' for the identifier if its type is "Gene"
  • + *
  • the 'Parent' if its type is 'Transcript'
  • + *
      + * If the type is 'Translation', does a recursive call to this method, passing + * in the 'Parent' (transcript id). * * @param identifier * @return */ public String getGeneId(String identifier) { + return (String) getResult(identifier, br -> parseGeneId(br)); + } + + /** + * Calls the Ensembl lookup REST endpoint and retrieves the 'species' for the + * given identifier, or null if not found + * + * @param identifier + * @return + */ + public String getSpecies(String identifier) + { + return (String) getResult(identifier, br -> getAttribute(br, SPECIES)); + } + + /** + * Calls the /lookup/id rest service and delegates parsing of the JSON + * response to the supplied parser + * + * @param identifier + * @param parser + * @return + */ + protected Object getResult(String identifier, + Function parser) + { List ids = Arrays.asList(new String[] { identifier }); BufferedReader br = null; @@ -141,7 +177,7 @@ public class EnsemblLookup extends EnsemblRestClient { br = getHttpResponse(url, ids); } - return br == null ? null : parseResponse(br); + return br == null ? null : parser.apply(br); } catch (IOException e) { // ignore @@ -162,6 +198,29 @@ public class EnsemblLookup extends EnsemblRestClient } /** + * Answers the value of 'attribute' from the JSON response, or null if not + * found + * + * @param br + * @param attribute + * @return + */ + protected String getAttribute(BufferedReader br, String attribute) + { + String value = null; + JSONParser jp = new JSONParser(); + try + { + JSONObject val = (JSONObject) jp.parse(br); + value = val.get(attribute).toString(); + } catch (ParseException | NullPointerException | IOException e) + { + // ignore + } + return value; + } + + /** * Parses the JSON response and returns the gene identifier, or null if not * found. If the returned object_type is Gene, returns the id, if Transcript * returns the Parent. If it is Translation (peptide identifier), then the @@ -169,9 +228,8 @@ public class EnsemblLookup extends EnsemblRestClient * * @param br * @return - * @throws IOException */ - protected String parseResponse(BufferedReader br) throws IOException + protected String parseGeneId(BufferedReader br) { String geneId = null; JSONParser jp = new JSONParser(); @@ -204,11 +262,87 @@ public class EnsemblLookup extends EnsemblRestClient + " looping on Parent!"); } } - } catch (ParseException e) + } catch (ParseException | IOException e) { // ignore } return geneId; } + /** + * Calls the /lookup/id rest service for the given id, and if successful, + * parses and returns the gene's chromosomal coordinates + * + * @param geneId + * @return + */ + public GeneLociI getGeneLoci(String geneId) + { + return (GeneLociI) getResult(geneId, br -> parseGeneLoci(br)); + } + + /** + * Parses the /lookup/id response for species, asssembly_name, + * seq_region_name, start, end and returns an object that wraps them, or null + * if unsuccessful + * + * @param br + * @return + */ + GeneLociI parseGeneLoci(BufferedReader br) + { + JSONParser jp = new JSONParser(); + try + { + JSONObject val = (JSONObject) jp.parse(br); + final String species = val.get("species").toString(); + final String assembly = val.get("assembly_name").toString(); + final String chromosome = val.get("seq_region_name").toString(); + String strand = val.get("strand").toString(); + int start = Integer.parseInt(val.get("start").toString()); + int end = Integer.parseInt(val.get("end").toString()); + int fromEnd = end - start + 1; + boolean reverseStrand = "-1".equals(strand); + int toStart = reverseStrand ? end : start; + int toEnd = reverseStrand ? start : end; + List fromRange = Collections.singletonList(new int[] { 1, + fromEnd }); + List toRange = Collections.singletonList(new int[] { toStart, + toEnd }); + final MapList map = new MapList(fromRange, toRange, 1, 1); + return new GeneLociI() + { + + @Override + public String getSpeciesId() + { + return species == null ? "" : species; + } + + @Override + public String getAssemblyId() + { + return assembly; + } + + @Override + public String getChromosomeId() + { + return chromosome; + } + + @Override + public MapList getMap() + { + return map; + } + }; + } catch (ParseException | NullPointerException | IOException + | NumberFormatException | ClassCastException e) + { + Cache.log.error("Error looking up gene loci: " + e.getMessage()); + } + return null; + } + } diff --git a/src/jalview/ext/ensembl/EnsemblMap.java b/src/jalview/ext/ensembl/EnsemblMap.java new file mode 100644 index 0000000..56657e0 --- /dev/null +++ b/src/jalview/ext/ensembl/EnsemblMap.java @@ -0,0 +1,422 @@ +package jalview.ext.ensembl; + +import jalview.datamodel.AlignmentI; +import jalview.datamodel.DBRefSource; +import jalview.datamodel.GeneLociI; +import jalview.util.MapList; + +import java.io.BufferedReader; +import java.io.IOException; +import java.net.MalformedURLException; +import java.net.URL; +import java.util.ArrayList; +import java.util.Collections; +import java.util.Iterator; +import java.util.List; + +import org.json.simple.JSONArray; +import org.json.simple.JSONObject; +import org.json.simple.parser.JSONParser; +import org.json.simple.parser.ParseException; + +public class EnsemblMap extends EnsemblRestClient +{ + private static final String MAPPED = "mapped"; + + private static final String MAPPINGS = "mappings"; + + private static final String CDS = "cds"; + + private static final String CDNA = "cdna"; + + /** + * Default constructor (to use rest.ensembl.org) + */ + public EnsemblMap() + { + super(); + } + + /** + * Constructor given the target domain to fetch data from + * + * @param + */ + public EnsemblMap(String domain) + { + super(domain); + } + + @Override + public String getDbName() + { + return DBRefSource.ENSEMBL; + } + + @Override + public AlignmentI getSequenceRecords(String queries) throws Exception + { + return null; // not used + } + + /** + * Constructs a URL of the format + * http://rest.ensembl.org/map/human/GRCh38/17:45051610..45109016:1/GRCh37?content-type=application/json + * + * + * @param species + * @param chromosome + * @param fromRef + * @param toRef + * @param startPos + * @param endPos + * @return + * @throws MalformedURLException + */ + protected URL getAssemblyMapUrl(String species, String chromosome, String fromRef, + String toRef, int startPos, int endPos) + throws MalformedURLException + { + /* + * start-end might be reverse strand - present forwards to the service + */ + boolean forward = startPos <= endPos; + int start = forward ? startPos : endPos; + int end = forward ? endPos : startPos; + String strand = forward ? "1" : "-1"; + String url = String.format( + "%s/map/%s/%s/%s:%d..%d:%s/%s?content-type=application/json", + getDomain(), species, fromRef, chromosome, start, end, strand, + toRef); + return new URL(url); + } + + @Override + protected boolean useGetRequest() + { + return true; + } + + @Override + protected String getRequestMimeType(boolean multipleIds) + { + return "application/json"; + } + + @Override + protected String getResponseMimeType() + { + return "application/json"; + } + + @Override + protected URL getUrl(List ids) throws MalformedURLException + { + return null; // not used + } + + /** + * Calls the REST /map service to get the chromosomal coordinates (start/end) + * in 'toRef' that corresponding to the (start/end) queryRange in 'fromRef' + * + * @param species + * @param chromosome + * @param fromRef + * @param toRef + * @param queryRange + * @return + * @see http://rest.ensemblgenomes.org/documentation/info/assembly_map + */ + public int[] getAssemblyMapping(String species, String chromosome, + String fromRef, String toRef, int[] queryRange) + { + URL url = null; + BufferedReader br = null; + + try + { + url = getAssemblyMapUrl(species, chromosome, fromRef, toRef, queryRange[0], + queryRange[1]); + br = getHttpResponse(url, null); + return (parseAssemblyMappingResponse(br)); + } catch (Throwable t) + { + System.out.println("Error calling " + url + ": " + t.getMessage()); + return null; + } finally + { + if (br != null) + { + try + { + br.close(); + } catch (IOException e) + { + // ignore + } + } + } + } + + /** + * Parses the JSON response from the /map/<species>/ REST service. The + * format is (with some fields omitted) + * + *
      +   *  {"mappings": 
      +   *    [{
      +   *       "original": {"end":45109016,"start":45051610},
      +   *       "mapped"  : {"end":43186384,"start":43128978} 
      +   *  }] }
      +   * 
      + * + * @param br + * @return + */ + protected int[] parseAssemblyMappingResponse(BufferedReader br) + { + int[] result = null; + JSONParser jp = new JSONParser(); + + try + { + JSONObject parsed = (JSONObject) jp.parse(br); + JSONArray mappings = (JSONArray) parsed.get(MAPPINGS); + + Iterator rvals = mappings.iterator(); + while (rvals.hasNext()) + { + // todo check for "mapped" + JSONObject val = (JSONObject) rvals.next(); + JSONObject mapped = (JSONObject) val.get(MAPPED); + int start = Integer.parseInt(mapped.get("start").toString()); + int end = Integer.parseInt(mapped.get("end").toString()); + String strand = mapped.get("strand").toString(); + if ("1".equals(strand)) + { + result = new int[] { start, end }; + } + else + { + result = new int[] { end, start }; + } + } + } catch (IOException | ParseException | NumberFormatException e) + { + // ignore + } + return result; + } + + /** + * Calls the REST /map/cds/id service, and returns a DBRefEntry holding the + * returned chromosomal coordinates, or returns null if the call fails + * + * @param division + * e.g. Ensembl, EnsemblMetazoa + * @param accession + * e.g. ENST00000592782, Y55B1AR.1.1 + * @param start + * @param end + * @return + */ + public GeneLociI getCdsMapping(String division, String accession, + int start, int end) + { + return getIdMapping(division, accession, start, end, CDS); + } + + /** + * Calls the REST /map/cdna/id service, and returns a DBRefEntry holding the + * returned chromosomal coordinates, or returns null if the call fails + * + * @param division + * e.g. Ensembl, EnsemblMetazoa + * @param accession + * e.g. ENST00000592782, Y55B1AR.1.1 + * @param start + * @param end + * @return + */ + public GeneLociI getCdnaMapping(String division, String accession, + int start, int end) + { + return getIdMapping(division, accession, start, end, CDNA); + } + + GeneLociI getIdMapping(String division, String accession, int start, + int end, String cdsOrCdna) + { + URL url = null; + BufferedReader br = null; + + try + { + String domain = new EnsemblInfo().getDomain(division); + if (domain != null) + { + url = getIdMapUrl(domain, accession, start, end, cdsOrCdna); + br = getHttpResponse(url, null); + return (parseIdMappingResponse(br, accession, domain)); + } + return null; + } catch (Throwable t) + { + System.out.println("Error calling " + url + ": " + t.getMessage()); + return null; + } finally + { + if (br != null) + { + try + { + br.close(); + } catch (IOException e) + { + // ignore + } + } + } + } + + /** + * Constructs a URL to the /map/cds/ or /map/cdna/ REST service. The + * REST call is to either ensembl or ensemblgenomes, as determined from the + * division, e.g. Ensembl or EnsemblProtists. + * + * @param domain + * @param accession + * @param start + * @param end + * @param cdsOrCdna + * @return + * @throws MalformedURLException + */ + URL getIdMapUrl(String domain, String accession, int start, int end, + String cdsOrCdna) throws MalformedURLException + { + String url = String + .format("%s/map/%s/%s/%d..%d?include_original_region=1&content-type=application/json", + domain, cdsOrCdna, accession, start, end); + return new URL(url); + } + + /** + * Parses the JSON response from the /map/cds/ or /map/cdna REST service. The + * format is + * + *
      +   * {"mappings":
      +   *   [
      +   *    {"assembly_name":"TAIR10","end":2501311,"seq_region_name":"1","gap":0,
      +   *     "strand":-1,"coord_system":"chromosome","rank":0,"start":2501114},
      +   *    {"assembly_name":"TAIR10","end":2500815,"seq_region_name":"1","gap":0,
      +   *     "strand":-1,"coord_system":"chromosome","rank":0,"start":2500714}
      +   *   ]
      +   * }
      +   * 
      + * + * @param br + * @param accession + * @param domain + * @return + */ + GeneLociI parseIdMappingResponse(BufferedReader br, String accession, + String domain) + { + JSONParser jp = new JSONParser(); + + try + { + JSONObject parsed = (JSONObject) jp.parse(br); + JSONArray mappings = (JSONArray) parsed.get(MAPPINGS); + + Iterator rvals = mappings.iterator(); + String assembly = null; + String chromosome = null; + int fromEnd = 0; + List regions = new ArrayList<>(); + + while (rvals.hasNext()) + { + JSONObject val = (JSONObject) rvals.next(); + JSONObject original = (JSONObject) val.get("original"); + fromEnd = Integer.parseInt(original.get("end").toString()); + + JSONObject mapped = (JSONObject) val.get(MAPPED); + int start = Integer.parseInt(mapped.get("start").toString()); + int end = Integer.parseInt(mapped.get("end").toString()); + String ass = mapped.get("assembly_name").toString(); + if (assembly != null && !assembly.equals(ass)) + { + System.err + .println("EnsemblMap found multiple assemblies - can't resolve"); + return null; + } + assembly = ass; + String chr = mapped.get("seq_region_name").toString(); + if (chromosome != null && !chromosome.equals(chr)) + { + System.err + .println("EnsemblMap found multiple chromosomes - can't resolve"); + return null; + } + chromosome = chr; + String strand = mapped.get("strand").toString(); + if ("-1".equals(strand)) + { + regions.add(new int[] { end, start }); + } + else + { + regions.add(new int[] { start, end }); + } + } + + /* + * processed all mapped regions on chromosome, assemble the result, + * having first fetched the species id for the accession + */ + final String species = new EnsemblLookup(domain) + .getSpecies(accession); + final String as = assembly; + final String chr = chromosome; + List fromRange = Collections.singletonList(new int[] { 1, + fromEnd }); + final MapList map = new MapList(fromRange, regions, 1, 1); + return new GeneLociI() + { + + @Override + public String getSpeciesId() + { + return species == null ? "" : species; + } + + @Override + public String getAssemblyId() + { + return as; + } + + @Override + public String getChromosomeId() + { + return chr; + } + + @Override + public MapList getMap() + { + return map; + } + }; + } catch (IOException | ParseException | NumberFormatException e) + { + // ignore + } + + return null; + } + +} diff --git a/src/jalview/ext/ensembl/EnsemblRestClient.java b/src/jalview/ext/ensembl/EnsemblRestClient.java index b1bc8e5..e3d1215 100644 --- a/src/jalview/ext/ensembl/EnsemblRestClient.java +++ b/src/jalview/ext/ensembl/EnsemblRestClient.java @@ -72,7 +72,7 @@ abstract class EnsemblRestClient extends EnsemblSequenceFetcher private static final String REST_CHANGE_LOG = "https://github.com/Ensembl/ensembl-rest/wiki/Change-log"; - private static Map domainData; + private static Map domainData; // @see https://github.com/Ensembl/ensembl-rest/wiki/Output-formats private static final String PING_URL = "http://rest.ensembl.org/info/ping.json"; @@ -87,8 +87,8 @@ abstract class EnsemblRestClient extends EnsemblSequenceFetcher { domainData = new HashMap<>(); domainData.put(ENSEMBL_REST, - new EnsemblInfo(ENSEMBL_REST, LATEST_ENSEMBL_REST_VERSION)); - domainData.put(ENSEMBL_GENOMES_REST, new EnsemblInfo( + new EnsemblData(ENSEMBL_REST, LATEST_ENSEMBL_REST_VERSION)); + domainData.put(ENSEMBL_GENOMES_REST, new EnsemblData( ENSEMBL_GENOMES_REST, LATEST_ENSEMBLGENOMES_REST_VERSION)); } @@ -381,7 +381,7 @@ abstract class EnsemblRestClient extends EnsemblSequenceFetcher */ protected boolean isEnsemblAvailable() { - EnsemblInfo info = domainData.get(getDomain()); + EnsemblData info = domainData.get(getDomain()); long now = System.currentTimeMillis(); @@ -455,7 +455,7 @@ abstract class EnsemblRestClient extends EnsemblSequenceFetcher */ private void checkEnsemblRestVersion() { - EnsemblInfo info = domainData.get(getDomain()); + EnsemblData info = domainData.get(getDomain()); JSONParser jp = new JSONParser(); URL url = null; diff --git a/src/jalview/ext/ensembl/EnsemblSeqProxy.java b/src/jalview/ext/ensembl/EnsemblSeqProxy.java index 577111e..35ceea3 100644 --- a/src/jalview/ext/ensembl/EnsemblSeqProxy.java +++ b/src/jalview/ext/ensembl/EnsemblSeqProxy.java @@ -34,6 +34,7 @@ import jalview.datamodel.features.SequenceFeatures; import jalview.exceptions.JalviewException; import jalview.io.FastaFile; import jalview.io.FileParse; +import jalview.io.gff.Gff3Helper; import jalview.io.gff.SequenceOntologyFactory; import jalview.io.gff.SequenceOntologyI; import jalview.util.Comparison; @@ -59,8 +60,6 @@ import java.util.Map.Entry; */ public abstract class EnsemblSeqProxy extends EnsemblRestClient { - private static final String ALLELES = "alleles"; - protected static final String PARENT = "Parent"; protected static final String ID = "ID"; @@ -717,7 +716,7 @@ public abstract class EnsemblSeqProxy extends EnsemblRestClient */ static void reverseComplementAlleles(SequenceFeature sf) { - final String alleles = (String) sf.getValue(ALLELES); + final String alleles = (String) sf.getValue(Gff3Helper.ALLELES); if (alleles == null) { return; @@ -728,7 +727,7 @@ public abstract class EnsemblSeqProxy extends EnsemblRestClient reverseComplementAllele(complement, allele); } String comp = complement.toString(); - sf.setValue(ALLELES, comp); + sf.setValue(Gff3Helper.ALLELES, comp); sf.setDescription(comp); /* @@ -738,7 +737,8 @@ public abstract class EnsemblSeqProxy extends EnsemblRestClient String atts = sf.getAttributes(); if (atts != null) { - atts = atts.replace(ALLELES + "=" + alleles, ALLELES + "=" + comp); + atts = atts.replace(Gff3Helper.ALLELES + "=" + alleles, + Gff3Helper.ALLELES + "=" + comp); sf.setAttributes(atts); } } diff --git a/src/jalview/ext/ensembl/EnsemblSymbol.java b/src/jalview/ext/ensembl/EnsemblSymbol.java index 75598a0..65be906 100644 --- a/src/jalview/ext/ensembl/EnsemblSymbol.java +++ b/src/jalview/ext/ensembl/EnsemblSymbol.java @@ -152,7 +152,6 @@ public class EnsemblSymbol extends EnsemblXref if (br != null) { String geneId = parseSymbolResponse(br); - System.out.println(url + " returned " + geneId); if (geneId != null && !result.contains(geneId)) { result.add(geneId); diff --git a/src/jalview/ext/htsjdk/VCFReader.java b/src/jalview/ext/htsjdk/VCFReader.java new file mode 100644 index 0000000..14c057f --- /dev/null +++ b/src/jalview/ext/htsjdk/VCFReader.java @@ -0,0 +1,214 @@ +package jalview.ext.htsjdk; + +import htsjdk.samtools.util.CloseableIterator; +import htsjdk.variant.variantcontext.VariantContext; +import htsjdk.variant.vcf.VCFFileReader; +import htsjdk.variant.vcf.VCFHeader; + +import java.io.Closeable; +import java.io.File; +import java.io.IOException; + +/** + * A thin wrapper for htsjdk classes to read either plain, or compressed, or + * compressed and indexed VCF files + */ +public class VCFReader implements Closeable, Iterable +{ + private static final String GZ = "gz"; + + private static final String TBI_EXTENSION = ".tbi"; + + private boolean indexed; + + private VCFFileReader reader; + + /** + * Constructor given a raw or compressed VCF file or a (tabix) index file + *

      + * For now, file type is inferred from its suffix: .gz or .bgz for compressed + * data, .tbi for an index file, anything else is assumed to be plain text + * VCF. + * + * @param f + * @throws IOException + */ + public VCFReader(String filePath) throws IOException + { + if (filePath.endsWith(GZ)) + { + if (new File(filePath + TBI_EXTENSION).exists()) + { + indexed = true; + } + } + else if (filePath.endsWith(TBI_EXTENSION)) + { + indexed = true; + filePath = filePath.substring(0, filePath.length() - 4); + } + + reader = new VCFFileReader(new File(filePath), indexed); + } + + @Override + public void close() throws IOException + { + if (reader != null) + { + reader.close(); + } + } + + /** + * Returns an iterator over VCF variants in the file. The client should call + * close() on the iterator when finished with it. + */ + @Override + public CloseableIterator iterator() + { + return reader == null ? null : reader.iterator(); + } + + /** + * Queries for records overlapping the region specified. Note that this method + * is performant if the VCF file is indexed, and may be very slow if it is + * not. + *

      + * Client code should call close() on the iterator when finished with it. + * + * @param chrom + * the chromosome to query + * @param start + * query interval start + * @param end + * query interval end + * @return + */ + public CloseableIterator query(final String chrom, + final int start, final int end) + { + if (reader == null) { + return null; + } + if (indexed) + { + return reader.query(chrom, start, end); + } + else + { + return queryUnindexed(chrom, start, end); + } + } + + /** + * Returns an iterator over variant records read from a flat file which + * overlap the specified chromosomal positions. Call close() on the iterator + * when finished with it! + * + * @param chrom + * @param start + * @param end + * @return + */ + protected CloseableIterator queryUnindexed( + final String chrom, final int start, final int end) + { + final CloseableIterator it = reader.iterator(); + + return new CloseableIterator() + { + boolean atEnd = false; + + // prime look-ahead buffer with next matching record + private VariantContext next = findNext(); + + private VariantContext findNext() + { + if (atEnd) + { + return null; + } + VariantContext variant = null; + while (it.hasNext()) + { + variant = it.next(); + int vstart = variant.getStart(); + + if (vstart > end) + { + atEnd = true; + close(); + return null; + } + + int vend = variant.getEnd(); + // todo what is the undeprecated way to get + // the chromosome for the variant? + if (chrom.equals(variant.getChr()) && (vstart <= end) + && (vend >= start)) + { + return variant; + } + } + return null; + } + + @Override + public boolean hasNext() + { + boolean hasNext = !atEnd && (next != null); + if (!hasNext) + { + close(); + } + return hasNext; + } + + @Override + public VariantContext next() + { + /* + * return the next match, and then re-prime + * it with the following one (if any) + */ + VariantContext temp = next; + next = findNext(); + return temp; + } + + @Override + public void remove() + { + // not implemented + } + + @Override + public void close() + { + it.close(); + } + }; + } + + /** + * Returns an object that models the VCF file headers + * + * @return + */ + public VCFHeader getFileHeader() + { + return reader == null ? null : reader.getFileHeader(); + } + + /** + * Answers true if we are processing a tab-indexed VCF file, false if it is a + * plain text (uncompressed) file. + * + * @return + */ + public boolean isIndex() + { + return indexed; + } +} diff --git a/src/jalview/gui/AlignFrame.java b/src/jalview/gui/AlignFrame.java index 298688b..5b812c2 100644 --- a/src/jalview/gui/AlignFrame.java +++ b/src/jalview/gui/AlignFrame.java @@ -81,6 +81,7 @@ import jalview.io.JnetAnnotationMaker; import jalview.io.NewickFile; import jalview.io.ScoreMatrixFile; import jalview.io.TCoffeeScoreFile; +import jalview.io.vcf.VCFLoader; import jalview.jbgui.GAlignFrame; import jalview.schemes.ColourSchemeI; import jalview.schemes.ColourSchemes; @@ -839,6 +840,7 @@ public class AlignFrame extends GAlignFrame implements DropTargetListener, AlignmentI al = getViewport().getAlignment(); boolean nucleotide = al.isNucleotide(); + loadVcf.setVisible(nucleotide); showTranslation.setVisible(nucleotide); showReverse.setVisible(nucleotide); showReverseComplement.setVisible(nucleotide); @@ -4258,7 +4260,7 @@ public class AlignFrame extends GAlignFrame implements DropTargetListener, protected void showProductsFor(final SequenceI[] sel, final boolean _odna, final String source) { - new Thread(CrossRefAction.showProductsFor(sel, _odna, source, this)) + new Thread(CrossRefAction.getHandlerFor(sel, _odna, source, this)) .start(); } @@ -5585,6 +5587,26 @@ public class AlignFrame extends GAlignFrame implements DropTargetListener, new CalculationChooser(AlignFrame.this); } } + + @Override + protected void loadVcf_actionPerformed() + { + JalviewFileChooser chooser = new JalviewFileChooser( + Cache.getProperty("LAST_DIRECTORY")); + chooser.setFileView(new JalviewFileView()); + chooser.setDialogTitle(MessageManager.getString("label.load_vcf_file")); + chooser.setToolTipText(MessageManager.getString("label.load_vcf_file")); + + int value = chooser.showOpenDialog(null); + + if (value == JalviewFileChooser.APPROVE_OPTION) + { + String choice = chooser.getSelectedFile().getPath(); + Cache.setProperty("LAST_DIRECTORY", choice); + new VCFLoader(viewport.getAlignment()).loadVCF(choice, this); + } + + } } class PrintThread extends Thread diff --git a/src/jalview/gui/AquaInternalFrameManager.java b/src/jalview/gui/AquaInternalFrameManager.java index ea809eb..829135b 100644 --- a/src/jalview/gui/AquaInternalFrameManager.java +++ b/src/jalview/gui/AquaInternalFrameManager.java @@ -60,7 +60,6 @@ import javax.swing.JInternalFrame; * around to the bottom of the window stack (as the original implementation * does) * - * @see com.sun.java.swing.plaf.windows.WindowsDesktopManager */ public class AquaInternalFrameManager extends DefaultDesktopManager { diff --git a/src/jalview/gui/CrossRefAction.java b/src/jalview/gui/CrossRefAction.java index 2d1dfd4..285e574 100644 --- a/src/jalview/gui/CrossRefAction.java +++ b/src/jalview/gui/CrossRefAction.java @@ -27,17 +27,25 @@ import jalview.api.FeatureSettingsModelI; import jalview.bin.Cache; import jalview.datamodel.Alignment; import jalview.datamodel.AlignmentI; +import jalview.datamodel.DBRefEntry; import jalview.datamodel.DBRefSource; +import jalview.datamodel.GeneLociI; import jalview.datamodel.SequenceI; +import jalview.ext.ensembl.EnsemblInfo; +import jalview.ext.ensembl.EnsemblMap; import jalview.io.gff.SequenceOntologyI; import jalview.structure.StructureSelectionManager; +import jalview.util.DBRefUtils; +import jalview.util.MapList; +import jalview.util.MappingUtils; import jalview.util.MessageManager; import jalview.ws.SequenceFetcher; import java.util.ArrayList; +import java.util.HashMap; import java.util.List; - -import javax.swing.JOptionPane; +import java.util.Map; +import java.util.Set; /** * Factory constructor and runnable for discovering and displaying @@ -52,13 +60,13 @@ public class CrossRefAction implements Runnable private SequenceI[] sel; - private boolean _odna; + private final boolean _odna; private String source; - List xrefViews = new ArrayList(); + List xrefViews = new ArrayList<>(); - public List getXrefViews() + List getXrefViews() { return xrefViews; } @@ -90,6 +98,13 @@ public class CrossRefAction implements Runnable { return; } + + /* + * try to look up chromosomal coordinates for nucleotide + * sequences (if not already retrieved) + */ + findGeneLoci(xrefs.getSequences()); + /* * get display scheme (if any) to apply to features */ @@ -113,75 +128,14 @@ public class CrossRefAction implements Runnable if (Cache.getDefault(Preferences.ENABLE_SPLIT_FRAME, true)) { - boolean copyAlignmentIsAligned = false; - if (dna) - { - copyAlignment = AlignmentUtils.makeCdsAlignment(sel, dataset, - xrefsAlignment.getSequencesArray()); - if (copyAlignment.getHeight() == 0) - { - JvOptionPane.showMessageDialog(alignFrame, - MessageManager.getString("label.cant_map_cds"), - MessageManager.getString("label.operation_failed"), - JvOptionPane.OK_OPTION); - System.err.println("Failed to make CDS alignment"); - } - - /* - * pending getting Embl transcripts to 'align', - * we are only doing this for Ensembl - */ - // TODO proper criteria for 'can align as cdna' - if (DBRefSource.ENSEMBL.equalsIgnoreCase(source) - || AlignmentUtils.looksLikeEnsembl(alignment)) - { - copyAlignment.alignAs(alignment); - copyAlignmentIsAligned = true; - } - } - else + copyAlignment = copyAlignmentForSplitFrame(alignment, dataset, dna, + xrefs, xrefsAlignment); + if (copyAlignment == null) { - copyAlignment = AlignmentUtils.makeCopyAlignment(sel, - xrefs.getSequencesArray(), dataset); - } - copyAlignment - .setGapCharacter(alignFrame.viewport.getGapCharacter()); - - StructureSelectionManager ssm = StructureSelectionManager - .getStructureSelectionManager(Desktop.instance); - - /* - * register any new mappings for sequence mouseover etc - * (will not duplicate any previously registered mappings) - */ - ssm.registerMappings(dataset.getCodonFrames()); - - if (copyAlignment.getHeight() <= 0) - { - System.err.println( - "No Sequences generated for xRef type " + source); - return; - } - /* - * align protein to dna - */ - if (dna && copyAlignmentIsAligned) - { - xrefsAlignment.alignAs(copyAlignment); - } - else - { - /* - * align cdna to protein - currently only if - * fetching and aligning Ensembl transcripts! - */ - // TODO: generalise for other sources of locus/transcript/cds data - if (dna && DBRefSource.ENSEMBL.equalsIgnoreCase(source)) - { - copyAlignment.alignAs(xrefsAlignment); - } + return; // failed } } + /* * build AlignFrame(s) according to available alignment data */ @@ -207,6 +161,7 @@ public class CrossRefAction implements Runnable xrefViews.add(newFrame.alignPanel); return; // via finally clause } + AlignFrame copyThis = new AlignFrame(copyAlignment, AlignFrame.DEFAULT_WIDTH, AlignFrame.DEFAULT_HEIGHT); copyThis.setTitle(alignFrame.getTitle()); @@ -263,6 +218,260 @@ public class CrossRefAction implements Runnable } /** + * Tries to add chromosomal coordinates to any nucleotide sequence which does + * not already have them. Coordinates are retrieved from Ensembl given an + * Ensembl identifier, either on the sequence itself or on a peptide sequence + * it has a reference to. + * + *

      +   * Example (human):
      +   * - fetch EMBLCDS cross-references for Uniprot entry P30419
      +   * - the EMBL sequences do not have xrefs to Ensembl
      +   * - the Uniprot entry has xrefs to 
      +   *    ENSP00000258960, ENSP00000468424, ENST00000258960, ENST00000592782
      +   * - either of the transcript ids can be used to retrieve gene loci e.g.
      +   *    http://rest.ensembl.org/map/cds/ENST00000592782/1..100000
      +   * Example (invertebrate):
      +   * - fetch EMBLCDS cross-references for Uniprot entry Q43517 (FER1_SOLLC)
      +   * - the Uniprot entry has an xref to ENSEMBLPLANTS Solyc10g044520.1.1
      +   * - can retrieve gene loci with
      +   *    http://rest.ensemblgenomes.org/map/cds/Solyc10g044520.1.1/1..100000
      +   * 
      + * + * @param sequences + */ + public static void findGeneLoci(List sequences) + { + Map retrievedLoci = new HashMap<>(); + for (SequenceI seq : sequences) + { + findGeneLoci(seq, retrievedLoci); + } + } + + /** + * Tres to find chromosomal coordinates for the sequence, by searching its + * direct and indirect cross-references for Ensembl. If the loci have already + * been retrieved, just reads them out of the map of retrievedLoci; this is + * the case of an alternative transcript for the same protein. Otherwise calls + * a REST service to retrieve the loci, and if successful, adds them to the + * sequence and to the retrievedLoci. + * + * @param seq + * @param retrievedLoci + */ + static void findGeneLoci(SequenceI seq, + Map retrievedLoci) + { + /* + * don't replace any existing chromosomal coordinates + */ + if (seq == null || seq.isProtein() || seq.getGeneLoci() != null + || seq.getDBRefs() == null) + { + return; + } + + Set ensemblDivisions = new EnsemblInfo().getDivisions(); + + /* + * first look for direct dbrefs from sequence to Ensembl + */ + String[] divisionsArray = ensemblDivisions + .toArray(new String[ensemblDivisions.size()]); + DBRefEntry[] seqRefs = seq.getDBRefs(); + DBRefEntry[] directEnsemblRefs = DBRefUtils.selectRefs(seqRefs, + divisionsArray); + if (directEnsemblRefs != null) + { + for (DBRefEntry ensemblRef : directEnsemblRefs) + { + if (fetchGeneLoci(seq, ensemblRef, retrievedLoci)) + { + return; + } + } + } + + /* + * else look for indirect dbrefs from sequence to Ensembl + */ + for (DBRefEntry dbref : seq.getDBRefs()) + { + if (dbref.getMap() != null && dbref.getMap().getTo() != null) + { + DBRefEntry[] dbrefs = dbref.getMap().getTo().getDBRefs(); + DBRefEntry[] indirectEnsemblRefs = DBRefUtils.selectRefs(dbrefs, + divisionsArray); + if (indirectEnsemblRefs != null) + { + for (DBRefEntry ensemblRef : indirectEnsemblRefs) + { + if (fetchGeneLoci(seq, ensemblRef, retrievedLoci)) + { + return; + } + } + } + } + } + } + + /** + * Retrieves chromosomal coordinates for the Ensembl (or EnsemblGenomes) + * identifier in dbref. If successful, and the sequence length matches gene + * loci length, then add it to the sequence, and to the retrievedLoci map. + * Answers true if successful, else false. + * + * @param seq + * @param dbref + * @param retrievedLoci + * @return + */ + static boolean fetchGeneLoci(SequenceI seq, DBRefEntry dbref, + Map retrievedLoci) + { + String accession = dbref.getAccessionId(); + String division = dbref.getSource(); + + /* + * hack: ignore cross-references to Ensembl protein ids + * (or use map/translation perhaps?) + * todo: is there an equivalent in EnsemblGenomes? + */ + if (accession.startsWith("ENSP")) + { + return false; + } + EnsemblMap mapper = new EnsemblMap(); + + /* + * try CDS mapping first + */ + GeneLociI geneLoci = mapper.getCdsMapping(division, accession, 1, + seq.getLength()); + if (geneLoci != null) + { + MapList map = geneLoci.getMap(); + int mappedFromLength = MappingUtils.getLength(map.getFromRanges()); + if (mappedFromLength == seq.getLength()) + { + seq.setGeneLoci(geneLoci.getSpeciesId(), geneLoci.getAssemblyId(), + geneLoci.getChromosomeId(), geneLoci.getMap()); + retrievedLoci.put(dbref, geneLoci); + return true; + } + } + + /* + * else try CDNA mapping + */ + geneLoci = mapper.getCdnaMapping(division, accession, 1, + seq.getLength()); + if (geneLoci != null) + { + MapList map = geneLoci.getMap(); + int mappedFromLength = MappingUtils.getLength(map.getFromRanges()); + if (mappedFromLength == seq.getLength()) + { + seq.setGeneLoci(geneLoci.getSpeciesId(), geneLoci.getAssemblyId(), + geneLoci.getChromosomeId(), geneLoci.getMap()); + retrievedLoci.put(dbref, geneLoci); + return true; + } + } + + return false; + } + + /** + * @param alignment + * @param dataset + * @param dna + * @param xrefs + * @param xrefsAlignment + * @return + */ + protected AlignmentI copyAlignmentForSplitFrame(AlignmentI alignment, + AlignmentI dataset, boolean dna, AlignmentI xrefs, + AlignmentI xrefsAlignment) + { + AlignmentI copyAlignment; + boolean copyAlignmentIsAligned = false; + if (dna) + { + copyAlignment = AlignmentUtils.makeCdsAlignment(sel, dataset, + xrefsAlignment.getSequencesArray()); + if (copyAlignment.getHeight() == 0) + { + JvOptionPane.showMessageDialog(alignFrame, + MessageManager.getString("label.cant_map_cds"), + MessageManager.getString("label.operation_failed"), + JvOptionPane.OK_OPTION); + System.err.println("Failed to make CDS alignment"); + return null; + } + + /* + * pending getting Embl transcripts to 'align', + * we are only doing this for Ensembl + */ + // TODO proper criteria for 'can align as cdna' + if (DBRefSource.ENSEMBL.equalsIgnoreCase(source) + || AlignmentUtils.looksLikeEnsembl(alignment)) + { + copyAlignment.alignAs(alignment); + copyAlignmentIsAligned = true; + } + } + else + { + copyAlignment = AlignmentUtils.makeCopyAlignment(sel, + xrefs.getSequencesArray(), dataset); + } + copyAlignment + .setGapCharacter(alignFrame.viewport.getGapCharacter()); + + StructureSelectionManager ssm = StructureSelectionManager + .getStructureSelectionManager(Desktop.instance); + + /* + * register any new mappings for sequence mouseover etc + * (will not duplicate any previously registered mappings) + */ + ssm.registerMappings(dataset.getCodonFrames()); + + if (copyAlignment.getHeight() <= 0) + { + System.err.println( + "No Sequences generated for xRef type " + source); + return null; + } + + /* + * align protein to dna + */ + if (dna && copyAlignmentIsAligned) + { + xrefsAlignment.alignAs(copyAlignment); + } + else + { + /* + * align cdna to protein - currently only if + * fetching and aligning Ensembl transcripts! + */ + // TODO: generalise for other sources of locus/transcript/cds data + if (dna && DBRefSource.ENSEMBL.equalsIgnoreCase(source)) + { + copyAlignment.alignAs(xrefsAlignment); + } + } + + return copyAlignment; + } + + /** * Makes an alignment containing the given sequences, and adds them to the * given dataset, which is also set as the dataset for the new alignment * @@ -291,20 +500,28 @@ public class CrossRefAction implements Runnable return al; } - public CrossRefAction(AlignFrame alignFrame, SequenceI[] sel, - boolean _odna, String source) + /** + * Constructor + * + * @param af + * @param seqs + * @param fromDna + * @param dbSource + */ + CrossRefAction(AlignFrame af, SequenceI[] seqs, boolean fromDna, + String dbSource) { - this.alignFrame = alignFrame; - this.sel = sel; - this._odna = _odna; - this.source = source; + this.alignFrame = af; + this.sel = seqs; + this._odna = fromDna; + this.source = dbSource; } - public static CrossRefAction showProductsFor(final SequenceI[] sel, - final boolean _odna, final String source, + public static CrossRefAction getHandlerFor(final SequenceI[] sel, + final boolean fromDna, final String source, final AlignFrame alignFrame) { - return new CrossRefAction(alignFrame, sel, _odna, source); + return new CrossRefAction(alignFrame, sel, fromDna, source); } } diff --git a/src/jalview/gui/CutAndPasteHtmlTransfer.java b/src/jalview/gui/CutAndPasteHtmlTransfer.java index 71a1520..2e51bce 100644 --- a/src/jalview/gui/CutAndPasteHtmlTransfer.java +++ b/src/jalview/gui/CutAndPasteHtmlTransfer.java @@ -141,6 +141,7 @@ public class CutAndPasteHtmlTransfer extends GCutAndPasteHtmlTransfer */ public void setText(String text) { + textarea.setDocument(textarea.getEditorKit().createDefaultDocument()); textarea.setText(text); } diff --git a/src/jalview/gui/FeatureColourChooser.java b/src/jalview/gui/FeatureColourChooser.java index d8db546..fbe8437 100644 --- a/src/jalview/gui/FeatureColourChooser.java +++ b/src/jalview/gui/FeatureColourChooser.java @@ -22,19 +22,29 @@ package jalview.gui; import jalview.api.FeatureColourI; import jalview.datamodel.GraphLine; +import jalview.datamodel.features.FeatureAttributes; import jalview.schemes.FeatureColour; import jalview.util.MessageManager; +import jalview.util.matcher.Condition; +import jalview.util.matcher.KeyedMatcher; +import jalview.util.matcher.KeyedMatcherI; +import jalview.util.matcher.KeyedMatcherSet; +import jalview.util.matcher.KeyedMatcherSetI; import java.awt.BorderLayout; import java.awt.Color; import java.awt.Dimension; import java.awt.FlowLayout; +import java.awt.GridLayout; import java.awt.event.ActionEvent; import java.awt.event.ActionListener; import java.awt.event.FocusAdapter; import java.awt.event.FocusEvent; +import java.awt.event.ItemEvent; +import java.awt.event.ItemListener; import java.awt.event.MouseAdapter; import java.awt.event.MouseEvent; +import java.util.Iterator; import javax.swing.BorderFactory; import javax.swing.JCheckBox; @@ -94,6 +104,18 @@ public class FeatureColourChooser extends JalviewDialog private ActionListener colourEditor = null; + private JComboBox filterAttribute; + + private JComboBox filterCondition; + + private JTextField filterValue; + + private JComboBox filterAttribute2; + + private JComboBox filterCondition2; + + private JTextField filterValue2; + /** * Constructor * @@ -213,62 +235,90 @@ public class FeatureColourChooser extends JalviewDialog threshline.value = cs.getThreshold(); } + setInitialFilters(cs.getAttributeFilters()); + adjusting = false; changeColour(false); waitForInput(); } - private void jbInit() throws Exception + /** + * Populates the attribute filter fields for the initial display + * + * @param filters + */ + void setInitialFilters(KeyedMatcherSetI filters) { + // todo generalise to populate N conditions - minColour.setFont(JvSwingUtils.getLabelFont()); - minColour.setBorder(BorderFactory.createLineBorder(Color.black)); - minColour.setPreferredSize(new Dimension(40, 20)); - minColour.setToolTipText(MessageManager.getString("label.min_colour")); - minColour.addMouseListener(new MouseAdapter() + if (filters == null) { - @Override - public void mousePressed(MouseEvent e) - { - if (minColour.isEnabled()) - { - minColour_actionPerformed(); - } - } - }); - maxColour.setFont(JvSwingUtils.getLabelFont()); - maxColour.setBorder(BorderFactory.createLineBorder(Color.black)); - maxColour.setPreferredSize(new Dimension(40, 20)); - maxColour.setToolTipText(MessageManager.getString("label.max_colour")); - maxColour.addMouseListener(new MouseAdapter() + return; + } + + Iterator theFilters = filters.getMatchers(); + if (theFilters.hasNext()) { - @Override - public void mousePressed(MouseEvent e) - { - if (maxColour.isEnabled()) - { - maxColour_actionPerformed(); - } - } - }); - maxColour.setBorder(new LineBorder(Color.black)); - JLabel minText = new JLabel(MessageManager.getString("label.min")); - minText.setFont(JvSwingUtils.getLabelFont()); - JLabel maxText = new JLabel(MessageManager.getString("label.max")); - maxText.setFont(JvSwingUtils.getLabelFont()); - this.setLayout(new BorderLayout()); - JPanel jPanel1 = new JPanel(); - jPanel1.setBackground(Color.white); - JPanel jPanel2 = new JPanel(); - jPanel2.setLayout(new FlowLayout()); - jPanel2.setBackground(Color.white); + KeyedMatcherI filter = theFilters.next(); + filterAttribute.setSelectedItem(filter.getKey()); + filterCondition.setSelectedItem(filter.getMatcher().getCondition()); + filterValue.setText(filter.getMatcher().getPattern()); + } + if (theFilters.hasNext()) + { + KeyedMatcherI filter = theFilters.next(); + boolean anded = filters.isAnded(); + // todo add OR/AND condition to gui + // - user choice for the second condition, fixed thereafter + filterAttribute2.setSelectedItem(filter.getKey()); + filterCondition2.setSelectedItem(filter.getMatcher().getCondition()); + filterValue2.setText(filter.getMatcher().getPattern()); + } + } + + private void jbInit() throws Exception + { + this.setLayout(new GridLayout(4, 1)); + + JPanel colourByPanel = initColoursPanel(); + + JPanel thresholdPanel = initThresholdPanel(); + + JPanel okCancelPanel = initOkCancelPanel(); + + this.add(colourByPanel); + this.add(thresholdPanel); + + /* + * add filter by attributes options only if we know any attributes + */ + Iterator attributes = FeatureAttributes.getInstance() + .getAttributes(type).iterator(); + if (attributes.hasNext()) + { + JPanel filtersPanel = initFiltersPanel(attributes); + this.add(filtersPanel); + } + + this.add(okCancelPanel); + } + + /** + * Lay out fields for threshold options + * + * @return + */ + protected JPanel initThresholdPanel() + { + JPanel thresholdPanel = new JPanel(); + thresholdPanel.setLayout(new FlowLayout()); threshold.addActionListener(new ActionListener() { @Override public void actionPerformed(ActionEvent e) { - threshold_actionPerformed(); + changeColour(true); } }); threshold.setToolTipText(MessageManager @@ -280,8 +330,6 @@ public class FeatureColourChooser extends JalviewDialog threshold.addItem(MessageManager .getString("label.threshold_feature_below_threshold")); // index 2 - JPanel jPanel3 = new JPanel(); - jPanel3.setLayout(new FlowLayout()); thresholdValue.addActionListener(new ActionListener() { @Override @@ -308,7 +356,7 @@ public class FeatureColourChooser extends JalviewDialog MessageManager.getString("label.adjust_threshold")); thresholdValue.setEnabled(false); thresholdValue.setColumns(7); - jPanel3.setBackground(Color.white); + thresholdPanel.setBackground(Color.white); thresholdIsMin.setBackground(Color.white); thresholdIsMin .setText(MessageManager.getString("label.threshold_minmax")); @@ -319,40 +367,101 @@ public class FeatureColourChooser extends JalviewDialog @Override public void actionPerformed(ActionEvent actionEvent) { - thresholdIsMin_actionPerformed(); + changeColour(true); } }); - colourByLabel.setBackground(Color.white); - colourByLabel - .setText(MessageManager.getString("label.colour_by_label")); - colourByLabel.setToolTipText(MessageManager.getString( - "label.display_features_same_type_different_label_using_different_colour")); - colourByLabel.addActionListener(new ActionListener() + thresholdPanel.add(threshold); + thresholdPanel.add(slider); + thresholdPanel.add(thresholdValue); + thresholdPanel.add(thresholdIsMin); + return thresholdPanel; + } + + /** + * Lay out OK and Cancel buttons + * + * @return + */ + protected JPanel initOkCancelPanel() + { + JPanel okCancelPanel = new JPanel(); + okCancelPanel.setBackground(Color.white); + okCancelPanel.add(ok); + okCancelPanel.add(cancel); + return okCancelPanel; + } + + /** + * Lay out Colour by Label and min/max colour widgets + * + * @return + */ + protected JPanel initColoursPanel() + { + JPanel colourByPanel = new JPanel(); + colourByPanel.setLayout(new FlowLayout()); + colourByPanel.setBackground(Color.white); + minColour.setFont(JvSwingUtils.getLabelFont()); + minColour.setBorder(BorderFactory.createLineBorder(Color.black)); + minColour.setPreferredSize(new Dimension(40, 20)); + minColour.setToolTipText(MessageManager.getString("label.min_colour")); + minColour.addMouseListener(new MouseAdapter() { @Override - public void actionPerformed(ActionEvent actionEvent) + public void mousePressed(MouseEvent e) { - colourByLabel_actionPerformed(); + if (minColour.isEnabled()) + { + minColour_actionPerformed(); + } } }); + maxColour.setFont(JvSwingUtils.getLabelFont()); + maxColour.setBorder(BorderFactory.createLineBorder(Color.black)); + maxColour.setPreferredSize(new Dimension(40, 20)); + maxColour.setToolTipText(MessageManager.getString("label.max_colour")); + maxColour.addMouseListener(new MouseAdapter() + { + @Override + public void mousePressed(MouseEvent e) + { + if (maxColour.isEnabled()) + { + maxColour_actionPerformed(); + } + } + }); + maxColour.setBorder(new LineBorder(Color.black)); + JLabel minText = new JLabel(MessageManager.getString("label.min")); + minText.setFont(JvSwingUtils.getLabelFont()); + JLabel maxText = new JLabel(MessageManager.getString("label.max")); + maxText.setFont(JvSwingUtils.getLabelFont()); JPanel colourPanel = new JPanel(); colourPanel.setBackground(Color.white); - jPanel1.add(ok); - jPanel1.add(cancel); - jPanel2.add(colourByLabel, BorderLayout.WEST); - jPanel2.add(colourPanel, BorderLayout.EAST); colourPanel.add(minText); colourPanel.add(minColour); colourPanel.add(maxText); colourPanel.add(maxColour); - this.add(jPanel3, BorderLayout.CENTER); - jPanel3.add(threshold); - jPanel3.add(slider); - jPanel3.add(thresholdValue); - jPanel3.add(thresholdIsMin); - this.add(jPanel1, BorderLayout.SOUTH); - this.add(jPanel2, BorderLayout.NORTH); + colourByPanel.add(colourByLabel, BorderLayout.WEST); + colourByPanel.add(colourPanel, BorderLayout.EAST); + + colourByLabel.setBackground(Color.white); + colourByLabel + .setText(MessageManager.getString("label.colour_by_label")); + colourByLabel + .setToolTipText(MessageManager + .getString("label.display_features_same_type_different_label_using_different_colour")); + colourByLabel.addActionListener(new ActionListener() + { + @Override + public void actionPerformed(ActionEvent actionEvent) + { + changeColour(true); + } + }); + + return colourByPanel; } /** @@ -406,6 +515,11 @@ public class FeatureColourChooser extends JalviewDialog return; } + if (!validateInputs()) + { + return; + } + boolean aboveThreshold = false; boolean belowThreshold = false; if (threshold.getSelectedIndex() == 1) @@ -505,11 +619,101 @@ public class FeatureColourChooser extends JalviewDialog maxColour.setForeground(oldmaxColour); minColour.setForeground(oldminColour); } + + /* + * add attribute filters if entered + */ + if (filterAttribute != null) + { + setAttributeFilters(acg); + } + fr.setColour(type, acg); cs = acg; ap.paintAlignment(updateStructsAndOverview, updateStructsAndOverview); } + /** + * Checks inputs are valid, and answers true if they are, else false. Also + * sets the colour of invalid inputs to red. + * + * @return + */ + boolean validateInputs() + { + // todo generalise to N filters + return isValidFilter(filterValue, filterCondition) + || isValidFilter(filterValue2, filterCondition2); + } + + /** + * Answers true unless a numeric condition has been selected with a + * non-numeric value + * + * @param value + * @param condition + */ + protected boolean isValidFilter(JTextField value, JComboBox condition) + { + if (value == null || condition == null) + { + return true; // fields not populated + } + + value.setBackground(Color.white); + String v1 = value.getText().trim(); + if (v1.length() > 0) + { + Condition c1 = (Condition) condition.getSelectedItem(); + if (c1.isNumeric()) + { + try + { + Float.valueOf(v1); + } catch (NumberFormatException e) + { + value.setBackground(Color.red); + return false; + } + } + } + + return true; + } + + /** + * Sets any attribute value filters entered in the dialog as filters on the + * colour scheme + * + * @param acg + */ + protected void setAttributeFilters(FeatureColourI acg) + { + String attribute = (String) filterAttribute.getSelectedItem(); + Condition cond = (Condition) filterCondition.getSelectedItem(); + String pattern = filterValue.getText().trim(); + if (pattern.length() > 0) + { + KeyedMatcherSetI filters = new KeyedMatcherSet(); + KeyedMatcherI km = new KeyedMatcher(attribute, cond, pattern); + filters.and(km); + + /* + * is there a second condition? + * todo: allow N conditions with choice of AND or OR (but not both!) + */ + pattern = filterValue2.getText().trim(); + if (pattern.length() > 1) + { + attribute = (String) filterAttribute2.getSelectedItem(); + cond = (Condition) filterCondition2.getSelectedItem(); + KeyedMatcherI km2 = new KeyedMatcher(attribute, cond, pattern); + filters.and(km2); + } + acg.setAttributeFilters(filters); + } + } + @Override protected void raiseClosed() { @@ -544,14 +748,6 @@ public class FeatureColourChooser extends JalviewDialog } /** - * Action on change of choice of No / Above / Below Threshold - */ - protected void threshold_actionPerformed() - { - changeColour(true); - } - - /** * Action on text entry of a threshold value */ protected void thresholdValue_actionPerformed() @@ -594,16 +790,6 @@ public class FeatureColourChooser extends JalviewDialog changeColour(false); } - protected void thresholdIsMin_actionPerformed() - { - changeColour(true); - } - - protected void colourByLabel_actionPerformed() - { - changeColour(true); - } - void addActionListener(ActionListener graduatedColorEditor) { if (colourEditor != null) @@ -629,4 +815,130 @@ public class FeatureColourChooser extends JalviewDialog return cs; } + /** + * Lay out fields for attribute value filters + * + * @param attNames + * + * @return + */ + protected JPanel initFiltersPanel(Iterator attNames) + { + JPanel filtersPanel = new JPanel(); + filtersPanel.setLayout(new GridLayout(2, 3)); + filtersPanel.setBackground(Color.white); + + /* + * drop-down choice of attribute + */ + filterAttribute = new JComboBox<>(); + filterAttribute2 = new JComboBox<>(); + while (attNames.hasNext()) + { + String attName = attNames.next(); + filterAttribute.addItem(attName); + filterAttribute2.addItem(attName); + } + filterAttribute.addItemListener(new ItemListener() + { + @Override + public void itemStateChanged(ItemEvent e) + { + changeColour(true); + } + }); + + /* + * drop-down choice of test condition + */ + filterCondition = new JComboBox<>(); + for (Condition cond : Condition.values()) + { + filterCondition.addItem(cond); + } + filterCondition.addItemListener(new ItemListener() + { + @Override + public void itemStateChanged(ItemEvent e) + { + changeColour(true); + } + }); + + filterValue = new JTextField(12); + filterValue.addActionListener(new ActionListener() + { + @Override + public void actionPerformed(ActionEvent e) + { + changeColour(true); + } + }); + filterValue.addFocusListener(new FocusAdapter() + { + @Override + public void focusLost(FocusEvent e) + { + changeColour(true); + } + }); + + /* + * repeat for a second filter + * todo: generalise to N filters + */ + filterAttribute2.addActionListener(new ActionListener() + { + @Override + public void actionPerformed(ActionEvent e) + { + changeColour(true); + } + }); + + /* + * drop-down choice of test condition + */ + filterCondition2 = new JComboBox<>(); + for (Condition cond : Condition.values()) + { + filterCondition2.addItem(cond); + } + filterCondition2.addActionListener(new ActionListener() + { + @Override + public void actionPerformed(ActionEvent e) + { + changeColour(true); + } + }); + + filterValue2 = new JTextField(12); + filterValue2.addActionListener(new ActionListener() + { + @Override + public void actionPerformed(ActionEvent e) + { + changeColour(true); + } + }); + filterValue2.addFocusListener(new FocusAdapter() + { + @Override + public void focusLost(FocusEvent e) + { + changeColour(true); + } + }); + + filtersPanel.add(filterAttribute); + filtersPanel.add(filterCondition); + filtersPanel.add(filterValue); + filtersPanel.add(filterAttribute2); + filtersPanel.add(filterCondition2); + filtersPanel.add(filterValue2); + + return filtersPanel; + } + } diff --git a/src/jalview/gui/IdPanel.java b/src/jalview/gui/IdPanel.java index 1f2a3ad..a4f79c2 100755 --- a/src/jalview/gui/IdPanel.java +++ b/src/jalview/gui/IdPanel.java @@ -331,7 +331,8 @@ public class IdPanel extends JPanel * and any non-positional features */ List nlinks = Preferences.sequenceUrlLinks.getLinksForMenu(); - for (SequenceFeature sf : sq.getFeatures().getNonPositionalFeatures()) + List features = sq.getFeatures().getNonPositionalFeatures(); + for (SequenceFeature sf : features) { if (sf.links != null) { @@ -342,7 +343,7 @@ public class IdPanel extends JPanel } } - PopupMenu pop = new PopupMenu(alignPanel, sq, nlinks, + PopupMenu pop = new PopupMenu(alignPanel, sq, features, Preferences.getGroupURLLinks()); pop.show(this, e.getX(), e.getY()); } diff --git a/src/jalview/gui/JalviewDialog.java b/src/jalview/gui/JalviewDialog.java index 05f5ffc..1008203 100644 --- a/src/jalview/gui/JalviewDialog.java +++ b/src/jalview/gui/JalviewDialog.java @@ -27,8 +27,8 @@ import java.awt.Dimension; import java.awt.Rectangle; import java.awt.event.ActionEvent; import java.awt.event.ActionListener; +import java.awt.event.WindowAdapter; import java.awt.event.WindowEvent; -import java.awt.event.WindowListener; import javax.swing.JButton; import javax.swing.JDialog; @@ -118,55 +118,14 @@ public abstract class JalviewDialog extends JPanel closeDialog(); } }); - frame.addWindowListener(new WindowListener() + frame.addWindowListener(new WindowAdapter() { - - @Override - public void windowOpened(WindowEvent e) - { - // TODO Auto-generated method stub - - } - - @Override - public void windowIconified(WindowEvent e) - { - // TODO Auto-generated method stub - - } - - @Override - public void windowDeiconified(WindowEvent e) - { - // TODO Auto-generated method stub - - } - - @Override - public void windowDeactivated(WindowEvent e) - { - // TODO Auto-generated method stub - - } - @Override public void windowClosing(WindowEvent e) { // user has cancelled the dialog closeDialog(); } - - @Override - public void windowClosed(WindowEvent e) - { - } - - @Override - public void windowActivated(WindowEvent e) - { - // TODO Auto-generated method stub - - } }); } diff --git a/src/jalview/gui/PopupMenu.java b/src/jalview/gui/PopupMenu.java index 850a09a..6da7d4f 100644 --- a/src/jalview/gui/PopupMenu.java +++ b/src/jalview/gui/PopupMenu.java @@ -34,7 +34,6 @@ import jalview.datamodel.Annotation; import jalview.datamodel.DBRefEntry; import jalview.datamodel.HiddenColumns; import jalview.datamodel.PDBEntry; -import jalview.datamodel.Sequence; import jalview.datamodel.SequenceFeature; import jalview.datamodel.SequenceGroup; import jalview.datamodel.SequenceI; @@ -50,6 +49,7 @@ import jalview.schemes.PIDColourScheme; import jalview.util.GroupUrlLink; import jalview.util.GroupUrlLink.UrlStringTooLongException; import jalview.util.MessageManager; +import jalview.util.StringUtils; import jalview.util.UrlLink; import java.awt.Color; @@ -176,25 +176,31 @@ public class PopupMenu extends JPopupMenu implements ColourChangeListener * Creates a new PopupMenu object. * * @param ap - * DOCUMENT ME! * @param seq - * DOCUMENT ME! + * @param features + * non-positional features (for seq not null), or positional features + * at residue (for seq equal to null) */ - public PopupMenu(final AlignmentPanel ap, Sequence seq, - List links) + public PopupMenu(final AlignmentPanel ap, SequenceI seq, + List features) { - this(ap, seq, links, null); + this(ap, seq, features, null); } /** + * Constructor * - * @param ap + * @param alignPanel * @param seq - * @param links + * the sequence under the cursor if in the Id panel, null if in the + * sequence panel + * @param features + * non-positional features if in the Id panel, features at the + * clicked residue if in the sequence panel * @param groupLinks */ - public PopupMenu(final AlignmentPanel ap, final SequenceI seq, - List links, List groupLinks) + public PopupMenu(final AlignmentPanel alignPanel, final SequenceI seq, + List features, List groupLinks) { // ///////////////////////////////////////////////////////// // If this is activated from the sequence panel, the user may want to @@ -202,7 +208,7 @@ public class PopupMenu extends JPopupMenu implements ColourChangeListener // // If from the IDPanel, we must display the sequence menu // //////////////////////////////////////////////////////// - this.ap = ap; + this.ap = alignPanel; sequence = seq; for (String ff : FileFormats.getInstance().getWritableFormats(true)) @@ -237,9 +243,9 @@ public class PopupMenu extends JPopupMenu implements ColourChangeListener /* * And repeat for the current selection group (if there is one): */ - final List selectedGroup = (ap.av.getSelectionGroup() == null + final List selectedGroup = (alignPanel.av.getSelectionGroup() == null ? Collections. emptyList() - : ap.av.getSelectionGroup().getSequences()); + : alignPanel.av.getSelectionGroup().getSequences()); buildAnnotationTypesMenus(groupShowAnnotationsMenu, groupHideAnnotationsMenu, selectedGroup); configureReferenceAnnotationsMenu(groupAddReferenceAnnotations, @@ -257,7 +263,7 @@ public class PopupMenu extends JPopupMenu implements ColourChangeListener if (seq != null) { sequenceMenu.setText(sequence.getName()); - if (seq == ap.av.getAlignment().getSeqrep()) + if (seq == alignPanel.av.getAlignment().getSeqrep()) { makeReferenceSeq.setText( MessageManager.getString("action.unmark_as_reference")); @@ -268,7 +274,7 @@ public class PopupMenu extends JPopupMenu implements ColourChangeListener MessageManager.getString("action.set_as_reference")); } - if (!ap.av.getAlignment().isNucleotide()) + if (!alignPanel.av.getAlignment().isNucleotide()) { remove(rnaStructureMenu); } @@ -279,7 +285,7 @@ public class PopupMenu extends JPopupMenu implements ColourChangeListener * add menu items to 2D-render any alignment or sequence secondary * structure annotation */ - AlignmentAnnotation[] aas = ap.av.getAlignment() + AlignmentAnnotation[] aas = alignPanel.av.getAlignment() .getAlignmentAnnotation(); if (aas != null) { @@ -299,7 +305,7 @@ public class PopupMenu extends JPopupMenu implements ColourChangeListener @Override public void actionPerformed(ActionEvent e) { - new AppVarna(seq, aa, ap); + new AppVarna(seq, aa, alignPanel); } }); rnaStructureMenu.add(menuItem); @@ -328,7 +334,7 @@ public class PopupMenu extends JPopupMenu implements ColourChangeListener public void actionPerformed(ActionEvent e) { // TODO: VARNA does'nt print gaps in the sequence - new AppVarna(seq, aa, ap); + new AppVarna(seq, aa, alignPanel); } }); rnaStructureMenu.add(menuItem); @@ -353,8 +359,8 @@ public class PopupMenu extends JPopupMenu implements ColourChangeListener }); add(menuItem); - if (ap.av.getSelectionGroup() != null - && ap.av.getSelectionGroup().getSize() > 1) + if (alignPanel.av.getSelectionGroup() != null + && alignPanel.av.getSelectionGroup().getSize() > 1) { menuItem = new JMenuItem(MessageManager .formatMessage("label.represent_group_with", new Object[] @@ -370,12 +376,12 @@ public class PopupMenu extends JPopupMenu implements ColourChangeListener sequenceMenu.add(menuItem); } - if (ap.av.hasHiddenRows()) + if (alignPanel.av.hasHiddenRows()) { - final int index = ap.av.getAlignment().findIndex(seq); + final int index = alignPanel.av.getAlignment().findIndex(seq); - if (ap.av.adjustForHiddenSeqs(index) - - ap.av.adjustForHiddenSeqs(index - 1) > 1) + if (alignPanel.av.adjustForHiddenSeqs(index) + - alignPanel.av.adjustForHiddenSeqs(index - 1) > 1) { menuItem = new JMenuItem( MessageManager.getString("action.reveal_sequences")); @@ -384,10 +390,10 @@ public class PopupMenu extends JPopupMenu implements ColourChangeListener @Override public void actionPerformed(ActionEvent e) { - ap.av.showSequence(index); - if (ap.overviewPanel != null) + alignPanel.av.showSequence(index); + if (alignPanel.overviewPanel != null) { - ap.overviewPanel.updateOverviewImage(); + alignPanel.overviewPanel.updateOverviewImage(); } } }); @@ -396,7 +402,7 @@ public class PopupMenu extends JPopupMenu implements ColourChangeListener } } // for the case when no sequences are even visible - if (ap.av.hasHiddenRows()) + if (alignPanel.av.hasHiddenRows()) { { menuItem = new JMenuItem( @@ -406,10 +412,10 @@ public class PopupMenu extends JPopupMenu implements ColourChangeListener @Override public void actionPerformed(ActionEvent e) { - ap.av.showAllHiddenSeqs(); - if (ap.overviewPanel != null) + alignPanel.av.showAllHiddenSeqs(); + if (alignPanel.overviewPanel != null) { - ap.overviewPanel.updateOverviewImage(); + alignPanel.overviewPanel.updateOverviewImage(); } } }); @@ -418,9 +424,9 @@ public class PopupMenu extends JPopupMenu implements ColourChangeListener } } - SequenceGroup sg = ap.av.getSelectionGroup(); + SequenceGroup sg = alignPanel.av.getSelectionGroup(); boolean isDefinedGroup = (sg != null) - ? ap.av.getAlignment().getGroups().contains(sg) + ? alignPanel.av.getAlignment().getGroups().contains(sg) : false; if (sg != null && sg.getSize() > 0) @@ -458,7 +464,7 @@ public class PopupMenu extends JPopupMenu implements ColourChangeListener Hashtable pdbe = new Hashtable<>(), reppdb = new Hashtable<>(); SequenceI sqass = null; - for (SequenceI sq : ap.av.getSequenceSelection()) + for (SequenceI sq : alignPanel.av.getSequenceSelection()) { Vector pes = sq.getDatasetSequence().getAllPDBEntries(); if (pes != null && pes.size() > 0) @@ -508,24 +514,130 @@ public class PopupMenu extends JPopupMenu implements ColourChangeListener rnaStructureMenu.setVisible(false); } - if (links != null && links.size() > 0) + addLinks(seq, features); + + if (seq == null) + { + addFeatureDetails(features); + } + } + + /** + * Add a link to show feature details for each sequence feature + * + * @param features + */ + protected void addFeatureDetails(List features) + { + if (features == null || features.isEmpty()) + { + return; + } + JMenu details = new JMenu( + MessageManager.getString("label.feature_details")); + add(details); + + for (final SequenceFeature sf : features) { - addFeatureLinks(seq, links); + int start = sf.getBegin(); + int end = sf.getEnd(); + String desc = null; + if (start == end) + { + desc = String.format("%s %d", sf.getType(), start); + } + else + { + desc = String.format("%s %d-%d", sf.getType(), start, end); + } + String description = sf.getDescription(); + if (description != null) + { + description = StringUtils.stripHtmlTags(description); + if (description.length() <= 6) + { + desc = desc + " " + description; + } + else + { + desc = desc + " " + description.substring(0, 6) + ".."; + } + } + if (sf.getFeatureGroup() != null) + { + desc = desc + " (" + sf.getFeatureGroup() + ")"; + } + JMenuItem item = new JMenuItem(desc); + item.addActionListener(new ActionListener() + { + @Override + public void actionPerformed(ActionEvent e) + { + showFeatureDetails(sf); + } + }); + details.add(item); } } /** + * Opens a panel showing a text report of feature dteails + * + * @param sf + */ + protected void showFeatureDetails(SequenceFeature sf) + { + CutAndPasteHtmlTransfer cap = new CutAndPasteHtmlTransfer(); + // it appears Java's CSS does not support border-collaps :-( + cap.addStylesheetRule("table { border-collapse: collapse;}"); + cap.addStylesheetRule("table, td, th {border: 1px solid black;}"); + cap.setText(sf.getDetailsReport()); + + Desktop.addInternalFrame(cap, + MessageManager.getString("label.feature_details"), 500, 500); + } + + /** * Adds a 'Link' menu item with a sub-menu item for each hyperlink provided. + * When seq is not null, these are links for the sequence id, which may be to + * external web sites for the sequence accession, and/or links embedded in + * non-positional features. When seq is null, only links embedded in the + * provided features are added. * * @param seq - * @param links + * @param features */ - void addFeatureLinks(final SequenceI seq, List links) + void addLinks(final SequenceI seq, List features) { JMenu linkMenu = new JMenu(MessageManager.getString("action.link")); + + List nlinks = null; + if (seq != null) + { + nlinks = Preferences.sequenceUrlLinks.getLinksForMenu(); + } + else + { + nlinks = new ArrayList<>(); + } + + if (features != null) + { + for (SequenceFeature sf : features) + { + if (sf.links != null) + { + for (String link : sf.links) + { + nlinks.add(link); + } + } + } + } + Map> linkset = new LinkedHashMap<>(); - for (String link : links) + for (String link : nlinks) { UrlLink urlLink = null; try @@ -548,25 +660,18 @@ public class PopupMenu extends JPopupMenu implements ColourChangeListener addshowLinks(linkMenu, linkset.values()); - // disable link menu if there are no valid entries + // only add link menu if it has entries if (linkMenu.getItemCount() > 0) { - linkMenu.setEnabled(true); - } - else - { - linkMenu.setEnabled(false); - } - - if (sequence != null) - { - sequenceMenu.add(linkMenu); - } - else - { - add(linkMenu); + if (sequence != null) + { + sequenceMenu.add(linkMenu); + } + else + { + add(linkMenu); + } } - } /** diff --git a/src/jalview/gui/SeqPanel.java b/src/jalview/gui/SeqPanel.java index 2223ee5..6148a2e 100644 --- a/src/jalview/gui/SeqPanel.java +++ b/src/jalview/gui/SeqPanel.java @@ -59,7 +59,6 @@ import java.awt.event.MouseListener; import java.awt.event.MouseMotionListener; import java.awt.event.MouseWheelEvent; import java.awt.event.MouseWheelListener; -import java.util.ArrayList; import java.util.Collections; import java.util.List; @@ -1792,21 +1791,10 @@ public class SeqPanel extends JPanel final int column = findColumn(evt); final int seq = findSeq(evt); SequenceI sequence = av.getAlignment().getSequenceAt(seq); - List allFeatures = ap.getFeatureRenderer() + List features = ap.getFeatureRenderer() .findFeaturesAtColumn(sequence, column + 1); - List links = new ArrayList<>(); - for (SequenceFeature sf : allFeatures) - { - if (sf.links != null) - { - for (String link : sf.links) - { - links.add(link); - } - } - } - PopupMenu pop = new PopupMenu(ap, null, links); + PopupMenu pop = new PopupMenu(ap, null, features); pop.show(this, evt.getX(), evt.getY()); } diff --git a/src/jalview/io/SequenceAnnotationReport.java b/src/jalview/io/SequenceAnnotationReport.java index f1ebcac..6d819d3 100644 --- a/src/jalview/io/SequenceAnnotationReport.java +++ b/src/jalview/io/SequenceAnnotationReport.java @@ -26,6 +26,7 @@ import jalview.datamodel.SequenceFeature; import jalview.datamodel.SequenceI; import jalview.io.gff.GffConstants; import jalview.util.MessageManager; +import jalview.util.StringUtils; import jalview.util.UrlLink; import java.util.Arrays; @@ -58,7 +59,7 @@ public class SequenceAnnotationReport /* * Comparator to order DBRefEntry by Source + accession id (case-insensitive), - * with 'Primary' sources placed before others + * with 'Primary' sources placed before others, and 'chromosome' first of all */ private static Comparator comparator = new Comparator() { @@ -66,6 +67,14 @@ public class SequenceAnnotationReport @Override public int compare(DBRefEntry ref1, DBRefEntry ref2) { + if (ref1.isChromosome()) + { + return -1; + } + if (ref2.isChromosome()) + { + return 1; + } String s1 = ref1.getSource(); String s2 = ref2.getSource(); boolean s1Primary = isPrimarySource(s1); @@ -175,50 +184,11 @@ public class SequenceAnnotationReport sb.append(" ").append(feature.end); } - if (feature.getDescription() != null - && !feature.description.equals(feature.getType())) + String description = feature.getDescription(); + if (description != null && !description.equals(feature.getType())) { - String tmpString = feature.getDescription(); - String tmp2up = tmpString.toUpperCase(); - int startTag = tmp2up.indexOf(""); - if (startTag > -1) - { - tmpString = tmpString.substring(startTag + 6); - tmp2up = tmp2up.substring(startTag + 6); - } - int endTag = tmp2up.indexOf(""); - if (endTag > -1) - { - tmpString = tmpString.substring(0, endTag); - tmp2up = tmp2up.substring(0, endTag); - } - endTag = tmp2up.indexOf(""); - if (endTag > -1) - { - tmpString = tmpString.substring(0, endTag); - } - - if (startTag > -1) - { - sb.append("; ").append(tmpString); - } - else - { - if (tmpString.indexOf("<") > -1 || tmpString.indexOf(">") > -1) - { - // The description does not specify html is to - // be used, so we must remove < > symbols - tmpString = tmpString.replaceAll("<", "<"); - tmpString = tmpString.replaceAll(">", ">"); - - sb.append("; "); - sb.append(tmpString); - } - else - { - sb.append("; ").append(tmpString); - } - } + description = StringUtils.stripHtmlTags(description); + sb.append("; ").append(description); } // check score should be shown if (!Float.isNaN(feature.getScore())) diff --git a/src/jalview/io/gff/Gff3Helper.java b/src/jalview/io/gff/Gff3Helper.java index c7e1d7a..a25a014 100644 --- a/src/jalview/io/gff/Gff3Helper.java +++ b/src/jalview/io/gff/Gff3Helper.java @@ -39,6 +39,8 @@ import java.util.Map; */ public class Gff3Helper extends GffHelperBase { + public static final String ALLELES = "alleles"; + protected static final String TARGET = "Target"; protected static final String ID = "ID"; @@ -399,7 +401,7 @@ public class Gff3Helper extends GffHelperBase /* * Ensembl returns dna variants as 'alleles' */ - desc = StringUtils.listToDelimitedString(attributes.get("alleles"), + desc = StringUtils.listToDelimitedString(attributes.get(ALLELES), ","); } diff --git a/src/jalview/io/vcf/VCFLoader.java b/src/jalview/io/vcf/VCFLoader.java new file mode 100644 index 0000000..5adc55c --- /dev/null +++ b/src/jalview/io/vcf/VCFLoader.java @@ -0,0 +1,1070 @@ +package jalview.io.vcf; + +import htsjdk.samtools.util.CloseableIterator; +import htsjdk.variant.variantcontext.Allele; +import htsjdk.variant.variantcontext.VariantContext; +import htsjdk.variant.vcf.VCFHeader; +import htsjdk.variant.vcf.VCFHeaderLine; +import htsjdk.variant.vcf.VCFHeaderLineCount; +import htsjdk.variant.vcf.VCFHeaderLineType; +import htsjdk.variant.vcf.VCFInfoHeaderLine; + +import jalview.analysis.AlignmentUtils; +import jalview.analysis.Dna; +import jalview.api.AlignViewControllerGuiI; +import jalview.datamodel.AlignmentI; +import jalview.datamodel.DBRefEntry; +import jalview.datamodel.GeneLociI; +import jalview.datamodel.Mapping; +import jalview.datamodel.SequenceFeature; +import jalview.datamodel.SequenceI; +import jalview.datamodel.features.FeatureAttributeType; +import jalview.datamodel.features.FeatureSource; +import jalview.datamodel.features.FeatureSources; +import jalview.ext.ensembl.EnsemblMap; +import jalview.ext.htsjdk.VCFReader; +import jalview.io.gff.Gff3Helper; +import jalview.io.gff.SequenceOntologyI; +import jalview.util.MapList; +import jalview.util.MappingUtils; +import jalview.util.MessageManager; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Map.Entry; + +/** + * A class to read VCF data (using the htsjdk) and add variants as sequence + * features on dna and any related protein product sequences + * + * @author gmcarstairs + */ +public class VCFLoader +{ + /* + * keys to fields of VEP CSQ consequence data + * see https://www.ensembl.org/info/docs/tools/vep/vep_formats.html + */ + private static final String ALLELE_KEY = "Allele"; + + private static final String ALLELE_NUM_KEY = "ALLELE_NUM"; // 0 (ref), 1... + private static final String FEATURE_KEY = "Feature"; // Ensembl stable id + + /* + * what comes before column headings in CSQ Description field + */ + private static final String FORMAT = "Format: "; + + /* + * default VCF INFO key for VEP consequence data + * NB this can be overridden running VEP with --vcf_info_field + * - we don't handle this case (require CSQ identifier) + */ + private static final String CSQ = "CSQ"; + + /* + * separator for fields in consequence data + */ + private static final String PIPE = "|"; + + private static final String PIPE_REGEX = "\\" + PIPE; + + /* + * key for Allele Frequency output by VEP + * see http://www.ensembl.org/info/docs/tools/vep/vep_formats.html + */ + private static final String ALLELE_FREQUENCY_KEY = "AF"; + + /* + * delimiter that separates multiple consequence data blocks + */ + private static final String COMMA = ","; + + /* + * the feature group assigned to a VCF variant in Jalview + */ + private static final String FEATURE_GROUP_VCF = "VCF"; + + /* + * internal delimiter used to build keys for assemblyMappings + * + */ + private static final String EXCL = "!"; + + /* + * the alignment we are associating VCF data with + */ + private AlignmentI al; + + /* + * mappings between VCF and sequence reference assembly regions, as + * key = "species!chromosome!fromAssembly!toAssembly + * value = Map{fromRange, toRange} + */ + private Map> assemblyMappings; + + /* + * holds details of the VCF header lines (metadata) + */ + private VCFHeader header; + + /* + * the position (0...) of field in each block of + * CSQ (consequence) data (if declared in the VCF INFO header for CSQ) + * see http://www.ensembl.org/info/docs/tools/vep/vep_formats.html + */ + private int csqAlleleFieldIndex = -1; + private int csqAlleleNumberFieldIndex = -1; + private int csqFeatureFieldIndex = -1; + + /* + * a unique identifier under which to save metadata about feature + * attributes (selected INFO field data) + */ + private String sourceId; + + /** + * Constructor given an alignment context + * + * @param alignment + */ + public VCFLoader(AlignmentI alignment) + { + al = alignment; + + // map of species!chromosome!fromAssembly!toAssembly to {fromRange, toRange} + assemblyMappings = new HashMap>(); + } + + /** + * Starts a new thread to query and load VCF variant data on to the alignment + *

      + * This method is not thread safe - concurrent threads should use separate + * instances of this class. + * + * @param filePath + * @param gui + */ + public void loadVCF(final String filePath, + final AlignViewControllerGuiI gui) + { + if (gui != null) + { + gui.setStatus(MessageManager.getString("label.searching_vcf")); + } + + new Thread() + { + + @Override + public void run() + { + VCFLoader.this.doLoad(filePath, gui); + } + + }.start(); + } + + /** + * Loads VCF on to an alignment - provided it can be related to one or more + * sequence's chromosomal coordinates + * + * @param filePath + * @param gui + * optional callback handler for messages + */ + protected void doLoad(String filePath, AlignViewControllerGuiI gui) + { + VCFReader reader = null; + try + { + // long start = System.currentTimeMillis(); + reader = new VCFReader(filePath); + + header = reader.getFileHeader(); + + sourceId = filePath; + + saveMetadata(sourceId); + + /* + * get offset of CSQ ALLELE_NUM and Feature if declared + */ + locateCsqFields(); + + VCFHeaderLine ref = header + .getOtherHeaderLine(VCFHeader.REFERENCE_KEY); + String vcfAssembly = ref.getValue(); + + int varCount = 0; + int seqCount = 0; + + /* + * query for VCF overlapping each sequence in turn + */ + for (SequenceI seq : al.getSequences()) + { + int added = loadSequenceVCF(seq, reader, vcfAssembly); + if (added > 0) + { + seqCount++; + varCount += added; + transferAddedFeatures(seq); + } + } + if (gui != null) + { + // long elapsed = System.currentTimeMillis() - start; + String msg = MessageManager.formatMessage("label.added_vcf", + varCount, seqCount); + gui.setStatus(msg); + if (gui.getFeatureSettingsUI() != null) + { + gui.getFeatureSettingsUI().discoverAllFeatureData(); + } + } + } catch (Throwable e) + { + System.err.println("Error processing VCF: " + e.getMessage()); + e.printStackTrace(); + if (gui != null) + { + gui.setStatus("Error occurred - see console for details"); + } + } finally + { + if (reader != null) + { + try + { + reader.close(); + } catch (IOException e) + { + // ignore + } + } + } + } + + /** + * Reads metadata (such as INFO field descriptions and datatypes) and saves + * them for future reference + * + * @param sourceId + */ + void saveMetadata(String sourceId) + { + FeatureSource metadata = new FeatureSource(sourceId); + + for (VCFInfoHeaderLine info : header.getInfoHeaderLines()) + { + String attributeId = info.getID(); + String desc = info.getDescription(); + VCFHeaderLineType type = info.getType(); + FeatureAttributeType attType = null; + switch (type) + { + case Character: + attType = FeatureAttributeType.Character; + break; + case Flag: + attType = FeatureAttributeType.Flag; + break; + case Float: + attType = FeatureAttributeType.Float; + break; + case Integer: + attType = FeatureAttributeType.Integer; + break; + case String: + attType = FeatureAttributeType.String; + break; + } + metadata.setAttributeName(attributeId, desc); + metadata.setAttributeType(attributeId, attType); + } + + FeatureSources.getInstance().addSource(sourceId, metadata); + } + + /** + * Records the position of selected fields defined in the CSQ INFO header (if + * there is one). CSQ fields are declared in the CSQ INFO Description e.g. + *

      + * Description="Consequence ...from ... VEP. Format: Allele|Consequence|... + */ + protected void locateCsqFields() + { + VCFInfoHeaderLine csqInfo = header.getInfoHeaderLine(CSQ); + if (csqInfo == null) + { + return; + } + + String desc = csqInfo.getDescription(); + int formatPos = desc.indexOf(FORMAT); + if (formatPos == -1) + { + System.err.println("Parse error, failed to find " + FORMAT + + " in " + desc); + return; + } + desc = desc.substring(formatPos + FORMAT.length()); + + if (desc != null) + { + String[] format = desc.split(PIPE_REGEX); + int index = 0; + for (String field : format) + { + if (ALLELE_NUM_KEY.equals(field)) + { + csqAlleleNumberFieldIndex = index; + } + if (ALLELE_KEY.equals(field)) + { + csqAlleleFieldIndex = index; + } + if (FEATURE_KEY.equals(field)) + { + csqFeatureFieldIndex = index; + } + index++; + } + } + } + + /** + * Transfers VCF features to sequences to which this sequence has a mapping. + * If the mapping is 3:1, computes peptide variants from nucleotide variants. + * + * @param seq + */ + protected void transferAddedFeatures(SequenceI seq) + { + DBRefEntry[] dbrefs = seq.getDBRefs(); + if (dbrefs == null) + { + return; + } + for (DBRefEntry dbref : dbrefs) + { + Mapping mapping = dbref.getMap(); + if (mapping == null || mapping.getTo() == null) + { + continue; + } + + SequenceI mapTo = mapping.getTo(); + MapList map = mapping.getMap(); + if (map.getFromRatio() == 3) + { + /* + * dna-to-peptide product mapping + */ + AlignmentUtils.computeProteinFeatures(seq, mapTo, map); + } + else + { + /* + * nucleotide-to-nucleotide mapping e.g. transcript to CDS + */ + List features = seq.getFeatures() + .getPositionalFeatures(SequenceOntologyI.SEQUENCE_VARIANT); + for (SequenceFeature sf : features) + { + if (FEATURE_GROUP_VCF.equals(sf.getFeatureGroup())) + { + transferFeature(sf, mapTo, map); + } + } + } + } + } + + /** + * Tries to add overlapping variants read from a VCF file to the given + * sequence, and returns the number of variant features added. Note that this + * requires the sequence to hold information as to its species, chromosomal + * positions and reference assembly, in order to be able to map the VCF + * variants to the sequence (or not) + * + * @param seq + * @param reader + * @param vcfAssembly + * @return + */ + protected int loadSequenceVCF(SequenceI seq, VCFReader reader, + String vcfAssembly) + { + int count = 0; + GeneLociI seqCoords = seq.getGeneLoci(); + if (seqCoords == null) + { + System.out.println(String.format( + "Can't query VCF for %s as chromosome coordinates not known", + seq.getName())); + return 0; + } + + if (!vcfSpeciesMatchesSequence(vcfAssembly, seqCoords.getSpeciesId())) + { + return 0; + } + + List seqChromosomalContigs = seqCoords.getMap().getToRanges(); + for (int[] range : seqChromosomalContigs) + { + count += addVcfVariants(seq, reader, range, vcfAssembly); + } + + return count; + } + + /** + * Answers true if the species inferred from the VCF reference identifier + * matches that for the sequence + * + * @param vcfAssembly + * @param speciesId + * @return + */ + boolean vcfSpeciesMatchesSequence(String vcfAssembly, String speciesId) + { + // PROBLEM 1 + // there are many aliases for species - how to equate one with another? + // PROBLEM 2 + // VCF ##reference header is an unstructured URI - how to extract species? + // perhaps check if ref includes any (Ensembl) alias of speciesId?? + // TODO ask the user to confirm this?? + + if (vcfAssembly.contains("Homo_sapiens") // gnomAD exome data example + && "HOMO_SAPIENS".equals(speciesId)) // Ensembl species id + { + return true; + } + + if (vcfAssembly.contains("c_elegans") // VEP VCF response example + && "CAENORHABDITIS_ELEGANS".equals(speciesId)) // Ensembl + { + return true; + } + + // this is not a sustainable solution... + + return false; + } + + /** + * Queries the VCF reader for any variants that overlap the given chromosome + * region of the sequence, and adds as variant features. Returns the number of + * overlapping variants found. + * + * @param seq + * @param reader + * @param range + * start-end range of a sequence region in its chromosomal + * coordinates + * @param vcfAssembly + * the '##reference' identifier for the VCF reference assembly + * @return + */ + protected int addVcfVariants(SequenceI seq, VCFReader reader, + int[] range, String vcfAssembly) + { + GeneLociI seqCoords = seq.getGeneLoci(); + + String chromosome = seqCoords.getChromosomeId(); + String seqRef = seqCoords.getAssemblyId(); + String species = seqCoords.getSpeciesId(); + + /* + * map chromosomal coordinates from sequence to VCF if the VCF + * data has a different reference assembly to the sequence + */ + // TODO generalise for non-human species + // - or get the user to choose in a dialog + + int offset = 0; + if ("GRCh38".equalsIgnoreCase(seqRef) // Ensembl + && vcfAssembly.contains("Homo_sapiens_assembly19")) // gnomAD + { + String toRef = "GRCh37"; + int[] newRange = mapReferenceRange(range, chromosome, "human", + seqRef, toRef); + if (newRange == null) + { + System.err.println(String.format( + "Failed to map %s:%s:%s:%d:%d to %s", species, chromosome, + seqRef, range[0], range[1], toRef)); + return 0; + } + offset = newRange[0] - range[0]; + range = newRange; + } + + boolean forwardStrand = range[0] <= range[1]; + + /* + * query the VCF for overlaps + * (convert a reverse strand range to forwards) + */ + int count = 0; + MapList mapping = seqCoords.getMap(); + + int fromLocus = Math.min(range[0], range[1]); + int toLocus = Math.max(range[0], range[1]); + CloseableIterator variants = reader.query(chromosome, + fromLocus, toLocus); + while (variants.hasNext()) + { + /* + * get variant location in sequence chromosomal coordinates + */ + VariantContext variant = variants.next(); + + int start = variant.getStart() - offset; + int end = variant.getEnd() - offset; + + /* + * convert chromosomal location to sequence coordinates + * - may be reverse strand (convert to forward for sequence feature) + * - null if a partially overlapping feature + */ + int[] seqLocation = mapping.locateInFrom(start, end); + if (seqLocation != null) + { + int featureStart = Math.min(seqLocation[0], seqLocation[1]); + int featureEnd = Math.max(seqLocation[0], seqLocation[1]); + count += addAlleleFeatures(seq, variant, featureStart, featureEnd, + forwardStrand); + } + } + + variants.close(); + + return count; + } + + /** + * A convenience method to get the AF value for the given alternate allele + * index + * + * @param variant + * @param alleleIndex + * @return + */ + protected float getAlleleFrequency(VariantContext variant, int alleleIndex) + { + float score = 0f; + String attributeValue = getAttributeValue(variant, + ALLELE_FREQUENCY_KEY, alleleIndex); + if (attributeValue != null) + { + try + { + score = Float.parseFloat(attributeValue); + } catch (NumberFormatException e) + { + // leave as 0 + } + } + + return score; + } + + /** + * A convenience method to get an attribute value for an alternate allele + * + * @param variant + * @param attributeName + * @param alleleIndex + * @return + */ + protected String getAttributeValue(VariantContext variant, + String attributeName, int alleleIndex) + { + Object att = variant.getAttribute(attributeName); + + if (att instanceof String) + { + return (String) att; + } + else if (att instanceof ArrayList) + { + return ((List) att).get(alleleIndex); + } + + return null; + } + + /** + * Adds one variant feature for each allele in the VCF variant record, and + * returns the number of features added. + * + * @param seq + * @param variant + * @param featureStart + * @param featureEnd + * @param forwardStrand + * @return + */ + protected int addAlleleFeatures(SequenceI seq, VariantContext variant, + int featureStart, int featureEnd, boolean forwardStrand) + { + int added = 0; + + /* + * Javadoc says getAlternateAlleles() imposes no order on the list returned + * so we proceed defensively to get them in strict order + */ + int altAlleleCount = variant.getAlternateAlleles().size(); + for (int i = 0; i < altAlleleCount; i++) + { + added += addAlleleFeature(seq, variant, i, featureStart, featureEnd, + forwardStrand); + } + return added; + } + + /** + * Inspects one allele and attempts to add a variant feature for it to the + * sequence. We extract as much as possible of the additional data associated + * with this allele to store in the feature's key-value map. Answers the + * number of features added (0 or 1). + * + * @param seq + * @param variant + * @param altAlleleIndex + * (0, 1..) + * @param featureStart + * @param featureEnd + * @param forwardStrand + * @return + */ + protected int addAlleleFeature(SequenceI seq, VariantContext variant, + int altAlleleIndex, int featureStart, int featureEnd, + boolean forwardStrand) + { + String reference = variant.getReference().getBaseString(); + Allele alt = variant.getAlternateAllele(altAlleleIndex); + String allele = alt.getBaseString(); + + /* + * build the ref,alt allele description e.g. "G,A", using the base + * complement if the sequence is on the reverse strand + */ + // TODO check how structural variants are shown on reverse strand + StringBuilder sb = new StringBuilder(); + sb.append(forwardStrand ? reference : Dna.reverseComplement(reference)); + sb.append(COMMA); + sb.append(forwardStrand ? allele : Dna.reverseComplement(allele)); + String alleles = sb.toString(); // e.g. G,A + + String type = SequenceOntologyI.SEQUENCE_VARIANT; + float score = getAlleleFrequency(variant, altAlleleIndex); + + SequenceFeature sf = new SequenceFeature(type, alleles, featureStart, + featureEnd, score, FEATURE_GROUP_VCF); + sf.setSource(sourceId); + + sf.setValue(Gff3Helper.ALLELES, alleles); + + addAlleleProperties(variant, seq, sf, altAlleleIndex); + + seq.addSequenceFeature(sf); + + return 1; + } + + /** + * Add any allele-specific VCF key-value data to the sequence feature + * + * @param variant + * @param seq + * @param sf + * @param altAlelleIndex + * (0, 1..) + */ + protected void addAlleleProperties(VariantContext variant, SequenceI seq, + SequenceFeature sf, final int altAlelleIndex) + { + Map atts = variant.getAttributes(); + + for (Entry att : atts.entrySet()) + { + String key = att.getKey(); + + /* + * extract Consequence data (if present) that we are able to + * associated with the allele for this variant feature + */ + if (CSQ.equals(key)) + { + addConsequences(variant, seq, sf, altAlelleIndex); + continue; + } + + /* + * we extract values for other data which are allele-specific; + * these may be per alternate allele (INFO[key].Number = 'A') + * or per allele including reference (INFO[key].Number = 'R') + */ + VCFInfoHeaderLine infoHeader = header.getInfoHeaderLine(key); + if (infoHeader == null) + { + /* + * can't be sure what data belongs to this allele, so + * play safe and don't take any + */ + continue; + } + + VCFHeaderLineCount number = infoHeader.getCountType(); + int index = altAlelleIndex; + if (number == VCFHeaderLineCount.R) + { + /* + * one value per allele including reference, so bump index + * e.g. the 3rd value is for the 2nd alternate allele + */ + index++; + } + else if (number != VCFHeaderLineCount.A) + { + /* + * don't save other values as not allele-related + */ + continue; + } + + /* + * take the index'th value + */ + String value = getAttributeValue(variant, key, index); + if (value != null) + { + sf.setValue(key, value); + } + } + } + + /** + * Inspects CSQ data blocks (consequences) and adds attributes on the sequence + * feature for the current allele (and transcript if applicable) + *

      + * Allele matching: if field ALLELE_NUM is present, it must match + * altAlleleIndex. If not present, then field Allele value must match the VCF + * Allele. + *

      + * Transcript matching: if sequence name can be identified to at least one of + * the consequences' Feature values, then select only consequences that match + * the value (i.e. consequences for the current transcript sequence). If not, + * take all consequences (this is the case when adding features to the gene + * sequence). + * + * @param variant + * @param seq + * @param sf + * @param altAlelleIndex + * (0, 1..) + */ + protected void addConsequences(VariantContext variant, SequenceI seq, + SequenceFeature sf, int altAlelleIndex) + { + Object value = variant.getAttribute(CSQ); + + if (value == null || !(value instanceof ArrayList)) + { + return; + } + + List consequences = (List) value; + + /* + * if CSQ data includes 'Feature', and any value matches the sequence name, + * then restrict consequence data to only the matching value (transcript) + * i.e. just pick out consequences for the transcript the variant feature is on + */ + String seqName = seq.getName()== null ? "" : seq.getName().toLowerCase(); + String matchFeature = null; + if (csqFeatureFieldIndex > -1) + { + for (String consequence : consequences) + { + String[] csqFields = consequence.split(PIPE_REGEX); + if (csqFields.length > csqFeatureFieldIndex) + { + String featureIdentifier = csqFields[csqFeatureFieldIndex]; + if (featureIdentifier.length() > 4 + && seqName.indexOf(featureIdentifier.toLowerCase()) > -1) + { + matchFeature = featureIdentifier; + } + } + } + } + + StringBuilder sb = new StringBuilder(128); + boolean found = false; + + for (String consequence : consequences) + { + String[] csqFields = consequence.split(PIPE_REGEX); + + if (includeConsequence(csqFields, matchFeature, variant, + altAlelleIndex)) + { + if (found) + { + sb.append(COMMA); + } + found = true; + sb.append(consequence); + } + } + + if (found) + { + sf.setValue(CSQ, sb.toString()); + } + } + + /** + * Answers true if we want to associate this block of consequence data with + * the specified alternate allele of the VCF variant. + *

      + * If consequence data includes the ALLELE_NUM field, then this has to match + * altAlleleIndex. Otherwise the Allele field of the consequence data has to + * match the allele value. + *

      + * Optionally (if matchFeature is not null), restrict to only include + * consequences whose Feature value matches. This allows us to attach + * consequences to their respective transcripts. + * + * @param csqFields + * @param matchFeature + * @param variant + * @param altAlelleIndex + * (0, 1..) + * @return + */ + protected boolean includeConsequence(String[] csqFields, + String matchFeature, VariantContext variant, int altAlelleIndex) + { + /* + * check consequence is for the current transcript + */ + if (matchFeature != null) + { + if (csqFields.length <= csqFeatureFieldIndex) + { + return false; + } + String featureIdentifier = csqFields[csqFeatureFieldIndex]; + if (!featureIdentifier.equals(matchFeature)) + { + return false; // consequence is for a different transcript + } + } + + /* + * if ALLELE_NUM is present, it must match altAlleleIndex + * NB first alternate allele is 1 for ALLELE_NUM, 0 for altAlleleIndex + */ + if (csqAlleleNumberFieldIndex > -1) + { + if (csqFields.length <= csqAlleleNumberFieldIndex) + { + return false; + } + String alleleNum = csqFields[csqAlleleNumberFieldIndex]; + return String.valueOf(altAlelleIndex + 1).equals(alleleNum); + } + + /* + * else consequence allele must match variant allele + */ + if (csqAlleleFieldIndex > -1 && csqFields.length > csqAlleleFieldIndex) + { + String csqAllele = csqFields[csqAlleleFieldIndex]; + String vcfAllele = variant.getAlternateAllele(altAlelleIndex) + .getBaseString(); + return csqAllele.equals(vcfAllele); + } + + return false; + } + + /** + * A convenience method to complement a dna base and return the string value + * of its complement + * + * @param reference + * @return + */ + protected String complement(byte[] reference) + { + return String.valueOf(Dna.getComplement((char) reference[0])); + } + + /** + * Determines the location of the query range (chromosome positions) in a + * different reference assembly. + *

      + * If the range is just a subregion of one for which we already have a mapping + * (for example, an exon sub-region of a gene), then the mapping is just + * computed arithmetically. + *

      + * Otherwise, calls the Ensembl REST service that maps from one assembly + * reference's coordinates to another's + * + * @param queryRange + * start-end chromosomal range in 'fromRef' coordinates + * @param chromosome + * @param species + * @param fromRef + * assembly reference for the query coordinates + * @param toRef + * assembly reference we wish to translate to + * @return the start-end range in 'toRef' coordinates + */ + protected int[] mapReferenceRange(int[] queryRange, String chromosome, + String species, String fromRef, String toRef) + { + /* + * first try shorcut of computing the mapping as a subregion of one + * we already have (e.g. for an exon, if we have the gene mapping) + */ + int[] mappedRange = findSubsumedRangeMapping(queryRange, chromosome, + species, fromRef, toRef); + if (mappedRange != null) + { + return mappedRange; + } + + /* + * call (e.g.) http://rest.ensembl.org/map/human/GRCh38/17:45051610..45109016:1/GRCh37 + */ + EnsemblMap mapper = new EnsemblMap(); + int[] mapping = mapper.getAssemblyMapping(species, chromosome, fromRef, + toRef, queryRange); + + if (mapping == null) + { + // mapping service failure + return null; + } + + /* + * save mapping for possible future re-use + */ + String key = makeRangesKey(chromosome, species, fromRef, toRef); + if (!assemblyMappings.containsKey(key)) + { + assemblyMappings.put(key, new HashMap()); + } + + assemblyMappings.get(key).put(queryRange, mapping); + + return mapping; + } + + /** + * If we already have a 1:1 contiguous mapping which subsumes the given query + * range, this method just calculates and returns the subset of that mapping, + * else it returns null. In practical terms, if a gene has a contiguous + * mapping between (for example) GRCh37 and GRCh38, then we assume that its + * subsidiary exons occupy unchanged relative positions, and just compute + * these as offsets, rather than do another lookup of the mapping. + *

      + * If in future these assumptions prove invalid (e.g. for bacterial dna?!), + * simply remove this method or let it always return null. + *

      + * Warning: many rapid calls to the /map service map result in a 429 overload + * error response + * + * @param queryRange + * @param chromosome + * @param species + * @param fromRef + * @param toRef + * @return + */ + protected int[] findSubsumedRangeMapping(int[] queryRange, String chromosome, + String species, String fromRef, String toRef) + { + String key = makeRangesKey(chromosome, species, fromRef, toRef); + if (assemblyMappings.containsKey(key)) + { + Map mappedRanges = assemblyMappings.get(key); + for (Entry mappedRange : mappedRanges.entrySet()) + { + int[] fromRange = mappedRange.getKey(); + int[] toRange = mappedRange.getValue(); + if (fromRange[1] - fromRange[0] == toRange[1] - toRange[0]) + { + /* + * mapping is 1:1 in length, so we trust it to have no discontinuities + */ + if (MappingUtils.rangeContains(fromRange, queryRange)) + { + /* + * fromRange subsumes our query range + */ + int offset = queryRange[0] - fromRange[0]; + int mappedRangeFrom = toRange[0] + offset; + int mappedRangeTo = mappedRangeFrom + (queryRange[1] - queryRange[0]); + return new int[] { mappedRangeFrom, mappedRangeTo }; + } + } + } + } + return null; + } + + /** + * Transfers the sequence feature to the target sequence, locating its start + * and end range based on the mapping. Features which do not overlap the + * target sequence are ignored. + * + * @param sf + * @param targetSequence + * @param mapping + * mapping from the feature's coordinates to the target sequence + */ + protected void transferFeature(SequenceFeature sf, + SequenceI targetSequence, MapList mapping) + { + int[] mappedRange = mapping.locateInTo(sf.getBegin(), sf.getEnd()); + + if (mappedRange != null) + { + String group = sf.getFeatureGroup(); + int newBegin = Math.min(mappedRange[0], mappedRange[1]); + int newEnd = Math.max(mappedRange[0], mappedRange[1]); + SequenceFeature copy = new SequenceFeature(sf, newBegin, newEnd, + group, sf.getScore()); + targetSequence.addSequenceFeature(copy); + } + } + + /** + * Formats a ranges map lookup key + * + * @param chromosome + * @param species + * @param fromRef + * @param toRef + * @return + */ + protected static String makeRangesKey(String chromosome, String species, + String fromRef, String toRef) + { + return species + EXCL + chromosome + EXCL + fromRef + EXCL + + toRef; + } +} diff --git a/src/jalview/jbgui/GAlignFrame.java b/src/jalview/jbgui/GAlignFrame.java index 86d0c85..1cf482d 100755 --- a/src/jalview/jbgui/GAlignFrame.java +++ b/src/jalview/jbgui/GAlignFrame.java @@ -147,6 +147,8 @@ public class GAlignFrame extends JInternalFrame protected JMenuItem runGroovy = new JMenuItem(); + protected JMenuItem loadVcf; + protected JCheckBoxMenuItem autoCalculate = new JCheckBoxMenuItem(); protected JCheckBoxMenuItem sortByTree = new JCheckBoxMenuItem(); @@ -1308,6 +1310,16 @@ public class GAlignFrame extends JInternalFrame associatedData_actionPerformed(e); } }); + loadVcf = new JMenuItem(MessageManager.getString("label.load_vcf_file")); + loadVcf.setToolTipText(MessageManager.getString("label.load_vcf")); + loadVcf.addActionListener(new ActionListener() + { + @Override + public void actionPerformed(ActionEvent e) + { + loadVcf_actionPerformed(); + } + }); autoCalculate.setText( MessageManager.getString("label.autocalculate_consensus")); autoCalculate.setState( @@ -1710,6 +1722,7 @@ public class GAlignFrame extends JInternalFrame fileMenu.add(exportAnnotations); fileMenu.add(loadTreeMenuItem); fileMenu.add(associatedData); + fileMenu.add(loadVcf); fileMenu.addSeparator(); fileMenu.add(closeMenuItem); @@ -1855,6 +1868,10 @@ public class GAlignFrame extends JInternalFrame // selectMenu.add(listenToViewSelections); } + protected void loadVcf_actionPerformed() + { + } + /** * Constructs the entries on the Colour menu (but does not add them to the * menu). diff --git a/src/jalview/jbgui/GCutAndPasteHtmlTransfer.java b/src/jalview/jbgui/GCutAndPasteHtmlTransfer.java index abc0b3d..a6e0ace 100644 --- a/src/jalview/jbgui/GCutAndPasteHtmlTransfer.java +++ b/src/jalview/jbgui/GCutAndPasteHtmlTransfer.java @@ -39,6 +39,8 @@ import javax.swing.JMenuBar; import javax.swing.JMenuItem; import javax.swing.JPanel; import javax.swing.JScrollPane; +import javax.swing.text.EditorKit; +import javax.swing.text.html.HTMLEditorKit; /** * DOCUMENT ME! @@ -85,6 +87,7 @@ public class GCutAndPasteHtmlTransfer extends JInternalFrame { try { + textarea.setEditorKit(new HTMLEditorKit()); setJMenuBar(editMenubar); jbInit(); } catch (Exception e) @@ -272,4 +275,20 @@ public class GCutAndPasteHtmlTransfer extends JInternalFrame { } + + /** + * Adds the given stylesheet rule to the Html editor. However note that CSS + * support is limited. + * + * @param rule + * @see javax.swing.text.html.CSS + */ + public void addStylesheetRule(String rule) + { + EditorKit editorKit = textarea.getEditorKit(); + if (editorKit != null) + { + ((HTMLEditorKit) editorKit).getStyleSheet().addRule(rule); + } + } } diff --git a/src/jalview/schemes/FeatureColour.java b/src/jalview/schemes/FeatureColour.java index 54d1c6c..2dac7db 100644 --- a/src/jalview/schemes/FeatureColour.java +++ b/src/jalview/schemes/FeatureColour.java @@ -24,9 +24,11 @@ import jalview.api.FeatureColourI; import jalview.datamodel.SequenceFeature; import jalview.util.ColorUtils; import jalview.util.Format; +import jalview.util.matcher.KeyedMatcherSetI; import java.awt.Color; import java.util.StringTokenizer; +import java.util.function.Function; /** * A class that wraps either a simple colour or a graduated colour @@ -73,6 +75,11 @@ public class FeatureColour implements FeatureColourI final private float deltaBlue; + /* + * optional filter(s) by attribute values + */ + private KeyedMatcherSetI attributeFilters; + /** * Parses a Jalview features file format colour descriptor * [label|][mincolour|maxcolour @@ -359,6 +366,7 @@ public class FeatureColour implements FeatureColourI base = fc.base; range = fc.range; isHighToLow = fc.isHighToLow; + attributeFilters = fc.attributeFilters; setAboveThreshold(fc.isAboveThreshold()); setBelowThreshold(fc.isBelowThreshold()); setThreshold(fc.getThreshold()); @@ -540,6 +548,11 @@ public class FeatureColour implements FeatureColourI @Override public Color getColor(SequenceFeature feature) { + if (!matchesFilters(feature)) + { + return null; + } + if (isColourByLabel()) { return ColorUtils.createColourFromName(feature.getDescription()); @@ -589,6 +602,28 @@ public class FeatureColour implements FeatureColourI } /** + * Answers true if either there are no attribute value filters defined, or the + * feature matches all of the filter conditions. Answers false if the feature + * fails the filter conditions. + * + * @param feature + * + * @return + */ + boolean matchesFilters(SequenceFeature feature) + { + if (attributeFilters == null) + { + return true; + } + + Function valueProvider = key -> feature.otherDetails == null ? null + : (feature.otherDetails.containsKey(key) ? feature.otherDetails + .get(key).toString() : null); + return attributeFilters.matches(valueProvider); + } + + /** * Returns the maximum score of the graduated colour range * * @return @@ -674,4 +709,21 @@ public class FeatureColour implements FeatureColourI return String.format("%s\t%s", featureType, colourString); } + /** + * Adds an attribute filter + * + * @param attName + * @param filter + */ + @Override + public void setAttributeFilters(KeyedMatcherSetI matcher) + { + attributeFilters = matcher; + } + + @Override + public KeyedMatcherSetI getAttributeFilters() + { + return attributeFilters; + } } diff --git a/src/jalview/util/MapList.java b/src/jalview/util/MapList.java index 4658724..3ce0bb3 100644 --- a/src/jalview/util/MapList.java +++ b/src/jalview/util/MapList.java @@ -1120,4 +1120,63 @@ public class MapList || (fromRatio == 3 && toRatio == 1); } + /** + * Returns a map which is the composite of this one and the input map. That + * is, the output map has the fromRanges of this map, and its toRanges are the + * toRanges of this map as transformed by the input map. + *

      + * Returns null if the mappings cannot be traversed (not all toRanges of this + * map correspond to fromRanges of the input), or if this.toRatio does not + * match map.fromRatio. + * + *

      +   * Example 1:
      +   *    this:   from [1-100] to [501-600]
      +   *    input:  from [10-40] to [60-90]
      +   *    output: from [10-40] to [560-590]
      +   * Example 2 ('reverse strand exons'):
      +   *    this:   from [1-100] to [2000-1951], [1000-951] // transcript to loci
      +   *    input:  from [1-50]  to [41-90] // CDS to transcript
      +   *    output: from [10-40] to [1960-1951], [1000-971] // CDS to gene loci
      +   * 
      + * + * @param map + * @return + */ + public MapList traverse(MapList map) + { + if (map == null) + { + return null; + } + + /* + * compound the ratios by this rule: + * A:B with M:N gives A*M:B*N + * reduced by greatest common divisor + * so 1:3 with 3:3 is 3:9 or 1:3 + * 1:3 with 3:1 is 3:3 or 1:1 + * 1:3 with 1:3 is 1:9 + * 2:5 with 3:7 is 6:35 + */ + int outFromRatio = getFromRatio() * map.getFromRatio(); + int outToRatio = getToRatio() * map.getToRatio(); + int gcd = MathUtils.gcd(outFromRatio, outToRatio); + outFromRatio /= gcd; + outToRatio /= gcd; + + List toRanges = new ArrayList<>(); + for (int[] range : getToRanges()) + { + int[] transferred = map.locateInTo(range[0], range[1]); + if (transferred == null) + { + return null; + } + toRanges.add(transferred); + } + + return new MapList(getFromRanges(), toRanges, outFromRatio, outToRatio); + } + } diff --git a/src/jalview/util/MappingUtils.java b/src/jalview/util/MappingUtils.java index 9c5c109..f5dd883 100644 --- a/src/jalview/util/MappingUtils.java +++ b/src/jalview/util/MappingUtils.java @@ -941,6 +941,34 @@ public final class MappingUtils } /** + * Answers true if range's start-end positions include those of queryRange, + * where either range might be in reverse direction, else false + * + * @param range + * a start-end range + * @param queryRange + * a candidate subrange of range (start2-end2) + * @return + */ + public static boolean rangeContains(int[] range, int[] queryRange) + { + if (range == null || queryRange == null || range.length != 2 + || queryRange.length != 2) + { + /* + * invalid arguments + */ + return false; + } + + int min = Math.min(range[0], range[1]); + int max = Math.max(range[0], range[1]); + + return (min <= queryRange[0] && max >= queryRange[0] + && min <= queryRange[1] && max >= queryRange[1]); + } + + /** * Removes the specified number of positions from the given ranges. Provided * to allow a stop codon to be stripped from a CDS sequence so that it matches * the peptide translation length. diff --git a/src/jalview/util/MathUtils.java b/src/jalview/util/MathUtils.java new file mode 100644 index 0000000..72d46a2 --- /dev/null +++ b/src/jalview/util/MathUtils.java @@ -0,0 +1,22 @@ +package jalview.util; + +public class MathUtils +{ + + /** + * Returns the greatest common divisor of two integers + * + * @param a + * @param b + * @return + */ + public static int gcd(int a, int b) + { + if (b == 0) + { + return Math.abs(a); + } + return gcd(b, a % b); + } + +} diff --git a/src/jalview/util/StringUtils.java b/src/jalview/util/StringUtils.java index b3456aa..2e8ace8 100644 --- a/src/jalview/util/StringUtils.java +++ b/src/jalview/util/StringUtils.java @@ -403,4 +403,45 @@ public class StringUtils } return s.substring(0, 1).toUpperCase() + s.substring(1).toLowerCase(); } + + /** + * A helper method that strips off any leading or trailing html and body tags. + * If no html tag is found, then also html-encodes angle bracket characters. + * + * @param text + * @return + */ + public static String stripHtmlTags(String text) + { + if (text == null) + { + return null; + } + String tmp2up = text.toUpperCase(); + int startTag = tmp2up.indexOf(""); + if (startTag > -1) + { + text = text.substring(startTag + 6); + tmp2up = tmp2up.substring(startTag + 6); + } + // is omission of "" intentional here?? + int endTag = tmp2up.indexOf(""); + if (endTag > -1) + { + text = text.substring(0, endTag); + tmp2up = tmp2up.substring(0, endTag); + } + endTag = tmp2up.indexOf(""); + if (endTag > -1) + { + text = text.substring(0, endTag); + } + + if (startTag == -1 && (text.contains("<") || text.contains(">"))) + { + text = text.replaceAll("<", "<"); + text = text.replaceAll(">", ">"); + } + return text; + } } diff --git a/src/jalview/util/matcher/Condition.java b/src/jalview/util/matcher/Condition.java new file mode 100644 index 0000000..455f805 --- /dev/null +++ b/src/jalview/util/matcher/Condition.java @@ -0,0 +1,57 @@ +package jalview.util.matcher; + +import jalview.util.MessageManager; + +import java.util.HashMap; +import java.util.Map; + +/** + * An enumeration for binary conditions that a user might choose from when + * setting filter or match conditions for values + */ +public enum Condition +{ + Contains(false), NotContains(false), Matches(false), NotMatches(false), + EQ(true), NE(true), LT(true), LE(true), GT(true), GE(true); + + private static Map displayNames = new HashMap<>(); + + private boolean numeric; + + Condition(boolean isNumeric) + { + numeric = isNumeric; + } + + /** + * Answers true if the condition does a numerical comparison, else false + * (string comparison) + * + * @return + */ + public boolean isNumeric() + { + return numeric; + } + + /** + * Answers a display name for the match condition, suitable for showing in + * drop-down menus. The value may be internationalized using the resource key + * "label.matchCondition_" with the enum name appended. + * + * @return + */ + @Override + public String toString() + { + String name = displayNames.get(this); + if (name != null) + { + return name; + } + name = MessageManager + .getStringOrReturn("label.matchCondition_", name()); + displayNames.put(this, name); + return name; + } +} diff --git a/src/jalview/util/matcher/KeyedMatcher.java b/src/jalview/util/matcher/KeyedMatcher.java new file mode 100644 index 0000000..474dc31 --- /dev/null +++ b/src/jalview/util/matcher/KeyedMatcher.java @@ -0,0 +1,86 @@ +package jalview.util.matcher; + +import java.util.function.Function; + +/** + * An immutable class that models one or more match conditions, each of which is + * applied to the value obtained by lookup given the match key. + *

      + * For example, the value provider could be a SequenceFeature's attributes map, + * and the conditions might be + *

        + *
      • CSQ contains "pathological"
      • + *
      • AND
      • + *
      • AF <= 1.0e-5
      • + *
      + * + * @author gmcarstairs + * + */ +public class KeyedMatcher implements KeyedMatcherI +{ + final private String key; + + final private MatcherI matcher; + + /** + * Constructor given a key, a test condition and a match pattern + * + * @param theKey + * @param cond + * @param pattern + */ + public KeyedMatcher(String theKey, Condition cond, String pattern) + { + key = theKey; + matcher = new Matcher(cond, pattern); + } + + /** + * Constructor given a key, a test condition and a numerical value to compare + * to. Note that if a non-numerical condition is specified, the float will be + * converted to a string. + * + * @param theKey + * @param cond + * @param value + */ + public KeyedMatcher(String theKey, Condition cond, float value) + { + key = theKey; + matcher = new Matcher(cond, value); + } + + @Override + public boolean matches(Function valueProvider) + { + String value = valueProvider.apply(key); + return matcher.matches(value); + } + + @Override + public String getKey() + { + return key; + } + + @Override + public MatcherI getMatcher() + { + return matcher; + } + + /** + * Answers a string description of this matcher, suitable for debugging or + * logging. The format may change in future. + */ + @Override + public String toString() + { + StringBuilder sb = new StringBuilder(); + sb.append(key).append(" ").append(matcher.getCondition().name()) + .append(" ").append(matcher.getPattern()); + + return sb.toString(); + } +} diff --git a/src/jalview/util/matcher/KeyedMatcherI.java b/src/jalview/util/matcher/KeyedMatcherI.java new file mode 100644 index 0000000..e9fe014 --- /dev/null +++ b/src/jalview/util/matcher/KeyedMatcherI.java @@ -0,0 +1,36 @@ +package jalview.util.matcher; + +import java.util.function.Function; + +/** + * An interface for an object that can apply one or more match conditions, given + * a key-value provider. The match conditions are stored against key values, and + * applied to the value obtained by a key-value lookup. + * + * @author gmcarstairs + */ +public interface KeyedMatcherI +{ + /** + * Answers true if the value provided for this matcher's key passes this + * matcher's match condition + * + * @param valueProvider + * @return + */ + boolean matches(Function valueProvider); + + /** + * Answers the value key this matcher operates on + * + * @return + */ + String getKey(); + + /** + * Answers the match condition that is applied + * + * @return + */ + MatcherI getMatcher(); +} diff --git a/src/jalview/util/matcher/KeyedMatcherSet.java b/src/jalview/util/matcher/KeyedMatcherSet.java new file mode 100644 index 0000000..3c21d50 --- /dev/null +++ b/src/jalview/util/matcher/KeyedMatcherSet.java @@ -0,0 +1,116 @@ +package jalview.util.matcher; + +import java.util.ArrayList; +import java.util.Iterator; +import java.util.List; +import java.util.function.Function; + +public class KeyedMatcherSet implements KeyedMatcherSetI +{ + List matchConditions; + + boolean andConditions; + + /** + * Constructor + */ + public KeyedMatcherSet() + { + matchConditions = new ArrayList<>(); + } + + @Override + public boolean matches(Function valueProvider) + { + /* + * no conditions matches anything + */ + if (matchConditions.isEmpty()) + { + return true; + } + + /* + * AND until failure + */ + if (andConditions) + { + for (KeyedMatcherI m : matchConditions) + { + if (!m.matches(valueProvider)) + { + return false; + } + } + return true; + } + + /* + * OR until match + */ + for (KeyedMatcherI m : matchConditions) + { + if (m.matches(valueProvider)) + { + return true; + } + } + return false; + } + + @Override + public KeyedMatcherSetI and(KeyedMatcherI m) + { + if (!andConditions && matchConditions.size() > 1) + { + throw new IllegalStateException("Can't add an AND to OR conditions"); + } + matchConditions.add(m); + andConditions = true; + + return this; + } + + @Override + public KeyedMatcherSetI or(KeyedMatcherI m) + { + if (andConditions && matchConditions.size() > 1) + { + throw new IllegalStateException("Can't add an OR to AND conditions"); + } + matchConditions.add(m); + andConditions = false; + + return this; + } + + @Override + public boolean isAnded() + { + return andConditions; + } + + @Override + public Iterator getMatchers() + { + return matchConditions.iterator(); + } + + @Override + public String toString() + { + StringBuilder sb = new StringBuilder(); + boolean first = true; + for (KeyedMatcherI matcher : matchConditions) + { + if (!first) + { + sb.append(andConditions ? " AND " : " OR "); + } + first = false; + sb.append("(").append(matcher.toString()).append(")"); + } + return sb.toString(); + } + +} diff --git a/src/jalview/util/matcher/KeyedMatcherSetI.java b/src/jalview/util/matcher/KeyedMatcherSetI.java new file mode 100644 index 0000000..09532a4 --- /dev/null +++ b/src/jalview/util/matcher/KeyedMatcherSetI.java @@ -0,0 +1,58 @@ +package jalview.util.matcher; + +import java.util.Iterator; +import java.util.function.Function; + +/** + * An interface to describe a set of one or more key-value match conditions, + * where all conditions are combined with either AND or OR + * + * @author gmcarstairs + * + */ +public interface KeyedMatcherSetI +{ + /** + * Answers true if the value provided for this matcher's key passes this + * matcher's match condition + * + * @param valueProvider + * @return + */ + boolean matches(Function valueProvider); + + /** + * Answers a new object that matches the logical AND of this and m + * + * @param m + * @return + * @throws IllegalStateException + * if an attempt is made to AND to existing OR-ed conditions + */ + KeyedMatcherSetI and(KeyedMatcherI m); + + /** + * Answers true if any second condition is AND-ed with this one, false if it + * is OR-ed + * + * @return + */ + boolean isAnded(); + + /** + * Answers a new object that matches the logical OR of this and m + * + * @param m + * @return + * @throws IllegalStateException + * if an attempt is made to OR to existing AND-ed conditions + */ + KeyedMatcherSetI or(KeyedMatcherI m); + + /** + * Answers an iterator over the combined match conditions + * + * @return + */ + Iterator getMatchers(); +} diff --git a/src/jalview/util/matcher/Matcher.java b/src/jalview/util/matcher/Matcher.java new file mode 100644 index 0000000..638933d --- /dev/null +++ b/src/jalview/util/matcher/Matcher.java @@ -0,0 +1,218 @@ +package jalview.util.matcher; + +import java.util.Objects; +import java.util.regex.Pattern; + +/** + * A bean to describe one attribute-based filter + */ +public class Matcher implements MatcherI +{ + /* + * the comparison condition + */ + Condition condition; + + /* + * the string value (upper-cased), or the regex, to compare to + * also holds the string form of float value if a numeric condition + */ + String pattern; + + /* + * the compiled regex if using a pattern match condition + * (reserved for possible future enhancement) + */ + Pattern regexPattern; + + /* + * the value to compare to for a numerical condition + */ + float value; + + /** + * Constructor + * + * @param cond + * @param compareTo + * @return + * @throws NumberFormatException + * if a numerical condition is specified with a non-numeric + * comparision value + * @throws NullPointerException + * if a null comparison string is specified + */ + public Matcher(Condition cond, String compareTo) + { + condition = cond; + if (cond.isNumeric()) + { + value = Float.valueOf(compareTo); + pattern = String.valueOf(value); + } + else + { + // pattern matches will be non-case-sensitive + pattern = compareTo.toUpperCase(); + } + + // if we add regex conditions (e.g. matchesPattern), then + // pattern should hold the raw regex, and + // regexPattern = Pattern.compile(compareTo); + } + + /** + * Constructor for a numerical match condition. Note that if a string + * comparison condition is specified, this will be converted to a comparison + * with the float value as string + * + * @param cond + * @param compareTo + */ + public Matcher(Condition cond, float compareTo) + { + Objects.requireNonNull(cond); + condition = cond; + value = compareTo; + pattern = String.valueOf(compareTo).toUpperCase(); + } + + /** + * {@inheritDoc} + */ + @SuppressWarnings("incomplete-switch") + @Override + public boolean matches(String val) + { + if (condition.isNumeric()) + { + try + { + /* + * treat a null value (no such attribute) as + * failing any numerical filter condition + */ + return val == null ? false : matches(Float.valueOf(val)); + } catch (NumberFormatException e) + { + return false; + } + } + + /* + * a null value matches a negative condition, fails a positive test + */ + if (val == null) + { + return condition == Condition.NotContains + || condition == Condition.NotMatches; + } + + String upper = val.toUpperCase().trim(); + boolean matched = false; + switch(condition) { + case Matches: + matched = upper.equals(pattern); + break; + case NotMatches: + matched = !upper.equals(pattern); + break; + case Contains: + matched = upper.indexOf(pattern) > -1; + break; + case NotContains: + matched = upper.indexOf(pattern) == -1; + break; + } + return matched; + } + + /** + * Applies a numerical comparison match condition + * + * @param f + * @return + */ + @SuppressWarnings("incomplete-switch") + boolean matches(float f) + { + if (!condition.isNumeric()) + { + return matches(String.valueOf(f)); + } + + boolean matched = false; + switch (condition) { + case LT: + matched = f < value; + break; + case LE: + matched = f <= value; + break; + case EQ: + matched = f == value; + break; + case NE: + matched = f != value; + break; + case GT: + matched = f > value; + break; + case GE: + matched = f >= value; + break; + } + + return matched; + } + + /** + * A simple hash function that guarantees that when two objects are equal, + * they have the same hashcode + */ + @Override + public int hashCode() + { + return pattern.hashCode() + condition.hashCode() + (int) value; + } + + /** + * equals is overridden so that we can safely remove Matcher objects from + * collections (e.g. delete an attribut match condition for a feature colour) + */ + @Override + public boolean equals(Object obj) + { + if (obj == null || !(obj instanceof Matcher)) + { + return false; + } + Matcher m = (Matcher) obj; + return condition == m.condition && value == m.value + && pattern.equals(m.pattern); + } + + @Override + public Condition getCondition() + { + return condition; + } + + @Override + public String getPattern() + { + return pattern; + } + + @Override + public float getFloatValue() + { + return value; + } + + @Override + public String toString() + { + return condition.name() + " " + pattern; + } +} diff --git a/src/jalview/util/matcher/MatcherI.java b/src/jalview/util/matcher/MatcherI.java new file mode 100644 index 0000000..ca6d44c --- /dev/null +++ b/src/jalview/util/matcher/MatcherI.java @@ -0,0 +1,18 @@ +package jalview.util.matcher; + +public interface MatcherI +{ + /** + * Answers true if the given value is matched, else false + * + * @param s + * @return + */ + boolean matches(String s); + + Condition getCondition(); + + String getPattern(); + + float getFloatValue(); +} diff --git a/test/jalview/analysis/AlignmentUtilsTests.java b/test/jalview/analysis/AlignmentUtilsTests.java index 06b51e6..1bff8bf 100644 --- a/test/jalview/analysis/AlignmentUtilsTests.java +++ b/test/jalview/analysis/AlignmentUtilsTests.java @@ -34,6 +34,7 @@ import jalview.datamodel.AlignmentAnnotation; import jalview.datamodel.AlignmentI; import jalview.datamodel.Annotation; import jalview.datamodel.DBRefEntry; +import jalview.datamodel.GeneLociI; import jalview.datamodel.Mapping; import jalview.datamodel.SearchResultMatchI; import jalview.datamodel.SearchResultsI; @@ -63,6 +64,8 @@ import org.testng.annotations.Test; public class AlignmentUtilsTests { + private static Sequence ts = new Sequence("short", + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklm"); @BeforeClass(alwaysRun = true) public void setUpJvOptionPane() @@ -71,9 +74,6 @@ public class AlignmentUtilsTests JvOptionPane.setMockResponse(JvOptionPane.CANCEL_OPTION); } - public static Sequence ts = new Sequence("short", - "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklm"); - @Test(groups = { "Functional" }) public void testExpandContext() { @@ -1044,14 +1044,18 @@ public class AlignmentUtilsTests dna.addCodonFrame(acf); /* - * In this case, mappings originally came from matching Uniprot accessions - so need an xref on dna involving those regions. These are normally constructed from CDS annotation + * In this case, mappings originally came from matching Uniprot accessions + * - so need an xref on dna involving those regions. + * These are normally constructed from CDS annotation */ DBRefEntry dna1xref = new DBRefEntry("UNIPROT", "ENSEMBL", "pep1", new Mapping(mapfordna1)); - dna1.getDatasetSequence().addDBRef(dna1xref); + dna1.addDBRef(dna1xref); + assertEquals(2, dna1.getDBRefs().length); // to self and to pep1 DBRefEntry dna2xref = new DBRefEntry("UNIPROT", "ENSEMBL", "pep2", new Mapping(mapfordna2)); - dna2.getDatasetSequence().addDBRef(dna2xref); + dna2.addDBRef(dna2xref); + assertEquals(2, dna2.getDBRefs().length); // to self and to pep2 /* * execute method under test: @@ -1106,6 +1110,38 @@ public class AlignmentUtilsTests assertEquals(cdsMapping.getInverse(), dbref.getMap().getMap()); /* + * verify cDNA has added a dbref with mapping to CDS + */ + assertEquals(3, dna1.getDBRefs().length); + DBRefEntry dbRefEntry = dna1.getDBRefs()[2]; + assertSame(cds1Dss, dbRefEntry.getMap().getTo()); + MapList dnaToCdsMapping = new MapList(new int[] { 4, 6, 10, 12 }, + new int[] { 1, 6 }, 1, 1); + assertEquals(dnaToCdsMapping, dbRefEntry.getMap().getMap()); + assertEquals(3, dna2.getDBRefs().length); + dbRefEntry = dna2.getDBRefs()[2]; + assertSame(cds2Dss, dbRefEntry.getMap().getTo()); + dnaToCdsMapping = new MapList(new int[] { 1, 3, 7, 9, 13, 15 }, + new int[] { 1, 9 }, 1, 1); + assertEquals(dnaToCdsMapping, dbRefEntry.getMap().getMap()); + + /* + * verify CDS has added a dbref with mapping to cDNA + */ + assertEquals(2, cds1Dss.getDBRefs().length); + dbRefEntry = cds1Dss.getDBRefs()[1]; + assertSame(dna1.getDatasetSequence(), dbRefEntry.getMap().getTo()); + MapList cdsToDnaMapping = new MapList(new int[] { 1, 6 }, new int[] { + 4, 6, 10, 12 }, 1, 1); + assertEquals(cdsToDnaMapping, dbRefEntry.getMap().getMap()); + assertEquals(2, cds2Dss.getDBRefs().length); + dbRefEntry = cds2Dss.getDBRefs()[1]; + assertSame(dna2.getDatasetSequence(), dbRefEntry.getMap().getTo()); + cdsToDnaMapping = new MapList(new int[] { 1, 9 }, new int[] { 1, 3, 7, + 9, 13, 15 }, 1, 1); + assertEquals(cdsToDnaMapping, dbRefEntry.getMap().getMap()); + + /* * Verify mappings from CDS to peptide, cDNA to CDS, and cDNA to peptide * the mappings are on the shared alignment dataset * 6 mappings, 2*(DNA->CDS), 2*(DNA->Pep), 2*(CDS->Pep) @@ -2533,6 +2569,70 @@ public class AlignmentUtilsTests assertEquals(s_as3, uas3.getSequenceAsString()); } + @Test(groups = { "Functional" }) + public void testTransferGeneLoci() + { + SequenceI from = new Sequence("transcript", + "aaacccgggTTTAAACCCGGGtttaaacccgggttt"); + SequenceI to = new Sequence("CDS", "TTTAAACCCGGG"); + MapList map = new MapList(new int[] { 1, 12 }, new int[] { 10, 21 }, 1, + 1); + + /* + * first with nothing to transfer + */ + AlignmentUtils.transferGeneLoci(from, map, to); + assertNull(to.getGeneLoci()); + + /* + * next with gene loci set on 'from' sequence + */ + int[] exons = new int[] { 100, 105, 155, 164, 210, 229 }; + MapList geneMap = new MapList(new int[] { 1, 36 }, exons, 1, 1); + from.setGeneLoci("human", "GRCh38", "7", geneMap); + AlignmentUtils.transferGeneLoci(from, map, to); + + GeneLociI toLoci = to.getGeneLoci(); + assertNotNull(toLoci); + // DBRefEntry constructor upper-cases 'source' + assertEquals("HUMAN", toLoci.getSpeciesId()); + assertEquals("GRCh38", toLoci.getAssemblyId()); + assertEquals("7", toLoci.getChromosomeId()); + + /* + * transcript 'exons' are 1-6, 7-16, 17-36 + * CDS 1:12 is transcript 10-21 + * transcript 'CDS' is 10-16, 17-21 + * which is 'gene' 158-164, 210-214 + */ + MapList toMap = toLoci.getMap(); + assertEquals(1, toMap.getFromRanges().size()); + assertEquals(2, toMap.getFromRanges().get(0).length); + assertEquals(1, toMap.getFromRanges().get(0)[0]); + assertEquals(12, toMap.getFromRanges().get(0)[1]); + assertEquals(1, toMap.getToRanges().size()); + assertEquals(4, toMap.getToRanges().get(0).length); + assertEquals(158, toMap.getToRanges().get(0)[0]); + assertEquals(164, toMap.getToRanges().get(0)[1]); + assertEquals(210, toMap.getToRanges().get(0)[2]); + assertEquals(214, toMap.getToRanges().get(0)[3]); + // or summarised as (but toString might change in future): + assertEquals("[ [1, 12] ] 1:1 to [ [158, 164, 210, 214] ]", + toMap.toString()); + + /* + * an existing value is not overridden + */ + geneMap = new MapList(new int[] { 1, 36 }, new int[] { 36, 1 }, 1, 1); + from.setGeneLoci("inhuman", "GRCh37", "6", geneMap); + AlignmentUtils.transferGeneLoci(from, map, to); + assertEquals("GRCh38", toLoci.getAssemblyId()); + assertEquals("7", toLoci.getChromosomeId()); + toMap = toLoci.getMap(); + assertEquals("[ [1, 12] ] 1:1 to [ [158, 164, 210, 214] ]", + toMap.toString()); + } + /** * Tests for the method that maps nucleotide to protein based on CDS features */ @@ -2599,5 +2699,4 @@ public class AlignmentUtilsTests assertEquals("[[3, 3], [8, 12]]", Arrays.deepToString(ml.getFromRanges().toArray())); } - } diff --git a/test/jalview/datamodel/SequenceFeatureTest.java b/test/jalview/datamodel/SequenceFeatureTest.java index fbeb365..c955979 100644 --- a/test/jalview/datamodel/SequenceFeatureTest.java +++ b/test/jalview/datamodel/SequenceFeatureTest.java @@ -273,4 +273,47 @@ public class SequenceFeatureTest "group"); assertTrue(sf.isContactFeature()); } + + @Test(groups = { "Functional" }) + public void testGetDetailsReport() + { + // single locus, no group, no score + SequenceFeature sf = new SequenceFeature("variant", "G,C", 22, 22, null); + String expected = "
      " + + "" + + "
      Typevariant
      Start/end22
      DescriptionG,C
      "; + assertEquals(expected, sf.getDetailsReport()); + + // contact feature + sf = new SequenceFeature("Disulphide Bond", "a description", 28, 31, + null); + expected = "
      " + + "" + + "
      TypeDisulphide Bond
      Start/end28:31
      Descriptiona description
      "; + assertEquals(expected, sf.getDetailsReport()); + + sf = new SequenceFeature("variant", "G,C", 22, 33, + 12.5f, "group"); + sf.setValue("Parent", "ENSG001"); + sf.setValue("Child", "ENSP002"); + expected = "
      " + + "" + + "" + + "" + + "" + + "" + + "
      Typevariant
      Start/end22-33
      DescriptionG,C
      Score12.5
      Groupgroup
      ChildENSP002
      ParentENSG001
      "; + assertEquals(expected, sf.getDetailsReport()); + + /* + * feature with embedded html link in description + */ + String desc = "Fer2 Status: True Positive Pfam 8_8"; + sf = new SequenceFeature("Pfam", desc, 8, 83, "Uniprot"); + expected = "
      " + + "" + + "" + + "
      TypePfam
      Start/end8-83
      DescriptionFer2 Status: True Positive Pfam 8_8
      GroupUniprot
      "; + assertEquals(expected, sf.getDetailsReport()); + } } diff --git a/test/jalview/ext/htsjdk/VCFReaderTest.java b/test/jalview/ext/htsjdk/VCFReaderTest.java new file mode 100644 index 0000000..bf617ae --- /dev/null +++ b/test/jalview/ext/htsjdk/VCFReaderTest.java @@ -0,0 +1,200 @@ +package jalview.ext.htsjdk; + +import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertFalse; +import static org.testng.Assert.assertTrue; +import htsjdk.samtools.util.CloseableIterator; +import htsjdk.variant.variantcontext.Allele; +import htsjdk.variant.variantcontext.VariantContext; + +import java.io.File; +import java.io.IOException; +import java.io.PrintWriter; +import java.util.List; + +import org.testng.annotations.Test; + +public class VCFReaderTest +{ + private static final String[] VCF = new String[] { + "##fileformat=VCFv4.2", + "#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO", + "20\t3\t.\tC\tG\t.\tPASS\tDP=100", // SNP C/G + "20\t7\t.\tG\tGA\t.\tPASS\tDP=100", // insertion G/GA + "18\t2\t.\tACG\tA\t.\tPASS\tDP=100" }; // deletion ACG/A + + // gnomAD exome variant dataset + private static final String VCF_PATH = "/Volumes/gjb/smacgowan/NOBACK/resources/gnomad/gnomad.exomes.r2.0.1.sites.vcf.gz"; + + // "https://storage.cloud.google.com/gnomad-public/release/2.0.1/vcf/exomes/gnomad.exomes.r2.0.1.sites.vcf.gz"; + + /** + * A test to exercise some basic functionality of the htsjdk VCF reader, + * reading from a non-index VCF file + * + * @throws IOException + */ + @Test(groups = "Functional") + public void testReadVcf_plain() throws IOException + { + File f = writeVcfFile(); + VCFReader reader = new VCFReader(f.getAbsolutePath()); + CloseableIterator variants = reader.iterator(); + + /* + * SNP C/G variant + */ + VariantContext vc = variants.next(); + assertTrue(vc.isSNP()); + Allele ref = vc.getReference(); + assertEquals(ref.getBaseString(), "C"); + List alleles = vc.getAlleles(); + assertEquals(alleles.size(), 2); + assertTrue(alleles.get(0).isReference()); + assertEquals(alleles.get(0).getBaseString(), "C"); + assertFalse(alleles.get(1).isReference()); + assertEquals(alleles.get(1).getBaseString(), "G"); + + /* + * Insertion G -> GA + */ + vc = variants.next(); + assertFalse(vc.isSNP()); + assertTrue(vc.isSimpleInsertion()); + ref = vc.getReference(); + assertEquals(ref.getBaseString(), "G"); + alleles = vc.getAlleles(); + assertEquals(alleles.size(), 2); + assertTrue(alleles.get(0).isReference()); + assertEquals(alleles.get(0).getBaseString(), "G"); + assertFalse(alleles.get(1).isReference()); + assertEquals(alleles.get(1).getBaseString(), "GA"); + + /* + * Deletion ACG -> A + */ + vc = variants.next(); + assertFalse(vc.isSNP()); + assertTrue(vc.isSimpleDeletion()); + ref = vc.getReference(); + assertEquals(ref.getBaseString(), "ACG"); + alleles = vc.getAlleles(); + assertEquals(alleles.size(), 2); + assertTrue(alleles.get(0).isReference()); + assertEquals(alleles.get(0).getBaseString(), "ACG"); + assertFalse(alleles.get(1).isReference()); + assertEquals(alleles.get(1).getBaseString(), "A"); + + assertFalse(variants.hasNext()); + + variants.close(); + reader.close(); + } + + /** + * Creates a temporary file to be read by the htsjdk VCF reader + * + * @return + * @throws IOException + */ + protected File writeVcfFile() throws IOException + { + File f = File.createTempFile("Test", "vcf"); + f.deleteOnExit(); + PrintWriter pw = new PrintWriter(f); + for (String vcfLine : VCF) { + pw.println(vcfLine); + } + pw.close(); + return f; + } + + /** + * A 'test' that demonstrates querying an indexed VCF file for features in a + * specified interval + * + * @throws IOException + */ + @Test + public void testQuery_indexed() throws IOException + { + /* + * if not specified, assumes index file is filename.tbi + */ + VCFReader reader = new VCFReader(VCF_PATH); + + /* + * gene NMT1 (human) is on chromosome 17 + * GCHR38 (Ensembl): 45051610-45109016 + * GCHR37 (gnoMAD): 43128978-43186384 + * CDS begins at offset 9720, first CDS variant at offset 9724 + */ + CloseableIterator features = reader.query("17", + 43128978 + 9724, 43128978 + 9734); // first 11 CDS positions + + assertEquals(printNext(features), 43138702); + assertEquals(printNext(features), 43138704); + assertEquals(printNext(features), 43138707); + assertEquals(printNext(features), 43138708); + assertEquals(printNext(features), 43138710); + assertEquals(printNext(features), 43138711); + assertFalse(features.hasNext()); + + features.close(); + reader.close(); + } + + /** + * Prints the toString value of the next variant, and returns its start + * location + * + * @param features + * @return + */ + protected int printNext(CloseableIterator features) + { + VariantContext next = features.next(); + System.out.println(next.toString()); + return next.getStart(); + } + + // "https://storage.cloud.google.com/gnomad-public/release/2.0.1/vcf/exomes/gnomad.exomes.r2.0.1.sites.vcf.gz"; + + /** + * Test the query method that wraps a non-indexed VCF file + * + * @throws IOException + */ + @Test(groups = "Functional") + public void testQuery_plain() throws IOException + { + File f = writeVcfFile(); + VCFReader reader = new VCFReader(f.getAbsolutePath()); + + /* + * query for overlap of 5-8 - should find variant at 7 + */ + CloseableIterator variants = reader.query("20", 5, 8); + + /* + * INDEL G/GA variant + */ + VariantContext vc = variants.next(); + assertTrue(vc.isIndel()); + assertEquals(vc.getStart(), 7); + assertEquals(vc.getEnd(), 7); + Allele ref = vc.getReference(); + assertEquals(ref.getBaseString(), "G"); + List alleles = vc.getAlleles(); + assertEquals(alleles.size(), 2); + assertTrue(alleles.get(0).isReference()); + assertEquals(alleles.get(0).getBaseString(), "G"); + assertFalse(alleles.get(1).isReference()); + assertEquals(alleles.get(1).getBaseString(), "GA"); + + assertFalse(variants.hasNext()); + + variants.close(); + reader.close(); + } +} diff --git a/test/jalview/gui/PopupMenuTest.java b/test/jalview/gui/PopupMenuTest.java index 335240b..40e624d 100644 --- a/test/jalview/gui/PopupMenuTest.java +++ b/test/jalview/gui/PopupMenuTest.java @@ -26,21 +26,26 @@ import static org.testng.AssertJUnit.assertEquals; import static org.testng.AssertJUnit.assertFalse; import static org.testng.AssertJUnit.assertTrue; +import jalview.bin.Cache; import jalview.datamodel.AlignmentAnnotation; import jalview.datamodel.AlignmentI; import jalview.datamodel.Annotation; import jalview.datamodel.DBRefEntry; import jalview.datamodel.DBRefSource; -import jalview.datamodel.Sequence; +import jalview.datamodel.SequenceFeature; import jalview.datamodel.SequenceI; import jalview.io.DataSourceType; import jalview.io.FileFormat; import jalview.io.FormatAdapter; +import jalview.urls.api.UrlProviderFactoryI; +import jalview.urls.desktop.DesktopUrlProviderFactory; import jalview.util.MessageManager; +import jalview.util.UrlConstants; import java.awt.Component; import java.io.IOException; import java.util.ArrayList; +import java.util.Collections; import java.util.List; import javax.swing.JMenu; @@ -80,6 +85,25 @@ public class PopupMenuTest @BeforeMethod(alwaysRun = true) public void setUp() throws IOException { + Cache.loadProperties("test/jalview/io/testProps.jvprops"); + String inMenuString = ("EMBL-EBI Search | http://www.ebi.ac.uk/ebisearch/search.ebi?db=allebi&query=$" + + SEQUENCE_ID + + "$" + + "|" + + "UNIPROT | http://www.uniprot.org/uniprot/$" + DB_ACCESSION + "$") + + "|" + + ("INTERPRO | http://www.ebi.ac.uk/interpro/entry/$" + + DB_ACCESSION + "$") + + "|" + + + // Gene3D entry tests for case (in)sensitivity + ("Gene3D | http://gene3d.biochem.ucl.ac.uk/Gene3D/search?sterm=$" + + DB_ACCESSION + "$&mode=protein"); + + UrlProviderFactoryI factory = new DesktopUrlProviderFactory( + UrlConstants.DEFAULT_LABEL, inMenuString, ""); + Preferences.sequenceUrlLinks = factory.createUrlProvider(); + alignment = new FormatAdapter().readFile(TEST_DATA, DataSourceType.PASTE, FileFormat.Fasta); AlignFrame af = new AlignFrame(alignment, 700, 500); @@ -495,17 +519,19 @@ public class PopupMenuTest // add all the dbrefs to the sequences: Uniprot 1 each, Interpro all 3 to // seq0, Gene3D to seq1 - seqs.get(0).addDBRef(refs.get(0)); + SequenceI seq = seqs.get(0); + seq.addDBRef(refs.get(0)); - seqs.get(0).addDBRef(refs.get(1)); - seqs.get(0).addDBRef(refs.get(2)); - seqs.get(0).addDBRef(refs.get(3)); + seq.addDBRef(refs.get(1)); + seq.addDBRef(refs.get(2)); + seq.addDBRef(refs.get(3)); seqs.get(1).addDBRef(refs.get(4)); seqs.get(1).addDBRef(refs.get(5)); // get the Popup Menu for first sequence - testee = new PopupMenu(parentPanel, (Sequence) seqs.get(0), links); + List noFeatures = Collections. emptyList(); + testee = new PopupMenu(parentPanel, seq, noFeatures); Component[] seqItems = testee.sequenceMenu.getMenuComponents(); JMenu linkMenu = (JMenu) seqItems[6]; Component[] linkItems = linkMenu.getMenuComponents(); @@ -519,15 +545,18 @@ public class PopupMenuTest // sequence id for each link should match corresponding DB accession id for (int i = 1; i < 4; i++) { - assertEquals(refs.get(i - 1).getSource(), ((JMenuItem) linkItems[i]) + String msg = seq.getName() + " link[" + i + "]"; + assertEquals(msg, refs.get(i - 1).getSource(), + ((JMenuItem) linkItems[i]) .getText().split("\\|")[0]); - assertEquals(refs.get(i - 1).getAccessionId(), + assertEquals(msg, refs.get(i - 1).getAccessionId(), ((JMenuItem) linkItems[i]) .getText().split("\\|")[1]); } // get the Popup Menu for second sequence - testee = new PopupMenu(parentPanel, (Sequence) seqs.get(1), links); + seq = seqs.get(1); + testee = new PopupMenu(parentPanel, seq, noFeatures); seqItems = testee.sequenceMenu.getMenuComponents(); linkMenu = (JMenu) seqItems[6]; linkItems = linkMenu.getMenuComponents(); @@ -541,9 +570,11 @@ public class PopupMenuTest // sequence id for each link should match corresponding DB accession id for (int i = 1; i < 3; i++) { - assertEquals(refs.get(i + 3).getSource(), ((JMenuItem) linkItems[i]) + String msg = seq.getName() + " link[" + i + "]"; + assertEquals(msg, refs.get(i + 3).getSource(), + ((JMenuItem) linkItems[i]) .getText().split("\\|")[0].toUpperCase()); - assertEquals(refs.get(i + 3).getAccessionId(), + assertEquals(msg, refs.get(i + 3).getAccessionId(), ((JMenuItem) linkItems[i]).getText().split("\\|")[1]); } @@ -552,8 +583,7 @@ public class PopupMenuTest nomatchlinks.add("NOMATCH | http://www.uniprot.org/uniprot/$" + DB_ACCESSION + "$"); - testee = new PopupMenu(parentPanel, (Sequence) seqs.get(0), - nomatchlinks); + testee = new PopupMenu(parentPanel, seq, noFeatures); seqItems = testee.sequenceMenu.getMenuComponents(); linkMenu = (JMenu) seqItems[6]; assertFalse(linkMenu.isEnabled()); diff --git a/test/jalview/gui/SeqCanvasTest.java b/test/jalview/gui/SeqCanvasTest.java index a27bc3f..05b9aea 100644 --- a/test/jalview/gui/SeqCanvasTest.java +++ b/test/jalview/gui/SeqCanvasTest.java @@ -13,8 +13,6 @@ import junit.extensions.PA; import org.testng.annotations.Test; -import sun.swing.SwingUtilities2; - public class SeqCanvasTest { /** @@ -48,7 +46,7 @@ public class SeqCanvasTest av.setScaleAboveWrapped(true); av.setScaleLeftWrapped(true); av.setScaleRightWrapped(true); - FontMetrics fm = SwingUtilities2.getFontMetrics(testee, av.getFont()); + FontMetrics fm = testee.getFontMetrics(av.getFont()); int labelWidth = fm.stringWidth("000") + charWidth; assertEquals(labelWidth, 39); // 3 x 9 + charWidth @@ -218,7 +216,7 @@ public class SeqCanvasTest av.setScaleAboveWrapped(true); av.setScaleLeftWrapped(true); av.setScaleRightWrapped(true); - FontMetrics fm = SwingUtilities2.getFontMetrics(testee, av.getFont()); + FontMetrics fm = testee.getFontMetrics(av.getFont()); int labelWidth = fm.stringWidth("000") + charWidth; assertEquals(labelWidth, 39); // 3 x 9 + charWidth int annotationHeight = testee.getAnnotationHeight(); diff --git a/test/jalview/io/CrossRef2xmlTests.java b/test/jalview/io/CrossRef2xmlTests.java index 0715857..b3db4de 100644 --- a/test/jalview/io/CrossRef2xmlTests.java +++ b/test/jalview/io/CrossRef2xmlTests.java @@ -39,6 +39,9 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.HashMap; import java.util.List; +import java.util.Map; + +import junit.extensions.PA; import org.testng.Assert; import org.testng.annotations.BeforeClass; @@ -90,9 +93,9 @@ public class CrossRef2xmlTests extends Jalview2xmlBase // . codonframes // // - HashMap dbtoviewBit = new HashMap<>(); + Map dbtoviewBit = new HashMap<>(); List keyseq = new ArrayList<>(); - HashMap savedProjects = new HashMap<>(); + Map savedProjects = new HashMap<>(); for (String[] did : new String[][] { { "UNIPROT", "P00338" } }) { @@ -186,15 +189,16 @@ public class CrossRef2xmlTests extends Jalview2xmlBase if (pass2 == 0) { // retrieve and show cross-refs in this thread - cra = new CrossRefAction(af, seqs, dna, db); + cra = CrossRefAction.getHandlerFor(seqs, dna, db, af); cra.run(); - if (cra.getXrefViews().size() == 0) + cra_views = (List) PA.getValue(cra, + "xrefViews"); + if (cra_views.size() == 0) { failedXrefMenuItems.add("No crossrefs retrieved for " + first + " -> " + db); continue; } - cra_views = cra.getXrefViews(); assertNucleotide(cra_views.get(0), "Nucleotide panel included proteins for " + first + " -> " + db); @@ -286,16 +290,18 @@ public class CrossRef2xmlTests extends Jalview2xmlBase if (pass3 == 0) { - SequenceI[] xrseqs = avp.getAlignment() .getSequencesArray(); AlignFrame nextaf = Desktop.getAlignFrameFor(avp .getAlignViewport()); - cra = new CrossRefAction(nextaf, xrseqs, avp - .getAlignViewport().isNucleotide(), xrefdb); + cra = CrossRefAction.getHandlerFor(xrseqs, avp + .getAlignViewport().isNucleotide(), xrefdb, + nextaf); cra.run(); - if (cra.getXrefViews().size() == 0) + cra_views2 = (List) PA.getValue( + cra, "xrefViews"); + if (cra_views2.size() == 0) { failedXrefMenuItems .add("No crossrefs retrieved for '" @@ -303,7 +309,6 @@ public class CrossRef2xmlTests extends Jalview2xmlBase + " via '" + nextaf.getTitle() + "'"); continue; } - cra_views2 = cra.getXrefViews(); assertNucleotide(cra_views2.get(0), "Nucleotide panel included proteins for '" + nextxref + "' to " + xrefdb @@ -541,8 +546,8 @@ public class CrossRef2xmlTests extends Jalview2xmlBase * viewpanel needs to be called with a distinct xrefpath to ensure * each one's strings are compared) */ - private void stringify(HashMap dbtoviewBit, - HashMap savedProjects, String xrefpath, + private void stringify(Map dbtoviewBit, + Map savedProjects, String xrefpath, AlignmentViewPanel avp) { if (savedProjects != null) diff --git a/test/jalview/io/vcf/VCFLoaderTest.java b/test/jalview/io/vcf/VCFLoaderTest.java new file mode 100644 index 0000000..5607b4b --- /dev/null +++ b/test/jalview/io/vcf/VCFLoaderTest.java @@ -0,0 +1,578 @@ +package jalview.io.vcf; + +import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertTrue; + +import jalview.datamodel.AlignmentI; +import jalview.datamodel.DBRefEntry; +import jalview.datamodel.Mapping; +import jalview.datamodel.Sequence; +import jalview.datamodel.SequenceFeature; +import jalview.datamodel.SequenceI; +import jalview.datamodel.features.SequenceFeatures; +import jalview.gui.AlignFrame; +import jalview.io.DataSourceType; +import jalview.io.FileLoader; +import jalview.io.gff.Gff3Helper; +import jalview.io.gff.SequenceOntologyI; +import jalview.util.MapList; + +import java.io.File; +import java.io.IOException; +import java.io.PrintWriter; +import java.util.List; + +import org.testng.annotations.Test; + +public class VCFLoaderTest +{ + private static final float DELTA = 0.00001f; + + // columns 9717- of gene P30419 from Ensembl (much modified) + private static final String FASTA = "" + + + /* + * forward strand 'gene' and 'transcript' with two exons + */ + ">gene1/1-25 chromosome:GRCh38:17:45051610:45051634:1\n" + + "CAAGCTGGCGGACGAGAGTGTGACA\n" + + ">transcript1/1-18\n--AGCTGGCG----AGAGTGTGAC-\n" + + /* + * reverse strand gene and transcript (reverse complement alleles!) + */ + + ">gene2/1-25 chromosome:GRCh38:17:45051610:45051634:-1\n" + + "TGTCACACTCTCGTCCGCCAGCTTG\n" + + ">transcript2/1-18\n" + "-GTCACACTCT----CGCCAGCT--\n" + + /* + * 'gene' on chromosome 5 with two transcripts + */ + + ">gene3/1-25 chromosome:GRCh38:5:45051610:45051634:1\n" + + "CAAGCTGGCGGACGAGAGTGTGACA\n" + + ">transcript3/1-18\n--AGCTGGCG----AGAGTGTGAC-\n" + + ">transcript4/1-18\n-----TGG-GGACGAGAGTGTGA-A\n"; + + private static final String[] VCF = { "##fileformat=VCFv4.2", + "##INFO=", + "##reference=Homo_sapiens/GRCh38", + "#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO", + // A/T,C variants in position 2 of gene sequence (precedes transcript) + // should create 2 variant features with respective scores + "17\t45051611\t.\tA\tT,C\t1666.64\tRF\tAC=15;AF=5.0e-03,4.0e-03", + // SNP G/C in position 4 of gene sequence, position 2 of transcript + // insertion G/GA is transferred to nucleotide but not to peptide + "17\t45051613\t.\tG\tGA,C\t1666.64\tRF\tAC=15;AF=3.0e-03,2.0e-03" }; + + @Test(groups = "Functional") + public void testDoLoad() throws IOException + { + AlignmentI al = buildAlignment(); + VCFLoader loader = new VCFLoader(al); + + File f = makeVcf(); + + loader.doLoad(f.getPath(), null); + + /* + * verify variant feature(s) added to gene + * NB alleles at a locus may not be processed, and features added, + * in the order in which they appear in the VCF record as method + * VariantContext.getAlternateAlleles() does not guarantee order + * - order of assertions here matches what we find (is not important) + */ + List geneFeatures = al.getSequenceAt(0) + .getSequenceFeatures(); + SequenceFeatures.sortFeatures(geneFeatures, true); + assertEquals(geneFeatures.size(), 4); + SequenceFeature sf = geneFeatures.get(0); + assertEquals(sf.getFeatureGroup(), "VCF"); + assertEquals(sf.getBegin(), 2); + assertEquals(sf.getEnd(), 2); + assertEquals(sf.getScore(), 4.0e-03, DELTA); + assertEquals(sf.getValue(Gff3Helper.ALLELES), "A,C"); + assertEquals(sf.getType(), SequenceOntologyI.SEQUENCE_VARIANT); + sf = geneFeatures.get(1); + assertEquals(sf.getFeatureGroup(), "VCF"); + assertEquals(sf.getBegin(), 2); + assertEquals(sf.getEnd(), 2); + assertEquals(sf.getType(), SequenceOntologyI.SEQUENCE_VARIANT); + assertEquals(sf.getScore(), 5.0e-03, DELTA); + assertEquals(sf.getValue(Gff3Helper.ALLELES), "A,T"); + + sf = geneFeatures.get(2); + assertEquals(sf.getFeatureGroup(), "VCF"); + assertEquals(sf.getBegin(), 4); + assertEquals(sf.getEnd(), 4); + assertEquals(sf.getType(), SequenceOntologyI.SEQUENCE_VARIANT); + assertEquals(sf.getScore(), 2.0e-03, DELTA); + assertEquals(sf.getValue(Gff3Helper.ALLELES), "G,C"); + + sf = geneFeatures.get(3); + assertEquals(sf.getFeatureGroup(), "VCF"); + assertEquals(sf.getBegin(), 4); + assertEquals(sf.getEnd(), 4); + assertEquals(sf.getType(), SequenceOntologyI.SEQUENCE_VARIANT); + assertEquals(sf.getScore(), 3.0e-03, DELTA); + assertEquals(sf.getValue(Gff3Helper.ALLELES), "G,GA"); + + /* + * verify variant feature(s) added to transcript + */ + List transcriptFeatures = al.getSequenceAt(1) + .getSequenceFeatures(); + assertEquals(transcriptFeatures.size(), 2); + sf = transcriptFeatures.get(0); + assertEquals(sf.getFeatureGroup(), "VCF"); + assertEquals(sf.getBegin(), 2); + assertEquals(sf.getEnd(), 2); + assertEquals(sf.getType(), SequenceOntologyI.SEQUENCE_VARIANT); + assertEquals(sf.getScore(), 2.0e-03, DELTA); + assertEquals(sf.getValue(Gff3Helper.ALLELES), "G,C"); + sf = transcriptFeatures.get(1); + assertEquals(sf.getFeatureGroup(), "VCF"); + assertEquals(sf.getBegin(), 2); + assertEquals(sf.getEnd(), 2); + assertEquals(sf.getType(), SequenceOntologyI.SEQUENCE_VARIANT); + assertEquals(sf.getScore(), 3.0e-03, DELTA); + assertEquals(sf.getValue(Gff3Helper.ALLELES), "G,GA"); + + /* + * verify SNP variant feature(s) computed and added to protein + * first codon AGC varies to ACC giving S/T + */ + DBRefEntry[] dbRefs = al.getSequenceAt(1).getDBRefs(); + SequenceI peptide = null; + for (DBRefEntry dbref : dbRefs) + { + if (dbref.getMap().getMap().getFromRatio() == 3) + { + peptide = dbref.getMap().getTo(); + } + } + List proteinFeatures = peptide.getSequenceFeatures(); + assertEquals(proteinFeatures.size(), 1); + sf = proteinFeatures.get(0); + assertEquals(sf.getFeatureGroup(), "VCF"); + assertEquals(sf.getBegin(), 1); + assertEquals(sf.getEnd(), 1); + assertEquals(sf.getType(), SequenceOntologyI.SEQUENCE_VARIANT); + assertEquals(sf.getDescription(), "p.Ser1Thr"); + } + + private File makeVcf() throws IOException + { + File f = File.createTempFile("Test", ".vcf"); + f.deleteOnExit(); + PrintWriter pw = new PrintWriter(f); + for (String vcfLine : VCF) + { + pw.println(vcfLine); + } + pw.close(); + return f; + } + + /** + * Make a simple alignment with one 'gene' and one 'transcript' + * + * @return + */ + private AlignmentI buildAlignment() + { + AlignFrame af = new FileLoader().LoadFileWaitTillLoaded(FASTA, + DataSourceType.PASTE); + + /* + * map gene1 sequence to chromosome (normally done when the sequence is fetched + * from Ensembl and transcripts computed) + */ + AlignmentI alignment = af.getViewport().getAlignment(); + SequenceI gene1 = alignment.findName("gene1"); + int[] to = new int[] { 45051610, 45051634 }; + int[] from = new int[] { gene1.getStart(), gene1.getEnd() }; + gene1.setGeneLoci("homo_sapiens", "GRCh38", "17", new MapList(from, to, + 1, 1)); + + /* + * map 'transcript1' to chromosome via 'gene1' + * transcript1/1-18 is gene1/3-10,15-24 + * which is chromosome 45051612-45051619,45051624-45051633 + */ + to = new int[] { 45051612, 45051619, 45051624, 45051633 }; + SequenceI transcript1 = alignment.findName("transcript1"); + from = new int[] { transcript1.getStart(), transcript1.getEnd() }; + transcript1.setGeneLoci("homo_sapiens", "GRCh38", "17", new MapList( + from, to, + 1, 1)); + + /* + * map gene2 to chromosome reverse strand + */ + SequenceI gene2 = alignment.findName("gene2"); + to = new int[] { 45051634, 45051610 }; + from = new int[] { gene2.getStart(), gene2.getEnd() }; + gene2.setGeneLoci("homo_sapiens", "GRCh38", "17", new MapList(from, to, + 1, 1)); + + /* + * map 'transcript2' to chromosome via 'gene2' + * transcript2/1-18 is gene2/2-11,16-23 + * which is chromosome 45051633-45051624,45051619-45051612 + */ + to = new int[] { 45051633, 45051624, 45051619, 45051612 }; + SequenceI transcript2 = alignment.findName("transcript2"); + from = new int[] { transcript2.getStart(), transcript2.getEnd() }; + transcript2.setGeneLoci("homo_sapiens", "GRCh38", "17", new MapList( + from, to, + 1, 1)); + + /* + * add a protein product as a DBRef on transcript1 + */ + SequenceI peptide1 = new Sequence("ENSP001", "SWRECD"); + MapList mapList = new MapList(new int[] { 1, 18 }, new int[] { 1, 6 }, + 3, 1); + Mapping map = new Mapping(peptide1, mapList); + DBRefEntry product = new DBRefEntry("", "", "ENSP001", map); + transcript1.addDBRef(product); + + /* + * add a protein product as a DBRef on transcript2 + */ + SequenceI peptide2 = new Sequence("ENSP002", "VTLSPA"); + mapList = new MapList(new int[] { 1, 18 }, new int[] { 1, 6 }, 3, 1); + map = new Mapping(peptide2, mapList); + product = new DBRefEntry("", "", "ENSP002", map); + transcript2.addDBRef(product); + + /* + * map gene3 to chromosome + */ + SequenceI gene3 = alignment.findName("gene3"); + to = new int[] { 45051610, 45051634 }; + from = new int[] { gene3.getStart(), gene3.getEnd() }; + gene3.setGeneLoci("homo_sapiens", "GRCh38", "5", new MapList(from, to, + 1, 1)); + + /* + * map 'transcript3' to chromosome + */ + SequenceI transcript3 = alignment.findName("transcript3"); + to = new int[] { 45051612, 45051619, 45051624, 45051633 }; + from = new int[] { transcript3.getStart(), transcript3.getEnd() }; + transcript3.setGeneLoci("homo_sapiens", "GRCh38", "5", new MapList( + from, to, + 1, 1)); + + /* + * map 'transcript4' to chromosome + */ + SequenceI transcript4 = alignment.findName("transcript4"); + to = new int[] { 45051615, 45051617, 45051619, 45051632, 45051634, + 45051634 }; + from = new int[] { transcript4.getStart(), transcript4.getEnd() }; + transcript4.setGeneLoci("homo_sapiens", "GRCh38", "5", new MapList( + from, to, + 1, 1)); + + /* + * add a protein product as a DBRef on transcript3 + */ + SequenceI peptide3 = new Sequence("ENSP003", "SWRECD"); + mapList = new MapList(new int[] { 1, 18 }, new int[] { 1, 6 }, 3, 1); + map = new Mapping(peptide3, mapList); + product = new DBRefEntry("", "", "ENSP003", map); + transcript3.addDBRef(product); + + return alignment; + } + + /** + * Test with 'gene' and 'transcript' mapped to the reverse strand of the + * chromosome. The VCF variant positions (in forward coordinates) should get + * correctly located on sequence positions. + * + * @throws IOException + */ + @Test(groups = "Functional") + public void testDoLoad_reverseStrand() throws IOException + { + AlignmentI al = buildAlignment(); + + VCFLoader loader = new VCFLoader(al); + + File f = makeVcf(); + + loader.doLoad(f.getPath(), null); + + /* + * verify variant feature(s) added to gene2 + * gene/1-25 maps to chromosome 45051634- reverse strand + * variants A/T, A/C at 45051611 and G/GA,G/C at 45051613 map to + * T/A, T/G and C/TC,C/G at gene positions 24 and 22 respectively + */ + List geneFeatures = al.getSequenceAt(2) + .getSequenceFeatures(); + SequenceFeatures.sortFeatures(geneFeatures, true); + assertEquals(geneFeatures.size(), 4); + SequenceFeature sf = geneFeatures.get(0); + assertEquals(sf.getFeatureGroup(), "VCF"); + assertEquals(sf.getBegin(), 22); + assertEquals(sf.getEnd(), 22); + assertEquals(sf.getType(), SequenceOntologyI.SEQUENCE_VARIANT); + assertEquals(sf.getScore(), 2.0e-03, DELTA); + assertEquals("C,G", sf.getValue(Gff3Helper.ALLELES)); + + sf = geneFeatures.get(1); + assertEquals(sf.getFeatureGroup(), "VCF"); + assertEquals(sf.getBegin(), 22); + assertEquals(sf.getEnd(), 22); + assertEquals(sf.getType(), SequenceOntologyI.SEQUENCE_VARIANT); + assertEquals(sf.getScore(), 3.0e-03, DELTA); + assertEquals("C,TC", sf.getValue(Gff3Helper.ALLELES)); + + sf = geneFeatures.get(2); + assertEquals(sf.getFeatureGroup(), "VCF"); + assertEquals(sf.getBegin(), 24); + assertEquals(sf.getEnd(), 24); + assertEquals(sf.getType(), SequenceOntologyI.SEQUENCE_VARIANT); + assertEquals(sf.getScore(), 4.0e-03, DELTA); + assertEquals("T,G", sf.getValue(Gff3Helper.ALLELES)); + + sf = geneFeatures.get(3); + assertEquals(sf.getFeatureGroup(), "VCF"); + assertEquals(sf.getBegin(), 24); + assertEquals(sf.getEnd(), 24); + assertEquals(sf.getType(), SequenceOntologyI.SEQUENCE_VARIANT); + assertEquals(sf.getScore(), 5.0e-03, DELTA); + assertEquals("T,A", sf.getValue(Gff3Helper.ALLELES)); + + /* + * verify variant feature(s) added to transcript2 + * variants G/GA,G/C at position 22 of gene overlap and map to + * C/TC,C/G at position 17 of transcript + */ + List transcriptFeatures = al.getSequenceAt(3) + .getSequenceFeatures(); + assertEquals(transcriptFeatures.size(), 2); + sf = transcriptFeatures.get(0); + assertEquals(sf.getFeatureGroup(), "VCF"); + assertEquals(sf.getBegin(), 17); + assertEquals(sf.getEnd(), 17); + assertEquals(sf.getType(), SequenceOntologyI.SEQUENCE_VARIANT); + assertEquals(sf.getScore(), 2.0e-03, DELTA); + assertEquals("C,G", sf.getValue(Gff3Helper.ALLELES)); + + sf = transcriptFeatures.get(1); + assertEquals(sf.getFeatureGroup(), "VCF"); + assertEquals(sf.getBegin(), 17); + assertEquals(sf.getEnd(), 17); + assertEquals(sf.getType(), SequenceOntologyI.SEQUENCE_VARIANT); + assertEquals(sf.getScore(), 3.0e-03, DELTA); + assertEquals("C,TC", sf.getValue(Gff3Helper.ALLELES)); + + /* + * verify variant feature(s) computed and added to protein + * last codon GCT varies to GGT giving A/G in the last peptide position + */ + DBRefEntry[] dbRefs = al.getSequenceAt(3).getDBRefs(); + SequenceI peptide = null; + for (DBRefEntry dbref : dbRefs) + { + if (dbref.getMap().getMap().getFromRatio() == 3) + { + peptide = dbref.getMap().getTo(); + } + } + List proteinFeatures = peptide.getSequenceFeatures(); + assertEquals(proteinFeatures.size(), 1); + sf = proteinFeatures.get(0); + assertEquals(sf.getFeatureGroup(), "VCF"); + assertEquals(sf.getBegin(), 6); + assertEquals(sf.getEnd(), 6); + assertEquals(sf.getType(), SequenceOntologyI.SEQUENCE_VARIANT); + assertEquals(sf.getDescription(), "p.Ala6Gly"); + } + + /** + * Tests that if VEP consequence (CSQ) data is present in the VCF data, then + * it is added to the variant feature, but restricted where possible to the + * consequences for a specific transcript + * + * @throws IOException + */ + @Test(groups = "Functional") + public void testDoLoad_vepCsq() throws IOException + { + AlignmentI al = buildAlignment(); + + VCFLoader loader = new VCFLoader(al); + + /* + * VCF data file with variants at gene3 positions + * 1 C/A + * 5 C/T + * 9 CGT/C (deletion) + * 13 C/G, C/T + * 17 A/AC (insertion), A/G + */ + loader.doLoad("test/jalview/io/vcf/testVcf.dat", null); + + /* + * verify variant feature(s) added to gene3 + */ + List geneFeatures = al.findName("gene3") + .getSequenceFeatures(); + SequenceFeatures.sortFeatures(geneFeatures, true); + assertEquals(geneFeatures.size(), 7); + SequenceFeature sf = geneFeatures.get(0); + assertEquals(sf.getBegin(), 1); + assertEquals(sf.getEnd(), 1); + assertEquals(sf.getScore(), 0.1f, DELTA); + assertEquals(sf.getValue("alleles"), "C,A"); + // gene features include Consequence for all transcripts + assertEquals(((String) sf.getValue("CSQ")).split(",").length, 2); + + sf = geneFeatures.get(1); + assertEquals(sf.getBegin(), 5); + assertEquals(sf.getEnd(), 5); + assertEquals(sf.getScore(), 0.2f, DELTA); + assertEquals(sf.getValue("alleles"), "C,T"); + assertEquals(((String) sf.getValue("CSQ")).split(",").length, 2); + + sf = geneFeatures.get(2); + assertEquals(sf.getBegin(), 9); + assertEquals(sf.getEnd(), 11); // deletion over 3 positions + assertEquals(sf.getScore(), 0.3f, DELTA); + assertEquals(sf.getValue("alleles"), "CGG,C"); + assertEquals(((String) sf.getValue("CSQ")).split(",").length, 2); + + sf = geneFeatures.get(3); + assertEquals(sf.getBegin(), 13); + assertEquals(sf.getEnd(), 13); + assertEquals(sf.getScore(), 0.5f, DELTA); + assertEquals(sf.getValue("alleles"), "C,T"); + assertEquals(((String) sf.getValue("CSQ")).split(",").length, 2); + + sf = geneFeatures.get(4); + assertEquals(sf.getBegin(), 13); + assertEquals(sf.getEnd(), 13); + assertEquals(sf.getScore(), 0.4f, DELTA); + assertEquals(sf.getValue("alleles"), "C,G"); + assertEquals(((String) sf.getValue("CSQ")).split(",").length, 2); + + sf = geneFeatures.get(5); + assertEquals(sf.getBegin(), 17); + assertEquals(sf.getEnd(), 17); + assertEquals(sf.getScore(), 0.7f, DELTA); + assertEquals(sf.getValue("alleles"), "A,G"); + assertEquals(((String) sf.getValue("CSQ")).split(",").length, 2); + + sf = geneFeatures.get(6); + assertEquals(sf.getBegin(), 17); + assertEquals(sf.getEnd(), 17); // insertion + assertEquals(sf.getScore(), 0.6f, DELTA); + assertEquals(sf.getValue("alleles"), "A,AC"); + assertEquals(((String) sf.getValue("CSQ")).split(",").length, 2); + + /* + * verify variant feature(s) added to transcript3 + * at columns 5 (1), 17 (2), positions 3, 11 + * note the deletion at columns 9-11 is not transferred since col 11 + * has no mapping to transcript 3 + */ + List transcriptFeatures = al.findName("transcript3") + .getSequenceFeatures(); + SequenceFeatures.sortFeatures(transcriptFeatures, true); + assertEquals(transcriptFeatures.size(), 3); + sf = transcriptFeatures.get(0); + assertEquals(sf.getBegin(), 3); + assertEquals(sf.getEnd(), 3); + assertEquals(sf.getScore(), 0.2f, DELTA); + assertEquals(sf.getValue("alleles"), "C,T"); + // transcript features only have Consequence for that transcripts + assertEquals(((String) sf.getValue("CSQ")).split(",").length, 1); + assertTrue(sf.getValue("CSQ").toString().contains("transcript3")); + + sf = transcriptFeatures.get(1); + assertEquals(sf.getBegin(), 11); + assertEquals(sf.getEnd(), 11); + assertEquals(sf.getScore(), 0.7f, DELTA); + assertEquals(sf.getValue("alleles"), "A,G"); + assertEquals(((String) sf.getValue("CSQ")).split(",").length, 1); + assertTrue(sf.getValue("CSQ").toString().contains("transcript3")); + + sf = transcriptFeatures.get(2); + assertEquals(sf.getBegin(), 11); + assertEquals(sf.getEnd(), 11); + assertEquals(sf.getScore(), 0.6f, DELTA); + assertEquals(sf.getValue("alleles"), "A,AC"); + assertEquals(((String) sf.getValue("CSQ")).split(",").length, 1); + assertTrue(sf.getValue("CSQ").toString().contains("transcript3")); + + /* + * verify variants computed on protein product for transcript3 + * peptide is SWRECD + * codon variants are AGC/AGT position 1 which is synonymous + * and GAG/GGG which is E/G in position 4 + * the insertion variant is not transferred to the peptide + */ + DBRefEntry[] dbRefs = al.findName("transcript3").getDBRefs(); + SequenceI peptide = null; + for (DBRefEntry dbref : dbRefs) + { + if (dbref.getMap().getMap().getFromRatio() == 3) + { + peptide = dbref.getMap().getTo(); + } + } + List proteinFeatures = peptide.getSequenceFeatures(); + assertEquals(proteinFeatures.size(), 1); + sf = proteinFeatures.get(0); + assertEquals(sf.getFeatureGroup(), "VCF"); + assertEquals(sf.getBegin(), 4); + assertEquals(sf.getEnd(), 4); + assertEquals(sf.getType(), SequenceOntologyI.SEQUENCE_VARIANT); + assertEquals(sf.getDescription(), "p.Glu4Gly"); + + /* + * verify variant feature(s) added to transcript4 + * at columns 13 (2) and 17 (2), positions 7 and 11 + */ + transcriptFeatures = al.findName("transcript4").getSequenceFeatures(); + SequenceFeatures.sortFeatures(transcriptFeatures, true); + assertEquals(transcriptFeatures.size(), 4); + sf = transcriptFeatures.get(0); + assertEquals(sf.getBegin(), 7); + assertEquals(sf.getEnd(), 7); + assertEquals(sf.getScore(), 0.5f, DELTA); + assertEquals(sf.getValue("alleles"), "C,T"); + assertEquals(((String) sf.getValue("CSQ")).split(",").length, 1); + assertTrue(sf.getValue("CSQ").toString().contains("transcript4")); + + sf = transcriptFeatures.get(1); + assertEquals(sf.getBegin(), 7); + assertEquals(sf.getEnd(), 7); + assertEquals(sf.getScore(), 0.4f, DELTA); + assertEquals(sf.getValue("alleles"), "C,G"); + assertEquals(((String) sf.getValue("CSQ")).split(",").length, 1); + assertTrue(sf.getValue("CSQ").toString().contains("transcript4")); + + sf = transcriptFeatures.get(2); + assertEquals(sf.getBegin(), 11); + assertEquals(sf.getEnd(), 11); + assertEquals(sf.getScore(), 0.7f, DELTA); + assertEquals(sf.getValue("alleles"), "A,G"); + assertEquals(((String) sf.getValue("CSQ")).split(",").length, 1); + assertTrue(sf.getValue("CSQ").toString().contains("transcript4")); + + sf = transcriptFeatures.get(3); + assertEquals(sf.getBegin(), 11); + assertEquals(sf.getEnd(), 11); + assertEquals(sf.getScore(), 0.6f, DELTA); + assertEquals(sf.getValue("alleles"), "A,AC"); + assertEquals(((String) sf.getValue("CSQ")).split(",").length, 1); + assertTrue(sf.getValue("CSQ").toString().contains("transcript4")); + } +} diff --git a/test/jalview/io/vcf/testVcf.dat b/test/jalview/io/vcf/testVcf.dat new file mode 100644 index 0000000..77e070c --- /dev/null +++ b/test/jalview/io/vcf/testVcf.dat @@ -0,0 +1,13 @@ +##fileformat=VCFv4.2 +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##reference=/Homo_sapiens/GRCh38 +#CHROM POS ID REF ALT QUAL FILTER INFO +5 45051610 . C A 81.96 RF;AC0 AC=1;AF=0.1;AN=0;AF_Female=2;AB_MEDIAN=6.00000e-01;CSQ=A|missense_variant|MODIFIER|WASH7P|gene3|Transcript|transcript3|rna|Benign,A|downstream_gene_variant|MODIFIER|WASH7P|gene3|Transcript|transcript4|mrna|Bad +5 45051614 . C T 1666.64 RF AC=1;AF=0.2;AN=0;AF_Female=2;AB_MEDIAN=6.00000e-01;CSQ=T|missense_variant|MODIFIER|WASH7P|gene3|Transcript|transcript3|rna|Benign,T|downstream_gene_variant|MODIFIER|WASH7P|gene3|Transcript|transcript4|mrna|Bad +5 45051618 . CGG C 41.94 AC0 AC=1;AF=0.3;AN=0;AF_Female=2;AB_MEDIAN=6.00000e-01;CSQ=C|missense_variant|MODIFIER|WASH7P|gene3|Transcript|transcript3|rna|Benign,C|downstream_gene_variant|MODIFIER|WASH7P|gene3|Transcript|transcript4|mrna|Bad,CSQ=CGT|missense_variant|MODIFIER|WASH7P|gene3|Transcript|transcript3|rna|Benign,CGT|downstream_gene_variant|MODIFIER|WASH7P|gene3|Transcript|transcript4|mrna|Bad +5 45051622 . C G,T 224.23 RF;AC0 AC=1,2;AF=0.4,0.5;AN=0;AF_Female=2;AB_MEDIAN=6.00000e-01;CSQ=G|missense_variant|MODIFIER|WASH7P|gene3|Transcript|transcript3|rna|Benign,G|downstream_gene_variant|MODIFIER|WASH7P|gene3|Transcript|transcript4|mrna|Bad,T|missense_variant|MODIFIER|WASH7P|gene3|Transcript|transcript3|rna|Benign,T|downstream_gene_variant|MODIFIER|WASH7P|gene3|Transcript|transcript4|mrna|Bad +5 45051626 . A AC,G 433.35 RF;AC0 AC=3,4;AF=0.6,0.7;AN=0;AF_Female=2;AB_MEDIAN=6.00000e-01;CSQ=G|missense_variant|MODIFIER|WASH7P|gene3|Transcript|transcript3|rna|Benign,G|downstream_gene_variant|MODIFIER|WASH7P|gene3|Transcript|transcript4|mrna|Bad,AC|missense_variant|MODIFIER|WASH7P|gene3|Transcript|transcript3|rna|Benign,AC|downstream_gene_variant|MODIFIER|WASH7P|gene3|Transcript|transcript4|mrna|Bad diff --git a/test/jalview/util/MapListTest.java b/test/jalview/util/MapListTest.java index a2f38e2..d2db258 100644 --- a/test/jalview/util/MapListTest.java +++ b/test/jalview/util/MapListTest.java @@ -814,4 +814,130 @@ public class MapListTest assertEquals(1, merged.size()); assertArrayEquals(new int[] { 9, 0 }, merged.get(0)); } + + /** + * Test the method that compounds ('traverses') two mappings + */ + @Test + public void testTraverse() + { + /* + * simple 1:1 plus 1:1 forwards + */ + MapList ml1 = new MapList(new int[] { 3, 4, 8, 12 }, new int[] { 5, 8, + 11, 13 }, 1, 1); + MapList ml2 = new MapList(new int[] { 1, 50 }, new int[] { 40, 45, 70, + 75, 90, 127 }, 1, 1); + MapList compound = ml1.traverse(ml2); + + assertEquals(compound.getFromRatio(), 1); + assertEquals(compound.getToRatio(), 1); + List fromRanges = compound.getFromRanges(); + assertEquals(fromRanges.size(), 2); + assertArrayEquals(new int[] { 3, 4 }, fromRanges.get(0)); + assertArrayEquals(new int[] { 8, 12 }, fromRanges.get(1)); + List toRanges = compound.getToRanges(); + assertEquals(toRanges.size(), 2); + // 5-8 maps to 44-45,70-71 + // 11-13 maps to 74-75,90 + assertArrayEquals(new int[] { 44, 45, 70, 71 }, toRanges.get(0)); + assertArrayEquals(new int[] { 74, 75, 90, 90 }, toRanges.get(1)); + + /* + * 1:1 over 1:1 backwards ('reverse strand') + */ + ml1 = new MapList(new int[] { 1, 50 }, new int[] { 70, 119 }, 1, 1); + ml2 = new MapList(new int[] { 1, 500 }, + new int[] { 1000, 901, 600, 201 }, 1, 1); + compound = ml1.traverse(ml2); + + assertEquals(compound.getFromRatio(), 1); + assertEquals(compound.getToRatio(), 1); + fromRanges = compound.getFromRanges(); + assertEquals(fromRanges.size(), 1); + assertArrayEquals(new int[] { 1, 50 }, fromRanges.get(0)); + toRanges = compound.getToRanges(); + assertEquals(toRanges.size(), 1); + assertArrayEquals(new int[] { 931, 901, 600, 582 }, toRanges.get(0)); + + /* + * 1:1 plus 1:3 should result in 1:3 + */ + ml1 = new MapList(new int[] { 1, 30 }, new int[] { 11, 40 }, 1, 1); + ml2 = new MapList(new int[] { 1, 100 }, new int[] { 1, 50, 91, 340 }, + 1, 3); + compound = ml1.traverse(ml2); + + assertEquals(compound.getFromRatio(), 1); + assertEquals(compound.getToRatio(), 3); + fromRanges = compound.getFromRanges(); + assertEquals(fromRanges.size(), 1); + assertArrayEquals(new int[] { 1, 30 }, fromRanges.get(0)); + // 11-40 maps to 31-50,91-160 + toRanges = compound.getToRanges(); + assertEquals(toRanges.size(), 1); + assertArrayEquals(new int[] { 31, 50, 91, 160 }, toRanges.get(0)); + + /* + * 3:1 plus 1:1 should result in 3:1 + */ + ml1 = new MapList(new int[] { 1, 30 }, new int[] { 11, 20 }, 3, 1); + ml2 = new MapList(new int[] { 1, 100 }, new int[] { 1, 15, 91, 175 }, + 1, 1); + compound = ml1.traverse(ml2); + + assertEquals(compound.getFromRatio(), 3); + assertEquals(compound.getToRatio(), 1); + fromRanges = compound.getFromRanges(); + assertEquals(fromRanges.size(), 1); + assertArrayEquals(new int[] { 1, 30 }, fromRanges.get(0)); + // 11-20 maps to 11-15, 91-95 + toRanges = compound.getToRanges(); + assertEquals(toRanges.size(), 1); + assertArrayEquals(new int[] { 11, 15, 91, 95 }, toRanges.get(0)); + + /* + * 1:3 plus 3:1 should result in 1:1 + */ + ml1 = new MapList(new int[] { 21, 40 }, new int[] { 13, 72 }, 1, 3); + ml2 = new MapList(new int[] { 1, 300 }, new int[] { 51, 70, 121, 200 }, + 3, 1); + compound = ml1.traverse(ml2); + + assertEquals(compound.getFromRatio(), 1); + assertEquals(compound.getToRatio(), 1); + fromRanges = compound.getFromRanges(); + assertEquals(fromRanges.size(), 1); + assertArrayEquals(new int[] { 21, 40 }, fromRanges.get(0)); + // 13-72 maps 3:1 to 55-70, 121-124 + toRanges = compound.getToRanges(); + assertEquals(toRanges.size(), 1); + assertArrayEquals(new int[] { 55, 70, 121, 124 }, toRanges.get(0)); + + /* + * 3:1 plus 1:3 should result in 1:1 + */ + ml1 = new MapList(new int[] { 31, 90 }, new int[] { 13, 32 }, 3, 1); + ml2 = new MapList(new int[] { 11, 40 }, new int[] { 41, 50, 71, 150 }, + 1, 3); + compound = ml1.traverse(ml2); + + assertEquals(compound.getFromRatio(), 1); + assertEquals(compound.getToRatio(), 1); + fromRanges = compound.getFromRanges(); + assertEquals(fromRanges.size(), 1); + assertArrayEquals(new int[] { 31, 90 }, fromRanges.get(0)); + // 13-32 maps to 47-50,71-126 + toRanges = compound.getToRanges(); + assertEquals(toRanges.size(), 1); + assertArrayEquals(new int[] { 47, 50, 71, 126 }, toRanges.get(0)); + + /* + * method returns null if not all regions are mapped through + */ + ml1 = new MapList(new int[] { 1, 50 }, new int[] { 101, 150 }, 1, 1); + ml2 = new MapList(new int[] { 131, 180 }, new int[] { 201, 250 }, 1, 3); + compound = ml1.traverse(ml2); + assertNull(compound); + } } diff --git a/test/jalview/util/MappingUtilsTest.java b/test/jalview/util/MappingUtilsTest.java index 5226819..d4cf98a 100644 --- a/test/jalview/util/MappingUtilsTest.java +++ b/test/jalview/util/MappingUtilsTest.java @@ -1149,6 +1149,95 @@ public class MappingUtilsTest assertEquals("[12, 11, 8, 4]", Arrays.toString(ranges)); } + @Test(groups = { "Functional" }) + public void testRangeContains() + { + /* + * both forward ranges + */ + assertTrue(MappingUtils.rangeContains(new int[] { 1, 10 }, new int[] { + 1, 10 })); + assertTrue(MappingUtils.rangeContains(new int[] { 1, 10 }, new int[] { + 2, 10 })); + assertTrue(MappingUtils.rangeContains(new int[] { 1, 10 }, new int[] { + 1, 9 })); + assertTrue(MappingUtils.rangeContains(new int[] { 1, 10 }, new int[] { + 4, 5 })); + assertFalse(MappingUtils.rangeContains(new int[] { 1, 10 }, new int[] { + 0, 9 })); + assertFalse(MappingUtils.rangeContains(new int[] { 1, 10 }, new int[] { + -10, -9 })); + assertFalse(MappingUtils.rangeContains(new int[] { 1, 10 }, new int[] { + 1, 11 })); + assertFalse(MappingUtils.rangeContains(new int[] { 1, 10 }, new int[] { + 11, 12 })); + + /* + * forward range, reverse query + */ + assertTrue(MappingUtils.rangeContains(new int[] { 1, 10 }, new int[] { + 10, 1 })); + assertTrue(MappingUtils.rangeContains(new int[] { 1, 10 }, new int[] { + 9, 1 })); + assertTrue(MappingUtils.rangeContains(new int[] { 1, 10 }, new int[] { + 10, 2 })); + assertTrue(MappingUtils.rangeContains(new int[] { 1, 10 }, new int[] { + 5, 5 })); + assertFalse(MappingUtils.rangeContains(new int[] { 1, 10 }, new int[] { + 11, 1 })); + assertFalse(MappingUtils.rangeContains(new int[] { 1, 10 }, new int[] { + 10, 0 })); + + /* + * reverse range, forward query + */ + assertTrue(MappingUtils.rangeContains(new int[] { 10, 1 }, new int[] { + 1, 10 })); + assertTrue(MappingUtils.rangeContains(new int[] { 10, 1 }, new int[] { + 1, 9 })); + assertTrue(MappingUtils.rangeContains(new int[] { 10, 1 }, new int[] { + 2, 10 })); + assertTrue(MappingUtils.rangeContains(new int[] { 10, 1 }, new int[] { + 6, 6 })); + assertFalse(MappingUtils.rangeContains(new int[] { 10, 1 }, new int[] { + 6, 11 })); + assertFalse(MappingUtils.rangeContains(new int[] { 10, 1 }, new int[] { + 11, 20 })); + assertFalse(MappingUtils.rangeContains(new int[] { 10, 1 }, new int[] { + -3, -2 })); + + /* + * both reverse + */ + assertTrue(MappingUtils.rangeContains(new int[] { 10, 1 }, new int[] { + 10, 1 })); + assertTrue(MappingUtils.rangeContains(new int[] { 10, 1 }, new int[] { + 9, 1 })); + assertTrue(MappingUtils.rangeContains(new int[] { 10, 1 }, new int[] { + 10, 2 })); + assertTrue(MappingUtils.rangeContains(new int[] { 10, 1 }, new int[] { + 3, 3 })); + assertFalse(MappingUtils.rangeContains(new int[] { 10, 1 }, new int[] { + 11, 1 })); + assertFalse(MappingUtils.rangeContains(new int[] { 10, 1 }, new int[] { + 10, 0 })); + assertFalse(MappingUtils.rangeContains(new int[] { 10, 1 }, new int[] { + 12, 11 })); + assertFalse(MappingUtils.rangeContains(new int[] { 10, 1 }, new int[] { + -5, -8 })); + + /* + * bad arguments + */ + assertFalse(MappingUtils.rangeContains(new int[] { 1, 10, 12 }, + new int[] { + 1, 10 })); + assertFalse(MappingUtils.rangeContains(new int[] { 1, 10 }, + new int[] { 1 })); + assertFalse(MappingUtils.rangeContains(new int[] { 1, 10 }, null)); + assertFalse(MappingUtils.rangeContains(null, new int[] { 1, 10 })); + } + @Test(groups = "Functional") public void testRemoveEndPositions() { diff --git a/test/jalview/util/MathUtilsTest.java b/test/jalview/util/MathUtilsTest.java new file mode 100644 index 0000000..fc84741 --- /dev/null +++ b/test/jalview/util/MathUtilsTest.java @@ -0,0 +1,26 @@ +package jalview.util; + +import static org.testng.Assert.assertEquals; + +import org.testng.annotations.Test; + +public class MathUtilsTest +{ + @Test + public void testGcd() + { + assertEquals(MathUtils.gcd(0, 0), 0); + assertEquals(MathUtils.gcd(0, 1), 1); + assertEquals(MathUtils.gcd(1, 0), 1); + assertEquals(MathUtils.gcd(1, 1), 1); + assertEquals(MathUtils.gcd(1, -1), 1); + assertEquals(MathUtils.gcd(-1, 1), 1); + assertEquals(MathUtils.gcd(2, 3), 1); + assertEquals(MathUtils.gcd(4, 2), 2); + assertEquals(MathUtils.gcd(2, 4), 2); + assertEquals(MathUtils.gcd(2, -4), 2); + assertEquals(MathUtils.gcd(-2, 4), 2); + assertEquals(MathUtils.gcd(-2, -4), 2); + assertEquals(MathUtils.gcd(2 * 3 * 5 * 7 * 11, 3 * 7 * 13 * 17), 3 * 7); + } +} diff --git a/test/jalview/util/StringUtilsTest.java b/test/jalview/util/StringUtilsTest.java index b6f8a25..084219a 100644 --- a/test/jalview/util/StringUtilsTest.java +++ b/test/jalview/util/StringUtilsTest.java @@ -228,4 +228,26 @@ public class StringUtilsTest assertEquals("", StringUtils.toSentenceCase("")); assertNull(StringUtils.toSentenceCase(null)); } + + @Test(groups = { "Functional" }) + public void testStripHtmlTags() + { + assertNull(StringUtils.stripHtmlTags(null)); + assertEquals("", StringUtils.stripHtmlTags("")); + assertEquals( + "label", + StringUtils + .stripHtmlTags("label")); + + // if no "" tag, < and > get html-encoded (not sure why) + assertEquals("<a href=\"something\">label</href>", + StringUtils.stripHtmlTags("label")); + + // gets removed but not (is this intentional?) + assertEquals("

      hello", + StringUtils.stripHtmlTags("

      hello")); + + assertEquals("kdHydro < 12.53", + StringUtils.stripHtmlTags("kdHydro < 12.53")); + } } diff --git a/test/jalview/util/matcher/ConditionTest.java b/test/jalview/util/matcher/ConditionTest.java new file mode 100644 index 0000000..11a0630 --- /dev/null +++ b/test/jalview/util/matcher/ConditionTest.java @@ -0,0 +1,31 @@ +package jalview.util.matcher; + +import static org.testng.Assert.assertEquals; + +import java.util.Locale; + +import org.testng.annotations.Test; + +public class ConditionTest +{ + @Test + public void testToString() + { + Locale.setDefault(Locale.UK); + assertEquals(Condition.Contains.toString(), "Contains"); + assertEquals(Condition.NotContains.toString(), "Does not contain"); + assertEquals(Condition.Matches.toString(), "Matches"); + assertEquals(Condition.NotMatches.toString(), "Does not match"); + assertEquals(Condition.LT.toString(), "Is less than"); + assertEquals(Condition.LE.toString(), "Is less than or equal to"); + assertEquals(Condition.GT.toString(), "Is greater than"); + assertEquals(Condition.GE.toString(), "Is greater than or equal to"); + assertEquals(Condition.EQ.toString(), "Is equal to"); + assertEquals(Condition.NE.toString(), "Is not equal to"); + + /* + * repeat call to get coverage of cached value + */ + assertEquals(Condition.NE.toString(), "Is not equal to"); + } +} diff --git a/test/jalview/util/matcher/KeyedMatcherSetTest.java b/test/jalview/util/matcher/KeyedMatcherSetTest.java new file mode 100644 index 0000000..76ae8a5 --- /dev/null +++ b/test/jalview/util/matcher/KeyedMatcherSetTest.java @@ -0,0 +1,117 @@ +package jalview.util.matcher; + +import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertFalse; +import static org.testng.Assert.assertTrue; + +import java.util.function.Function; + +import org.testng.annotations.Test; + +public class KeyedMatcherSetTest +{ + @Test + public void testMatches() + { + /* + * a numeric matcher - MatcherTest covers more conditions + */ + KeyedMatcherI km = new KeyedMatcher("AF", Condition.GE, -2F); + assertTrue(km.matches(key -> "-2")); + assertTrue(km.matches(key -> "-1")); + assertFalse(km.matches(key -> "-3")); + assertFalse(km.matches(key -> "")); + assertFalse(km.matches(key -> "junk")); + assertFalse(km.matches(key -> null)); + + /* + * a string pattern matcher + */ + km = new KeyedMatcher("AF", Condition.Contains, "Cat"); + assertTrue(km.matches(key -> "AF".equals(key) ? "raining cats and dogs" + : "showers")); + } + + @Test + public void testAnd() + { + // condition1: AF value contains "dog" (matches) + KeyedMatcherI km1 = new KeyedMatcher("AF", Condition.Contains, "dog"); + // condition 2: CSQ value does not contain "how" (does not match) + KeyedMatcherI km2 = new KeyedMatcher("CSQ", Condition.NotContains, + "how"); + + Function vp = key -> "AF".equals(key) ? "raining cats and dogs" + : "showers"; + assertTrue(km1.matches(vp)); + assertFalse(km2.matches(vp)); + + KeyedMatcherSetI kms = new KeyedMatcherSet(); + assertTrue(kms.matches(vp)); // if no conditions, then 'all' pass + kms.and(km1); + assertTrue(kms.matches(vp)); + kms.and(km2); + assertFalse(kms.matches(vp)); + } + + @Test + public void testToString() + { + KeyedMatcherI km1 = new KeyedMatcher("AF", Condition.LT, 1.2f); + assertEquals(km1.toString(), "AF LT 1.2"); + + KeyedMatcher km2 = new KeyedMatcher("CLIN_SIG", Condition.NotContains, "path"); + assertEquals(km2.toString(), "CLIN_SIG NotContains PATH"); + + /* + * AND them + */ + KeyedMatcherSetI kms = new KeyedMatcherSet(); + assertEquals(kms.toString(), ""); + kms.and(km1); + assertEquals(kms.toString(), "(AF LT 1.2)"); + kms.and(km2); + assertEquals(kms.toString(), + "(AF LT 1.2) AND (CLIN_SIG NotContains PATH)"); + + /* + * OR them + */ + kms = new KeyedMatcherSet(); + assertEquals(kms.toString(), ""); + kms.or(km1); + assertEquals(kms.toString(), "(AF LT 1.2)"); + kms.or(km2); + assertEquals(kms.toString(), + "(AF LT 1.2) OR (CLIN_SIG NotContains PATH)"); + } + + /** + * @return + */ + protected KeyedMatcher km3() + { + return new KeyedMatcher("CSQ", Condition.Contains, "benign"); + } + + @Test + public void testOr() + { + // condition1: AF value contains "dog" (matches) + KeyedMatcherI km1 = new KeyedMatcher("AF", Condition.Contains, "dog"); + // condition 2: CSQ value does not contain "how" (does not match) + KeyedMatcherI km2 = new KeyedMatcher("CSQ", Condition.NotContains, + "how"); + + Function vp = key -> "AF".equals(key) ? "raining cats and dogs" + : "showers"; + assertTrue(km1.matches(vp)); + assertFalse(km2.matches(vp)); + + KeyedMatcherSetI kms = new KeyedMatcherSet(); + kms.or(km2); + assertFalse(kms.matches(vp)); + kms.or(km1); + assertTrue(kms.matches(vp)); + } +} diff --git a/test/jalview/util/matcher/KeyedMatcherTest.java b/test/jalview/util/matcher/KeyedMatcherTest.java new file mode 100644 index 0000000..ebc09c1 --- /dev/null +++ b/test/jalview/util/matcher/KeyedMatcherTest.java @@ -0,0 +1,55 @@ +package jalview.util.matcher; + +import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertFalse; +import static org.testng.Assert.assertTrue; + +import org.testng.annotations.Test; + +public class KeyedMatcherTest +{ + @Test + public void testMatches() + { + /* + * a numeric matcher - MatcherTest covers more conditions + */ + KeyedMatcherI km = new KeyedMatcher("AF", Condition.GE, -2F); + assertTrue(km.matches(key -> "-2")); + assertTrue(km.matches(key -> "-1")); + assertFalse(km.matches(key -> "-3")); + assertFalse(km.matches(key -> "")); + assertFalse(km.matches(key -> "junk")); + assertFalse(km.matches(key -> null)); + + /* + * a string pattern matcher + */ + km = new KeyedMatcher("AF", Condition.Contains, "Cat"); + assertTrue(km.matches(key -> "AF".equals(key) ? "raining cats and dogs" + : "showers")); + } + + @Test + public void testToString() + { + KeyedMatcherI km = new KeyedMatcher("AF", Condition.LT, 1.2f); + assertEquals(km.toString(), "AF LT 1.2"); + } + + @Test + public void testGetKey() + { + KeyedMatcherI km = new KeyedMatcher("AF", Condition.GE, -2F); + assertEquals(km.getKey(), "AF"); + } + + @Test + public void testGetMatcher() + { + KeyedMatcherI km = new KeyedMatcher("AF", Condition.GE, -2F); + assertEquals(km.getMatcher().getCondition(), Condition.GE); + assertEquals(km.getMatcher().getFloatValue(), -2F); + assertEquals(km.getMatcher().getPattern(), "-2.0"); + } +} diff --git a/test/jalview/util/matcher/MatcherTest.java b/test/jalview/util/matcher/MatcherTest.java new file mode 100644 index 0000000..d988c3a --- /dev/null +++ b/test/jalview/util/matcher/MatcherTest.java @@ -0,0 +1,250 @@ +package jalview.util.matcher; + +import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertFalse; +import static org.testng.Assert.assertNotEquals; +import static org.testng.Assert.assertTrue; +import static org.testng.Assert.fail; + +import org.testng.annotations.Test; + +public class MatcherTest +{ + @Test + public void testConstructor() + { + MatcherI m = new Matcher(Condition.Contains, "foo"); + assertEquals(m.getCondition(), Condition.Contains); + assertEquals(m.getPattern(), "FOO"); // all comparisons upper-cased + assertEquals(m.getFloatValue(), 0f); + + m = new Matcher(Condition.GT, -2.1f); + assertEquals(m.getCondition(), Condition.GT); + assertEquals(m.getPattern(), "-2.1"); + assertEquals(m.getFloatValue(), -2.1f); + + m = new Matcher(Condition.NotContains, "-1.2f"); + assertEquals(m.getCondition(), Condition.NotContains); + assertEquals(m.getPattern(), "-1.2F"); + assertEquals(m.getFloatValue(), 0f); + + m = new Matcher(Condition.GE, "-1.2f"); + assertEquals(m.getCondition(), Condition.GE); + assertEquals(m.getPattern(), "-1.2"); + assertEquals(m.getFloatValue(), -1.2f); + + try + { + new Matcher(null, 0f); + fail("Expected exception"); + } catch (NullPointerException e) + { + // expected + } + + try + { + new Matcher(Condition.LT, "123,456"); + fail("Expected exception"); + } catch (NumberFormatException e) + { + // expected + } + } + + /** + * Tests for float comparison conditions + */ + @Test + public void testMatches_float() + { + /* + * EQUALS test + */ + MatcherI m = new Matcher(Condition.EQ, 2f); + assertTrue(m.matches("2")); + assertTrue(m.matches("2.0")); + assertFalse(m.matches("2.01")); + + /* + * NOT EQUALS test + */ + m = new Matcher(Condition.NE, 2f); + assertFalse(m.matches("2")); + assertFalse(m.matches("2.0")); + assertTrue(m.matches("2.01")); + + /* + * >= test + */ + m = new Matcher(Condition.GE, 2f); + assertTrue(m.matches("2")); + assertTrue(m.matches("2.1")); + assertFalse(m.matches("1.9")); + + /* + * > test + */ + m = new Matcher(Condition.GT, 2f); + assertFalse(m.matches("2")); + assertTrue(m.matches("2.1")); + assertFalse(m.matches("1.9")); + + /* + * <= test + */ + m = new Matcher(Condition.LE, 2f); + assertTrue(m.matches("2")); + assertFalse(m.matches("2.1")); + assertTrue(m.matches("1.9")); + + /* + * < test + */ + m = new Matcher(Condition.LT, 2f); + assertFalse(m.matches("2")); + assertFalse(m.matches("2.1")); + assertTrue(m.matches("1.9")); + } + + @Test + public void testMatches_floatNullOrInvalid() + { + for (Condition cond : Condition.values()) + { + if (cond.isNumeric()) + { + MatcherI m = new Matcher(cond, 2f); + assertFalse(m.matches(null)); + assertFalse(m.matches("")); + assertFalse(m.matches("two")); + } + } + } + + /** + * Tests for string comparison conditions + */ + @Test + public void testMatches_pattern() + { + /* + * Contains + */ + MatcherI m = new Matcher(Condition.Contains, "benign"); + assertTrue(m.matches("benign")); + assertTrue(m.matches("MOSTLY BENIGN OBSERVED")); // not case-sensitive + assertFalse(m.matches("pathogenic")); + assertFalse(m.matches(null)); + + /* + * does not contain + */ + m = new Matcher(Condition.NotContains, "benign"); + assertFalse(m.matches("benign")); + assertFalse(m.matches("MOSTLY BENIGN OBSERVED")); // not case-sensitive + assertTrue(m.matches("pathogenic")); + assertTrue(m.matches(null)); // null value passes this condition + + /* + * matches + */ + m = new Matcher(Condition.Matches, "benign"); + assertTrue(m.matches("benign")); + assertTrue(m.matches(" Benign ")); // trim before testing + assertFalse(m.matches("MOSTLY BENIGN")); + assertFalse(m.matches("pathogenic")); + assertFalse(m.matches(null)); + + /* + * does not match + */ + m = new Matcher(Condition.NotMatches, "benign"); + assertFalse(m.matches("benign")); + assertFalse(m.matches(" Benign ")); // trim before testing + assertTrue(m.matches("MOSTLY BENIGN")); + assertTrue(m.matches("pathogenic")); + assertTrue(m.matches(null)); + + /* + * a float with a string match condition will be treated as string + */ + Matcher m1 = new Matcher(Condition.Contains, "32"); + assertFalse(m1.matches(-203f)); + assertTrue(m1.matches(-4321.0f)); + } + + /** + * If a float is passed with a string condition it gets converted to a string + */ + @Test + public void testMatches_floatWithStringCondition() + { + MatcherI m = new Matcher(Condition.Contains, 1.2e-6f); + assertTrue(m.matches("1.2e-6")); + + m = new Matcher(Condition.Contains, 0.0000001f); + assertTrue(m.matches("1.0e-7")); + assertTrue(m.matches("1.0E-7")); + assertFalse(m.matches("0.0000001f")); + } + + @Test + public void testToString() + { + MatcherI m = new Matcher(Condition.LT, 1.2e-6f); + assertEquals(m.toString(), "LT 1.2E-6"); + + m = new Matcher(Condition.NotMatches, "ABC"); + assertEquals(m.toString(), "NotMatches ABC"); + + m = new Matcher(Condition.Contains, -1.2f); + assertEquals(m.toString(), "Contains -1.2"); + } + + @Test + public void testEquals() + { + /* + * string condition + */ + MatcherI m = new Matcher(Condition.NotMatches, "ABC"); + assertFalse(m.equals(null)); + assertFalse(m.equals("foo")); + assertTrue(m.equals(m)); + assertTrue(m.equals(new Matcher(Condition.NotMatches, "ABC"))); + // not case-sensitive: + assertTrue(m.equals(new Matcher(Condition.NotMatches, "abc"))); + assertFalse(m.equals(new Matcher(Condition.Matches, "ABC"))); + assertFalse(m.equals(new Matcher(Condition.NotMatches, "def"))); + + /* + * numeric conditions + */ + m = new Matcher(Condition.LT, -1f); + assertFalse(m.equals(null)); + assertFalse(m.equals("foo")); + assertTrue(m.equals(m)); + assertTrue(m.equals(new Matcher(Condition.LT, -1f))); + assertTrue(m.equals(new Matcher(Condition.LT, "-1f"))); + assertTrue(m.equals(new Matcher(Condition.LT, "-1.00f"))); + assertFalse(m.equals(new Matcher(Condition.LE, -1f))); + assertFalse(m.equals(new Matcher(Condition.GE, -1f))); + assertFalse(m.equals(new Matcher(Condition.NE, -1f))); + assertFalse(m.equals(new Matcher(Condition.LT, 1f))); + assertFalse(m.equals(new Matcher(Condition.LT, -1.1f))); + } + + @Test + public void testHashCode() + { + MatcherI m1 = new Matcher(Condition.NotMatches, "ABC"); + MatcherI m2 = new Matcher(Condition.NotMatches, "ABC"); + MatcherI m3 = new Matcher(Condition.NotMatches, "AB"); + MatcherI m4 = new Matcher(Condition.Matches, "ABC"); + assertEquals(m1.hashCode(), m2.hashCode()); + assertNotEquals(m1.hashCode(), m3.hashCode()); + assertNotEquals(m1.hashCode(), m4.hashCode()); + assertNotEquals(m3.hashCode(), m4.hashCode()); + } +} diff --git a/utils/proguard.jar b/utils/proguard.jar deleted file mode 100755 index dfb7f29..0000000 Binary files a/utils/proguard.jar and /dev/null differ diff --git a/utils/proguard_5.3.3.jar b/utils/proguard_5.3.3.jar new file mode 100755 index 0000000..08f4a4c Binary files /dev/null and b/utils/proguard_5.3.3.jar differ