From 14f4c01bd0ff74a237cec40aad04f3f81efc5afb Mon Sep 17 00:00:00 2001 From: gmungoc Date: Tue, 5 Jun 2018 16:35:29 +0100 Subject: [PATCH] JAL-3020 cache selected attribute terms for filter value drop-down --- .../datamodel/features/FeatureAttributes.java | 204 +++++++++++++++++++- src/jalview/gui/FeatureTypeSettings.java | 115 +++++++---- src/jalview/io/vcf/VCFLoader.java | 50 +---- .../datamodel/features/FeatureAttributesTest.java | 31 +++ 4 files changed, 316 insertions(+), 84 deletions(-) diff --git a/src/jalview/datamodel/features/FeatureAttributes.java b/src/jalview/datamodel/features/FeatureAttributes.java index 10249f3..833c704 100644 --- a/src/jalview/datamodel/features/FeatureAttributes.java +++ b/src/jalview/datamodel/features/FeatureAttributes.java @@ -1,13 +1,20 @@ package jalview.datamodel.features; +import jalview.bin.Cache; + import java.util.ArrayList; import java.util.Collections; import java.util.Comparator; import java.util.HashMap; +import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Map.Entry; +import java.util.Set; +import java.util.StringTokenizer; import java.util.TreeMap; +import java.util.regex.Pattern; +import java.util.regex.PatternSyntaxException; /** * A singleton class to hold the set of attributes known for each feature type @@ -19,6 +26,23 @@ public class FeatureAttributes Character, Number, Mixed } + /* + * property key for lookup of a comma-separated list of regex patterns + * to match those attribute names for which distinct values should be cached + */ + private static final String CACHED_ATTS_KEY = "CACHED_ATTRIBUTES"; + + /* + * default value if property is not specified + */ + private static final String CACHED_ATTS_DEFAULT = "AS_FilterStatus,clinical_significance,consequence_type," + + "CSQ:Consequence,CSQ:CLIN_SIG,CSQ:DOMAIN,CSQ:IMPACT"; + + /* + * delimiters of terms in attribute values + */ + private static final String TERM_DELIMITERS = ",&"; + private static FeatureAttributes instance = new FeatureAttributes(); /* @@ -28,6 +52,20 @@ public class FeatureAttributes private Map> attributes; /* + * attribute names that have been seen and + * match the condition for caching distinct values + */ + private Set cachedAttributes; + + /* + * attribute names that have been seen and do not + * match the condition for caching distinct values + */ + private Set uncachedAttributes; + + private List cacheableNamePatterns; + + /* * a case-insensitive comparator so that attributes are ordered e.g. * AC * af @@ -70,30 +108,39 @@ public class FeatureAttributes List description; /* - * minimum value (of any numeric values recorded) + * minimum value (if only numeric values recorded) */ float min = 0f; /* - * maximum value (of any numeric values recorded) + * maximum value (if only numeric values recorded) */ float max = 0f; /* - * flag is set true if any numeric value is detected for this attribute + * flag is set true if only numeric values are detected for this attribute */ boolean hasValue = false; Datatype type; + /* + * (for selected attributes), a list of distinct terms found in values + */ + Set terms; + /** * Note one instance of this attribute, recording unique, non-null - * descriptions, and the min/max of any numerical values + * descriptions, and the min/max of any numerical values. + *

+ * Distinct value terms may also be recorded, if the feature type is one for + * which this is configured * + * @param attName * @param desc * @param value */ - void addInstance(String desc, String value) + void addInstance(String[] attName, String desc, String value) { addDescription(desc); @@ -101,6 +148,9 @@ public class FeatureAttributes { value = value.trim(); + String name = FeatureMatcher.toAttributeDisplayName(attName); + recordValue(name, value); + /* * Parse numeric value unless we have previously * seen text data for this attribute type @@ -133,7 +183,78 @@ public class FeatureAttributes } /** - * Answers the description of the attribute, if recorded and unique, or null if either no, or more than description is recorded + * If attribute name is configured to cache distinct values, then parse out + * and store these + * + * @param attName + * @param value + */ + private void recordValue(String attName, String value) + { + /* + * quit if we've seen this attribute name before, + * and determined we are not caching its values + */ + if (uncachedAttributes.contains(attName)) + { + return; + } + + /* + * if first time seen, check attribute name filters to + * see if we want to cache its value + */ + if (!cachedAttributes.contains(attName)) + { + if (!matches(attName, cacheableNamePatterns)) + { + uncachedAttributes.add(attName); + return; + } + else + { + cachedAttributes.add(attName); + } + } + + /* + * we want to cache distinct terms for this attribute; + * parse them out using comma or & delimiters + */ + if (terms == null) + { + terms = new HashSet<>(); + } + StringTokenizer st = new StringTokenizer(value, TERM_DELIMITERS); + while (st.hasMoreTokens()) + { + terms.add(st.nextToken().trim()); + } + } + + /** + * Answers true if any of the patterns matches the value, else false + * + * @param value + * @param filters + * @return + */ + private boolean matches(String value, List filters) + { + for (Pattern p : filters) + { + if (p.matcher(value).matches()) + { + return true; + } + } + return false; + } + + /** + * Answers the description of the attribute, if recorded and unique, or null + * if either no, or more than description is recorded + * * @return */ public String getDescription() @@ -170,6 +291,17 @@ public class FeatureAttributes } } } + + /** + * Answers the distinct terms recorded for the attribute, or an empty set if + * it is not configured to cache values + * + * @return + */ + public Set getDistinctTerms() + { + return terms == null ? Collections. emptySet() : terms; + } } /** @@ -182,9 +314,47 @@ public class FeatureAttributes return instance; } + /** + * Private constructor to enforce singleton pattern + */ private FeatureAttributes() { attributes = new HashMap<>(); + cachedAttributes = new HashSet<>(); + uncachedAttributes = new HashSet<>(); + cacheableNamePatterns = getFieldMatchers(CACHED_ATTS_KEY, + CACHED_ATTS_DEFAULT); + } + + /** + * Reads the Preference value for the given key, with default specified if no + * preference set. The value is interpreted as a comma-separated list of + * regular expressions, and converted into a list of compiled patterns ready + * for matching. Patterns are set to non-case-sensitive matching. + *

+ * This supports user-defined filters for attributes of interest to capture + * distinct values for as instance are added. + * + * @param key + * @param def + * @return + */ + public static List getFieldMatchers(String key, String def) + { + String pref = Cache.getDefault(key, def); + List patterns = new ArrayList<>(); + String[] tokens = pref.split(","); + for (String token : tokens) + { + try + { + patterns.add(Pattern.compile(token, Pattern.CASE_INSENSITIVE)); + } catch (PatternSyntaxException e) + { + System.err.println("Invalid pattern ignored: " + token); + } + } + return patterns; } /** @@ -207,6 +377,26 @@ public class FeatureAttributes } /** + * Answers the set of distinct terms recorded for the given feature type and + * attribute. Answers an empty set if values are not cached for this + * attribute. + * + * @param featureType + * @param attName + * @return + */ + public Set getDistinctTerms(String featureType, String... attName) + { + if (!attributes.containsKey(featureType) + || !attributes.get(featureType).containsKey(attName)) + { + return Collections. emptySet(); + } + + return attributes.get(featureType).get(attName).getDistinctTerms(); + } + + /** * Answers true if at least one attribute is known for the given feature type, * else false * @@ -271,7 +461,7 @@ public class FeatureAttributes attData = new AttributeData(); atts.put(attName, attData); } - attData.addInstance(description, valueAsString); + attData.addInstance(attName, description, valueAsString); } /** diff --git a/src/jalview/gui/FeatureTypeSettings.java b/src/jalview/gui/FeatureTypeSettings.java index e13f6ee..21397f8 100644 --- a/src/jalview/gui/FeatureTypeSettings.java +++ b/src/jalview/gui/FeatureTypeSettings.java @@ -49,15 +49,20 @@ import java.awt.event.MouseAdapter; import java.awt.event.MouseEvent; import java.text.DecimalFormat; import java.util.ArrayList; +import java.util.Collections; import java.util.List; +import java.util.Set; +import java.util.Vector; import javax.swing.BorderFactory; import javax.swing.BoxLayout; import javax.swing.ButtonGroup; +import javax.swing.DefaultComboBoxModel; import javax.swing.JButton; import javax.swing.JCheckBox; import javax.swing.JColorChooser; import javax.swing.JComboBox; +import javax.swing.JComponent; import javax.swing.JLabel; import javax.swing.JPanel; import javax.swing.JRadioButton; @@ -256,7 +261,7 @@ public class FeatureTypeSettings extends JalviewDialog String title = MessageManager .formatMessage("label.display_settings_for", new String[] { theType }); - initDialogFrame(this, true, blocking, title, 600, 360); + initDialogFrame(this, true, blocking, title, 700, 360); waitForInput(); } @@ -1283,9 +1288,10 @@ public class FeatureTypeSettings extends JalviewDialog /** * A helper method that constructs a row (panel) with one filter condition: *

    - *
  • a drop-down list of Label, Score and attribute names to choose from
  • + *
  • a drop-down list of Label, Score and attribute names to choose + * from
  • *
  • a drop-down list of conditions to choose from
  • - *
  • a text field for input of a match pattern
  • + *
  • an editable combo box for input of a match pattern
  • *
  • optionally, a 'remove' button
  • *
* The filter values are set as defaults for the input fields. The 'remove' @@ -1294,10 +1300,10 @@ public class FeatureTypeSettings extends JalviewDialog * Action handlers on these fields provide for *
    *
  • validate pattern field - should be numeric if condition is numeric
  • - *
  • save filters and refresh display on any (valid) change
  • + *
  • save and apply filters, and refresh display on any (valid) change
  • *
  • remove filter and refresh on 'Remove'
  • *
  • update conditions list on change of Label/Score/Attribute
  • - *
  • refresh value field tooltip with min-max range on change of + *
  • update values list, and tooltip with min-max range, on change of * attribute
  • *
* @@ -1326,8 +1332,10 @@ public class FeatureTypeSettings extends JalviewDialog JComboBox condCombo = new JComboBox<>(); - JTextField patternField = new JTextField(8); - patternField.setText(pattern); + Vector attArray = getAttributeValues(attName); + JComboBox patternField = new JComboBox<>(attArray); + patternField.setEditable(true); + patternField.setSelectedItem(pattern); /* * action handlers that validate and (if valid) apply changes @@ -1337,7 +1345,9 @@ public class FeatureTypeSettings extends JalviewDialog @Override public void actionPerformed(ActionEvent e) { - if (validateFilter(patternField, condCombo)) + String value = (String) patternField.getSelectedItem(); + value = value.trim(); + if (validateFilter(patternField, value, condCombo)) { if (updateFilter(attCombo, condCombo, patternField, filterIndex)) { @@ -1371,8 +1381,18 @@ public class FeatureTypeSettings extends JalviewDialog { /* * on change of attribute, refresh the conditions list to - * ensure it is appropriate for the attribute datatype + * ensure it is appropriate for the attribute datatype, and + * refresh any values list in the pattern combo box */ + String attribute = (String) attCombo.getSelectedItem(); + String[] attNam = FeatureMatcher + .fromAttributeDisplayName(attribute); + Vector attValsArray = getAttributeValues(attNam); + DefaultComboBoxModel model = new DefaultComboBoxModel<>( + attValsArray); + String val = (String) patternField.getSelectedItem(); + patternField.setModel(model); + patternField.setSelectedItem(val == null ? "" : val); // ?? populateConditions((String) attCombo.getSelectedItem(), (Condition) condCombo.getSelectedItem(), condCombo, patternField); @@ -1386,7 +1406,7 @@ public class FeatureTypeSettings extends JalviewDialog * drop-down choice of test condition */ populateConditions(filterBy, cond, condCombo, patternField); - condCombo.setPreferredSize(new Dimension(150, 20)); + condCombo.setPreferredSize(new Dimension(160, 20)); condCombo.addItemListener(itemListener); filterRow.add(condCombo); @@ -1442,6 +1462,26 @@ public class FeatureTypeSettings extends JalviewDialog } /** + * Answers a (possibly empty) list of cached terms known (if any) for the + * given attribute name + * + * @param attName + * @return + */ + protected Vector getAttributeValues(String[] attName) + { + if (attName == null) + { + return new Vector<>(); + } + Set attValues = FeatureAttributes.getInstance() + .getDistinctTerms(featureType, attName); + Vector values = new Vector<>(attValues); + Collections.sort(values, String.CASE_INSENSITIVE_ORDER); + return values; + } + + /** * Sets the selected item in the Label/Score/Attribute drop-down to match the * filter * @@ -1482,7 +1522,7 @@ public class FeatureTypeSettings extends JalviewDialog * @param patternField */ private void setNumericHints(String attName, - Condition selectedCondition, JTextField patternField) + Condition selectedCondition, JComboBox patternField) { patternField.setToolTipText(""); @@ -1495,19 +1535,20 @@ public class FeatureTypeSettings extends JalviewDialog String maxFormatted = DECFMT_2_2.format(minMax[1]); String tip = String.format("(%s - %s)", minFormatted, maxFormatted); patternField.setToolTipText(tip); - if (patternField.getText().isEmpty()) + String pattern = (String) patternField.getSelectedItem(); + if (pattern.isEmpty()) { if (selectedCondition == Condition.GE || selectedCondition == Condition.GT) { - patternField.setText(minFormatted); + patternField.setSelectedItem(minFormatted); } else { if (selectedCondition == Condition.LE || selectedCondition == Condition.LT) { - patternField.setText(maxFormatted); + patternField.setSelectedItem(maxFormatted); } } } @@ -1528,7 +1569,7 @@ public class FeatureTypeSettings extends JalviewDialog * @param patternField */ private void populateConditions(String attName, Condition cond, - JComboBox condCombo, JTextField patternField) + JComboBox condCombo, JComboBox patternField) { Datatype type = FeatureAttributes.getInstance().getDatatype(featureType, FeatureMatcher.fromAttributeDisplayName(attName)); @@ -1581,14 +1622,15 @@ public class FeatureTypeSettings extends JalviewDialog { try { - String pattern = patternField.getText().trim(); + String pattern = (String) patternField.getSelectedItem(); + pattern = pattern.trim(); if (pattern.length() > 0) { Float.valueOf(pattern); } } catch (NumberFormatException e) { - patternField.setText(""); + patternField.setSelectedItem(""); } } @@ -1606,13 +1648,13 @@ public class FeatureTypeSettings extends JalviewDialog * not mark the field as invalid. This supports selecting an attribute for a new * condition before a match pattern has been entered. * - * @param value + * @param valueField * @param condCombo */ - protected boolean validateFilter(JTextField value, + protected boolean validateFilter(JComponent valueField, String v1, JComboBox condCombo) { - if (value == null || condCombo == null) + if (valueField == null || condCombo == null) { return true; // fields not populated } @@ -1623,9 +1665,8 @@ public class FeatureTypeSettings extends JalviewDialog return true; } - value.setBackground(Color.white); - value.setToolTipText(""); - String v1 = value.getText().trim(); + valueField.setBackground(Color.white); + valueField.setToolTipText(""); if (v1.length() == 0) { // return false; @@ -1638,8 +1679,8 @@ public class FeatureTypeSettings extends JalviewDialog Float.valueOf(v1); } catch (NumberFormatException e) { - value.setBackground(Color.red); - value.setToolTipText( + valueField.setBackground(Color.red); + valueField.setToolTipText( MessageManager.getString("label.numeric_required")); return false; } @@ -1650,33 +1691,35 @@ public class FeatureTypeSettings extends JalviewDialog /** * Constructs a filter condition from the given input fields, and replaces the - * condition at filterIndex with the new one. Does nothing if the pattern field - * is blank (unless the match condition is one that doesn't require a pattern, - * e.g. 'Is present'). Answers true if the filter was updated, else false. + * condition at filterIndex with the new one. Does nothing if the pattern + * field is blank (unless the match condition is one that doesn't require a + * pattern, e.g. 'Is present'). Answers true if the filter was updated, else + * false. *

* This method may update the tooltip on the filter value field to show the - * value range, if a numeric condition is selected. This ensures the tooltip is - * updated when a numeric valued attribute is chosen on the last 'add a filter' - * row. + * value range, if a numeric condition is selected. This ensures the tooltip + * is updated when a numeric valued attribute is chosen on the last 'add a + * filter' row. * * @param attCombo * @param condCombo - * @param valueField + * @param patternField * @param filterIndex */ protected boolean updateFilter(JComboBox attCombo, - JComboBox condCombo, JTextField valueField, + JComboBox condCombo, JComboBox patternField, int filterIndex) { String attName = (String) attCombo.getSelectedItem(); Condition cond = (Condition) condCombo.getSelectedItem(); - String pattern = valueField.getText().trim(); + String pattern = (String) patternField.getSelectedItem(); + pattern = pattern.trim(); - setNumericHints(attName, cond, valueField); + setNumericHints(attName, cond, patternField); if (pattern.length() == 0 && cond.needsAPattern()) { - valueField.setEnabled(true); // ensure pattern field is enabled! + patternField.setEnabled(true); // ensure pattern field is enabled! return false; } diff --git a/src/jalview/io/vcf/VCFLoader.java b/src/jalview/io/vcf/VCFLoader.java index 9438ba9..337227e 100644 --- a/src/jalview/io/vcf/VCFLoader.java +++ b/src/jalview/io/vcf/VCFLoader.java @@ -10,6 +10,7 @@ import jalview.datamodel.Mapping; import jalview.datamodel.SequenceFeature; import jalview.datamodel.SequenceI; import jalview.datamodel.features.FeatureAttributeType; +import jalview.datamodel.features.FeatureAttributes; import jalview.datamodel.features.FeatureSource; import jalview.datamodel.features.FeatureSources; import jalview.ext.ensembl.EnsemblMap; @@ -29,7 +30,6 @@ import java.util.List; import java.util.Map; import java.util.Map.Entry; import java.util.regex.Pattern; -import java.util.regex.PatternSyntaxException; import htsjdk.samtools.SAMException; import htsjdk.samtools.SAMSequenceDictionary; @@ -90,7 +90,7 @@ public class VCFLoader private static final String DEFAULT_VCF_FIELDS = ".*"; - private static final String DEFAULT_VEP_FIELDS = ".*";// "Allele,Consequence,IMPACT,SWISSPROT,SIFT,PolyPhen,CLIN_SIG"; + private static final String DEFAULT_VEP_FIELDS = ".*"; // "Allele,Consequence,IMPACT,SWISSPROT,SIFT,PolyPhen,CLIN_SIG"; /* * keys to fields of VEP CSQ consequence data @@ -377,8 +377,8 @@ public class VCFLoader */ void saveMetadata(String theSourceId) { - List vcfFieldPatterns = getFieldMatchers(VCF_FIELDS_PREF, - DEFAULT_VCF_FIELDS); + List vcfFieldPatterns = FeatureAttributes + .getFieldMatchers(VCF_FIELDS_PREF, DEFAULT_VCF_FIELDS); vcfFieldsOfInterest = new ArrayList<>(); FeatureSource metadata = new FeatureSource(theSourceId); @@ -410,7 +410,7 @@ public class VCFLoader metadata.setAttributeName(attributeId, desc); metadata.setAttributeType(attributeId, attType); - if (isFieldWanted(attributeId, vcfFieldPatterns)) + if (matches(attributeId, vcfFieldPatterns)) { vcfFieldsOfInterest.add(attributeId); } @@ -427,7 +427,7 @@ public class VCFLoader * @param filters * @return */ - private boolean isFieldWanted(String id, List filters) + private boolean matches(String id, List filters) { for (Pattern p : filters) { @@ -450,8 +450,8 @@ public class VCFLoader */ protected void parseCsqHeader() { - List vepFieldFilters = getFieldMatchers(VEP_FIELDS_PREF, - DEFAULT_VEP_FIELDS); + List vepFieldFilters = FeatureAttributes + .getFieldMatchers(VEP_FIELDS_PREF, DEFAULT_VEP_FIELDS); vepFieldsOfInterest = new HashMap<>(); VCFInfoHeaderLine csqInfo = header.getInfoHeaderLine(CSQ_FIELD); @@ -491,7 +491,7 @@ public class VCFLoader csqFeatureFieldIndex = index; } - if (isFieldWanted(field, vepFieldFilters)) + if (matches(field, vepFieldFilters)) { vepFieldsOfInterest.put(index, field); } @@ -502,38 +502,6 @@ public class VCFLoader } /** - * Reads the Preference value for the given key, with default specified if no - * preference set. The value is interpreted as a comma-separated list of - * regular expressions, and converted into a list of compiled patterns ready - * for matching. Patterns are set to non-case-sensitive matching. - *

- * This supports user-defined filters for fields of interest to capture while - * processing data. For example, VCF_FIELDS = AF,AC* would mean that VCF INFO - * fields with an ID of AF, or starting with AC, would be matched. - * - * @param key - * @param def - * @return - */ - private List getFieldMatchers(String key, String def) - { - String pref = Cache.getDefault(key, def); - List patterns = new ArrayList<>(); - String[] tokens = pref.split(","); - for (String token : tokens) - { - try - { - patterns.add(Pattern.compile(token, Pattern.CASE_INSENSITIVE)); - } catch (PatternSyntaxException e) - { - System.err.println("Invalid pattern ignored: " + token); - } - } - return patterns; - } - - /** * Transfers VCF features to sequences to which this sequence has a mapping. * If the mapping is 3:1, computes peptide variants from nucleotide variants. * diff --git a/test/jalview/datamodel/features/FeatureAttributesTest.java b/test/jalview/datamodel/features/FeatureAttributesTest.java index 0846ec2..ff4cd53 100644 --- a/test/jalview/datamodel/features/FeatureAttributesTest.java +++ b/test/jalview/datamodel/features/FeatureAttributesTest.java @@ -1,6 +1,7 @@ package jalview.datamodel.features; import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertFalse; import static org.testng.Assert.assertNull; import static org.testng.Assert.assertTrue; @@ -9,7 +10,9 @@ import jalview.datamodel.features.FeatureAttributes.Datatype; import java.util.Comparator; import java.util.HashMap; +import java.util.List; import java.util.Map; +import java.util.regex.Pattern; import org.testng.annotations.AfterMethod; import org.testng.annotations.BeforeClass; @@ -130,4 +133,32 @@ public class FeatureAttributesTest assertEquals(fa.getDatatype("Pfam", "domain"), Datatype.Character); assertEquals(fa.getDatatype("Pfam", "phase"), Datatype.Mixed); } + + @Test(groups = "Functional") + public void testGetFieldMatchers() + { + // providing a junk property key to ensure the default is used + List matchers = FeatureAttributes.getFieldMatchers("^%!", + ".*,ABC?,CLIN_SIG,CSQ:P.*"); + assertEquals(matchers.size(), 4); + + // first pattern .* matches anything + assertTrue(matchers.get(0).matcher("xyz").matches()); + + // second ABC? matches AB or ABC, not case-sensitive + assertTrue(matchers.get(1).matcher("ABC").matches()); + assertTrue(matchers.get(1).matcher("abc").matches()); + assertFalse(matchers.get(1).matcher("abc2").matches()); + assertFalse(matchers.get(1).matcher("xab").matches()); + assertFalse(matchers.get(1).matcher("xabc").matches()); + + // third matches CLIN_SIG + assertTrue(matchers.get(2).matcher("CLIN_SIG").matches()); + assertTrue(matchers.get(2).matcher("clin_sig").matches()); + + // fourth matches CSQ:P followed by any characters (or none) + assertTrue(matchers.get(3).matcher("CSQ:P").matches()); + assertTrue(matchers.get(3).matcher("csq:peter").matches()); + assertFalse(matchers.get(3).matcher("CSQ:Blue Peter").matches()); + } } -- 1.7.10.2