package jalview.datamodel.features;
+import jalview.bin.Cache;
+
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
+import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
+import java.util.Set;
+import java.util.StringTokenizer;
import java.util.TreeMap;
+import java.util.regex.Pattern;
+import java.util.regex.PatternSyntaxException;
/**
* A singleton class to hold the set of attributes known for each feature type
Character, Number, Mixed
}
+ /*
+ * property key for lookup of a comma-separated list of regex patterns
+ * to match those attribute names for which distinct values should be cached
+ */
+ private static final String CACHED_ATTS_KEY = "CACHED_ATTRIBUTES";
+
+ /*
+ * default value if property is not specified
+ */
+ private static final String CACHED_ATTS_DEFAULT = "AS_FilterStatus,clinical_significance,consequence_type,"
+ + "CSQ:Consequence,CSQ:CLIN_SIG,CSQ:DOMAIN,CSQ:IMPACT";
+
+ /*
+ * delimiters of terms in attribute values
+ */
+ private static final String TERM_DELIMITERS = ",&";
+
private static FeatureAttributes instance = new FeatureAttributes();
/*
private Map<String, Map<String[], AttributeData>> attributes;
/*
+ * attribute names that have been seen and
+ * match the condition for caching distinct values
+ */
+ private Set<String> cachedAttributes;
+
+ /*
+ * attribute names that have been seen and do not
+ * match the condition for caching distinct values
+ */
+ private Set<String> uncachedAttributes;
+
+ private List<Pattern> cacheableNamePatterns;
+
+ /*
* a case-insensitive comparator so that attributes are ordered e.g.
* AC
* af
List<String> description;
/*
- * minimum value (of any numeric values recorded)
+ * minimum value (if only numeric values recorded)
*/
float min = 0f;
/*
- * maximum value (of any numeric values recorded)
+ * maximum value (if only numeric values recorded)
*/
float max = 0f;
/*
- * flag is set true if any numeric value is detected for this attribute
+ * flag is set true if only numeric values are detected for this attribute
*/
boolean hasValue = false;
Datatype type;
+ /*
+ * (for selected attributes), a list of distinct terms found in values
+ */
+ Set<String> terms;
+
/**
* Note one instance of this attribute, recording unique, non-null
- * descriptions, and the min/max of any numerical values
+ * descriptions, and the min/max of any numerical values.
+ * <p>
+ * Distinct value terms may also be recorded, if the feature type is one for
+ * which this is configured
*
+ * @param attName
* @param desc
* @param value
*/
- void addInstance(String desc, String value)
+ void addInstance(String[] attName, String desc, String value)
{
addDescription(desc);
{
value = value.trim();
+ String name = FeatureMatcher.toAttributeDisplayName(attName);
+ recordValue(name, value);
+
/*
* Parse numeric value unless we have previously
* seen text data for this attribute type
}
/**
- * Answers the description of the attribute, if recorded and unique, or null if either no, or more than description is recorded
+ * If attribute name is configured to cache distinct values, then parse out
+ * and store these
+ *
+ * @param attName
+ * @param value
+ */
+ private void recordValue(String attName, String value)
+ {
+ /*
+ * quit if we've seen this attribute name before,
+ * and determined we are not caching its values
+ */
+ if (uncachedAttributes.contains(attName))
+ {
+ return;
+ }
+
+ /*
+ * if first time seen, check attribute name filters to
+ * see if we want to cache its value
+ */
+ if (!cachedAttributes.contains(attName))
+ {
+ if (!matches(attName, cacheableNamePatterns))
+ {
+ uncachedAttributes.add(attName);
+ return;
+ }
+ else
+ {
+ cachedAttributes.add(attName);
+ }
+ }
+
+ /*
+ * we want to cache distinct terms for this attribute;
+ * parse them out using comma or & delimiters
+ */
+ if (terms == null)
+ {
+ terms = new HashSet<>();
+ }
+ StringTokenizer st = new StringTokenizer(value, TERM_DELIMITERS);
+ while (st.hasMoreTokens())
+ {
+ terms.add(st.nextToken().trim());
+ }
+ }
+
+ /**
+ * Answers true if any of the patterns matches the value, else false
+ *
+ * @param value
+ * @param filters
+ * @return
+ */
+ private boolean matches(String value, List<Pattern> filters)
+ {
+ for (Pattern p : filters)
+ {
+ if (p.matcher(value).matches())
+ {
+ return true;
+ }
+ }
+ return false;
+ }
+
+ /**
+ * Answers the description of the attribute, if recorded and unique, or null
+ * if either no, or more than description is recorded
+ *
* @return
*/
public String getDescription()
}
}
}
+
+ /**
+ * Answers the distinct terms recorded for the attribute, or an empty set if
+ * it is not configured to cache values
+ *
+ * @return
+ */
+ public Set<String> getDistinctTerms()
+ {
+ return terms == null ? Collections.<String> emptySet() : terms;
+ }
}
/**
return instance;
}
+ /**
+ * Private constructor to enforce singleton pattern
+ */
private FeatureAttributes()
{
attributes = new HashMap<>();
+ cachedAttributes = new HashSet<>();
+ uncachedAttributes = new HashSet<>();
+ cacheableNamePatterns = getFieldMatchers(CACHED_ATTS_KEY,
+ CACHED_ATTS_DEFAULT);
+ }
+
+ /**
+ * Reads the Preference value for the given key, with default specified if no
+ * preference set. The value is interpreted as a comma-separated list of
+ * regular expressions, and converted into a list of compiled patterns ready
+ * for matching. Patterns are set to non-case-sensitive matching.
+ * <p>
+ * This supports user-defined filters for attributes of interest to capture
+ * distinct values for as instance are added.
+ *
+ * @param key
+ * @param def
+ * @return
+ */
+ public static List<Pattern> getFieldMatchers(String key, String def)
+ {
+ String pref = Cache.getDefault(key, def);
+ List<Pattern> patterns = new ArrayList<>();
+ String[] tokens = pref.split(",");
+ for (String token : tokens)
+ {
+ try
+ {
+ patterns.add(Pattern.compile(token, Pattern.CASE_INSENSITIVE));
+ } catch (PatternSyntaxException e)
+ {
+ System.err.println("Invalid pattern ignored: " + token);
+ }
+ }
+ return patterns;
}
/**
}
/**
+ * Answers the set of distinct terms recorded for the given feature type and
+ * attribute. Answers an empty set if values are not cached for this
+ * attribute.
+ *
+ * @param featureType
+ * @param attName
+ * @return
+ */
+ public Set<String> getDistinctTerms(String featureType, String... attName)
+ {
+ if (!attributes.containsKey(featureType)
+ || !attributes.get(featureType).containsKey(attName))
+ {
+ return Collections.<String> emptySet();
+ }
+
+ return attributes.get(featureType).get(attName).getDistinctTerms();
+ }
+
+ /**
* Answers true if at least one attribute is known for the given feature type,
* else false
*
attData = new AttributeData();
atts.put(attName, attData);
}
- attData.addInstance(description, valueAsString);
+ attData.addInstance(attName, description, valueAsString);
}
/**
import java.awt.event.MouseEvent;
import java.text.DecimalFormat;
import java.util.ArrayList;
+import java.util.Collections;
import java.util.List;
+import java.util.Set;
+import java.util.Vector;
import javax.swing.BorderFactory;
import javax.swing.BoxLayout;
import javax.swing.ButtonGroup;
+import javax.swing.DefaultComboBoxModel;
import javax.swing.JButton;
import javax.swing.JCheckBox;
import javax.swing.JColorChooser;
import javax.swing.JComboBox;
+import javax.swing.JComponent;
import javax.swing.JLabel;
import javax.swing.JPanel;
import javax.swing.JRadioButton;
String title = MessageManager
.formatMessage("label.display_settings_for", new String[]
{ theType });
- initDialogFrame(this, true, blocking, title, 600, 360);
+ initDialogFrame(this, true, blocking, title, 700, 360);
waitForInput();
}
/**
* A helper method that constructs a row (panel) with one filter condition:
* <ul>
- * <li>a drop-down list of Label, Score and attribute names to choose from</li>
+ * <li>a drop-down list of Label, Score and attribute names to choose
+ * from</li>
* <li>a drop-down list of conditions to choose from</li>
- * <li>a text field for input of a match pattern</li>
+ * <li>an editable combo box for input of a match pattern</li>
* <li>optionally, a 'remove' button</li>
* </ul>
* The filter values are set as defaults for the input fields. The 'remove'
* Action handlers on these fields provide for
* <ul>
* <li>validate pattern field - should be numeric if condition is numeric</li>
- * <li>save filters and refresh display on any (valid) change</li>
+ * <li>save and apply filters, and refresh display on any (valid) change</li>
* <li>remove filter and refresh on 'Remove'</li>
* <li>update conditions list on change of Label/Score/Attribute</li>
- * <li>refresh value field tooltip with min-max range on change of
+ * <li>update values list, and tooltip with min-max range, on change of
* attribute</li>
* </ul>
*
JComboBox<Condition> condCombo = new JComboBox<>();
- JTextField patternField = new JTextField(8);
- patternField.setText(pattern);
+ Vector<String> attArray = getAttributeValues(attName);
+ JComboBox<String> patternField = new JComboBox<>(attArray);
+ patternField.setEditable(true);
+ patternField.setSelectedItem(pattern);
/*
* action handlers that validate and (if valid) apply changes
@Override
public void actionPerformed(ActionEvent e)
{
- if (validateFilter(patternField, condCombo))
+ String value = (String) patternField.getSelectedItem();
+ value = value.trim();
+ if (validateFilter(patternField, value, condCombo))
{
if (updateFilter(attCombo, condCombo, patternField, filterIndex))
{
{
/*
* on change of attribute, refresh the conditions list to
- * ensure it is appropriate for the attribute datatype
+ * ensure it is appropriate for the attribute datatype, and
+ * refresh any values list in the pattern combo box
*/
+ String attribute = (String) attCombo.getSelectedItem();
+ String[] attNam = FeatureMatcher
+ .fromAttributeDisplayName(attribute);
+ Vector<String> attValsArray = getAttributeValues(attNam);
+ DefaultComboBoxModel<String> model = new DefaultComboBoxModel<>(
+ attValsArray);
+ String val = (String) patternField.getSelectedItem();
+ patternField.setModel(model);
+ patternField.setSelectedItem(val == null ? "" : val); // ??
populateConditions((String) attCombo.getSelectedItem(),
(Condition) condCombo.getSelectedItem(), condCombo,
patternField);
* drop-down choice of test condition
*/
populateConditions(filterBy, cond, condCombo, patternField);
- condCombo.setPreferredSize(new Dimension(150, 20));
+ condCombo.setPreferredSize(new Dimension(160, 20));
condCombo.addItemListener(itemListener);
filterRow.add(condCombo);
}
/**
+ * Answers a (possibly empty) list of cached terms known (if any) for the
+ * given attribute name
+ *
+ * @param attName
+ * @return
+ */
+ protected Vector<String> getAttributeValues(String[] attName)
+ {
+ if (attName == null)
+ {
+ return new Vector<>();
+ }
+ Set<String> attValues = FeatureAttributes.getInstance()
+ .getDistinctTerms(featureType, attName);
+ Vector<String> values = new Vector<>(attValues);
+ Collections.sort(values, String.CASE_INSENSITIVE_ORDER);
+ return values;
+ }
+
+ /**
* Sets the selected item in the Label/Score/Attribute drop-down to match the
* filter
*
* @param patternField
*/
private void setNumericHints(String attName,
- Condition selectedCondition, JTextField patternField)
+ Condition selectedCondition, JComboBox<String> patternField)
{
patternField.setToolTipText("");
String maxFormatted = DECFMT_2_2.format(minMax[1]);
String tip = String.format("(%s - %s)", minFormatted, maxFormatted);
patternField.setToolTipText(tip);
- if (patternField.getText().isEmpty())
+ String pattern = (String) patternField.getSelectedItem();
+ if (pattern.isEmpty())
{
if (selectedCondition == Condition.GE
|| selectedCondition == Condition.GT)
{
- patternField.setText(minFormatted);
+ patternField.setSelectedItem(minFormatted);
}
else
{
if (selectedCondition == Condition.LE
|| selectedCondition == Condition.LT)
{
- patternField.setText(maxFormatted);
+ patternField.setSelectedItem(maxFormatted);
}
}
}
* @param patternField
*/
private void populateConditions(String attName, Condition cond,
- JComboBox<Condition> condCombo, JTextField patternField)
+ JComboBox<Condition> condCombo, JComboBox<String> patternField)
{
Datatype type = FeatureAttributes.getInstance().getDatatype(featureType,
FeatureMatcher.fromAttributeDisplayName(attName));
{
try
{
- String pattern = patternField.getText().trim();
+ String pattern = (String) patternField.getSelectedItem();
+ pattern = pattern.trim();
if (pattern.length() > 0)
{
Float.valueOf(pattern);
}
} catch (NumberFormatException e)
{
- patternField.setText("");
+ patternField.setSelectedItem("");
}
}
* not mark the field as invalid. This supports selecting an attribute for a new
* condition before a match pattern has been entered.
*
- * @param value
+ * @param valueField
* @param condCombo
*/
- protected boolean validateFilter(JTextField value,
+ protected boolean validateFilter(JComponent valueField, String v1,
JComboBox<Condition> condCombo)
{
- if (value == null || condCombo == null)
+ if (valueField == null || condCombo == null)
{
return true; // fields not populated
}
return true;
}
- value.setBackground(Color.white);
- value.setToolTipText("");
- String v1 = value.getText().trim();
+ valueField.setBackground(Color.white);
+ valueField.setToolTipText("");
if (v1.length() == 0)
{
// return false;
Float.valueOf(v1);
} catch (NumberFormatException e)
{
- value.setBackground(Color.red);
- value.setToolTipText(
+ valueField.setBackground(Color.red);
+ valueField.setToolTipText(
MessageManager.getString("label.numeric_required"));
return false;
}
/**
* Constructs a filter condition from the given input fields, and replaces the
- * condition at filterIndex with the new one. Does nothing if the pattern field
- * is blank (unless the match condition is one that doesn't require a pattern,
- * e.g. 'Is present'). Answers true if the filter was updated, else false.
+ * condition at filterIndex with the new one. Does nothing if the pattern
+ * field is blank (unless the match condition is one that doesn't require a
+ * pattern, e.g. 'Is present'). Answers true if the filter was updated, else
+ * false.
* <p>
* This method may update the tooltip on the filter value field to show the
- * value range, if a numeric condition is selected. This ensures the tooltip is
- * updated when a numeric valued attribute is chosen on the last 'add a filter'
- * row.
+ * value range, if a numeric condition is selected. This ensures the tooltip
+ * is updated when a numeric valued attribute is chosen on the last 'add a
+ * filter' row.
*
* @param attCombo
* @param condCombo
- * @param valueField
+ * @param patternField
* @param filterIndex
*/
protected boolean updateFilter(JComboBox<String> attCombo,
- JComboBox<Condition> condCombo, JTextField valueField,
+ JComboBox<Condition> condCombo, JComboBox<String> patternField,
int filterIndex)
{
String attName = (String) attCombo.getSelectedItem();
Condition cond = (Condition) condCombo.getSelectedItem();
- String pattern = valueField.getText().trim();
+ String pattern = (String) patternField.getSelectedItem();
+ pattern = pattern.trim();
- setNumericHints(attName, cond, valueField);
+ setNumericHints(attName, cond, patternField);
if (pattern.length() == 0 && cond.needsAPattern())
{
- valueField.setEnabled(true); // ensure pattern field is enabled!
+ patternField.setEnabled(true); // ensure pattern field is enabled!
return false;
}
import jalview.datamodel.SequenceFeature;
import jalview.datamodel.SequenceI;
import jalview.datamodel.features.FeatureAttributeType;
+import jalview.datamodel.features.FeatureAttributes;
import jalview.datamodel.features.FeatureSource;
import jalview.datamodel.features.FeatureSources;
import jalview.ext.ensembl.EnsemblMap;
import java.util.Map;
import java.util.Map.Entry;
import java.util.regex.Pattern;
-import java.util.regex.PatternSyntaxException;
import htsjdk.samtools.SAMException;
import htsjdk.samtools.SAMSequenceDictionary;
private static final String DEFAULT_VCF_FIELDS = ".*";
- private static final String DEFAULT_VEP_FIELDS = ".*";// "Allele,Consequence,IMPACT,SWISSPROT,SIFT,PolyPhen,CLIN_SIG";
+ private static final String DEFAULT_VEP_FIELDS = ".*"; // "Allele,Consequence,IMPACT,SWISSPROT,SIFT,PolyPhen,CLIN_SIG";
/*
* keys to fields of VEP CSQ consequence data
*/
void saveMetadata(String theSourceId)
{
- List<Pattern> vcfFieldPatterns = getFieldMatchers(VCF_FIELDS_PREF,
- DEFAULT_VCF_FIELDS);
+ List<Pattern> vcfFieldPatterns = FeatureAttributes
+ .getFieldMatchers(VCF_FIELDS_PREF, DEFAULT_VCF_FIELDS);
vcfFieldsOfInterest = new ArrayList<>();
FeatureSource metadata = new FeatureSource(theSourceId);
metadata.setAttributeName(attributeId, desc);
metadata.setAttributeType(attributeId, attType);
- if (isFieldWanted(attributeId, vcfFieldPatterns))
+ if (matches(attributeId, vcfFieldPatterns))
{
vcfFieldsOfInterest.add(attributeId);
}
* @param filters
* @return
*/
- private boolean isFieldWanted(String id, List<Pattern> filters)
+ private boolean matches(String id, List<Pattern> filters)
{
for (Pattern p : filters)
{
*/
protected void parseCsqHeader()
{
- List<Pattern> vepFieldFilters = getFieldMatchers(VEP_FIELDS_PREF,
- DEFAULT_VEP_FIELDS);
+ List<Pattern> vepFieldFilters = FeatureAttributes
+ .getFieldMatchers(VEP_FIELDS_PREF, DEFAULT_VEP_FIELDS);
vepFieldsOfInterest = new HashMap<>();
VCFInfoHeaderLine csqInfo = header.getInfoHeaderLine(CSQ_FIELD);
csqFeatureFieldIndex = index;
}
- if (isFieldWanted(field, vepFieldFilters))
+ if (matches(field, vepFieldFilters))
{
vepFieldsOfInterest.put(index, field);
}
}
/**
- * Reads the Preference value for the given key, with default specified if no
- * preference set. The value is interpreted as a comma-separated list of
- * regular expressions, and converted into a list of compiled patterns ready
- * for matching. Patterns are set to non-case-sensitive matching.
- * <p>
- * This supports user-defined filters for fields of interest to capture while
- * processing data. For example, VCF_FIELDS = AF,AC* would mean that VCF INFO
- * fields with an ID of AF, or starting with AC, would be matched.
- *
- * @param key
- * @param def
- * @return
- */
- private List<Pattern> getFieldMatchers(String key, String def)
- {
- String pref = Cache.getDefault(key, def);
- List<Pattern> patterns = new ArrayList<>();
- String[] tokens = pref.split(",");
- for (String token : tokens)
- {
- try
- {
- patterns.add(Pattern.compile(token, Pattern.CASE_INSENSITIVE));
- } catch (PatternSyntaxException e)
- {
- System.err.println("Invalid pattern ignored: " + token);
- }
- }
- return patterns;
- }
-
- /**
* Transfers VCF features to sequences to which this sequence has a mapping.
* If the mapping is 3:1, computes peptide variants from nucleotide variants.
*
package jalview.datamodel.features;
import static org.testng.Assert.assertEquals;
+import static org.testng.Assert.assertFalse;
import static org.testng.Assert.assertNull;
import static org.testng.Assert.assertTrue;
import java.util.Comparator;
import java.util.HashMap;
+import java.util.List;
import java.util.Map;
+import java.util.regex.Pattern;
import org.testng.annotations.AfterMethod;
import org.testng.annotations.BeforeClass;
assertEquals(fa.getDatatype("Pfam", "domain"), Datatype.Character);
assertEquals(fa.getDatatype("Pfam", "phase"), Datatype.Mixed);
}
+
+ @Test(groups = "Functional")
+ public void testGetFieldMatchers()
+ {
+ // providing a junk property key to ensure the default is used
+ List<Pattern> matchers = FeatureAttributes.getFieldMatchers("^%!",
+ ".*,ABC?,CLIN_SIG,CSQ:P.*");
+ assertEquals(matchers.size(), 4);
+
+ // first pattern .* matches anything
+ assertTrue(matchers.get(0).matcher("xyz").matches());
+
+ // second ABC? matches AB or ABC, not case-sensitive
+ assertTrue(matchers.get(1).matcher("ABC").matches());
+ assertTrue(matchers.get(1).matcher("abc").matches());
+ assertFalse(matchers.get(1).matcher("abc2").matches());
+ assertFalse(matchers.get(1).matcher("xab").matches());
+ assertFalse(matchers.get(1).matcher("xabc").matches());
+
+ // third matches CLIN_SIG
+ assertTrue(matchers.get(2).matcher("CLIN_SIG").matches());
+ assertTrue(matchers.get(2).matcher("clin_sig").matches());
+
+ // fourth matches CSQ:P followed by any characters (or none)
+ assertTrue(matchers.get(3).matcher("CSQ:P").matches());
+ assertTrue(matchers.get(3).matcher("csq:peter").matches());
+ assertFalse(matchers.get(3).matcher("CSQ:Blue Peter").matches());
+ }
}