1 package jalview.datamodel.features;
3 import jalview.bin.Cache;
5 import java.util.ArrayList;
6 import java.util.Collections;
7 import java.util.Comparator;
8 import java.util.HashMap;
9 import java.util.HashSet;
10 import java.util.List;
12 import java.util.Map.Entry;
14 import java.util.StringTokenizer;
15 import java.util.TreeMap;
16 import java.util.regex.Pattern;
17 import java.util.regex.PatternSyntaxException;
20 * A singleton class to hold the set of attributes known for each feature type
22 public class FeatureAttributes
26 Character, Number, Mixed
30 * property key for lookup of a comma-separated list of regex patterns
31 * to match those attribute names for which distinct values should be cached
33 private static final String CACHED_ATTS_KEY = "CACHED_ATTRIBUTES";
36 * default value if property is not specified
37 * (selected VCF/VEP terms which have 'categorical' value ranges)
39 private static final String CACHED_ATTS_DEFAULT = "AS_FilterStatus,clinical_significance,consequence_type,"
40 + "CSQ:Consequence,CSQ:CLIN_SIG,CSQ:DOMAIN,CSQ:IMPACT";
43 * delimiters of terms in attribute values
45 private static final String TERM_DELIMITERS = ",&";
48 * defensive limit to number of attribute values cached per attribute
50 private static final int MAX_ATT_VALS = 30;
52 private static FeatureAttributes instance = new FeatureAttributes();
55 * map, by feature type, of a map, by attribute name, of
56 * attribute description and min-max range (if known)
58 private Map<String, Map<String[], AttributeData>> attributes;
61 * attribute names that have been seen and
62 * match the condition for caching distinct values
64 private Set<String> cachedAttributes;
67 * attribute names that have been seen and do not
68 * match the condition for caching distinct values
70 private Set<String> uncachedAttributes;
72 private List<Pattern> cacheableNamePatterns;
75 * a case-insensitive comparator so that attributes are ordered e.g.
81 private Comparator<String[]> comparator = new Comparator<String[]>()
84 public int compare(String[] o1, String[] o2)
87 while (i < o1.length || i < o2.length)
91 return o1.length <= i ? 0 : 1;
97 int comp = String.CASE_INSENSITIVE_ORDER.compare(o1[i], o2[i]);
104 return 0; // same length and all matched
108 private class AttributeData
111 * description(s) for this attribute, if known
112 * (different feature source might have differing descriptions)
114 List<String> description;
117 * minimum value (if only numeric values recorded)
122 * maximum value (if only numeric values recorded)
127 * flag is set true if only numeric values are detected for this attribute
129 boolean hasValue = false;
134 * (for selected attributes), a list of distinct terms found in values
139 * Note one instance of this attribute, recording unique, non-null
140 * descriptions, and the min/max of any numerical values.
142 * Distinct value terms may also be recorded, if the feature type is one for
143 * which this is configured
149 void addInstance(String[] attName, String desc, String value)
151 addDescription(desc);
155 value = value.trim();
157 String name = FeatureMatcher.toAttributeDisplayName(attName);
158 recordValue(name, value);
161 * Parse numeric value unless we have previously
162 * seen text data for this attribute type
164 if (type == null || type == Datatype.Number)
168 float f = Float.valueOf(value);
169 min = hasValue ? Float.min(min, f) : f;
170 max = hasValue ? Float.max(max, f) : f;
172 type = (type == null || type == Datatype.Number)
175 } catch (NumberFormatException e)
178 * non-numeric data: treat attribute as Character (or Mixed)
180 type = (type == null || type == Datatype.Character)
192 * If attribute name is configured to cache distinct values, then parse out
198 private void recordValue(String attName, String value)
201 * quit if we've seen this attribute name before,
202 * and determined we are not caching its values
204 if (uncachedAttributes.contains(attName))
210 * if first time seen, check attribute name filters to
211 * see if we want to cache its value
213 if (!cachedAttributes.contains(attName))
215 if (!matches(attName, cacheableNamePatterns))
217 uncachedAttributes.add(attName);
222 cachedAttributes.add(attName);
227 * we want to cache distinct terms for this attribute;
228 * parse them out using comma or & delimiters
232 terms = new HashSet<>();
234 int count = terms.size();
235 StringTokenizer st = new StringTokenizer(value, TERM_DELIMITERS);
236 while (st.hasMoreTokens() && count < MAX_ATT_VALS)
238 String term = st.nextToken().trim();
239 if (!terms.contains(term))
248 * Answers true if any of the patterns matches the value, else false
254 private boolean matches(String value, List<Pattern> filters)
256 for (Pattern p : filters)
258 if (p.matcher(value).matches())
267 * Answers the description of the attribute, if recorded and unique, or null
268 * if either no, or more than description is recorded
272 public String getDescription()
274 if (description != null && description.size() == 1)
276 return description.get(0);
281 public Datatype getType()
287 * Adds the given description to the list of known descriptions (without
292 public void addDescription(String desc)
296 if (description == null)
298 description = new ArrayList<>();
300 if (!description.contains(desc))
302 description.add(desc);
308 * Answers the distinct terms recorded for the attribute, or an empty set if
309 * it is not configured to cache values
313 public Set<String> getDistinctTerms()
315 return terms == null ? Collections.<String> emptySet() : terms;
320 * Answers the singleton instance of this class
324 public static FeatureAttributes getInstance()
330 * Private constructor to enforce singleton pattern
332 private FeatureAttributes()
334 attributes = new HashMap<>();
335 cachedAttributes = new HashSet<>();
336 uncachedAttributes = new HashSet<>();
337 cacheableNamePatterns = getFieldMatchers(CACHED_ATTS_KEY,
338 CACHED_ATTS_DEFAULT);
342 * Reads the Preference value for the given key, with default specified if no
343 * preference set. The value is interpreted as a comma-separated list of
344 * regular expressions, and converted into a list of compiled patterns ready
345 * for matching. Patterns are set to non-case-sensitive matching.
347 * This supports user-defined filters for attributes of interest to capture
348 * distinct values for as instance are added.
354 public static List<Pattern> getFieldMatchers(String key, String def)
359 // temporary for applet: handle class loading errors...
360 pref = Cache.getDefault(key, def);
361 } catch (Throwable t)
364 List<Pattern> patterns = new ArrayList<>();
365 String[] tokens = pref.split(",");
366 for (String token : tokens)
370 patterns.add(Pattern.compile(token, Pattern.CASE_INSENSITIVE));
371 } catch (PatternSyntaxException e)
373 System.err.println("Invalid pattern ignored: " + token);
380 * Answers the attribute names known for the given feature type, in
381 * alphabetical order (not case sensitive), or an empty set if no attributes
382 * are known. An attribute name is typically 'simple' e.g. "AC", but may be
383 * 'compound' e.g. {"CSQ", "Allele"} where a feature has map-valued attributes
388 public List<String[]> getAttributes(String featureType)
390 if (!attributes.containsKey(featureType))
392 return Collections.<String[]> emptyList();
395 return new ArrayList<>(attributes.get(featureType).keySet());
399 * Answers the set of distinct terms recorded for the given feature type and
400 * attribute. Answers an empty set if values are not cached for this
407 public Set<String> getDistinctTerms(String featureType, String... attName)
409 if (!attributes.containsKey(featureType)
410 || !attributes.get(featureType).containsKey(attName))
412 return Collections.<String> emptySet();
415 return attributes.get(featureType).get(attName).getDistinctTerms();
419 * Answers true if at least one attribute is known for the given feature type,
425 public boolean hasAttributes(String featureType)
427 if (attributes.containsKey(featureType))
429 if (!attributes.get(featureType).isEmpty())
438 * Records the given attribute name and description for the given feature
439 * type, and updates the min-max for any numeric value
446 public void addAttribute(String featureType, String description,
447 Object value, String... attName)
449 if (featureType == null || attName == null)
455 * if attribute value is a map, drill down one more level to
456 * record its sub-fields
458 if (value instanceof Map<?, ?>)
460 for (Entry<?, ?> entry : ((Map<?, ?>) value).entrySet())
462 String[] attNames = new String[attName.length + 1];
463 System.arraycopy(attName, 0, attNames, 0, attName.length);
464 attNames[attName.length] = entry.getKey().toString();
465 addAttribute(featureType, description, entry.getValue(), attNames);
470 String valueAsString = value.toString();
471 Map<String[], AttributeData> atts = attributes.get(featureType);
474 atts = new TreeMap<>(comparator);
475 attributes.put(featureType, atts);
477 AttributeData attData = atts.get(attName);
480 attData = new AttributeData();
481 atts.put(attName, attData);
483 attData.addInstance(attName, description, valueAsString);
487 * Answers the description of the given attribute for the given feature type,
488 * if known and unique, else null
494 public String getDescription(String featureType, String... attName)
497 Map<String[], AttributeData> atts = attributes.get(featureType);
500 AttributeData attData = atts.get(attName);
503 desc = attData.getDescription();
510 * Answers the [min, max] value range of the given attribute for the given
511 * feature type, if known, else null. Attributes with a mixture of text and
512 * numeric values are considered text (do not return a min-max range).
518 public float[] getMinMax(String featureType, String... attName)
520 Map<String[], AttributeData> atts = attributes.get(featureType);
523 AttributeData attData = atts.get(attName);
524 if (attData != null && attData.hasValue)
526 return new float[] { attData.min, attData.max };
533 * Records the given attribute description for the given feature type
539 public void addDescription(String featureType, String description,
542 if (featureType == null || attName == null)
547 Map<String[], AttributeData> atts = attributes.get(featureType);
550 atts = new TreeMap<>(comparator);
551 attributes.put(featureType, atts);
553 AttributeData attData = atts.get(attName);
556 attData = new AttributeData();
557 atts.put(attName, attData);
559 attData.addDescription(description);
563 * Answers the datatype of the feature, which is one of Character, Number or
564 * Mixed (or null if not known), as discovered from values recorded.
570 public Datatype getDatatype(String featureType, String... attName)
572 Map<String[], AttributeData> atts = attributes.get(featureType);
575 AttributeData attData = atts.get(attName);
578 return attData.getType();