1 package jalview.datamodel.features;
3 import jalview.bin.Cache;
5 import java.util.ArrayList;
6 import java.util.Collections;
7 import java.util.Comparator;
8 import java.util.HashMap;
9 import java.util.HashSet;
10 import java.util.List;
12 import java.util.Map.Entry;
14 import java.util.StringTokenizer;
15 import java.util.TreeMap;
16 import java.util.regex.Pattern;
17 import java.util.regex.PatternSyntaxException;
20 * A singleton class to hold the set of attributes known for each feature type
22 public class FeatureAttributes
26 Character, Number, Mixed
30 * property key for lookup of a comma-separated list of regex patterns
31 * to match those attribute names for which distinct values should be cached
33 private static final String CACHED_ATTS_KEY = "CACHED_ATTRIBUTES";
36 * default value if property is not specified
37 * (selected VCF/VEP terms which have 'categorical' value ranges)
39 private static final String CACHED_ATTS_DEFAULT = "AS_FilterStatus,clinical_significance,consequence_type,"
40 + "CSQ:Consequence,CSQ:CLIN_SIG,CSQ:DOMAIN,CSQ:IMPACT";
43 * delimiters of terms in attribute values
45 private static final String TERM_DELIMITERS = ",&";
48 * defensive limit to number of attribute values cached per attribute
50 private static final int MAX_ATT_VALS = 30;
52 private static FeatureAttributes instance = new FeatureAttributes();
55 * map, by feature type, of a map, by attribute name, of
56 * attribute description and min-max range (if known)
58 private Map<String, Map<String[], AttributeData>> attributes;
61 * attribute names that have been seen and
62 * match the condition for caching distinct values
64 private Set<String> cachedAttributes;
67 * attribute names that have been seen and do not
68 * match the condition for caching distinct values
70 private Set<String> uncachedAttributes;
72 private List<Pattern> cacheableNamePatterns;
75 * a case-insensitive comparator so that attributes are ordered e.g.
81 private Comparator<String[]> comparator = new Comparator<String[]>()
84 public int compare(String[] o1, String[] o2)
87 while (i < o1.length || i < o2.length)
91 return o1.length <= i ? 0 : 1;
97 int comp = String.CASE_INSENSITIVE_ORDER.compare(o1[i], o2[i]);
104 return 0; // same length and all matched
108 private class AttributeData
111 * description(s) for this attribute, if known
112 * (different feature source might have differing descriptions)
114 List<String> description;
117 * minimum value (if only numeric values recorded)
122 * maximum value (if only numeric values recorded)
127 * flag is set true if only numeric values are detected for this attribute
129 boolean hasValue = false;
134 * (for selected attributes), a list of distinct terms found in values
139 * Note one instance of this attribute, recording unique, non-null
140 * descriptions, and the min/max of any numerical values.
142 * Distinct value terms may also be recorded, if the feature type is one for
143 * which this is configured
149 void addInstance(String[] attName, String desc, String value)
151 addDescription(desc);
155 value = value.trim();
157 String name = FeatureMatcher.toAttributeDisplayName(attName);
158 recordValue(name, value);
161 * Parse numeric value unless we have previously
162 * seen text data for this attribute type
164 if (type == null || type == Datatype.Number)
168 float f = Float.valueOf(value);
169 min = hasValue ? Float.min(min, f) : f;
170 max = hasValue ? Float.max(max, f) : f;
172 type = (type == null || type == Datatype.Number)
175 } catch (NumberFormatException e)
178 * non-numeric data: treat attribute as Character (or Mixed)
180 type = (type == null || type == Datatype.Character)
192 * If attribute name is configured to cache distinct values, then parse out
198 private void recordValue(String attName, String value)
201 * quit if we've seen this attribute name before,
202 * and determined we are not caching its values
204 if (uncachedAttributes.contains(attName))
210 * if first time seen, check attribute name filters to
211 * see if we want to cache its value
213 if (!cachedAttributes.contains(attName))
215 if (!matches(attName, cacheableNamePatterns))
217 uncachedAttributes.add(attName);
222 cachedAttributes.add(attName);
227 * we want to cache distinct terms for this attribute;
228 * parse them out using comma or & delimiters
232 terms = new HashSet<>();
234 int count = terms.size();
235 StringTokenizer st = new StringTokenizer(value, TERM_DELIMITERS);
236 while (st.hasMoreTokens() && count < MAX_ATT_VALS)
238 terms.add(st.nextToken().trim());
244 * Answers true if any of the patterns matches the value, else false
250 private boolean matches(String value, List<Pattern> filters)
252 for (Pattern p : filters)
254 if (p.matcher(value).matches())
263 * Answers the description of the attribute, if recorded and unique, or null
264 * if either no, or more than description is recorded
268 public String getDescription()
270 if (description != null && description.size() == 1)
272 return description.get(0);
277 public Datatype getType()
283 * Adds the given description to the list of known descriptions (without
288 public void addDescription(String desc)
292 if (description == null)
294 description = new ArrayList<>();
296 if (!description.contains(desc))
298 description.add(desc);
304 * Answers the distinct terms recorded for the attribute, or an empty set if
305 * it is not configured to cache values
309 public Set<String> getDistinctTerms()
311 return terms == null ? Collections.<String> emptySet() : terms;
316 * Answers the singleton instance of this class
320 public static FeatureAttributes getInstance()
326 * Private constructor to enforce singleton pattern
328 private FeatureAttributes()
330 attributes = new HashMap<>();
331 cachedAttributes = new HashSet<>();
332 uncachedAttributes = new HashSet<>();
333 cacheableNamePatterns = getFieldMatchers(CACHED_ATTS_KEY,
334 CACHED_ATTS_DEFAULT);
338 * Reads the Preference value for the given key, with default specified if no
339 * preference set. The value is interpreted as a comma-separated list of
340 * regular expressions, and converted into a list of compiled patterns ready
341 * for matching. Patterns are set to non-case-sensitive matching.
343 * This supports user-defined filters for attributes of interest to capture
344 * distinct values for as instance are added.
350 public static List<Pattern> getFieldMatchers(String key, String def)
355 // temporary for applet: handle class loading errors...
356 pref = Cache.getDefault(key, def);
357 } catch (Throwable t)
360 List<Pattern> patterns = new ArrayList<>();
361 String[] tokens = pref.split(",");
362 for (String token : tokens)
366 patterns.add(Pattern.compile(token, Pattern.CASE_INSENSITIVE));
367 } catch (PatternSyntaxException e)
369 System.err.println("Invalid pattern ignored: " + token);
376 * Answers the attribute names known for the given feature type, in
377 * alphabetical order (not case sensitive), or an empty set if no attributes
378 * are known. An attribute name is typically 'simple' e.g. "AC", but may be
379 * 'compound' e.g. {"CSQ", "Allele"} where a feature has map-valued attributes
384 public List<String[]> getAttributes(String featureType)
386 if (!attributes.containsKey(featureType))
388 return Collections.<String[]> emptyList();
391 return new ArrayList<>(attributes.get(featureType).keySet());
395 * Answers the set of distinct terms recorded for the given feature type and
396 * attribute. Answers an empty set if values are not cached for this
403 public Set<String> getDistinctTerms(String featureType, String... attName)
405 if (!attributes.containsKey(featureType)
406 || !attributes.get(featureType).containsKey(attName))
408 return Collections.<String> emptySet();
411 return attributes.get(featureType).get(attName).getDistinctTerms();
415 * Answers true if at least one attribute is known for the given feature type,
421 public boolean hasAttributes(String featureType)
423 if (attributes.containsKey(featureType))
425 if (!attributes.get(featureType).isEmpty())
434 * Records the given attribute name and description for the given feature
435 * type, and updates the min-max for any numeric value
442 public void addAttribute(String featureType, String description,
443 Object value, String... attName)
445 if (featureType == null || attName == null)
451 * if attribute value is a map, drill down one more level to
452 * record its sub-fields
454 if (value instanceof Map<?, ?>)
456 for (Entry<?, ?> entry : ((Map<?, ?>) value).entrySet())
458 String[] attNames = new String[attName.length + 1];
459 System.arraycopy(attName, 0, attNames, 0, attName.length);
460 attNames[attName.length] = entry.getKey().toString();
461 addAttribute(featureType, description, entry.getValue(), attNames);
466 String valueAsString = value.toString();
467 Map<String[], AttributeData> atts = attributes.get(featureType);
470 atts = new TreeMap<>(comparator);
471 attributes.put(featureType, atts);
473 AttributeData attData = atts.get(attName);
476 attData = new AttributeData();
477 atts.put(attName, attData);
479 attData.addInstance(attName, description, valueAsString);
483 * Answers the description of the given attribute for the given feature type,
484 * if known and unique, else null
490 public String getDescription(String featureType, String... attName)
493 Map<String[], AttributeData> atts = attributes.get(featureType);
496 AttributeData attData = atts.get(attName);
499 desc = attData.getDescription();
506 * Answers the [min, max] value range of the given attribute for the given
507 * feature type, if known, else null. Attributes with a mixture of text and
508 * numeric values are considered text (do not return a min-max range).
514 public float[] getMinMax(String featureType, String... attName)
516 Map<String[], AttributeData> atts = attributes.get(featureType);
519 AttributeData attData = atts.get(attName);
520 if (attData != null && attData.hasValue)
522 return new float[] { attData.min, attData.max };
529 * Records the given attribute description for the given feature type
535 public void addDescription(String featureType, String description,
538 if (featureType == null || attName == null)
543 Map<String[], AttributeData> atts = attributes.get(featureType);
546 atts = new TreeMap<>(comparator);
547 attributes.put(featureType, atts);
549 AttributeData attData = atts.get(attName);
552 attData = new AttributeData();
553 atts.put(attName, attData);
555 attData.addDescription(description);
559 * Answers the datatype of the feature, which is one of Character, Number or
560 * Mixed (or null if not known), as discovered from values recorded.
566 public Datatype getDatatype(String featureType, String... attName)
568 Map<String[], AttributeData> atts = attributes.get(featureType);
571 AttributeData attData = atts.get(attName);
574 return attData.getType();