1 package jalview.datamodel.features;
3 import jalview.bin.Cache;
5 import java.util.ArrayList;
6 import java.util.Collections;
7 import java.util.Comparator;
8 import java.util.HashMap;
9 import java.util.HashSet;
10 import java.util.List;
12 import java.util.Map.Entry;
14 import java.util.StringTokenizer;
15 import java.util.TreeMap;
16 import java.util.regex.Pattern;
17 import java.util.regex.PatternSyntaxException;
20 * A singleton class to hold the set of attributes known for each feature type
22 public class FeatureAttributes
26 Character, Number, Mixed
30 * property key for lookup of a comma-separated list of regex patterns
31 * to match those attribute names for which distinct values should be cached
33 private static final String CACHED_ATTS_KEY = "CACHED_ATTRIBUTES";
36 * default value if property is not specified
37 * (selected VCF/VEP terms which have 'categorical' value ranges)
39 private static final String CACHED_ATTS_DEFAULT = "AS_FilterStatus,clinical_significance,consequence_type,"
40 + "CSQ:Consequence,CSQ:CLIN_SIG,CSQ:DOMAIN,CSQ:IMPACT";
43 * delimiters of terms in attribute values
45 private static final String TERM_DELIMITERS = ",&";
48 * defensive limit to number of attribute values cached per attribute
50 private static final int MAX_ATT_VALS = 30;
52 private static FeatureAttributes instance = new FeatureAttributes();
55 * map, by feature type, of a map, by attribute name, of
56 * attribute description and min-max range (if known)
58 private Map<String, Map<String[], AttributeData>> attributes;
61 * attribute names that have been seen and
62 * match the condition for caching distinct values
64 private Set<String> cachedAttributes;
67 * attribute names that have been seen and do not
68 * match the condition for caching distinct values
70 private Set<String> uncachedAttributes;
72 private List<Pattern> cacheableNamePatterns;
75 * a case-insensitive comparator so that attributes are ordered e.g.
81 private Comparator<String[]> comparator = new Comparator<String[]>()
84 public int compare(String[] o1, String[] o2)
87 while (i < o1.length || i < o2.length)
91 return o1.length <= i ? 0 : 1;
97 int comp = String.CASE_INSENSITIVE_ORDER.compare(o1[i], o2[i]);
104 return 0; // same length and all matched
108 private class AttributeData
111 * description(s) for this attribute, if known
112 * (different feature source might have differing descriptions)
114 List<String> description;
117 * minimum value (if only numeric values recorded)
122 * maximum value (if only numeric values recorded)
127 * flag is set true if only numeric values are detected for this attribute
129 boolean hasValue = false;
134 * (for selected attributes), a list of distinct terms found in values
139 * Note one instance of this attribute, recording unique, non-null
140 * descriptions, and the min/max of any numerical values.
142 * Distinct value terms may also be recorded, if the feature type is one for
143 * which this is configured
149 void addInstance(String[] attName, String desc, String value)
151 addDescription(desc);
155 value = value.trim();
157 String name = FeatureMatcher.toAttributeDisplayName(attName);
158 recordValue(name, value);
161 * Parse numeric value unless we have previously
162 * seen text data for this attribute type
164 if (type == null || type == Datatype.Number)
168 float f = Float.valueOf(value);
169 min = hasValue ? Float.min(min, f) : f;
170 max = hasValue ? Float.max(max, f) : f;
172 type = (type == null || type == Datatype.Number)
175 } catch (NumberFormatException e)
178 * non-numeric data: treat attribute as Character (or Mixed)
180 type = (type == null || type == Datatype.Character)
192 * If attribute name is configured to cache distinct values, then parse out
198 private void recordValue(String attName, String value)
201 * quit if we've seen this attribute name before,
202 * and determined we are not caching its values
204 if (uncachedAttributes.contains(attName))
210 * if first time seen, check attribute name filters to
211 * see if we want to cache its value
213 if (!cachedAttributes.contains(attName))
215 if (!matches(attName, cacheableNamePatterns))
217 uncachedAttributes.add(attName);
222 cachedAttributes.add(attName);
227 * we want to cache distinct terms for this attribute;
228 * parse them out using comma or & delimiters
232 terms = new HashSet<>();
234 int count = terms.size();
235 if (count >= MAX_ATT_VALS)
239 StringTokenizer st = new StringTokenizer(value, TERM_DELIMITERS);
240 while (st.hasMoreTokens() && count < MAX_ATT_VALS)
242 String term = st.nextToken().trim();
243 if (!terms.contains(term))
252 * Answers true if any of the patterns matches the value, else false
258 private boolean matches(String value, List<Pattern> filters)
260 for (Pattern p : filters)
262 if (p.matcher(value).matches())
271 * Answers the description of the attribute, if recorded and unique, or null
272 * if either no, or more than description is recorded
276 public String getDescription()
278 if (description != null && description.size() == 1)
280 return description.get(0);
285 public Datatype getType()
291 * Adds the given description to the list of known descriptions (without
296 public void addDescription(String desc)
300 if (description == null)
302 description = new ArrayList<>();
304 if (!description.contains(desc))
306 description.add(desc);
312 * Answers the distinct terms recorded for the attribute, or an empty set if
313 * it is not configured to cache values
317 public Set<String> getDistinctTerms()
319 return terms == null ? Collections.<String> emptySet() : terms;
324 * Answers the singleton instance of this class
328 public static FeatureAttributes getInstance()
334 * Private constructor to enforce singleton pattern
336 private FeatureAttributes()
338 attributes = new HashMap<>();
339 cachedAttributes = new HashSet<>();
340 uncachedAttributes = new HashSet<>();
341 cacheableNamePatterns = getFieldMatchers(CACHED_ATTS_KEY,
342 CACHED_ATTS_DEFAULT);
346 * Reads the Preference value for the given key, with default specified if no
347 * preference set. The value is interpreted as a comma-separated list of
348 * regular expressions, and converted into a list of compiled patterns ready
349 * for matching. Patterns are set to non-case-sensitive matching.
351 * This supports user-defined filters for attributes of interest to capture
352 * distinct values for as instance are added.
358 public static List<Pattern> getFieldMatchers(String key, String def)
363 // temporary for applet: handle class loading errors...
364 pref = Cache.getDefault(key, def);
365 } catch (Throwable t)
368 List<Pattern> patterns = new ArrayList<>();
369 String[] tokens = pref.split(",");
370 for (String token : tokens)
374 patterns.add(Pattern.compile(token, Pattern.CASE_INSENSITIVE));
375 } catch (PatternSyntaxException e)
377 System.err.println("Invalid pattern ignored: " + token);
384 * Answers the attribute names known for the given feature type, in
385 * alphabetical order (not case sensitive), or an empty set if no attributes
386 * are known. An attribute name is typically 'simple' e.g. "AC", but may be
387 * 'compound' e.g. {"CSQ", "Allele"} where a feature has map-valued attributes
392 public List<String[]> getAttributes(String featureType)
394 if (!attributes.containsKey(featureType))
396 return Collections.<String[]> emptyList();
399 return new ArrayList<>(attributes.get(featureType).keySet());
403 * Answers the set of distinct terms recorded for the given feature type and
404 * attribute. Answers an empty set if values are not cached for this
411 public Set<String> getDistinctTerms(String featureType, String... attName)
413 if (!attributes.containsKey(featureType)
414 || !attributes.get(featureType).containsKey(attName))
416 return Collections.<String> emptySet();
419 return attributes.get(featureType).get(attName).getDistinctTerms();
423 * Answers true if at least one attribute is known for the given feature type,
429 public boolean hasAttributes(String featureType)
431 if (attributes.containsKey(featureType))
433 if (!attributes.get(featureType).isEmpty())
442 * Records the given attribute name and description for the given feature
443 * type, and updates the min-max for any numeric value
450 public void addAttribute(String featureType, String description,
451 Object value, String... attName)
453 if (featureType == null || attName == null)
459 * if attribute value is a map, drill down one more level to
460 * record its sub-fields
462 if (value instanceof Map<?, ?>)
464 for (Entry<?, ?> entry : ((Map<?, ?>) value).entrySet())
466 String[] attNames = new String[attName.length + 1];
467 System.arraycopy(attName, 0, attNames, 0, attName.length);
468 attNames[attName.length] = entry.getKey().toString();
469 addAttribute(featureType, description, entry.getValue(), attNames);
474 String valueAsString = value.toString();
475 Map<String[], AttributeData> atts = attributes.get(featureType);
478 atts = new TreeMap<>(comparator);
479 attributes.put(featureType, atts);
481 AttributeData attData = atts.get(attName);
484 attData = new AttributeData();
485 atts.put(attName, attData);
487 attData.addInstance(attName, description, valueAsString);
491 * Answers the description of the given attribute for the given feature type,
492 * if known and unique, else null
498 public String getDescription(String featureType, String... attName)
501 Map<String[], AttributeData> atts = attributes.get(featureType);
504 AttributeData attData = atts.get(attName);
507 desc = attData.getDescription();
514 * Answers the [min, max] value range of the given attribute for the given
515 * feature type, if known, else null. Attributes with a mixture of text and
516 * numeric values are considered text (do not return a min-max range).
522 public float[] getMinMax(String featureType, String... attName)
524 Map<String[], AttributeData> atts = attributes.get(featureType);
527 AttributeData attData = atts.get(attName);
528 if (attData != null && attData.hasValue)
530 return new float[] { attData.min, attData.max };
537 * Records the given attribute description for the given feature type
543 public void addDescription(String featureType, String description,
546 if (featureType == null || attName == null)
551 Map<String[], AttributeData> atts = attributes.get(featureType);
554 atts = new TreeMap<>(comparator);
555 attributes.put(featureType, atts);
557 AttributeData attData = atts.get(attName);
560 attData = new AttributeData();
561 atts.put(attName, attData);
563 attData.addDescription(description);
567 * Answers the datatype of the feature, which is one of Character, Number or
568 * Mixed (or null if not known), as discovered from values recorded.
574 public Datatype getDatatype(String featureType, String... attName)
576 Map<String[], AttributeData> atts = attributes.get(featureType);
579 AttributeData attData = atts.get(attName);
582 return attData.getType();