1 package jalview.datamodel.features;
3 import jalview.bin.Cache;
5 import java.util.ArrayList;
6 import java.util.Collections;
7 import java.util.Comparator;
8 import java.util.HashMap;
9 import java.util.HashSet;
10 import java.util.List;
12 import java.util.Map.Entry;
14 import java.util.StringTokenizer;
15 import java.util.TreeMap;
16 import java.util.regex.Pattern;
17 import java.util.regex.PatternSyntaxException;
20 * A singleton class to hold the set of attributes known for each feature type
22 public class FeatureAttributes
26 Character, Number, Mixed
30 * property key for lookup of a comma-separated list of regex patterns
31 * to match those attribute names for which distinct values should be cached
33 private static final String CACHED_ATTS_KEY = "CACHED_ATTRIBUTES";
36 * default value if property is not specified
38 private static final String CACHED_ATTS_DEFAULT = "AS_FilterStatus,clinical_significance,consequence_type,"
39 + "CSQ:Consequence,CSQ:CLIN_SIG,CSQ:DOMAIN,CSQ:IMPACT";
42 * delimiters of terms in attribute values
44 private static final String TERM_DELIMITERS = ",&";
46 private static FeatureAttributes instance = new FeatureAttributes();
49 * map, by feature type, of a map, by attribute name, of
50 * attribute description and min-max range (if known)
52 private Map<String, Map<String[], AttributeData>> attributes;
55 * attribute names that have been seen and
56 * match the condition for caching distinct values
58 private Set<String> cachedAttributes;
61 * attribute names that have been seen and do not
62 * match the condition for caching distinct values
64 private Set<String> uncachedAttributes;
66 private List<Pattern> cacheableNamePatterns;
69 * a case-insensitive comparator so that attributes are ordered e.g.
75 private Comparator<String[]> comparator = new Comparator<String[]>()
78 public int compare(String[] o1, String[] o2)
81 while (i < o1.length || i < o2.length)
85 return o1.length <= i ? 0 : 1;
91 int comp = String.CASE_INSENSITIVE_ORDER.compare(o1[i], o2[i]);
98 return 0; // same length and all matched
102 private class AttributeData
105 * description(s) for this attribute, if known
106 * (different feature source might have differing descriptions)
108 List<String> description;
111 * minimum value (if only numeric values recorded)
116 * maximum value (if only numeric values recorded)
121 * flag is set true if only numeric values are detected for this attribute
123 boolean hasValue = false;
128 * (for selected attributes), a list of distinct terms found in values
133 * Note one instance of this attribute, recording unique, non-null
134 * descriptions, and the min/max of any numerical values.
136 * Distinct value terms may also be recorded, if the feature type is one for
137 * which this is configured
143 void addInstance(String[] attName, String desc, String value)
145 addDescription(desc);
149 value = value.trim();
151 String name = FeatureMatcher.toAttributeDisplayName(attName);
152 recordValue(name, value);
155 * Parse numeric value unless we have previously
156 * seen text data for this attribute type
158 if (type == null || type == Datatype.Number)
162 float f = Float.valueOf(value);
163 min = hasValue ? Float.min(min, f) : f;
164 max = hasValue ? Float.max(max, f) : f;
166 type = (type == null || type == Datatype.Number)
169 } catch (NumberFormatException e)
172 * non-numeric data: treat attribute as Character (or Mixed)
174 type = (type == null || type == Datatype.Character)
186 * If attribute name is configured to cache distinct values, then parse out
192 private void recordValue(String attName, String value)
195 * quit if we've seen this attribute name before,
196 * and determined we are not caching its values
198 if (uncachedAttributes.contains(attName))
204 * if first time seen, check attribute name filters to
205 * see if we want to cache its value
207 if (!cachedAttributes.contains(attName))
209 if (!matches(attName, cacheableNamePatterns))
211 uncachedAttributes.add(attName);
216 cachedAttributes.add(attName);
221 * we want to cache distinct terms for this attribute;
222 * parse them out using comma or & delimiters
226 terms = new HashSet<>();
228 StringTokenizer st = new StringTokenizer(value, TERM_DELIMITERS);
229 while (st.hasMoreTokens())
231 terms.add(st.nextToken().trim());
236 * Answers true if any of the patterns matches the value, else false
242 private boolean matches(String value, List<Pattern> filters)
244 for (Pattern p : filters)
246 if (p.matcher(value).matches())
255 * Answers the description of the attribute, if recorded and unique, or null
256 * if either no, or more than description is recorded
260 public String getDescription()
262 if (description != null && description.size() == 1)
264 return description.get(0);
269 public Datatype getType()
275 * Adds the given description to the list of known descriptions (without
280 public void addDescription(String desc)
284 if (description == null)
286 description = new ArrayList<>();
288 if (!description.contains(desc))
290 description.add(desc);
296 * Answers the distinct terms recorded for the attribute, or an empty set if
297 * it is not configured to cache values
301 public Set<String> getDistinctTerms()
303 return terms == null ? Collections.<String> emptySet() : terms;
308 * Answers the singleton instance of this class
312 public static FeatureAttributes getInstance()
318 * Private constructor to enforce singleton pattern
320 private FeatureAttributes()
322 attributes = new HashMap<>();
323 cachedAttributes = new HashSet<>();
324 uncachedAttributes = new HashSet<>();
325 cacheableNamePatterns = getFieldMatchers(CACHED_ATTS_KEY,
326 CACHED_ATTS_DEFAULT);
330 * Reads the Preference value for the given key, with default specified if no
331 * preference set. The value is interpreted as a comma-separated list of
332 * regular expressions, and converted into a list of compiled patterns ready
333 * for matching. Patterns are set to non-case-sensitive matching.
335 * This supports user-defined filters for attributes of interest to capture
336 * distinct values for as instance are added.
342 public static List<Pattern> getFieldMatchers(String key, String def)
344 String pref = Cache.getDefault(key, def);
345 List<Pattern> patterns = new ArrayList<>();
346 String[] tokens = pref.split(",");
347 for (String token : tokens)
351 patterns.add(Pattern.compile(token, Pattern.CASE_INSENSITIVE));
352 } catch (PatternSyntaxException e)
354 System.err.println("Invalid pattern ignored: " + token);
361 * Answers the attribute names known for the given feature type, in
362 * alphabetical order (not case sensitive), or an empty set if no attributes
363 * are known. An attribute name is typically 'simple' e.g. "AC", but may be
364 * 'compound' e.g. {"CSQ", "Allele"} where a feature has map-valued attributes
369 public List<String[]> getAttributes(String featureType)
371 if (!attributes.containsKey(featureType))
373 return Collections.<String[]> emptyList();
376 return new ArrayList<>(attributes.get(featureType).keySet());
380 * Answers the set of distinct terms recorded for the given feature type and
381 * attribute. Answers an empty set if values are not cached for this
388 public Set<String> getDistinctTerms(String featureType, String... attName)
390 if (!attributes.containsKey(featureType)
391 || !attributes.get(featureType).containsKey(attName))
393 return Collections.<String> emptySet();
396 return attributes.get(featureType).get(attName).getDistinctTerms();
400 * Answers true if at least one attribute is known for the given feature type,
406 public boolean hasAttributes(String featureType)
408 if (attributes.containsKey(featureType))
410 if (!attributes.get(featureType).isEmpty())
419 * Records the given attribute name and description for the given feature
420 * type, and updates the min-max for any numeric value
427 public void addAttribute(String featureType, String description,
428 Object value, String... attName)
430 if (featureType == null || attName == null)
436 * if attribute value is a map, drill down one more level to
437 * record its sub-fields
439 if (value instanceof Map<?, ?>)
441 for (Entry<?, ?> entry : ((Map<?, ?>) value).entrySet())
443 String[] attNames = new String[attName.length + 1];
444 System.arraycopy(attName, 0, attNames, 0, attName.length);
445 attNames[attName.length] = entry.getKey().toString();
446 addAttribute(featureType, description, entry.getValue(), attNames);
451 String valueAsString = value.toString();
452 Map<String[], AttributeData> atts = attributes.get(featureType);
455 atts = new TreeMap<>(comparator);
456 attributes.put(featureType, atts);
458 AttributeData attData = atts.get(attName);
461 attData = new AttributeData();
462 atts.put(attName, attData);
464 attData.addInstance(attName, description, valueAsString);
468 * Answers the description of the given attribute for the given feature type,
469 * if known and unique, else null
475 public String getDescription(String featureType, String... attName)
478 Map<String[], AttributeData> atts = attributes.get(featureType);
481 AttributeData attData = atts.get(attName);
484 desc = attData.getDescription();
491 * Answers the [min, max] value range of the given attribute for the given
492 * feature type, if known, else null. Attributes with a mixture of text and
493 * numeric values are considered text (do not return a min-max range).
499 public float[] getMinMax(String featureType, String... attName)
501 Map<String[], AttributeData> atts = attributes.get(featureType);
504 AttributeData attData = atts.get(attName);
505 if (attData != null && attData.hasValue)
507 return new float[] { attData.min, attData.max };
514 * Records the given attribute description for the given feature type
520 public void addDescription(String featureType, String description,
523 if (featureType == null || attName == null)
528 Map<String[], AttributeData> atts = attributes.get(featureType);
531 atts = new TreeMap<>(comparator);
532 attributes.put(featureType, atts);
534 AttributeData attData = atts.get(attName);
537 attData = new AttributeData();
538 atts.put(attName, attData);
540 attData.addDescription(description);
544 * Answers the datatype of the feature, which is one of Character, Number or
545 * Mixed (or null if not known), as discovered from values recorded.
551 public Datatype getDatatype(String featureType, String... attName)
553 Map<String[], AttributeData> atts = attributes.get(featureType);
556 AttributeData attData = atts.get(attName);
559 return attData.getType();