JAL-3020 check for distinct attributes value; unit test
authorgmungoc <g.m.carstairs@dundee.ac.uk>
Wed, 6 Jun 2018 10:42:19 +0000 (11:42 +0100)
committergmungoc <g.m.carstairs@dundee.ac.uk>
Wed, 6 Jun 2018 10:42:19 +0000 (11:42 +0100)
src/jalview/datamodel/features/FeatureAttributes.java
test/jalview/datamodel/features/FeatureAttributesTest.java

index 64a13d7..d126402 100644 (file)
@@ -235,8 +235,12 @@ public class FeatureAttributes
       StringTokenizer st = new StringTokenizer(value, TERM_DELIMITERS);
       while (st.hasMoreTokens() && count < MAX_ATT_VALS)
       {
-        terms.add(st.nextToken().trim());
-        count++;
+        String term = st.nextToken().trim();
+        if (!terms.contains(term))
+        {
+          terms.add(term);
+          count++;
+        }
       }
     }
 
index ff4cd53..e068514 100644 (file)
@@ -5,6 +5,7 @@ import static org.testng.Assert.assertFalse;
 import static org.testng.Assert.assertNull;
 import static org.testng.Assert.assertTrue;
 
+import jalview.bin.Cache;
 import jalview.datamodel.SequenceFeature;
 import jalview.datamodel.features.FeatureAttributes.Datatype;
 
@@ -12,6 +13,7 @@ import java.util.Comparator;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
+import java.util.Set;
 import java.util.regex.Pattern;
 
 import org.testng.annotations.AfterMethod;
@@ -29,6 +31,9 @@ public class FeatureAttributesTest
   @BeforeClass(alwaysRun = true)
   public void setUp()
   {
+    Cache.loadProperties("test/jalview/io/testProps.jvprops");
+    Cache.applicationProperties.setProperty("CACHED_ATTRIBUTES",
+            "CLIN_SIG,CSQ:Consequence,CSQ:Protein_position");
     FeatureAttributes fa = FeatureAttributes.getInstance();
     ((Map<?, ?>) PA.getValue(fa, "attributes")).clear();
   }
@@ -119,7 +124,7 @@ public class FeatureAttributesTest
   }
 
   @Test(groups = "Functional")
-  public void testDatatype()
+  public void testGetDatatype()
   {
     FeatureAttributes fa = FeatureAttributes.getInstance();
     assertNull(fa.getDatatype("Pfam", "kd"));
@@ -161,4 +166,69 @@ public class FeatureAttributesTest
     assertTrue(matchers.get(3).matcher("csq:peter").matches());
     assertFalse(matchers.get(3).matcher("CSQ:Blue Peter").matches());
   }
+
+  @Test(groups = "Functional")
+  public void testGetDistinctTerms()
+  {
+    // sanity check that config is set for this test
+    assertEquals(Cache.getProperty("CACHED_ATTRIBUTES"),
+            "CLIN_SIG,CSQ:Consequence,CSQ:Protein_position");
+
+    FeatureAttributes fa = FeatureAttributes.getInstance();
+    Set<String> terms = fa.getDistinctTerms("sequence_variant", "CLIN_SIG");
+    assertTrue(terms.isEmpty());
+
+    /*
+     * add one value
+     */
+    fa.addAttribute("sequence_variant", "", "benign", "CLIN_SIG");
+    terms = fa.getDistinctTerms("sequence_variant", "CLIN_SIG");
+    assertEquals(terms.size(), 1);
+    assertTrue(terms.contains("benign"));
+
+    /*
+     * add two more delimited terms
+     */
+    fa.addAttribute("sequence_variant", "",
+            "likely benign&pathogenic&benign", "CLIN_SIG");
+    terms = fa.getDistinctTerms("sequence_variant", "CLIN_SIG");
+    assertEquals(terms.size(), 3);
+    assertTrue(terms.contains("benign"));
+    assertTrue(terms.contains("likely benign"));
+    assertTrue(terms.contains("pathogenic"));
+
+    /*
+     * add attribute/sub-attribute value
+     */
+    fa.addAttribute("missense_variant", "",
+            "splice_region_variant&NMD_transcript_variant", "CSQ",
+            "Consequence");
+    terms = fa.getDistinctTerms("missense_variant", "CSQ", "Consequence");
+    assertEquals(terms.size(), 2);
+    assertTrue(terms.contains("splice_region_variant"));
+    assertTrue(terms.contains("NMD_transcript_variant"));
+
+    /*
+     * cache size is limited per attribute; simulate mistakenly
+     * storing all protein positions to verify this...
+     */
+    int maxCache = (int) PA.getValue(FeatureAttributes.class,
+            "MAX_ATT_VALS");
+    for (int i = 0; i < maxCache + 5; i++)
+    {
+      fa.addAttribute("sequence_variant", "", String.valueOf(i), "CSQ",
+              "Protein_position");
+    }
+    terms = fa.getDistinctTerms("sequence_variant", "CSQ",
+            "Protein_position");
+    assertEquals(terms.size(), maxCache);
+
+    /*
+     * values are not cached for attributes not configured to do so
+     */
+    fa.addAttribute("sequence_variant", "", "", "clinical_significance");
+    terms = fa.getDistinctTerms("sequence_variant",
+            "clinical_significance");
+    assertTrue(terms.isEmpty());
+  }
 }