JAL-1965 word based matching and explicit coping with match collisions - tests are...
[jalview.git] / test / jalview / analysis / SequenceIdMatcherTest.java
1 package jalview.analysis;
2
3 import jalview.datamodel.Sequence;
4 import jalview.datamodel.SequenceI;
5
6 import java.util.Arrays;
7
8 import org.testng.Assert;
9 import org.testng.annotations.Test;
10
11 public class SequenceIdMatcherTest
12 {
13   private static SequenceI[] someseqs = {
14       new Sequence("A|ComplexId", "dummy"),
15       new Sequence("A|ComplexId|confused", "dummy"),
16       new Sequence("A|ComplexId|bits_of", "dummy"),
17       new Sequence("ComplexId", "dummy"),
18       new Sequence("A|ComplexIdNot", "dummy"),
19       new Sequence("A ComplexId Id", "dummy"),
20       new Sequence("complexid", "dummy") };
21
22   private static SequenceIdMatcher getMatcher()
23   {
24     return new SequenceIdMatcher(Arrays.asList(someseqs));
25   }
26
27   private static SequenceIdMatcher getWordMatcher()
28   {
29     return new SequenceIdMatcher(true, Arrays.asList(someseqs));
30   }
31
32   @Test(groups = { "Functional" })
33   public void findSelfAndOthers()
34   {
35     for (SequenceI sq : SequenceIdMatcherTest.someseqs)
36     {
37       SequenceI[] idmatches = getMatcher().findAllIdMatches(sq.getName());
38       Assert.assertTrue(
39               idmatches.length >= 1,
40               "Couldn't recover at least one sequence for string '"
41                       + sq.getName() + "'");
42       SequenceI[] seqmatches = getMatcher().findIdMatch(
43               new SequenceI[] { sq });
44       Assert.assertEquals(1, seqmatches.length,
45               "Expected to recover one sequence for sequence object called '"
46                       + sq.getName() + "'");
47       Assert.assertEquals(sq, seqmatches[0],
48               "Expected to recover the sequence queried with findIdMatch(SequenceI[])");
49       // TODO: complexid and ComplexId are identical with case-insensitive
50       // matching. This assert fails because of this.
51       // Assert.assertTrue(seqmatches.length == idmatches.length,
52       // "Different matches found for '" + sq.getName() + "'");
53       for (SequenceI sid : seqmatches)
54       {
55         boolean found = false;
56         for (SequenceI sobj : idmatches)
57         {
58           if (sid == sobj)
59           {
60             found = true;
61           }
62         }
63         Assert.assertTrue(
64                 found,
65                 "Different sequences recovered for Id "
66                         + "and SequenceI (Couldn't find match for '"
67                         + sid.getName() + "')");
68
69       }
70     }
71   }
72
73   @Test(groups = { "Functional" })
74   public void testExactMatch()
75   {
76     SequenceI[] matches = getMatcher().findAllIdMatches("A|ComplexId");
77     Assert.assertTrue(matches.length == 1,
78             "Exact match failed for 'A|ComplexId'");
79     matches = getMatcher().findAllIdMatches("A|ComplexId|confused");
80     Assert.assertTrue(matches.length == 1,
81             "Exact match failed for 'A|ComplexId|confused'");
82     matches = getMatcher().findAllIdMatches("A|ComplexId|bits_of");
83     Assert.assertTrue(matches.length == 1,
84             "Exact match failed for 'A|ComplexId|bits_of'");
85     matches = getMatcher().findAllIdMatches("A ComplexId Id");
86     Assert.assertTrue(matches.length == 1,
87             "Exact match failed for 'A Complex Id'");
88
89   }
90
91   @Test(groups = { "Functional" })
92   public void testCaseInsensitiveMatch()
93   {
94     Assert.assertNotNull(getMatcher().findIdMatch("a|complexid"),
95             "Couldn't retrieve a single case insensitive match.");
96   }
97
98   @Test(groups = { "Functional" })
99   public void testFlankingMatch()
100   {
101     SequenceI[] match = getMatcher().findAllIdMatches("complexId");
102     // should find two matches - one case exact, the other case inexact.
103     Assert.assertNotNull(match, "Exact matches not found.");
104     Assert.assertEquals(match.length, 2,
105             "Expected two exact matches to be found.");
106     SequenceI[] fmatch = getWordMatcher()
107             .findAllIdMatches("complexId");
108     // should find 6 distinct sequences
109     Assert.assertNotNull(fmatch, "Flanking matches not found.");
110     Assert.assertEquals(fmatch.length, 6,
111             "Couldn't find all entries with IDs containing 'complexId' word match");
112
113   }
114
115 }