From 04134a52ba4355211d96a1a883663508008a584b Mon Sep 17 00:00:00 2001 From: Mateusz Warowny Date: Wed, 13 Sep 2023 16:08:54 +0200 Subject: [PATCH] JAL-4241 add SeqsetUtils.filterSequence function --- src/jalview/analysis/SeqsetUtils.java | 29 +++++++++++++++++++++++++++ test/jalview/analysis/SeqsetUtilsTest.java | 30 ++++++++++++++++++++++++++++ 2 files changed, 59 insertions(+) diff --git a/src/jalview/analysis/SeqsetUtils.java b/src/jalview/analysis/SeqsetUtils.java index adb70e3..5420aff 100755 --- a/src/jalview/analysis/SeqsetUtils.java +++ b/src/jalview/analysis/SeqsetUtils.java @@ -30,6 +30,7 @@ import jalview.datamodel.SequenceFeature; import jalview.datamodel.SequenceI; import java.util.ArrayList; +import java.util.BitSet; import java.util.Enumeration; import java.util.HashMap; import java.util.Hashtable; @@ -40,6 +41,8 @@ import java.util.Optional; import java.util.Vector; import static java.lang.String.format; +import java.nio.CharBuffer; + public class SeqsetUtils { public static class SequenceInfo { @@ -90,6 +93,32 @@ public class SeqsetUtils } /** + * Filter the sequence through the mask leaving only characters at positions + * where the mask value was true. The length of the resulting array is + * the cardinality of the mask from 0 to sequence length. + * + * @param sequence + * input sequence + * @param mask + * mask used to filter the sequence characters + * @return input array filtered through the mask + */ + public static char[] filterSequence(char[] sequence, BitSet mask) + { + mask = mask.get(0, sequence.length); + char[] result = new char[mask.cardinality()]; + for (int i = mask.nextSetBit(0), j = 0; i >= 0;) + { + result[j++] = sequence[i]; + if (i == Integer.MAX_VALUE) + // prevents integer overflow of (i + 1) + break; + i = mask.nextSetBit(i + 1); + } + return result; + } + + /** * Recover essential properties of a sequence from a hashtable TODO: replace * these methods with something more elegant. * diff --git a/test/jalview/analysis/SeqsetUtilsTest.java b/test/jalview/analysis/SeqsetUtilsTest.java index 71e5bfd..fbd5812 100644 --- a/test/jalview/analysis/SeqsetUtilsTest.java +++ b/test/jalview/analysis/SeqsetUtilsTest.java @@ -27,11 +27,16 @@ import jalview.datamodel.SequenceFeature; import jalview.datamodel.SequenceI; import jalview.gui.JvOptionPane; +import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.Matchers.equalTo; + +import java.util.BitSet; import java.util.Hashtable; import java.util.Map; import org.testng.Assert; import org.testng.annotations.BeforeClass; +import org.testng.annotations.DataProvider; import org.testng.annotations.Test; /** @@ -82,4 +87,29 @@ public class SeqsetUtilsTest Assert.assertSame(sqset[0].getSequenceFeatures().get(1), sqset2[0].getSequenceFeatures().get(1)); } + + @DataProvider + public Object[][] sequenceAndMask() + { + return new Object[][] { + { "AAAABBBBCCCCDDDD", 0xFFFFL, "AAAABBBBCCCCDDDD" }, + { "AAAABBBBCCCCDDDD", 0x000FL, "AAAA" }, + { "---A---B---C---D", 0x8888L, "ABCD" }, + { "---A---B---C---D", 0x9999L, "-A-B-C-D" }, + { "ABCDABCDABCDABCD", 0xC5A3L, "ABBDACCD" }, + { "", 0xFFFFL, "" }, + { "AAAABBBBCCCCDDDD", 0x0000L, "" }, + { "AAABBBCCC", 0xFFFF, "AAABBBCCC" }, + { "AAAABBBB", 0xD000L, "" }, + { "AAAABBBB", 0xAA0AL, "AA" }, + }; + } + + @Test(groups = {"Functional"}, dataProvider = "sequenceAndMask") + public void testFilterSequence(String sequence, long mask, String expected) + { + BitSet bitMask = BitSet.valueOf(new long[] {mask}); + var result = SeqsetUtils.filterSequence(sequence.toCharArray(), bitMask); + assertThat(result, equalTo(expected.toCharArray())); + } } -- 1.7.10.2