summaryrefslogtreecommitdiff
path: root/media/highway/src/hwy/tests/mask_test.cc
diff options
context:
space:
mode:
Diffstat (limited to 'media/highway/src/hwy/tests/mask_test.cc')
-rw-r--r--media/highway/src/hwy/tests/mask_test.cc494
1 files changed, 494 insertions, 0 deletions
diff --git a/media/highway/src/hwy/tests/mask_test.cc b/media/highway/src/hwy/tests/mask_test.cc
new file mode 100644
index 0000000000..d397c72536
--- /dev/null
+++ b/media/highway/src/hwy/tests/mask_test.cc
@@ -0,0 +1,494 @@
+// Copyright 2019 Google LLC
+// SPDX-License-Identifier: Apache-2.0
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <inttypes.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <string.h> // memcmp
+
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "tests/mask_test.cc"
+#include "hwy/foreach_target.h"
+
+#include "hwy/highway.h"
+#include "hwy/tests/test_util-inl.h"
+
+HWY_BEFORE_NAMESPACE();
+namespace hwy {
+namespace HWY_NAMESPACE {
+
+// All types.
+struct TestFromVec {
+ template <typename T, class D>
+ HWY_NOINLINE void operator()(T /*unused*/, D d) {
+ const size_t N = Lanes(d);
+ auto lanes = AllocateAligned<T>(N);
+
+ memset(lanes.get(), 0, N * sizeof(T));
+ const auto actual_false = MaskFromVec(Load(d, lanes.get()));
+ HWY_ASSERT_MASK_EQ(d, MaskFalse(d), actual_false);
+
+ memset(lanes.get(), 0xFF, N * sizeof(T));
+ const auto actual_true = MaskFromVec(Load(d, lanes.get()));
+ HWY_ASSERT_MASK_EQ(d, MaskTrue(d), actual_true);
+ }
+};
+
+HWY_NOINLINE void TestAllFromVec() {
+ ForAllTypes(ForPartialVectors<TestFromVec>());
+}
+
+struct TestFirstN {
+ template <class T, class D>
+ HWY_NOINLINE void operator()(T /*unused*/, D d) {
+ const size_t N = Lanes(d);
+ auto bool_lanes = AllocateAligned<T>(N);
+
+ using TN = SignedFromSize<HWY_MIN(sizeof(size_t), sizeof(T))>;
+ const size_t max_len = static_cast<size_t>(LimitsMax<TN>());
+
+ const size_t max_lanes = HWY_MIN(2 * N, AdjustedReps(512));
+ for (size_t len = 0; len <= HWY_MIN(max_lanes, max_len); ++len) {
+ // Loop instead of Iota+Lt to avoid wraparound for 8-bit T.
+ for (size_t i = 0; i < N; ++i) {
+ bool_lanes[i] = (i < len) ? T{1} : 0;
+ }
+ const auto expected = Eq(Load(d, bool_lanes.get()), Set(d, T{1}));
+ HWY_ASSERT_MASK_EQ(d, expected, FirstN(d, len));
+ }
+
+ // Also ensure huge values yield all-true (unless the vector is actually
+ // larger than max_len).
+ for (size_t i = 0; i < N; ++i) {
+ bool_lanes[i] = (i < max_len) ? T{1} : 0;
+ }
+ const auto expected = Eq(Load(d, bool_lanes.get()), Set(d, T{1}));
+ HWY_ASSERT_MASK_EQ(d, expected, FirstN(d, max_len));
+ }
+};
+
+HWY_NOINLINE void TestAllFirstN() {
+ ForAllTypes(ForPartialVectors<TestFirstN>());
+}
+
+struct TestIfThenElse {
+ template <class T, class D>
+ HWY_NOINLINE void operator()(T /*unused*/, D d) {
+ RandomState rng;
+
+ using TI = MakeSigned<T>; // For mask > 0 comparison
+ const Rebind<TI, D> di;
+ const size_t N = Lanes(d);
+ auto in1 = AllocateAligned<T>(N);
+ auto in2 = AllocateAligned<T>(N);
+ auto bool_lanes = AllocateAligned<TI>(N);
+ auto expected = AllocateAligned<T>(N);
+
+ // Each lane should have a chance of having mask=true.
+ for (size_t rep = 0; rep < AdjustedReps(200); ++rep) {
+ for (size_t i = 0; i < N; ++i) {
+ in1[i] = static_cast<T>(Random32(&rng));
+ in2[i] = static_cast<T>(Random32(&rng));
+ bool_lanes[i] = (Random32(&rng) & 16) ? TI(1) : TI(0);
+ }
+
+ const auto v1 = Load(d, in1.get());
+ const auto v2 = Load(d, in2.get());
+ const auto mask = RebindMask(d, Gt(Load(di, bool_lanes.get()), Zero(di)));
+
+ for (size_t i = 0; i < N; ++i) {
+ expected[i] = bool_lanes[i] ? in1[i] : in2[i];
+ }
+ HWY_ASSERT_VEC_EQ(d, expected.get(), IfThenElse(mask, v1, v2));
+
+ for (size_t i = 0; i < N; ++i) {
+ expected[i] = bool_lanes[i] ? in1[i] : T(0);
+ }
+ HWY_ASSERT_VEC_EQ(d, expected.get(), IfThenElseZero(mask, v1));
+
+ for (size_t i = 0; i < N; ++i) {
+ expected[i] = bool_lanes[i] ? T(0) : in2[i];
+ }
+ HWY_ASSERT_VEC_EQ(d, expected.get(), IfThenZeroElse(mask, v2));
+ }
+ }
+};
+
+HWY_NOINLINE void TestAllIfThenElse() {
+ ForAllTypes(ForPartialVectors<TestIfThenElse>());
+}
+
+struct TestMaskVec {
+ template <class T, class D>
+ HWY_NOINLINE void operator()(T /*unused*/, D d) {
+ RandomState rng;
+
+ using TI = MakeSigned<T>; // For mask > 0 comparison
+ const Rebind<TI, D> di;
+ const size_t N = Lanes(d);
+ auto bool_lanes = AllocateAligned<TI>(N);
+
+ // Each lane should have a chance of having mask=true.
+ for (size_t rep = 0; rep < AdjustedReps(200); ++rep) {
+ for (size_t i = 0; i < N; ++i) {
+ bool_lanes[i] = (Random32(&rng) & 1024) ? TI(1) : TI(0);
+ }
+
+ const auto mask = RebindMask(d, Gt(Load(di, bool_lanes.get()), Zero(di)));
+ HWY_ASSERT_MASK_EQ(d, mask, MaskFromVec(VecFromMask(d, mask)));
+ }
+ }
+};
+
+HWY_NOINLINE void TestAllMaskVec() {
+ const ForPartialVectors<TestMaskVec> test;
+
+ test(uint16_t());
+ test(int16_t());
+ // TODO(janwas): float16_t - cannot compare yet
+
+ ForUIF3264(test);
+}
+
+struct TestMaskedLoad {
+ template <class T, class D>
+ HWY_NOINLINE void operator()(T /*unused*/, D d) {
+ RandomState rng;
+
+ using TI = MakeSigned<T>; // For mask > 0 comparison
+ const Rebind<TI, D> di;
+ const size_t N = Lanes(d);
+ auto bool_lanes = AllocateAligned<TI>(N);
+
+ auto lanes = AllocateAligned<T>(N);
+ Store(Iota(d, T{1}), d, lanes.get());
+
+ // Each lane should have a chance of having mask=true.
+ for (size_t rep = 0; rep < AdjustedReps(200); ++rep) {
+ for (size_t i = 0; i < N; ++i) {
+ bool_lanes[i] = (Random32(&rng) & 1024) ? TI(1) : TI(0);
+ }
+
+ const auto mask = RebindMask(d, Gt(Load(di, bool_lanes.get()), Zero(di)));
+ const auto expected = IfThenElseZero(mask, Load(d, lanes.get()));
+ const auto actual = MaskedLoad(mask, d, lanes.get());
+ HWY_ASSERT_VEC_EQ(d, expected, actual);
+ }
+ }
+};
+
+HWY_NOINLINE void TestAllMaskedLoad() {
+ ForAllTypes(ForPartialVectors<TestMaskedLoad>());
+}
+
+struct TestBlendedStore {
+ template <class T, class D>
+ HWY_NOINLINE void operator()(T /*unused*/, D d) {
+ RandomState rng;
+
+ using TI = MakeSigned<T>; // For mask > 0 comparison
+ const Rebind<TI, D> di;
+ const size_t N = Lanes(d);
+ auto bool_lanes = AllocateAligned<TI>(N);
+
+ const Vec<D> v = Iota(d, T{1});
+ auto actual = AllocateAligned<T>(N);
+ auto expected = AllocateAligned<T>(N);
+
+ // Each lane should have a chance of having mask=true.
+ for (size_t rep = 0; rep < AdjustedReps(200); ++rep) {
+ for (size_t i = 0; i < N; ++i) {
+ bool_lanes[i] = (Random32(&rng) & 1024) ? TI(1) : TI(0);
+ // Re-initialize to something distinct from v[i].
+ actual[i] = static_cast<T>(127 - (i & 127));
+ expected[i] = bool_lanes[i] ? static_cast<T>(i + 1) : actual[i];
+ }
+
+ const auto mask = RebindMask(d, Gt(Load(di, bool_lanes.get()), Zero(di)));
+ BlendedStore(v, mask, d, actual.get());
+ HWY_ASSERT_VEC_EQ(d, expected.get(), Load(d, actual.get()));
+ }
+ }
+};
+
+HWY_NOINLINE void TestAllBlendedStore() {
+ ForAllTypes(ForPartialVectors<TestBlendedStore>());
+}
+
+struct TestAllTrueFalse {
+ template <class T, class D>
+ HWY_NOINLINE void operator()(T /*unused*/, D d) {
+ const auto zero = Zero(d);
+ auto v = zero;
+
+ const size_t N = Lanes(d);
+ auto lanes = AllocateAligned<T>(N);
+ std::fill(lanes.get(), lanes.get() + N, T(0));
+
+ auto mask_lanes = AllocateAligned<T>(N);
+
+ HWY_ASSERT(AllTrue(d, Eq(v, zero)));
+ HWY_ASSERT(!AllFalse(d, Eq(v, zero)));
+
+ // Single lane implies AllFalse = !AllTrue. Otherwise, there are multiple
+ // lanes and one is nonzero.
+ const bool expected_all_false = (N != 1);
+
+ // Set each lane to nonzero and back to zero
+ for (size_t i = 0; i < N; ++i) {
+ lanes[i] = T(1);
+ v = Load(d, lanes.get());
+
+ // GCC 10.2.1 workaround: AllTrue(Eq(v, zero)) is true but should not be.
+ // Assigning to an lvalue is insufficient but storing to memory prevents
+ // the bug; so does Print of VecFromMask(d, Eq(v, zero)).
+ Store(VecFromMask(d, Eq(v, zero)), d, mask_lanes.get());
+ HWY_ASSERT(!AllTrue(d, MaskFromVec(Load(d, mask_lanes.get()))));
+
+ HWY_ASSERT(expected_all_false ^ AllFalse(d, Eq(v, zero)));
+
+ lanes[i] = T(-1);
+ v = Load(d, lanes.get());
+ HWY_ASSERT(!AllTrue(d, Eq(v, zero)));
+ HWY_ASSERT(expected_all_false ^ AllFalse(d, Eq(v, zero)));
+
+ // Reset to all zero
+ lanes[i] = T(0);
+ v = Load(d, lanes.get());
+ HWY_ASSERT(AllTrue(d, Eq(v, zero)));
+ HWY_ASSERT(!AllFalse(d, Eq(v, zero)));
+ }
+ }
+};
+
+HWY_NOINLINE void TestAllAllTrueFalse() {
+ ForAllTypes(ForPartialVectors<TestAllTrueFalse>());
+}
+
+class TestStoreMaskBits {
+ public:
+ template <class T, class D>
+ HWY_NOINLINE void operator()(T /*t*/, D /*d*/) {
+ RandomState rng;
+ using TI = MakeSigned<T>; // For mask > 0 comparison
+ const Rebind<TI, D> di;
+ const size_t N = Lanes(di);
+ auto bool_lanes = AllocateAligned<TI>(N);
+
+ const ScalableTag<uint8_t, -3> d_bits;
+ const size_t expected_num_bytes = (N + 7) / 8;
+ auto expected = AllocateAligned<uint8_t>(expected_num_bytes);
+ auto actual = AllocateAligned<uint8_t>(HWY_MAX(8, expected_num_bytes));
+
+ for (size_t rep = 0; rep < AdjustedReps(200); ++rep) {
+ // Generate random mask pattern.
+ for (size_t i = 0; i < N; ++i) {
+ bool_lanes[i] = static_cast<TI>((rng() & 1024) ? 1 : 0);
+ }
+ const auto bools = Load(di, bool_lanes.get());
+ const auto mask = Gt(bools, Zero(di));
+
+ // Requires at least 8 bytes, ensured above.
+ const size_t bytes_written = StoreMaskBits(di, mask, actual.get());
+ if (bytes_written != expected_num_bytes) {
+ fprintf(stderr, "%s expected %" PRIu64 " bytes, actual %" PRIu64 "\n",
+ TypeName(T(), N).c_str(),
+ static_cast<uint64_t>(expected_num_bytes),
+ static_cast<uint64_t>(bytes_written));
+
+ HWY_ASSERT(false);
+ }
+
+ // Requires at least 8 bytes, ensured above.
+ const auto mask2 = LoadMaskBits(di, actual.get());
+ HWY_ASSERT_MASK_EQ(di, mask, mask2);
+
+ memset(expected.get(), 0, expected_num_bytes);
+ for (size_t i = 0; i < N; ++i) {
+ expected[i / 8] = uint8_t(expected[i / 8] | (bool_lanes[i] << (i % 8)));
+ }
+
+ size_t i = 0;
+ // Stored bits must match original mask
+ for (; i < N; ++i) {
+ const TI is_set = (actual[i / 8] & (1 << (i % 8))) ? 1 : 0;
+ if (is_set != bool_lanes[i]) {
+ fprintf(stderr, "%s lane %" PRIu64 ": expected %d, actual %d\n",
+ TypeName(T(), N).c_str(), static_cast<uint64_t>(i),
+ int(bool_lanes[i]), int(is_set));
+ Print(di, "bools", bools, 0, N);
+ Print(d_bits, "expected bytes", Load(d_bits, expected.get()), 0,
+ expected_num_bytes);
+ Print(d_bits, "actual bytes", Load(d_bits, actual.get()), 0,
+ expected_num_bytes);
+
+ HWY_ASSERT(false);
+ }
+ }
+ // Any partial bits in the last byte must be zero
+ for (; i < 8 * bytes_written; ++i) {
+ const int bit = (actual[i / 8] & (1 << (i % 8)));
+ if (bit != 0) {
+ fprintf(stderr, "%s: bit #%" PRIu64 " should be zero\n",
+ TypeName(T(), N).c_str(), static_cast<uint64_t>(i));
+ Print(di, "bools", bools, 0, N);
+ Print(d_bits, "expected bytes", Load(d_bits, expected.get()), 0,
+ expected_num_bytes);
+ Print(d_bits, "actual bytes", Load(d_bits, actual.get()), 0,
+ expected_num_bytes);
+
+ HWY_ASSERT(false);
+ }
+ }
+ }
+ }
+};
+
+HWY_NOINLINE void TestAllStoreMaskBits() {
+ ForAllTypes(ForPartialVectors<TestStoreMaskBits>());
+}
+
+struct TestCountTrue {
+ template <class T, class D>
+ HWY_NOINLINE void operator()(T /*unused*/, D d) {
+ using TI = MakeSigned<T>; // For mask > 0 comparison
+ const Rebind<TI, D> di;
+ const size_t N = Lanes(di);
+ auto bool_lanes = AllocateAligned<TI>(N);
+ memset(bool_lanes.get(), 0, N * sizeof(TI));
+
+ // For all combinations of zero/nonzero state of subset of lanes:
+ const size_t max_lanes = HWY_MIN(N, size_t(10));
+
+ for (size_t code = 0; code < (1ull << max_lanes); ++code) {
+ // Number of zeros written = number of mask lanes that are true.
+ size_t expected = 0;
+ for (size_t i = 0; i < max_lanes; ++i) {
+ const bool is_true = (code & (1ull << i)) != 0;
+ bool_lanes[i] = is_true ? TI(1) : TI(0);
+ expected += is_true;
+ }
+
+ const auto mask = RebindMask(d, Gt(Load(di, bool_lanes.get()), Zero(di)));
+ const size_t actual = CountTrue(d, mask);
+ HWY_ASSERT_EQ(expected, actual);
+ }
+ }
+};
+
+HWY_NOINLINE void TestAllCountTrue() {
+ ForAllTypes(ForPartialVectors<TestCountTrue>());
+}
+
+struct TestFindFirstTrue {
+ template <class T, class D>
+ HWY_NOINLINE void operator()(T /*unused*/, D d) {
+ using TI = MakeSigned<T>; // For mask > 0 comparison
+ const Rebind<TI, D> di;
+ const size_t N = Lanes(di);
+ auto bool_lanes = AllocateAligned<TI>(N);
+ memset(bool_lanes.get(), 0, N * sizeof(TI));
+
+ // For all combinations of zero/nonzero state of subset of lanes:
+ const size_t max_lanes = AdjustedLog2Reps(HWY_MIN(N, size_t(9)));
+
+ HWY_ASSERT_EQ(intptr_t(-1), FindFirstTrue(d, MaskFalse(d)));
+ HWY_ASSERT_EQ(intptr_t(0), FindFirstTrue(d, MaskTrue(d)));
+
+ for (size_t code = 1; code < (1ull << max_lanes); ++code) {
+ for (size_t i = 0; i < max_lanes; ++i) {
+ bool_lanes[i] = (code & (1ull << i)) ? TI(1) : TI(0);
+ }
+
+ const intptr_t expected =
+ static_cast<intptr_t>(Num0BitsBelowLS1Bit_Nonzero32(uint32_t(code)));
+ const auto mask = RebindMask(d, Gt(Load(di, bool_lanes.get()), Zero(di)));
+ const intptr_t actual = FindFirstTrue(d, mask);
+ HWY_ASSERT_EQ(expected, actual);
+ }
+ }
+};
+
+HWY_NOINLINE void TestAllFindFirstTrue() {
+ ForAllTypes(ForPartialVectors<TestFindFirstTrue>());
+}
+
+struct TestLogicalMask {
+ template <class T, class D>
+ HWY_NOINLINE void operator()(T /*unused*/, D d) {
+ const auto m0 = MaskFalse(d);
+ const auto m_all = MaskTrue(d);
+
+ using TI = MakeSigned<T>; // For mask > 0 comparison
+ const Rebind<TI, D> di;
+ const size_t N = Lanes(di);
+ auto bool_lanes = AllocateAligned<TI>(N);
+ memset(bool_lanes.get(), 0, N * sizeof(TI));
+
+ HWY_ASSERT_MASK_EQ(d, m0, Not(m_all));
+ HWY_ASSERT_MASK_EQ(d, m_all, Not(m0));
+
+ // For all combinations of zero/nonzero state of subset of lanes:
+ const size_t max_lanes = AdjustedLog2Reps(HWY_MIN(N, size_t(6)));
+ for (size_t code = 0; code < (1ull << max_lanes); ++code) {
+ for (size_t i = 0; i < max_lanes; ++i) {
+ bool_lanes[i] = (code & (1ull << i)) ? TI(1) : TI(0);
+ }
+
+ const auto m = RebindMask(d, Gt(Load(di, bool_lanes.get()), Zero(di)));
+
+ HWY_ASSERT_MASK_EQ(d, m0, Xor(m, m));
+ HWY_ASSERT_MASK_EQ(d, m0, AndNot(m, m));
+ HWY_ASSERT_MASK_EQ(d, m0, AndNot(m_all, m));
+
+ HWY_ASSERT_MASK_EQ(d, m, Or(m, m));
+ HWY_ASSERT_MASK_EQ(d, m, Or(m0, m));
+ HWY_ASSERT_MASK_EQ(d, m, Or(m, m0));
+ HWY_ASSERT_MASK_EQ(d, m, Xor(m0, m));
+ HWY_ASSERT_MASK_EQ(d, m, Xor(m, m0));
+ HWY_ASSERT_MASK_EQ(d, m, And(m, m));
+ HWY_ASSERT_MASK_EQ(d, m, And(m_all, m));
+ HWY_ASSERT_MASK_EQ(d, m, And(m, m_all));
+ HWY_ASSERT_MASK_EQ(d, m, AndNot(m0, m));
+ }
+ }
+};
+
+HWY_NOINLINE void TestAllLogicalMask() {
+ ForAllTypes(ForPartialVectors<TestLogicalMask>());
+}
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+} // namespace HWY_NAMESPACE
+} // namespace hwy
+HWY_AFTER_NAMESPACE();
+
+#if HWY_ONCE
+
+namespace hwy {
+HWY_BEFORE_TEST(HwyMaskTest);
+HWY_EXPORT_AND_TEST_P(HwyMaskTest, TestAllFromVec);
+HWY_EXPORT_AND_TEST_P(HwyMaskTest, TestAllFirstN);
+HWY_EXPORT_AND_TEST_P(HwyMaskTest, TestAllIfThenElse);
+HWY_EXPORT_AND_TEST_P(HwyMaskTest, TestAllMaskVec);
+HWY_EXPORT_AND_TEST_P(HwyMaskTest, TestAllMaskedLoad);
+HWY_EXPORT_AND_TEST_P(HwyMaskTest, TestAllBlendedStore);
+HWY_EXPORT_AND_TEST_P(HwyMaskTest, TestAllAllTrueFalse);
+HWY_EXPORT_AND_TEST_P(HwyMaskTest, TestAllStoreMaskBits);
+HWY_EXPORT_AND_TEST_P(HwyMaskTest, TestAllCountTrue);
+HWY_EXPORT_AND_TEST_P(HwyMaskTest, TestAllFindFirstTrue);
+HWY_EXPORT_AND_TEST_P(HwyMaskTest, TestAllLogicalMask);
+} // namespace hwy
+
+#endif