diff options
Diffstat (limited to 'media/highway/src/hwy/tests/compress_test.cc')
-rw-r--r-- | media/highway/src/hwy/tests/compress_test.cc | 420 |
1 files changed, 420 insertions, 0 deletions
diff --git a/media/highway/src/hwy/tests/compress_test.cc b/media/highway/src/hwy/tests/compress_test.cc new file mode 100644 index 0000000000..861c9c299c --- /dev/null +++ b/media/highway/src/hwy/tests/compress_test.cc @@ -0,0 +1,420 @@ +// Copyright 2022 Google LLC +// SPDX-License-Identifier: Apache-2.0 +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <inttypes.h> // PRIu64 +#include <stddef.h> +#include <stdint.h> +#include <string.h> // memset + +#include <array> // IWYU pragma: keep + +#include "hwy/base.h" + +#undef HWY_TARGET_INCLUDE +#define HWY_TARGET_INCLUDE "tests/compress_test.cc" +#include "hwy/foreach_target.h" +#include "hwy/highway.h" +#include "hwy/tests/test_util-inl.h" + +HWY_BEFORE_NAMESPACE(); +namespace hwy { +namespace HWY_NAMESPACE { + +// For regenerating tables used in the implementation +#define HWY_PRINT_TABLES 0 + +class TestCompress { + template <class D, class DI, typename T = TFromD<D>, typename TI = TFromD<DI>> + void CheckStored(D d, DI di, size_t expected_pos, size_t actual_pos, + size_t num_to_check, const AlignedFreeUniquePtr<T[]>& in, + const AlignedFreeUniquePtr<TI[]>& mask_lanes, + const AlignedFreeUniquePtr<T[]>& expected, const T* actual_u, + int line) { + if (expected_pos != actual_pos) { + hwy::Abort( + __FILE__, line, + "Size mismatch for %s: expected %" PRIu64 ", actual %" PRIu64 "\n", + TypeName(T(), Lanes(d)).c_str(), static_cast<uint64_t>(expected_pos), + static_cast<uint64_t>(actual_pos)); + } + // Modified from AssertVecEqual - we may not be checking all lanes. + for (size_t i = 0; i < num_to_check; ++i) { + if (!IsEqual(expected[i], actual_u[i])) { + const size_t N = Lanes(d); + fprintf(stderr, + "Mismatch at i=%" PRIu64 " of %" PRIu64 ", line %d:\n\n", + static_cast<uint64_t>(i), static_cast<uint64_t>(num_to_check), + line); + Print(di, "mask", Load(di, mask_lanes.get()), 0, N); + Print(d, "in", Load(d, in.get()), 0, N); + Print(d, "expect", Load(d, expected.get()), 0, N); + Print(d, "actual", Load(d, actual_u), 0, N); + HWY_ASSERT(false); + } + } + } + + public: + template <class T, class D> + HWY_NOINLINE void operator()(T /*unused*/, D d) { + RandomState rng; + + using TI = MakeSigned<T>; // For mask > 0 comparison + const Rebind<TI, D> di; + const size_t N = Lanes(d); + + const T zero{0}; + + for (int frac : {0, 2, 3}) { + // For CompressStore + const size_t misalign = static_cast<size_t>(frac) * N / 4; + + auto in_lanes = AllocateAligned<T>(N); + auto mask_lanes = AllocateAligned<TI>(N); + auto expected = AllocateAligned<T>(N); + auto actual_a = AllocateAligned<T>(misalign + N); + T* actual_u = actual_a.get() + misalign; + + const size_t bits_size = RoundUpTo((N + 7) / 8, 8); + auto bits = AllocateAligned<uint8_t>(bits_size); + memset(bits.get(), 0, bits_size); // for MSAN + + // Each lane should have a chance of having mask=true. + for (size_t rep = 0; rep < AdjustedReps(200); ++rep) { + size_t expected_pos = 0; + for (size_t i = 0; i < N; ++i) { + const uint64_t bits = Random32(&rng); + in_lanes[i] = T(); // cannot initialize float16_t directly. + CopyBytes<sizeof(T)>(&bits, &in_lanes[i]); + mask_lanes[i] = (Random32(&rng) & 1024) ? TI(1) : TI(0); + if (mask_lanes[i] > 0) { + expected[expected_pos++] = in_lanes[i]; + } + } + size_t num_to_check; + if (CompressIsPartition<T>::value) { + // For non-native Compress, also check that mask=false lanes were + // moved to the back of the vector (highest indices). + size_t extra = expected_pos; + for (size_t i = 0; i < N; ++i) { + if (mask_lanes[i] == 0) { + expected[extra++] = in_lanes[i]; + } + } + HWY_ASSERT(extra == N); + num_to_check = N; + } else { + // For native Compress, only the mask=true lanes are defined. + num_to_check = expected_pos; + } + + const auto in = Load(d, in_lanes.get()); + const auto mask = + RebindMask(d, Gt(Load(di, mask_lanes.get()), Zero(di))); + StoreMaskBits(d, mask, bits.get()); + + // Compress + memset(actual_u, 0, N * sizeof(T)); + StoreU(Compress(in, mask), d, actual_u); + CheckStored(d, di, expected_pos, expected_pos, num_to_check, in_lanes, + mask_lanes, expected, actual_u, __LINE__); + + // CompressStore + memset(actual_u, 0, N * sizeof(T)); + const size_t size1 = CompressStore(in, mask, d, actual_u); + // expected_pos instead of num_to_check because this op is not affected + // by CompressIsPartition. + CheckStored(d, di, expected_pos, size1, expected_pos, in_lanes, + mask_lanes, expected, actual_u, __LINE__); + + // CompressBlendedStore + memset(actual_u, 0, N * sizeof(T)); + const size_t size2 = CompressBlendedStore(in, mask, d, actual_u); + // expected_pos instead of num_to_check because this op only writes the + // mask=true lanes. + CheckStored(d, di, expected_pos, size2, expected_pos, in_lanes, + mask_lanes, expected, actual_u, __LINE__); + // Subsequent lanes are untouched. + for (size_t i = size2; i < N; ++i) { + HWY_ASSERT_EQ(zero, actual_u[i]); + } + + // CompressBits + memset(actual_u, 0, N * sizeof(T)); + StoreU(CompressBits(in, bits.get()), d, actual_u); + CheckStored(d, di, expected_pos, expected_pos, num_to_check, in_lanes, + mask_lanes, expected, actual_u, __LINE__); + + // CompressBitsStore + memset(actual_u, 0, N * sizeof(T)); + const size_t size3 = CompressBitsStore(in, bits.get(), d, actual_u); + // expected_pos instead of num_to_check because this op is not affected + // by CompressIsPartition. + CheckStored(d, di, expected_pos, size3, expected_pos, in_lanes, + mask_lanes, expected, actual_u, __LINE__); + } // rep + } // frac + } // operator() +}; + +#if HWY_PRINT_TABLES +namespace detail { // for code folding +void PrintCompress16x8Tables() { + printf("======================================= 16x8\n"); + constexpr size_t N = 8; // 128-bit SIMD + for (uint64_t code = 0; code < (1ull << N); ++code) { + std::array<uint8_t, N> indices{0}; + size_t pos = 0; + // All lanes where mask = true + for (size_t i = 0; i < N; ++i) { + if (code & (1ull << i)) { + indices[pos++] = i; + } + } + // All lanes where mask = false + for (size_t i = 0; i < N; ++i) { + if (!(code & (1ull << i))) { + indices[pos++] = i; + } + } + HWY_ASSERT(pos == N); + + // Doubled (for converting lane to byte indices) + for (size_t i = 0; i < N; ++i) { + printf("%d,", 2 * indices[i]); + } + printf(code & 1 ? "//\n" : "/**/"); + } + printf("\n"); +} + +// Similar to the above, but uses native 16-bit shuffle instead of bytes. +void PrintCompress16x16HalfTables() { + printf("======================================= 16x16Half\n"); + constexpr size_t N = 8; + for (uint64_t code = 0; code < (1ull << N); ++code) { + std::array<uint8_t, N> indices{0}; + size_t pos = 0; + // All lanes where mask = true + for (size_t i = 0; i < N; ++i) { + if (code & (1ull << i)) { + indices[pos++] = i; + } + } + // All lanes where mask = false + for (size_t i = 0; i < N; ++i) { + if (!(code & (1ull << i))) { + indices[pos++] = i; + } + } + HWY_ASSERT(pos == N); + + for (size_t i = 0; i < N; ++i) { + printf("%d,", indices[i]); + } + printf(code & 1 ? "//\n" : "/**/"); + } + printf("\n"); +} + +// Compressed to nibbles +void PrintCompress32x8Tables() { + printf("======================================= 32x8\n"); + constexpr size_t N = 8; // AVX2 + for (uint64_t code = 0; code < (1ull << N); ++code) { + std::array<uint32_t, N> indices{0}; + size_t pos = 0; + // All lanes where mask = true + for (size_t i = 0; i < N; ++i) { + if (code & (1ull << i)) { + indices[pos++] = i; + } + } + // All lanes where mask = false + for (size_t i = 0; i < N; ++i) { + if (!(code & (1ull << i))) { + indices[pos++] = i; + } + } + HWY_ASSERT(pos == N); + + // Convert to nibbles + uint64_t packed = 0; + for (size_t i = 0; i < N; ++i) { + HWY_ASSERT(indices[i] < N); + packed += indices[i] << (i * 4); + } + + HWY_ASSERT(packed < (1ull << (N * 4))); + printf("0x%08x,", static_cast<uint32_t>(packed)); + } + printf("\n"); +} + +// Compressed to nibbles (for AVX3 64x4) +void PrintCompress64x4NibbleTables() { + printf("======================================= 64x4Nibble\n"); + constexpr size_t N = 4; + for (uint64_t code = 0; code < (1ull << N); ++code) { + std::array<uint32_t, N> indices{0}; + size_t pos = 0; + // All lanes where mask = true + for (size_t i = 0; i < N; ++i) { + if (code & (1ull << i)) { + indices[pos++] = i; + } + } + // All lanes where mask = false + for (size_t i = 0; i < N; ++i) { + if (!(code & (1ull << i))) { + indices[pos++] = i; + } + } + HWY_ASSERT(pos == N); + + // Convert to nibbles + uint64_t packed = 0; + for (size_t i = 0; i < N; ++i) { + HWY_ASSERT(indices[i] < N); + packed += indices[i] << (i * 4); + } + + HWY_ASSERT(packed < (1ull << (N * 4))); + printf("0x%08x,", static_cast<uint32_t>(packed)); + } + printf("\n"); +} + +// Pairs of 32-bit lane indices +void PrintCompress64x4Tables() { + printf("======================================= 64x4\n"); + constexpr size_t N = 4; // AVX2 + for (uint64_t code = 0; code < (1ull << N); ++code) { + std::array<uint32_t, N> indices{0}; + size_t pos = 0; + // All lanes where mask = true + for (size_t i = 0; i < N; ++i) { + if (code & (1ull << i)) { + indices[pos++] = i; + } + } + // All lanes where mask = false + for (size_t i = 0; i < N; ++i) { + if (!(code & (1ull << i))) { + indices[pos++] = i; + } + } + HWY_ASSERT(pos == N); + + for (size_t i = 0; i < N; ++i) { + printf("%d,%d,", 2 * indices[i], 2 * indices[i] + 1); + } + } + printf("\n"); +} + +// 4-tuple of byte indices +void PrintCompress32x4Tables() { + printf("======================================= 32x4\n"); + using T = uint32_t; + constexpr size_t N = 4; // SSE4 + for (uint64_t code = 0; code < (1ull << N); ++code) { + std::array<uint32_t, N> indices{0}; + size_t pos = 0; + // All lanes where mask = true + for (size_t i = 0; i < N; ++i) { + if (code & (1ull << i)) { + indices[pos++] = i; + } + } + // All lanes where mask = false + for (size_t i = 0; i < N; ++i) { + if (!(code & (1ull << i))) { + indices[pos++] = i; + } + } + HWY_ASSERT(pos == N); + + for (size_t i = 0; i < N; ++i) { + for (size_t idx_byte = 0; idx_byte < sizeof(T); ++idx_byte) { + printf("%" PRIu64 ",", + static_cast<uint64_t>(sizeof(T) * indices[i] + idx_byte)); + } + } + } + printf("\n"); +} + +// 8-tuple of byte indices +void PrintCompress64x2Tables() { + printf("======================================= 64x2\n"); + using T = uint64_t; + constexpr size_t N = 2; // SSE4 + for (uint64_t code = 0; code < (1ull << N); ++code) { + std::array<uint32_t, N> indices{0}; + size_t pos = 0; + // All lanes where mask = true + for (size_t i = 0; i < N; ++i) { + if (code & (1ull << i)) { + indices[pos++] = i; + } + } + // All lanes where mask = false + for (size_t i = 0; i < N; ++i) { + if (!(code & (1ull << i))) { + indices[pos++] = i; + } + } + HWY_ASSERT(pos == N); + + for (size_t i = 0; i < N; ++i) { + for (size_t idx_byte = 0; idx_byte < sizeof(T); ++idx_byte) { + printf("%" PRIu64 ",", + static_cast<uint64_t>(sizeof(T) * indices[i] + idx_byte)); + } + } + } + printf("\n"); +} +} // namespace detail +#endif // HWY_PRINT_TABLES + +HWY_NOINLINE void TestAllCompress() { +#if HWY_PRINT_TABLES + detail::PrintCompress32x8Tables(); + detail::PrintCompress64x4NibbleTables(); + detail::PrintCompress64x4Tables(); + detail::PrintCompress32x4Tables(); + detail::PrintCompress64x2Tables(); + detail::PrintCompress16x8Tables(); + detail::PrintCompress16x16HalfTables(); +#endif + + ForUIF163264(ForPartialVectors<TestCompress>()); +} + +// NOLINTNEXTLINE(google-readability-namespace-comments) +} // namespace HWY_NAMESPACE +} // namespace hwy +HWY_AFTER_NAMESPACE(); + +#if HWY_ONCE + +namespace hwy { +HWY_BEFORE_TEST(HwyCompressTest); +HWY_EXPORT_AND_TEST_P(HwyCompressTest, TestAllCompress); +} // namespace hwy + +#endif |