| // Copyright 2022 Google LLC |
| // |
| // This source code is licensed under the BSD-style license found in the |
| // LICENSE file in the root directory of this source tree. |
| |
| #include "src/xnnpack/indirection.h" |
| |
| #include <cstddef> |
| #include <cstdint> |
| #include <numeric> |
| #include <vector> |
| |
| #include <gtest/gtest.h> |
| #include "include/xnnpack.h" |
| #include "src/xnnpack/buffer.h" |
| #include "src/xnnpack/math.h" |
| #include "src/xnnpack/operator-utils.h" |
| #include "src/xnnpack/operator.h" |
| |
| namespace xnnpack { |
| namespace { |
| |
| // Constant index to refer to zero buffer. |
| static constexpr uintptr_t kZero = SIZE_MAX; |
| |
| class IndirectionTester { |
| public: |
| IndirectionTester& input_height(size_t input_height) { |
| input_height_ = input_height; |
| return *this; |
| } |
| |
| IndirectionTester& input_width(size_t input_width) { |
| input_width_ = input_width; |
| return *this; |
| } |
| |
| IndirectionTester& kernel_height(size_t kernel_height) { |
| kernel_height_ = kernel_height; |
| return *this; |
| } |
| |
| IndirectionTester& kernel_width(size_t kernel_width) { |
| kernel_width_ = kernel_width; |
| return *this; |
| } |
| |
| IndirectionTester& padding_height(size_t padding_height) { |
| padding_height_ = padding_height; |
| return *this; |
| } |
| |
| IndirectionTester& padding_width(size_t padding_width) { |
| padding_width_ = padding_width; |
| return *this; |
| } |
| |
| IndirectionTester& subsampling(size_t subsampling) { |
| subsampling_ = subsampling; |
| return *this; |
| } |
| |
| IndirectionTester& dilation(size_t dilation) { |
| dilation_ = dilation; |
| return *this; |
| } |
| |
| IndirectionTester& channels(size_t channels) { |
| channels_ = channels; |
| return *this; |
| } |
| |
| IndirectionTester& primary_tile(size_t primary_tile) { |
| primary_tile_ = primary_tile; |
| return *this; |
| } |
| |
| IndirectionTester& channel_tile(size_t channel_tile) { |
| channel_tile_ = channel_tile; |
| return *this; |
| } |
| |
| IndirectionTester& expected_indices(std::vector<size_t> expected_indices) { |
| expected_indices_ = expected_indices; |
| return *this; |
| } |
| |
| void Test() { |
| IndirectionInit(); |
| EXPECT_EQ(indirection_buffer_.size(), expected_indices_.size()); |
| for (size_t i = 0; i < expected_indices_.size(); i++) { |
| EXPECT_EQ(indirection_buffer_[i], &input_[expected_indices_[i]]) |
| << "i: " << i << ", input_index:" << expected_indices_[i]; |
| } |
| } |
| |
| void TestCompressed() { |
| IndirectionCompressedInit(); |
| EXPECT_EQ(indirection_buffer_.size(), expected_indices_.size()); |
| for (size_t i = 0; i < expected_indices_.size(); i++) { |
| if (expected_indices_[i] == kZero) { |
| EXPECT_EQ(indirection_buffer_[i], zero_buffer_.data()) |
| << "i: " << i << ", zero_buffer: " << zero_buffer_.data(); |
| } else { |
| EXPECT_EQ(indirection_buffer_[i], &input_[expected_indices_[i]]) |
| << "i: " << i << ", input_index:" << expected_indices_[i]; |
| } |
| } |
| } |
| |
| private: |
| void IndirectionInit() { |
| const size_t kernel_size = kernel_height_ * kernel_width_; |
| const size_t output_height = xnn_compute_convolution_output_dimension( |
| input_height_ + padding_height_, kernel_height_, dilation_, |
| subsampling_); |
| const size_t output_width = xnn_compute_convolution_output_dimension( |
| input_width_ + padding_width_, kernel_width_, dilation_, subsampling_); |
| const size_t step_width = |
| dilation_ == 1 ? min(subsampling_, kernel_width_) : kernel_width_; |
| const size_t step_height = |
| kernel_size + (output_width - 1) * step_width * kernel_height_; |
| |
| input_ = xnnpack::Buffer<float>(channels_ * input_height_ * input_width_); |
| std::iota(input_.begin(), input_.end(), 0.0f); |
| zero_buffer_ = xnnpack::Buffer<float>(channels_, 0.0f); |
| |
| const size_t num_indirection_elements = |
| (primary_tile_ - kernel_size) + output_height * step_height; |
| indirection_buffer_ = |
| xnnpack::Buffer<const float*>(num_indirection_elements); |
| xnn_operator op = {}; |
| xnn_convolution_operator conv_data; |
| op.convolution_op = &conv_data; |
| op.convolution_op->indirection_buffer = |
| reinterpret_cast<const void**>(indirection_buffer_.data()); |
| op.convolution_op->input = input_.data(); |
| op.input_pixel_stride = channels_; |
| op.zero_buffer = zero_buffer_.data(); |
| op.convolution_op->input_height = input_height_; |
| op.convolution_op->input_width = input_width_; |
| op.convolution_op->output_height = output_height; |
| op.convolution_op->output_width = output_width; |
| op.convolution_op->kernel_height = kernel_height_; |
| op.convolution_op->kernel_width = kernel_width_; |
| op.convolution_op->stride_height = subsampling_; |
| op.convolution_op->stride_width = subsampling_; |
| op.convolution_op->dilation_height = dilation_; |
| op.convolution_op->dilation_width = dilation_; |
| op.convolution_op->padding_top = padding_height_ / 2; |
| op.convolution_op->padding_left = padding_width_ / 2; |
| xnn_indirection_init_dwconv2d( |
| /*output_y_start=*/0, /*output_y_end=*/output_height, |
| op.convolution_op->indirection_buffer, op.convolution_op->input, |
| op.input_pixel_stride << /*log2_input_element_size=*/2, op.zero_buffer, |
| op.convolution_op->input_height, op.convolution_op->input_width, op.convolution_op->output_height, op.convolution_op->output_width, |
| op.convolution_op->kernel_height, op.convolution_op->kernel_width, op.convolution_op->stride_height, op.convolution_op->stride_width, |
| op.convolution_op->dilation_height, op.convolution_op->dilation_width, op.convolution_op->padding_top, op.convolution_op->padding_left, |
| step_height, step_width, primary_tile_); |
| } |
| |
| void IndirectionCompressedInit() { |
| const size_t kernel_size = kernel_height_ * kernel_width_; |
| const size_t output_height = xnn_compute_convolution_output_dimension( |
| input_height_ + padding_height_, kernel_height_, dilation_, |
| subsampling_); |
| const size_t output_width = xnn_compute_convolution_output_dimension( |
| input_width_ + padding_width_, kernel_width_, dilation_, subsampling_); |
| const size_t step_width = |
| dilation_ == 1 ? min(subsampling_, kernel_width_) : kernel_width_; |
| const size_t step_height = |
| kernel_size + (output_width - 1) * step_width * kernel_height_; |
| |
| input_ = xnnpack::Buffer<float>(channels_ * input_height_ * input_width_); |
| std::iota(input_.begin(), input_.end(), 0.0f); |
| zero_buffer_ = xnnpack::Buffer<float>(channels_, 0.0f); |
| |
| const size_t indirect_top_height = |
| divide_round_up(padding_height_ / 2, subsampling_); |
| const size_t indirect_bot_height = |
| divide_round_up(padding_height_ / 2, subsampling_); |
| const size_t indirection_buffer_output_height = |
| (indirect_top_height + indirect_bot_height + 1); |
| |
| const size_t num_indirection_elements = |
| (primary_tile_ - kernel_size) + |
| indirection_buffer_output_height * step_height; |
| indirection_buffer_ = |
| xnnpack::Buffer<const float*>(num_indirection_elements); |
| xnn_operator op = {}; |
| xnn_convolution_operator conv_data; |
| op.convolution_op = &conv_data; |
| op.convolution_op->indirection_buffer = |
| reinterpret_cast<const void**>(indirection_buffer_.data()); |
| op.convolution_op->input = input_.data(); |
| op.input_pixel_stride = channels_; |
| op.zero_buffer = zero_buffer_.data(); |
| op.convolution_op->input_height = input_height_; |
| op.convolution_op->input_width = input_width_; |
| op.convolution_op->output_height = output_height; |
| op.convolution_op->output_width = output_width; |
| op.convolution_op->kernel_height = kernel_height_; |
| op.convolution_op->kernel_width = kernel_width_; |
| op.convolution_op->stride_height = subsampling_; |
| op.convolution_op->stride_width = subsampling_; |
| op.convolution_op->dilation_height = dilation_; |
| op.convolution_op->dilation_width = dilation_; |
| op.convolution_op->padding_top = padding_height_ / 2; |
| op.convolution_op->padding_left = padding_width_ / 2; |
| xnn_indirection_init_dwconv2d_compressed( |
| /*output_y_start=*/0, /*output_y_end=*/output_height, |
| op.convolution_op->indirection_buffer, op.convolution_op->input, |
| op.input_pixel_stride << /*log2_input_element_size=*/2, op.zero_buffer, |
| op.convolution_op->input_height, op.convolution_op->input_width, op.convolution_op->output_height, op.convolution_op->output_width, |
| op.convolution_op->kernel_height, op.convolution_op->kernel_width, op.convolution_op->stride_height, op.convolution_op->stride_width, |
| op.convolution_op->dilation_height, op.convolution_op->dilation_width, op.convolution_op->padding_top, op.convolution_op->padding_left, |
| step_height, step_width, indirect_top_height, indirect_bot_height, |
| primary_tile_); |
| } |
| |
| // Set by tests using setter functions. |
| size_t input_height_; |
| size_t input_width_; |
| size_t kernel_height_; |
| size_t kernel_width_; |
| size_t padding_height_ = 0; |
| size_t padding_width_ = 0; |
| size_t subsampling_ = 1; |
| size_t dilation_ = 1; |
| size_t channels_ = 1; |
| size_t primary_tile_; |
| size_t channel_tile_ = 1; |
| std::vector<size_t> expected_indices_; |
| |
| // Initialized by IndirectionInit. |
| xnnpack::Buffer<const float*> indirection_buffer_; |
| xnnpack::Buffer<float> input_; |
| xnnpack::Buffer<float> zero_buffer_; |
| }; |
| |
| TEST(INDIRECTION, input3x3_kernel1x1) { |
| IndirectionTester() |
| .input_height(3) |
| .input_width(3) |
| .kernel_height(1) |
| .kernel_width(1) |
| .primary_tile(1) |
| .expected_indices({0, 1, 2, 3, 4, 5, 6, 7, 8}) |
| .Test(); |
| } |
| |
| TEST(INDIRECTION, input3x3_kernel2x2) { |
| IndirectionTester() |
| .input_height(3) |
| .input_width(3) |
| .kernel_height(2) |
| .kernel_width(2) |
| .primary_tile(4) |
| // input: kernel: |
| // 0 1 2 a b |
| // 3 4 5 c d |
| // 6 7 8 |
| .expected_indices({ |
| // For each output row, column major, and compress pointers within a |
| // single output row. |
| 0, |
| 3, |
| 1, |
| 4, |
| 2, |
| 5, |
| 3, |
| 6, |
| 4, |
| 7, |
| 5, |
| 8, |
| }) |
| .Test(); |
| } |
| |
| TEST(INDIRECTION, input3x3_kernel1x1_subsampling2) { |
| IndirectionTester() |
| .input_height(3) |
| .input_width(3) |
| .kernel_height(1) |
| .kernel_width(1) |
| .subsampling(2) |
| .primary_tile(1) |
| // input: kernel: |
| // 0 1 2 a |
| // 3 4 5 |
| // 6 7 8 |
| .expected_indices({ |
| 0, |
| 2, |
| 6, |
| 8, |
| }) |
| .Test(); |
| } |
| |
| TEST(INDIRECTION, input4x4_kernel2x2_subsampling2) { |
| IndirectionTester() |
| .input_height(4) |
| .input_width(4) |
| .kernel_height(2) |
| .kernel_width(2) |
| .subsampling(2) |
| .primary_tile(4) |
| // input: kernel: |
| // 0 1 2 3 a b |
| // 4 5 6 7 c d |
| // 8 9 10 11 |
| // 12 13 14 15 |
| .expected_indices({ |
| 0, |
| 4, |
| 1, |
| 5, |
| 2, |
| 6, |
| 3, |
| 7, |
| 8, |
| 12, |
| 9, |
| 13, |
| 10, |
| 14, |
| 11, |
| 15, |
| }) |
| .Test(); |
| } |
| |
| TEST(INDIRECTION, input4x4_kernel2x1_primarytile4) { |
| IndirectionTester() |
| .input_height(4) |
| .input_width(4) |
| .kernel_height(2) |
| .kernel_width(1) |
| .primary_tile(4) |
| // input: kernel: |
| // 0 1 2 3 a |
| // 4 5 6 7 b |
| // 8 9 10 11 |
| // 12 13 14 15 |
| .expected_indices({ |
| 0, |
| 4, |
| 1, |
| 5, |
| 2, |
| 6, |
| 3, |
| 7, |
| 4, |
| 8, |
| 5, |
| 9, |
| 6, |
| 10, |
| 7, |
| 11, |
| 8, |
| 12, |
| 9, |
| 13, |
| 10, |
| 14, |
| 11, |
| 15, |
| // 4 - (2 x 2) extra elements. |
| 15, |
| 15, |
| }) |
| .Test(); |
| } |
| |
| TEST(INDIRECTION, input4x4_kernel1x2_primarytile4_subsampling2) { |
| IndirectionTester() |
| .input_height(4) |
| .input_width(4) |
| .kernel_height(1) |
| .kernel_width(2) |
| .primary_tile(4) |
| .subsampling(2) |
| // input: kernel: |
| // 0 1 2 3 a b |
| // 4 5 6 7 |
| // 8 9 10 11 |
| // 12 13 14 15 |
| .expected_indices({ |
| 0, |
| 1, |
| 2, |
| 3, |
| 8, |
| 9, |
| 10, |
| 11, |
| // primary_tile - kernel_size (4 - 2) extra elements, set to last |
| // input pixel. |
| 11, |
| 11, |
| }) |
| .Test(); |
| } |
| |
| TEST(INDIRECTION, input4x4_kernel2x1_primarytile4_subsampling2) { |
| IndirectionTester() |
| .input_height(4) |
| .input_width(4) |
| .kernel_height(2) |
| .kernel_width(1) |
| .primary_tile(4) |
| .subsampling(2) |
| // input: kernel: output: |
| // 0 1 2 3 a A B |
| // 4 5 6 7 b C D |
| // 8 9 10 11 |
| // 12 13 14 15 |
| .expected_indices({0, 4, 2, 6, 8, 12, 10, 14, |
| // primary_tile - kernel_size (4 - 2) extra elements, |
| // set to last input pixel. |
| 14, 14}) |
| .Test(); |
| } |
| |
| TEST(INDIRECTION_COMPRESSED, input3x3_kernel1x1) { |
| IndirectionTester() |
| .input_height(3) |
| .input_width(3) |
| .kernel_height(1) |
| .kernel_width(1) |
| .primary_tile(1) |
| .expected_indices({0, 1, 2}) |
| .TestCompressed(); |
| } |
| |
| TEST(INDIRECTION_COMPRESSED, input3x3_kernel2x2_padding2x2) { |
| IndirectionTester() |
| .input_height(3) |
| .input_width(3) |
| .kernel_height(2) |
| .kernel_width(2) |
| .padding_height(2) |
| .padding_width(2) |
| .primary_tile(4) |
| // input: kernel: |
| // 0 0 0 0 0 a b |
| // 0 0 1 2 0 c d |
| // 0 3 4 5 0 |
| // 0 6 7 8 0 |
| // 0 0 0 0 0 |
| .expected_indices({ |
| // Top section. |
| kZero, |
| kZero, |
| kZero, |
| 0, |
| kZero, |
| 1, |
| kZero, |
| 2, |
| kZero, |
| kZero, |
| // Compressed rows |
| kZero, |
| kZero, |
| 0, |
| 3, |
| 1, |
| 4, |
| 2, |
| 5, |
| kZero, |
| kZero, |
| // Compressed rows. |
| kZero, |
| kZero, |
| 6, |
| kZero, |
| 7, |
| kZero, |
| 8, |
| kZero, |
| kZero, |
| kZero, |
| }) |
| .TestCompressed(); |
| } |
| |
| TEST(INDIRECTION_COMPRESSED, input2x2_kernel2x2_padding2x2_subsampling2) { |
| IndirectionTester() |
| .input_height(2) |
| .input_width(2) |
| .kernel_height(2) |
| .kernel_width(2) |
| .padding_height(2) |
| .padding_width(2) |
| .primary_tile(4) |
| .subsampling(2) |
| // input: kernel: |
| // 0 0 0 0 a b |
| // 0 0 1 0 c d |
| // 0 2 3 0 |
| // 0 0 0 0 |
| .expected_indices({ |
| // Top section. |
| kZero, |
| kZero, |
| kZero, |
| 0, |
| kZero, |
| 1, |
| kZero, |
| kZero, |
| // We don't actually need a compressed row, but it is easier to assume |
| // that there is always 1 compressed row, |
| // and this is the same as the bottom section. |
| kZero, |
| kZero, |
| 2, |
| kZero, |
| 3, |
| kZero, |
| kZero, |
| kZero, |
| // Bottom section is just all 0. |
| kZero, |
| kZero, |
| 2, |
| kZero, |
| 3, |
| kZero, |
| kZero, |
| kZero, |
| }) |
| .TestCompressed(); |
| } |
| } // namespace |
| } // namespace xnnpack |