blob: 1d29385c5bb3b681529b27b2febe0210c5a89c39 [file] [log] [blame] [edit]
// Copyright 2022 Google LLC
//
// This source code is licensed under the BSD-style license found in the
// LICENSE file in the root directory of this source tree.
#include "src/xnnpack/indirection.h"
#include <cstddef>
#include <cstdint>
#include <numeric>
#include <vector>
#include <gtest/gtest.h>
#include "include/xnnpack.h"
#include "src/xnnpack/buffer.h"
#include "src/xnnpack/math.h"
#include "src/xnnpack/operator-utils.h"
#include "src/xnnpack/operator.h"
namespace xnnpack {
namespace {
// Constant index to refer to zero buffer.
static constexpr uintptr_t kZero = SIZE_MAX;
class IndirectionTester {
public:
IndirectionTester& input_height(size_t input_height) {
input_height_ = input_height;
return *this;
}
IndirectionTester& input_width(size_t input_width) {
input_width_ = input_width;
return *this;
}
IndirectionTester& kernel_height(size_t kernel_height) {
kernel_height_ = kernel_height;
return *this;
}
IndirectionTester& kernel_width(size_t kernel_width) {
kernel_width_ = kernel_width;
return *this;
}
IndirectionTester& padding_height(size_t padding_height) {
padding_height_ = padding_height;
return *this;
}
IndirectionTester& padding_width(size_t padding_width) {
padding_width_ = padding_width;
return *this;
}
IndirectionTester& subsampling(size_t subsampling) {
subsampling_ = subsampling;
return *this;
}
IndirectionTester& dilation(size_t dilation) {
dilation_ = dilation;
return *this;
}
IndirectionTester& channels(size_t channels) {
channels_ = channels;
return *this;
}
IndirectionTester& primary_tile(size_t primary_tile) {
primary_tile_ = primary_tile;
return *this;
}
IndirectionTester& channel_tile(size_t channel_tile) {
channel_tile_ = channel_tile;
return *this;
}
IndirectionTester& expected_indices(std::vector<size_t> expected_indices) {
expected_indices_ = expected_indices;
return *this;
}
void Test() {
IndirectionInit();
EXPECT_EQ(indirection_buffer_.size(), expected_indices_.size());
for (size_t i = 0; i < expected_indices_.size(); i++) {
EXPECT_EQ(indirection_buffer_[i], &input_[expected_indices_[i]])
<< "i: " << i << ", input_index:" << expected_indices_[i];
}
}
void TestCompressed() {
IndirectionCompressedInit();
EXPECT_EQ(indirection_buffer_.size(), expected_indices_.size());
for (size_t i = 0; i < expected_indices_.size(); i++) {
if (expected_indices_[i] == kZero) {
EXPECT_EQ(indirection_buffer_[i], zero_buffer_.data())
<< "i: " << i << ", zero_buffer: " << zero_buffer_.data();
} else {
EXPECT_EQ(indirection_buffer_[i], &input_[expected_indices_[i]])
<< "i: " << i << ", input_index:" << expected_indices_[i];
}
}
}
private:
void IndirectionInit() {
const size_t kernel_size = kernel_height_ * kernel_width_;
const size_t output_height = xnn_compute_convolution_output_dimension(
input_height_ + padding_height_, kernel_height_, dilation_,
subsampling_);
const size_t output_width = xnn_compute_convolution_output_dimension(
input_width_ + padding_width_, kernel_width_, dilation_, subsampling_);
const size_t step_width =
dilation_ == 1 ? min(subsampling_, kernel_width_) : kernel_width_;
const size_t step_height =
kernel_size + (output_width - 1) * step_width * kernel_height_;
input_ = xnnpack::Buffer<float>(channels_ * input_height_ * input_width_);
std::iota(input_.begin(), input_.end(), 0.0f);
zero_buffer_ = xnnpack::Buffer<float>(channels_, 0.0f);
const size_t num_indirection_elements =
(primary_tile_ - kernel_size) + output_height * step_height;
indirection_buffer_ =
xnnpack::Buffer<const float*>(num_indirection_elements);
xnn_operator op = {};
xnn_convolution_operator conv_data;
op.convolution_op = &conv_data;
op.convolution_op->indirection_buffer =
reinterpret_cast<const void**>(indirection_buffer_.data());
op.convolution_op->input = input_.data();
op.input_pixel_stride = channels_;
op.zero_buffer = zero_buffer_.data();
op.convolution_op->input_height = input_height_;
op.convolution_op->input_width = input_width_;
op.convolution_op->output_height = output_height;
op.convolution_op->output_width = output_width;
op.convolution_op->kernel_height = kernel_height_;
op.convolution_op->kernel_width = kernel_width_;
op.convolution_op->stride_height = subsampling_;
op.convolution_op->stride_width = subsampling_;
op.convolution_op->dilation_height = dilation_;
op.convolution_op->dilation_width = dilation_;
op.convolution_op->padding_top = padding_height_ / 2;
op.convolution_op->padding_left = padding_width_ / 2;
xnn_indirection_init_dwconv2d(
/*output_y_start=*/0, /*output_y_end=*/output_height,
op.convolution_op->indirection_buffer, op.convolution_op->input,
op.input_pixel_stride << /*log2_input_element_size=*/2, op.zero_buffer,
op.convolution_op->input_height, op.convolution_op->input_width, op.convolution_op->output_height, op.convolution_op->output_width,
op.convolution_op->kernel_height, op.convolution_op->kernel_width, op.convolution_op->stride_height, op.convolution_op->stride_width,
op.convolution_op->dilation_height, op.convolution_op->dilation_width, op.convolution_op->padding_top, op.convolution_op->padding_left,
step_height, step_width, primary_tile_);
}
void IndirectionCompressedInit() {
const size_t kernel_size = kernel_height_ * kernel_width_;
const size_t output_height = xnn_compute_convolution_output_dimension(
input_height_ + padding_height_, kernel_height_, dilation_,
subsampling_);
const size_t output_width = xnn_compute_convolution_output_dimension(
input_width_ + padding_width_, kernel_width_, dilation_, subsampling_);
const size_t step_width =
dilation_ == 1 ? min(subsampling_, kernel_width_) : kernel_width_;
const size_t step_height =
kernel_size + (output_width - 1) * step_width * kernel_height_;
input_ = xnnpack::Buffer<float>(channels_ * input_height_ * input_width_);
std::iota(input_.begin(), input_.end(), 0.0f);
zero_buffer_ = xnnpack::Buffer<float>(channels_, 0.0f);
const size_t indirect_top_height =
divide_round_up(padding_height_ / 2, subsampling_);
const size_t indirect_bot_height =
divide_round_up(padding_height_ / 2, subsampling_);
const size_t indirection_buffer_output_height =
(indirect_top_height + indirect_bot_height + 1);
const size_t num_indirection_elements =
(primary_tile_ - kernel_size) +
indirection_buffer_output_height * step_height;
indirection_buffer_ =
xnnpack::Buffer<const float*>(num_indirection_elements);
xnn_operator op = {};
xnn_convolution_operator conv_data;
op.convolution_op = &conv_data;
op.convolution_op->indirection_buffer =
reinterpret_cast<const void**>(indirection_buffer_.data());
op.convolution_op->input = input_.data();
op.input_pixel_stride = channels_;
op.zero_buffer = zero_buffer_.data();
op.convolution_op->input_height = input_height_;
op.convolution_op->input_width = input_width_;
op.convolution_op->output_height = output_height;
op.convolution_op->output_width = output_width;
op.convolution_op->kernel_height = kernel_height_;
op.convolution_op->kernel_width = kernel_width_;
op.convolution_op->stride_height = subsampling_;
op.convolution_op->stride_width = subsampling_;
op.convolution_op->dilation_height = dilation_;
op.convolution_op->dilation_width = dilation_;
op.convolution_op->padding_top = padding_height_ / 2;
op.convolution_op->padding_left = padding_width_ / 2;
xnn_indirection_init_dwconv2d_compressed(
/*output_y_start=*/0, /*output_y_end=*/output_height,
op.convolution_op->indirection_buffer, op.convolution_op->input,
op.input_pixel_stride << /*log2_input_element_size=*/2, op.zero_buffer,
op.convolution_op->input_height, op.convolution_op->input_width, op.convolution_op->output_height, op.convolution_op->output_width,
op.convolution_op->kernel_height, op.convolution_op->kernel_width, op.convolution_op->stride_height, op.convolution_op->stride_width,
op.convolution_op->dilation_height, op.convolution_op->dilation_width, op.convolution_op->padding_top, op.convolution_op->padding_left,
step_height, step_width, indirect_top_height, indirect_bot_height,
primary_tile_);
}
// Set by tests using setter functions.
size_t input_height_;
size_t input_width_;
size_t kernel_height_;
size_t kernel_width_;
size_t padding_height_ = 0;
size_t padding_width_ = 0;
size_t subsampling_ = 1;
size_t dilation_ = 1;
size_t channels_ = 1;
size_t primary_tile_;
size_t channel_tile_ = 1;
std::vector<size_t> expected_indices_;
// Initialized by IndirectionInit.
xnnpack::Buffer<const float*> indirection_buffer_;
xnnpack::Buffer<float> input_;
xnnpack::Buffer<float> zero_buffer_;
};
TEST(INDIRECTION, input3x3_kernel1x1) {
IndirectionTester()
.input_height(3)
.input_width(3)
.kernel_height(1)
.kernel_width(1)
.primary_tile(1)
.expected_indices({0, 1, 2, 3, 4, 5, 6, 7, 8})
.Test();
}
TEST(INDIRECTION, input3x3_kernel2x2) {
IndirectionTester()
.input_height(3)
.input_width(3)
.kernel_height(2)
.kernel_width(2)
.primary_tile(4)
// input: kernel:
// 0 1 2 a b
// 3 4 5 c d
// 6 7 8
.expected_indices({
// For each output row, column major, and compress pointers within a
// single output row.
0,
3,
1,
4,
2,
5,
3,
6,
4,
7,
5,
8,
})
.Test();
}
TEST(INDIRECTION, input3x3_kernel1x1_subsampling2) {
IndirectionTester()
.input_height(3)
.input_width(3)
.kernel_height(1)
.kernel_width(1)
.subsampling(2)
.primary_tile(1)
// input: kernel:
// 0 1 2 a
// 3 4 5
// 6 7 8
.expected_indices({
0,
2,
6,
8,
})
.Test();
}
TEST(INDIRECTION, input4x4_kernel2x2_subsampling2) {
IndirectionTester()
.input_height(4)
.input_width(4)
.kernel_height(2)
.kernel_width(2)
.subsampling(2)
.primary_tile(4)
// input: kernel:
// 0 1 2 3 a b
// 4 5 6 7 c d
// 8 9 10 11
// 12 13 14 15
.expected_indices({
0,
4,
1,
5,
2,
6,
3,
7,
8,
12,
9,
13,
10,
14,
11,
15,
})
.Test();
}
TEST(INDIRECTION, input4x4_kernel2x1_primarytile4) {
IndirectionTester()
.input_height(4)
.input_width(4)
.kernel_height(2)
.kernel_width(1)
.primary_tile(4)
// input: kernel:
// 0 1 2 3 a
// 4 5 6 7 b
// 8 9 10 11
// 12 13 14 15
.expected_indices({
0,
4,
1,
5,
2,
6,
3,
7,
4,
8,
5,
9,
6,
10,
7,
11,
8,
12,
9,
13,
10,
14,
11,
15,
// 4 - (2 x 2) extra elements.
15,
15,
})
.Test();
}
TEST(INDIRECTION, input4x4_kernel1x2_primarytile4_subsampling2) {
IndirectionTester()
.input_height(4)
.input_width(4)
.kernel_height(1)
.kernel_width(2)
.primary_tile(4)
.subsampling(2)
// input: kernel:
// 0 1 2 3 a b
// 4 5 6 7
// 8 9 10 11
// 12 13 14 15
.expected_indices({
0,
1,
2,
3,
8,
9,
10,
11,
// primary_tile - kernel_size (4 - 2) extra elements, set to last
// input pixel.
11,
11,
})
.Test();
}
TEST(INDIRECTION, input4x4_kernel2x1_primarytile4_subsampling2) {
IndirectionTester()
.input_height(4)
.input_width(4)
.kernel_height(2)
.kernel_width(1)
.primary_tile(4)
.subsampling(2)
// input: kernel: output:
// 0 1 2 3 a A B
// 4 5 6 7 b C D
// 8 9 10 11
// 12 13 14 15
.expected_indices({0, 4, 2, 6, 8, 12, 10, 14,
// primary_tile - kernel_size (4 - 2) extra elements,
// set to last input pixel.
14, 14})
.Test();
}
TEST(INDIRECTION_COMPRESSED, input3x3_kernel1x1) {
IndirectionTester()
.input_height(3)
.input_width(3)
.kernel_height(1)
.kernel_width(1)
.primary_tile(1)
.expected_indices({0, 1, 2})
.TestCompressed();
}
TEST(INDIRECTION_COMPRESSED, input3x3_kernel2x2_padding2x2) {
IndirectionTester()
.input_height(3)
.input_width(3)
.kernel_height(2)
.kernel_width(2)
.padding_height(2)
.padding_width(2)
.primary_tile(4)
// input: kernel:
// 0 0 0 0 0 a b
// 0 0 1 2 0 c d
// 0 3 4 5 0
// 0 6 7 8 0
// 0 0 0 0 0
.expected_indices({
// Top section.
kZero,
kZero,
kZero,
0,
kZero,
1,
kZero,
2,
kZero,
kZero,
// Compressed rows
kZero,
kZero,
0,
3,
1,
4,
2,
5,
kZero,
kZero,
// Compressed rows.
kZero,
kZero,
6,
kZero,
7,
kZero,
8,
kZero,
kZero,
kZero,
})
.TestCompressed();
}
TEST(INDIRECTION_COMPRESSED, input2x2_kernel2x2_padding2x2_subsampling2) {
IndirectionTester()
.input_height(2)
.input_width(2)
.kernel_height(2)
.kernel_width(2)
.padding_height(2)
.padding_width(2)
.primary_tile(4)
.subsampling(2)
// input: kernel:
// 0 0 0 0 a b
// 0 0 1 0 c d
// 0 2 3 0
// 0 0 0 0
.expected_indices({
// Top section.
kZero,
kZero,
kZero,
0,
kZero,
1,
kZero,
kZero,
// We don't actually need a compressed row, but it is easier to assume
// that there is always 1 compressed row,
// and this is the same as the bottom section.
kZero,
kZero,
2,
kZero,
3,
kZero,
kZero,
kZero,
// Bottom section is just all 0.
kZero,
kZero,
2,
kZero,
3,
kZero,
kZero,
kZero,
})
.TestCompressed();
}
} // namespace
} // namespace xnnpack