blob: 596120ef27519632f9a12d27939726007f41188e [file] [log] [blame]
// C Extension module to test pycore_lock.h API
#include "parts.h"
#include "pycore_lock.h"
#include "pycore_pythread.h" // PyThread_get_thread_ident_ex()
#include "clinic/test_lock.c.h"
#ifdef MS_WINDOWS
#define WIN32_LEAN_AND_MEAN
#include <windows.h>
#else
#include <unistd.h> // usleep()
#endif
/*[clinic input]
module _testinternalcapi
[clinic start generated code]*/
/*[clinic end generated code: output=da39a3ee5e6b4b0d input=7bb583d8c9eb9a78]*/
static void
pysleep(int ms)
{
#ifdef MS_WINDOWS
Sleep(ms);
#else
usleep(ms * 1000);
#endif
}
static PyObject *
test_lock_basic(PyObject *self, PyObject *obj)
{
PyMutex m = (PyMutex){0};
// uncontended lock and unlock
PyMutex_Lock(&m);
assert(m._bits == 1);
PyMutex_Unlock(&m);
assert(m._bits == 0);
Py_RETURN_NONE;
}
struct test_lock2_data {
PyMutex m;
PyEvent done;
int started;
};
static void
lock_thread(void *arg)
{
struct test_lock2_data *test_data = arg;
PyMutex *m = &test_data->m;
_Py_atomic_store_int(&test_data->started, 1);
PyMutex_Lock(m);
// gh-135641: in rare cases the lock may still have `_Py_HAS_PARKED` set
// (m->_bits == 3) due to bucket collisions in the parking lot hash table
// between this mutex and the `test_data.done` event.
assert(m->_bits == 1 || m->_bits == 3);
PyMutex_Unlock(m);
assert(m->_bits == 0);
_PyEvent_Notify(&test_data->done);
}
static PyObject *
test_lock_two_threads(PyObject *self, PyObject *obj)
{
// lock attempt by two threads
struct test_lock2_data test_data;
memset(&test_data, 0, sizeof(test_data));
PyMutex_Lock(&test_data.m);
assert(test_data.m._bits == 1);
PyThread_start_new_thread(lock_thread, &test_data);
// wait up to two seconds for the lock_thread to attempt to lock "m"
int iters = 0;
uint8_t v;
do {
pysleep(10); // allow some time for the other thread to try to lock
v = _Py_atomic_load_uint8_relaxed(&test_data.m._bits);
assert(v == 1 || v == 3);
iters++;
} while (v != 3 && iters < 200);
// both the "locked" and the "has parked" bits should be set
v = _Py_atomic_load_uint8_relaxed(&test_data.m._bits);
assert(v == 3);
PyMutex_Unlock(&test_data.m);
PyEvent_Wait(&test_data.done);
assert(test_data.m._bits == 0);
Py_RETURN_NONE;
}
#define COUNTER_THREADS 5
#define COUNTER_ITERS 10000
struct test_data_counter {
PyMutex m;
Py_ssize_t counter;
};
struct thread_data_counter {
struct test_data_counter *test_data;
PyEvent done_event;
};
static void
counter_thread(void *arg)
{
struct thread_data_counter *thread_data = arg;
struct test_data_counter *test_data = thread_data->test_data;
for (Py_ssize_t i = 0; i < COUNTER_ITERS; i++) {
PyMutex_Lock(&test_data->m);
test_data->counter++;
PyMutex_Unlock(&test_data->m);
}
_PyEvent_Notify(&thread_data->done_event);
}
static PyObject *
test_lock_counter(PyObject *self, PyObject *obj)
{
// Test with rapidly locking and unlocking mutex
struct test_data_counter test_data;
memset(&test_data, 0, sizeof(test_data));
struct thread_data_counter thread_data[COUNTER_THREADS];
memset(&thread_data, 0, sizeof(thread_data));
for (Py_ssize_t i = 0; i < COUNTER_THREADS; i++) {
thread_data[i].test_data = &test_data;
PyThread_start_new_thread(counter_thread, &thread_data[i]);
}
for (Py_ssize_t i = 0; i < COUNTER_THREADS; i++) {
PyEvent_Wait(&thread_data[i].done_event);
}
assert(test_data.counter == COUNTER_THREADS * COUNTER_ITERS);
Py_RETURN_NONE;
}
#define SLOW_COUNTER_ITERS 100
static void
slow_counter_thread(void *arg)
{
struct thread_data_counter *thread_data = arg;
struct test_data_counter *test_data = thread_data->test_data;
for (Py_ssize_t i = 0; i < SLOW_COUNTER_ITERS; i++) {
PyMutex_Lock(&test_data->m);
if (i % 7 == 0) {
pysleep(2);
}
test_data->counter++;
PyMutex_Unlock(&test_data->m);
}
_PyEvent_Notify(&thread_data->done_event);
}
static PyObject *
test_lock_counter_slow(PyObject *self, PyObject *obj)
{
// Test lock/unlock with occasional "long" critical section, which will
// trigger handoff of the lock.
struct test_data_counter test_data;
memset(&test_data, 0, sizeof(test_data));
struct thread_data_counter thread_data[COUNTER_THREADS];
memset(&thread_data, 0, sizeof(thread_data));
for (Py_ssize_t i = 0; i < COUNTER_THREADS; i++) {
thread_data[i].test_data = &test_data;
PyThread_start_new_thread(slow_counter_thread, &thread_data[i]);
}
for (Py_ssize_t i = 0; i < COUNTER_THREADS; i++) {
PyEvent_Wait(&thread_data[i].done_event);
}
assert(test_data.counter == COUNTER_THREADS * SLOW_COUNTER_ITERS);
Py_RETURN_NONE;
}
struct bench_lock {
char padding[200];
PyMutex m;
double value;
};
struct bench_config {
int stop;
int work_inside;
int work_outside;
int num_acquisitions;
int random_locks;
Py_ssize_t target_iters;
Py_ssize_t num_locks;
struct bench_lock *locks;
};
struct bench_thread_data {
struct bench_config *config;
struct bench_lock *lock;
uint64_t rng_state;
Py_ssize_t iters;
PyEvent done;
};
static uint64_t
splitmix64(uint64_t *state)
{
uint64_t z = (*state += 0x9e3779b97f4a7c15);
z = (z ^ (z >> 30)) * 0xbf58476d1ce4e5b9;
z = (z ^ (z >> 27)) * 0x94d049bb133111eb;
return z ^ (z >> 31);
}
static void
thread_benchmark_locks(void *arg)
{
struct bench_thread_data *td = arg;
struct bench_config *config = td->config;
int work_inside = config->work_inside;
int work_outside = config->work_outside;
int num_acquisitions = config->num_acquisitions;
Py_ssize_t target_iters = config->target_iters;
uint64_t rng_state = td->rng_state;
double local_value = 0.0;
double my_value = 1.0;
Py_ssize_t iters = 0;
for (;;) {
if (target_iters > 0) {
if (iters >= target_iters) {
break;
}
}
else if (_Py_atomic_load_int_relaxed(&config->stop)) {
break;
}
struct bench_lock *lock = td->lock;
if (config->random_locks) {
uint32_t r = (uint32_t)splitmix64(&rng_state);
// Fast modulo reduction to pick a random lock, adapted from:
// https://lemire.me/blog/2016/06/27/a-fast-alternative-to-the-modulo-reduction/
Py_ssize_t idx = ((uint64_t)r * (uint32_t)config->num_locks) >> 32;
lock = &config->locks[idx];
}
for (int acq = 0; acq < num_acquisitions; acq++) {
PyMutex_Lock(&lock->m);
for (int i = 0; i < work_inside; i++) {
lock->value += my_value;
my_value = lock->value;
}
PyMutex_Unlock(&lock->m);
}
for (int i = 0; i < work_outside; i++) {
local_value += my_value;
my_value = local_value;
}
iters += num_acquisitions;
}
td->iters = iters;
_PyEvent_Notify(&td->done);
}
/*[clinic input]
_testinternalcapi.benchmark_locks
num_threads: Py_ssize_t
work_inside: int = 1
work_outside: int = 0
time_ms: int = 1000
num_acquisitions: int = 1
total_iters: Py_ssize_t = 0
num_locks: Py_ssize_t = 1
random_locks: bool = False
/
[clinic start generated code]*/
static PyObject *
_testinternalcapi_benchmark_locks_impl(PyObject *module,
Py_ssize_t num_threads,
int work_inside, int work_outside,
int time_ms, int num_acquisitions,
Py_ssize_t total_iters,
Py_ssize_t num_locks,
int random_locks)
/*[clinic end generated code: output=6258dc9de8cb9af1 input=d622cf4e1c4d008b]*/
{
// Run from Tools/lockbench/lockbench.py
// Based on the WebKit lock benchmarks:
// https://github.com/WebKit/WebKit/blob/main/Source/WTF/benchmarks/LockSpeedTest.cpp
// See also https://webkit.org/blog/6161/locking-in-webkit/
PyObject *thread_iters = NULL;
PyObject *res = NULL;
struct bench_thread_data *thread_data = NULL;
struct bench_config config = {
.work_inside = work_inside,
.work_outside = work_outside,
.num_acquisitions = num_acquisitions,
.target_iters = total_iters,
.num_locks = num_locks,
.random_locks = random_locks,
};
config.locks = PyMem_Calloc(num_locks, sizeof(*config.locks));
if (config.locks == NULL) {
PyErr_NoMemory();
goto exit;
}
thread_data = PyMem_Calloc(num_threads, sizeof(*thread_data));
if (thread_data == NULL) {
PyErr_NoMemory();
goto exit;
}
thread_iters = PyList_New(num_threads);
if (thread_iters == NULL) {
goto exit;
}
PyTime_t start, end;
if (PyTime_PerfCounter(&start) < 0) {
goto exit;
}
for (Py_ssize_t i = 0; i < num_threads; i++) {
thread_data[i].config = &config;
thread_data[i].lock = &config.locks[i % num_locks];
thread_data[i].rng_state = (uint64_t)i + 1;
PyThread_start_new_thread(thread_benchmark_locks, &thread_data[i]);
}
if (total_iters == 0) {
pysleep(time_ms);
_Py_atomic_store_int(&config.stop, 1);
}
for (Py_ssize_t i = 0; i < num_threads; i++) {
PyEvent_Wait(&thread_data[i].done);
}
if (PyTime_PerfCounter(&end) < 0) {
goto exit;
}
Py_ssize_t sum_iters = 0;
for (Py_ssize_t i = 0; i < num_threads; i++) {
PyObject *iter = PyLong_FromSsize_t(thread_data[i].iters);
if (iter == NULL) {
goto exit;
}
PyList_SET_ITEM(thread_iters, i, iter);
sum_iters += thread_data[i].iters;
}
assert(end != start);
PyTime_t elapsed_ns = end - start;
double rate = sum_iters * 1e9 / elapsed_ns;
res = Py_BuildValue("(dOL)", rate, thread_iters,
(long long)elapsed_ns);
exit:
PyMem_Free(config.locks);
PyMem_Free(thread_data);
Py_XDECREF(thread_iters);
return res;
}
static PyObject *
test_lock_benchmark(PyObject *module, PyObject *obj)
{
// Just make sure the benchmark runs without crashing
PyObject *res = _testinternalcapi_benchmark_locks_impl(
module, 1, 1, 0, 100, 1, 0, 1, 0);
if (res == NULL) {
return NULL;
}
Py_DECREF(res);
Py_RETURN_NONE;
}
static int
init_maybe_fail(void *arg)
{
int *counter = (int *)arg;
(*counter)++;
if (*counter < 5) {
// failure
return -1;
}
assert(*counter == 5);
return 0;
}
static PyObject *
test_lock_once(PyObject *self, PyObject *obj)
{
_PyOnceFlag once = {0};
int counter = 0;
for (int i = 0; i < 10; i++) {
int res = _PyOnceFlag_CallOnce(&once, init_maybe_fail, &counter);
if (i < 4) {
assert(res == -1);
}
else {
assert(res == 0);
assert(counter == 5);
}
}
Py_RETURN_NONE;
}
struct test_rwlock_data {
Py_ssize_t nthreads;
_PyRWMutex rw;
PyEvent step1;
PyEvent step2;
PyEvent step3;
PyEvent done;
};
static void
rdlock_thread(void *arg)
{
struct test_rwlock_data *test_data = arg;
// Acquire the lock in read mode
_PyRWMutex_RLock(&test_data->rw);
PyEvent_Wait(&test_data->step1);
_PyRWMutex_RUnlock(&test_data->rw);
_PyRWMutex_RLock(&test_data->rw);
PyEvent_Wait(&test_data->step3);
_PyRWMutex_RUnlock(&test_data->rw);
if (_Py_atomic_add_ssize(&test_data->nthreads, -1) == 1) {
_PyEvent_Notify(&test_data->done);
}
}
static void
wrlock_thread(void *arg)
{
struct test_rwlock_data *test_data = arg;
// First acquire the lock in write mode
_PyRWMutex_Lock(&test_data->rw);
PyEvent_Wait(&test_data->step2);
_PyRWMutex_Unlock(&test_data->rw);
if (_Py_atomic_add_ssize(&test_data->nthreads, -1) == 1) {
_PyEvent_Notify(&test_data->done);
}
}
static void
wait_until(uintptr_t *ptr, uintptr_t value)
{
// wait up to two seconds for *ptr == value
int iters = 0;
uintptr_t bits;
do {
pysleep(10);
bits = _Py_atomic_load_uintptr(ptr);
iters++;
} while (bits != value && iters < 200);
}
static PyObject *
test_lock_rwlock(PyObject *self, PyObject *obj)
{
struct test_rwlock_data test_data = {.nthreads = 3};
_PyRWMutex_Lock(&test_data.rw);
assert(test_data.rw.bits == 1);
_PyRWMutex_Unlock(&test_data.rw);
assert(test_data.rw.bits == 0);
// Start two readers
PyThread_start_new_thread(rdlock_thread, &test_data);
PyThread_start_new_thread(rdlock_thread, &test_data);
// wait up to two seconds for the threads to attempt to read-lock "rw"
wait_until(&test_data.rw.bits, 8);
assert(test_data.rw.bits == 8);
// start writer (while readers hold lock)
PyThread_start_new_thread(wrlock_thread, &test_data);
wait_until(&test_data.rw.bits, 10);
assert(test_data.rw.bits == 10);
// readers release lock, writer should acquire it
_PyEvent_Notify(&test_data.step1);
wait_until(&test_data.rw.bits, 3);
assert(test_data.rw.bits == 3);
// writer releases lock, readers acquire it
_PyEvent_Notify(&test_data.step2);
wait_until(&test_data.rw.bits, 8);
assert(test_data.rw.bits == 8);
// readers release lock again
_PyEvent_Notify(&test_data.step3);
wait_until(&test_data.rw.bits, 0);
assert(test_data.rw.bits == 0);
PyEvent_Wait(&test_data.done);
Py_RETURN_NONE;
}
static PyObject *
test_lock_recursive(PyObject *self, PyObject *obj)
{
_PyRecursiveMutex m = (_PyRecursiveMutex){0};
assert(!_PyRecursiveMutex_IsLockedByCurrentThread(&m));
_PyRecursiveMutex_Lock(&m);
assert(m.thread == PyThread_get_thread_ident_ex());
assert(PyMutex_IsLocked(&m.mutex));
assert(m.level == 0);
_PyRecursiveMutex_Lock(&m);
assert(m.level == 1);
_PyRecursiveMutex_Unlock(&m);
_PyRecursiveMutex_Unlock(&m);
assert(m.thread == 0);
assert(!PyMutex_IsLocked(&m.mutex));
assert(m.level == 0);
Py_RETURN_NONE;
}
static PyMethodDef test_methods[] = {
{"test_lock_basic", test_lock_basic, METH_NOARGS},
{"test_lock_two_threads", test_lock_two_threads, METH_NOARGS},
{"test_lock_counter", test_lock_counter, METH_NOARGS},
{"test_lock_counter_slow", test_lock_counter_slow, METH_NOARGS},
_TESTINTERNALCAPI_BENCHMARK_LOCKS_METHODDEF
{"test_lock_benchmark", test_lock_benchmark, METH_NOARGS},
{"test_lock_once", test_lock_once, METH_NOARGS},
{"test_lock_rwlock", test_lock_rwlock, METH_NOARGS},
{"test_lock_recursive", test_lock_recursive, METH_NOARGS},
{NULL, NULL} /* sentinel */
};
int
_PyTestInternalCapi_Init_Lock(PyObject *mod)
{
if (PyModule_AddFunctions(mod, test_methods) < 0) {
return -1;
}
return 0;
}