Skip to content

Commit

Permalink
switch from tp_init to tp_new
Browse files Browse the repository at this point in the history
tp_init (equivalent to __init__) is allowed to be called more than once,
or even not at all. Now that it's responsible for initializing
self->lock, that would lead to a memory leak and/or memory corruption,
and we need to define tp_new instead.

This change fixes a bug where we were incorrectly freeing self->lock
even if tp_init returned early without initializing it. This was causing
segfaults in CI on macOS.
  • Loading branch information
oconnor663 committed Jan 13, 2022
1 parent 41b81ee commit bd10cd4
Showing 1 changed file with 37 additions and 22 deletions.
59 changes: 37 additions & 22 deletions c_impl/blake3module.c
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,18 @@ static void Blake3_dealloc(Blake3Object *self) {
Py_TYPE(self)->tp_free((PyObject *)self);
}

static int Blake3_init(Blake3Object *self, PyObject *args, PyObject *kwds) {
static PyObject *Blake3_new(PyTypeObject *type, PyObject *args,
PyObject *kwds) {
Blake3Object *self = NULL;
PyThread_type_lock self_lock = NULL;
Py_buffer data = {0};
Py_buffer key = {0};
const char *derive_key_context = NULL;
Py_ssize_t max_threads = 1;
bool usedforsecurity = true;

PyObject *ret = NULL;

static char *kwlist[] = {
"", // data, positional-only
"key",
Expand All @@ -54,14 +65,6 @@ static int Blake3_init(Blake3Object *self, PyObject *args, PyObject *kwds) {
"usedforsecurity", // currently ignored
NULL,
};
Py_buffer data = {0};
Py_buffer key = {0};
const char *derive_key_context = NULL;
Py_ssize_t max_threads = 1;
bool usedforsecurity = true;

int ret = -1;

if (!PyArg_ParseTupleAndKeywords(args, kwds, "|y*$y*snp", kwlist, &data, &key,
&derive_key_context, &max_threads,
&usedforsecurity)) {
Expand Down Expand Up @@ -92,6 +95,20 @@ static int Blake3_init(Blake3Object *self, PyObject *args, PyObject *kwds) {
goto exit;
}

self = (Blake3Object *)type->tp_alloc(type, 0);
if (self == NULL) {
goto exit;
}

// TODO: Hashlib implementations do an optimization where they avoid
// allocating this lock unless it's needed. Is that worth it? It would mean
// we'd need to handle the possible allocation failure at every lock site.
// (Hashlib itself might not be handling these failures correctly.)
self_lock = PyThread_allocate_lock();
if (self_lock == NULL) {
goto exit;
}

if (key.obj != NULL) {
blake3_hasher_init_keyed(&self->hasher, key.buf);
} else if (derive_key_context != NULL) {
Expand All @@ -114,20 +131,19 @@ static int Blake3_init(Blake3Object *self, PyObject *args, PyObject *kwds) {
}
}

// TODO: Hashlib implementations do an optimization where they avoid
// allocating this lock unless its needed. Is that worth it? It would mean
// we'd need to handle the possible allocation failure at every lock site.
// (Notably, hashlib seems not to handle this and allows an unlikely race
// condition.)
self->lock = PyThread_allocate_lock();
if (self->lock == NULL) {
goto exit;
}

// success
ret = 0;
self->lock = self_lock;
ret = (PyObject *)self;
self_lock = NULL; // pass ownership
self = NULL; // pass ownership

exit:
if (self != NULL) {
Py_TYPE(self)->tp_free((PyObject *)self);
}
if (self_lock != NULL) {
PyThread_free_lock(self_lock);
}
release_buf_if_acquired(&data);
release_buf_if_acquired(&key);
return ret;
Expand Down Expand Up @@ -284,8 +300,7 @@ static PyTypeObject Blake3Type = {
.tp_basicsize = sizeof(Blake3Object),
.tp_itemsize = 0,
.tp_flags = Py_TPFLAGS_DEFAULT,
.tp_new = PyType_GenericNew,
.tp_init = (initproc) Blake3_init,
.tp_new = Blake3_new,
.tp_dealloc = (destructor) Blake3_dealloc,
.tp_methods = Blake3_methods,
// clang-format on
Expand Down

0 comments on commit bd10cd4

Please sign in to comment.