diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index b3fc08ed..8c3d4d28 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -23,8 +23,8 @@ jobs: name: "${{ matrix.os }}: Python ${{ matrix.python-version }}" strategy: matrix: - python-version: ["3.7", "3.8", "3.9", "3.10"] - os: ["ubuntu-20.04", "macos-10.15"] + python-version: ["3.7", "3.8", "3.9", "3.10", "3.11"] + os: ["ubuntu-20.04", "macos-latest"] runs-on: "${{ matrix.os }}" @@ -76,11 +76,11 @@ jobs: make test - name: "Build macOS wheels" if: startsWith(matrix.os, 'mac') && (matrix.python-version == '3.9') - uses: pypa/cibuildwheel@v2.9.0 + uses: pypa/cibuildwheel@v2.11.2 env: MACOSX_DEPLOYMENT_TARGET: "10.15" CIBW_ARCHS_MACOS: "x86_64 arm64" - CIBW_SKIP: "cp37-macosx_arm64 cp36* pp* cp311*" + CIBW_SKIP: "cp37-macosx_arm64 cp36* pp*" CIBW_BEFORE_BUILD: "touch filpreload/src/_filpreload.c" # force rebuild of Python code with new interpreter CIBW_TEST_COMMAND: python -m filprofiler run {project}/benchmarks/pystone.py with: diff --git a/CHANGELOG.md b/CHANGELOG.md index 7173957c..d9d30c6c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,11 @@ # Release notes +## 2022.11.0 (2022-11-07) + +### Features + +- Added initial Python 3.11 support; unfortunately this increased performance overhead a little. ([#381](https://github.com/pythonspeed/filprofiler/issues/381)) + ## 2022.10.0 (2022-10-19) ### Bugfixes diff --git a/Makefile b/Makefile index cf288346..ae1d2562 100644 --- a/Makefile +++ b/Makefile @@ -40,17 +40,13 @@ test-python-no-deps: cd tests/test-scripts && python -m numpy.f2py -c fortran.f90 -m fortran env RUST_BACKTRACE=1 py.test -v tests/ -.PHONY: docker-image -docker-image: - docker build -t manylinux-rust -f wheels/Dockerfile.build . - .PHONY: wheel wheel: python setup.py bdist_wheel .PHONY: manylinux-wheel manylinux-wheel: - docker run -u $(shell id -u):$(shell id -g) -v $(PWD):/src quay.io/pypa/manylinux2010_x86_64:latest /src/wheels/build-wheels.sh + docker run -u $(shell id -u):$(shell id -g) -v $(PWD):/src quay.io/pypa/manylinux2014_x86_64:latest /src/wheels/build-wheels.sh .PHONY: clean clean: diff --git a/filpreload/src/_filpreload.c b/filpreload/src/_filpreload.c index 000b3751..ada67d81 100644 --- a/filpreload/src/_filpreload.c +++ b/filpreload/src/_filpreload.c @@ -1,10 +1,17 @@ #include "Python.h" +#include "ceval.h" +#if PY_MINOR_VERSION < 11 #include "code.h" +#else +#include "internal/pycore_code.h" +#include "internal/pycore_frame.h" +#endif +#include "frameobject.h" #include "object.h" + #ifndef _GNU_SOURCE #define _GNU_SOURCE #endif -#include "frameobject.h" #include #include #include @@ -16,6 +23,20 @@ #include #include +#if PY_MINOR_VERSION < 9 +PyFrameObject *PyFrame_GetBack(PyFrameObject *frame) { + if (frame->f_back != NULL) { + Py_INCREF(frame->f_back); + } + return frame->f_back; +} + +PyCodeObject *PyFrame_GetCode(PyFrameObject *frame) { + Py_INCREF(frame->f_code); + return frame->f_code; +} +#endif + // Macro to create the publicly exposed symbol: #ifdef __APPLE__ #define SYMBOL_PREFIX(func) reimplemented_##func @@ -33,9 +54,9 @@ #define likely(x) __builtin_expect(!!(x), 1) #define unlikely(x) __builtin_expect(!!(x), 0) -// Underlying APIs we're wrapping: -static void *(*underlying_real_mmap)(void *addr, size_t length, int prot, - int flags, int fd, off_t offset) = 0; + // Underlying APIs we're wrapping: + static void *(*underlying_real_mmap)(void *addr, size_t length, int prot, + int flags, int fd, off_t offset) = 0; static int (*underlying_real_pthread_create)(pthread_t *thread, const pthread_attr_t *attr, void *(*start_routine)(void *), @@ -118,8 +139,22 @@ static inline int should_track_memory() { return (likely(initialized) && atomic_load_explicit(&tracking_allocations, memory_order_acquire) && !am_i_reentrant()); } -// Current thread's Python state: -static _Thread_local PyFrameObject *current_frame = NULL; +// Current thread's Python state; typically only set in C functions where GIL +// might be released. +static _Thread_local int current_line_number = -1; + +static inline int get_current_line_number() { + if (PyGILState_Check()) { + PyFrameObject *frame = PyEval_GetFrame(); + if (frame != NULL) { + return PyFrame_GetLineNumber(frame); + } + } + if (current_line_number != -1) { + return current_line_number; + } + return 0; +} // The file and function name responsible for an allocation. struct FunctionLocation { @@ -200,13 +235,16 @@ static void __attribute__((constructor)) constructor() { initialized = 1; } -static void start_call(uint64_t function_id, uint16_t line_number) { +static void start_call(uint64_t function_id, uint16_t line_number, PyFrameObject* current_frame) { if (should_track_memory()) { increment_reentrancy(); uint16_t parent_line_number = 0; - if (current_frame != NULL && current_frame->f_back != NULL) { - PyFrameObject *f = current_frame->f_back; - parent_line_number = PyFrame_GetLineNumber(f); + if (current_frame != NULL) { + PyFrameObject *parent = PyFrame_GetBack(current_frame); + if (parent != NULL ){ + parent_line_number = PyFrame_GetLineNumber(parent); + Py_DECREF(parent); + } } pymemprofile_start_call(parent_line_number, function_id, line_number); decrement_reentrancy(); @@ -226,39 +264,53 @@ __attribute__((visibility("hidden"))) int fil_tracer(PyObject *obj, PyFrameObject *frame, int what, PyObject *arg) { switch (what) { case PyTrace_CALL: - // Store the current frame, so malloc() can look up line number: - current_frame = frame; - /* We want an efficient identifier for filename+fuction name. So we register the function + filename with some Rust code that gives back its ID, and then store the ID. Due to bad API design, value 0 indicates "no result", so we actually store the result + 1. */ + current_line_number = frame->f_lineno; uint64_t function_id = 0; assert(extra_code_index != -1); - _PyCode_GetExtra((PyObject *)frame->f_code, extra_code_index, - (void **)&function_id); + PyCodeObject *code = PyFrame_GetCode(frame); + _PyCode_GetExtra((PyObject *)code, extra_code_index, (void **)&function_id); if (function_id == 0) { Py_ssize_t filename_length, function_length; - const char* filename = PyUnicode_AsUTF8AndSize(frame->f_code->co_filename, + const char* filename = PyUnicode_AsUTF8AndSize(code->co_filename, &filename_length); - const char* function_name = PyUnicode_AsUTF8AndSize(frame->f_code->co_name, + const char* function_name = PyUnicode_AsUTF8AndSize(code->co_name, &function_length); increment_reentrancy(); function_id = pymemprofile_add_function_location(filename, (uint64_t)filename_length, function_name, (uint64_t)function_length); decrement_reentrancy(); - _PyCode_SetExtra((PyObject *)frame->f_code, extra_code_index, + _PyCode_SetExtra((PyObject *)code, extra_code_index, (void *)function_id + 1); + Py_DECREF(code); } else { function_id -= 1; } - start_call(function_id, frame->f_lineno); + start_call(function_id, current_line_number, frame); break; case PyTrace_RETURN: finish_call(); - // We're done with this frame, so set the parent frame: - current_frame = frame->f_back; + if (frame != NULL) { + PyFrameObject* parent = PyFrame_GetBack(frame); + if (parent == NULL) { + current_line_number = -1; + } else { + current_line_number = PyFrame_GetLineNumber(parent); + Py_DECREF(parent); + } + } + break; + case PyTrace_C_CALL: + // C calls might release GIL, in which case they won't change the line + // number, so record it. + current_line_number = PyFrame_GetLineNumber(frame); + break; + case PyTrace_C_RETURN: + current_line_number = -1; break; default: break; @@ -315,20 +367,12 @@ fil_dump_peak_to_flamegraph(const char *path) { // *** End APIs called by Python *** static void add_allocation(size_t address, size_t size) { - uint16_t line_number = 0; - PyFrameObject *f = current_frame; - if (f != NULL) { - line_number = PyFrame_GetLineNumber(f); - } + uint16_t line_number = get_current_line_number(); pymemprofile_add_allocation(address, size, line_number); } static void add_anon_mmap(size_t address, size_t size) { - uint16_t line_number = 0; - PyFrameObject *f = current_frame; - if (f != NULL) { - line_number = PyFrame_GetLineNumber(f); - } + uint16_t line_number = get_current_line_number(); pymemprofile_add_anon_mmap(address, size, line_number); } diff --git a/filprofiler/_report.py b/filprofiler/_report.py index 2a8e2d3c..308151d3 100644 --- a/filprofiler/_report.py +++ b/filprofiler/_report.py @@ -76,16 +76,18 @@ def render_report(output_path: str, now: datetime) -> str:

Profiling result

+

Check out my other project:

Find memory and performance bottlenecks in production!

When your data pipeline is too slow in production, reproducing the problem on your laptop is hard or impossible—which means identifying and fixing the problem can be tricky.

What if you knew the cause of the problem as soon as you realized it was happening?

-

That's why you need - the Sciagraph profiler, designed to find performance +

That's how + the Sciagraph profiler can help you: + it's designed to find performance and memory bottlenecks by continuously profiling in production.



@@ -93,6 +95,7 @@ def render_report(output_path: str, now: datetime) -> str:


+
Need help, or does something look wrong? Read the documentation, diff --git a/memapi/src/memorytracking.rs b/memapi/src/memorytracking.rs index 92d300f1..856042f6 100644 --- a/memapi/src/memorytracking.rs +++ b/memapi/src/memorytracking.rs @@ -236,7 +236,8 @@ fn runpy_prefix_length(calls: std::slice::Iter<(CallSiteId, (&str, &str))>) -> u let mut length = 0; let runpy_path = get_runpy_path(); for (_, (_, filename)) in calls { - if *filename == runpy_path { + // On Python 3.11 it uses for some reason. + if *filename == runpy_path || *filename == "" { length += 1; } else { return length; diff --git a/wheels/Dockerfile.build b/wheels/Dockerfile.build deleted file mode 100644 index 05929fee..00000000 --- a/wheels/Dockerfile.build +++ /dev/null @@ -1,9 +0,0 @@ -FROM quay.io/pypa/manylinux2010_x86_64:latest - -COPY --from=rust:1.41.0-slim /usr/local/cargo /usr/local/cargo -COPY --from=rust:1.41.0-slim /usr/local/rustup /usr/local/rustup - -ENV PATH=$PATH:/usr/local/cargo/bin -ENV CARGO_HOME=/usr/local/cargo -ENV RUSTUP_HOME=/usr/local/rustup -RUN chmod a+w /usr/local/cargo diff --git a/wheels/build-wheels.sh b/wheels/build-wheels.sh index 667c3f43..d1b51fe3 100755 --- a/wheels/build-wheels.sh +++ b/wheels/build-wheels.sh @@ -17,15 +17,16 @@ rm -f filprofiler/_filpreload*.so rm -f filprofiler/_filpreload*.dylib rm -rf build -for PYBIN in /opt/python/cp{37,38,39,310}*/bin; do +for PYBIN in /opt/python/cp{37,38,39,310,311}*/bin; do touch filpreload/src/_filpreload.c # force rebuild of Python code with new interpreter export PYO3_PYTHON="$PYBIN/python" "${PYBIN}/pip" install -U setuptools wheel setuptools-rust pip "${PYBIN}/python" -m pip wheel -w /tmp/wheel . done -auditwheel repair --plat manylinux2010_x86_64 -w dist/ /tmp/wheel/filprofiler*cp37*whl -auditwheel repair --plat manylinux2010_x86_64 -w dist/ /tmp/wheel/filprofiler*cp38*whl -auditwheel repair --plat manylinux2010_x86_64 -w dist/ /tmp/wheel/filprofiler*cp39*whl -auditwheel repair --plat manylinux2010_x86_64 -w dist/ /tmp/wheel/filprofiler*cp310*whl +auditwheel repair --plat manylinux2014_x86_64 -w dist/ /tmp/wheel/filprofiler*cp37*whl +auditwheel repair --plat manylinux2014_x86_64 -w dist/ /tmp/wheel/filprofiler*cp38*whl +auditwheel repair --plat manylinux2014_x86_64 -w dist/ /tmp/wheel/filprofiler*cp39*whl +auditwheel repair --plat manylinux2014_x86_64 -w dist/ /tmp/wheel/filprofiler*cp310*whl +auditwheel repair --plat manylinux2014_x86_64 -w dist/ /tmp/wheel/filprofiler*cp311*whl