From 2645bf631b668d79b027e1df5de4fd987cd265c7 Mon Sep 17 00:00:00 2001 From: thatbirdguythatuknownot Date: Tue, 22 Aug 2023 20:58:15 +0800 Subject: [PATCH] math.factorize for small inputs --- Include/internal/pycore_long.h | 2 + Modules/clinic/mathmodule.c.h | 13 ++- Modules/mathmodule.c | 203 +++++++++++++++++++++++++++++++++ Objects/longobject.c | 2 +- 4 files changed, 218 insertions(+), 2 deletions(-) diff --git a/Include/internal/pycore_long.h b/Include/internal/pycore_long.h index 3dc00ec..b7c8dea 100644 --- a/Include/internal/pycore_long.h +++ b/Include/internal/pycore_long.h @@ -83,6 +83,8 @@ extern PyObject *_PyLong_Add(PyLongObject *left, PyLongObject *right); extern PyObject *_PyLong_Multiply(PyLongObject *left, PyLongObject *right); extern PyObject *_PyLong_Subtract(PyLongObject *left, PyLongObject *right); +extern void _PyLong_Negate(PyLongObject **x_p); + // Used by _PyBytes_FromHex(), _PyBytes_DecodeEscape(), Python/mystrtoul.c. // Export for 'binascii' shared extension. PyAPI_DATA(unsigned char) _PyLong_DigitValue[256]; diff --git a/Modules/clinic/mathmodule.c.h b/Modules/clinic/mathmodule.c.h index c16c1b0..e3b3f64 100644 --- a/Modules/clinic/mathmodule.c.h +++ b/Modules/clinic/mathmodule.c.h @@ -950,4 +950,15 @@ math_ulp(PyObject *module, PyObject *arg) exit: return return_value; } -/*[clinic end generated code: output=91a0357265a2a553 input=a9049054013a1b77]*/ + +PyDoc_STRVAR(math_factorize__doc__, +"factorize($module, n, /)\n" +"--\n" +"\n" +"Return the prime factorization of an integer in a list.\n" +"\n" +"If the integer is negative, the first term in the list will be negative."); + +#define MATH_FACTORIZE_METHODDEF \ + {"factorize", (PyCFunction)math_factorize, METH_O, math_factorize__doc__}, +/*[clinic end generated code: output=27bb0d83da5b552d input=a9049054013a1b77]*/ diff --git a/Modules/mathmodule.c b/Modules/mathmodule.c index 0d238c0..1c5093b 100644 --- a/Modules/mathmodule.c +++ b/Modules/mathmodule.c @@ -63,6 +63,7 @@ raised for division by zero and mod by zero. #include "pycore_moduleobject.h" // _PyModule_GetState() #include "pycore_object.h" // _PyObject_LookupSpecial() #include "pycore_pymath.h" // _PY_SHORT_FLOAT_REPR +#include "pycore_list.h" // _Py_memory_repeat /* For DBL_EPSILON in _math.h */ #include /* For _Py_log1p with workarounds for buggy handling of zeros. */ @@ -4020,6 +4021,207 @@ math_ulp_impl(PyObject *module, double x) return x2 - x; } +/*[clinic input] +math.factorize + + n: object + / + +Return the prime factorization of an integer in a list. + +If the integer is negative, the first term in the list will be negative. +[clinic start generated code]*/ + +static PyObject * +math_factorize(PyObject *module, PyObject *n) +/*[clinic end generated code: output=77975bd66e53917a input=ccb83153f3d2cd56]*/ +{ + int a_too_large, c_bit_length, shift; + size_t c, d, ndig; + uint64_t m, r, i; + uint32_t u; + digit *digits; + PyListObject *res; + PyObject *a, *b; + + n = _PyNumber_Index(n); + if (n == NULL) { + return NULL; + } + + if (_PyLong_IsZero((PyLongObject *)n)) { + Py_DECREF(n); + return PyList_New(0); + } + + digits = ((PyLongObject *)n)->long_value.ob_digit; + ndig = _PyLong_DigitCount((PyLongObject *)n); + for (i = 0; i < ndig && !digits[i]; i++); + assert(i < ndig); + r = PyLong_SHIFT * i; +#if (defined(__clang__) || defined(__GNUC__)) + shift = __builtin_ctz(digits[i]); + r += shift; +#elif defined(_MSC_VER) + Py_BUILD_ASSERT(sizeof(digit) <= 4); + if (_BitScanForward(&shift, digits[i])) { + r += shift; + } + else { + shift = 0; + } +#else + { + digit x = digits[i]; + for (shift = 0; (x & 1) == 0; shift++, x >>= 1); + r += shift; + } +#endif + + res = (PyListObject *)PyList_New(r); + if (res == NULL) { + return NULL; + } + if (r) { + a = PyLong_FromLong(2); + if (a == NULL) { + goto error; + } + PyList_SET_ITEM(res, 0, a); + _Py_RefcntAdd(a, r-1); + PyObject **dest = _PyList_ITEMS(res) + 1; + PyObject **dest_end = dest + (r-1); + while (dest < dest_end) { + *dest++ = a; + } + } + + c = _PyLong_NumBits(n); + if (c == (size_t)(-1)) { + goto error; + } + c = (c - r - 1U) / 2U; + + /* Fast path: if c <= 31 then n shifted < 2**64 and we can compute + directly with a fast algorithm. */ + if (c <= 31U) { + m = digits[i] >> shift; + for (shift = -shift; ++i < ndig;) { + shift += PyLong_SHIFT; + m |= (uint64_t)digits[i] << shift; + } + for (i = 3; m != 1; i += 2) { + a = NULL; + while (m % i == 0) { + if (!a) { + a = PyLong_FromUnsignedLongLong(i); + if (a == NULL) { + goto error; + } + } + else { + Py_INCREF(a); + } + if (_PyList_AppendTakeRef(res, a) < 0) { + goto error; + } + m /= i; + } + } + if (_PyLong_IsNegative((PyLongObject *)n) && Py_SIZE(res)) { + PyObject *x = PyList_GET_ITEM(res, 0); + _PyLong_Negate((PyLongObject **)&x); + if (x == NULL) { + goto error; + } + PyList_SET_ITEM(res, 0, x); + } + Py_DECREF(n); + return (PyObject *)res; + } + + /* Slow path: n shifted >= 2**64. We perform the first few iterations in + C integer arithmetic, then switch to using Python long integers. */ + + /* From n >= 2**64 it follows that c.bit_length() >= 6. + c_bit_length = 6; + while ((c >> c_bit_length) > 0U) { + ++c_bit_length; + } + + Initialise d and a. + d = c >> (c_bit_length - 5); + b = _PyLong_Rshift(n, 2U*c - 62U); + if (b == NULL) { + goto error; + } + m = (uint64_t)PyLong_AsUnsignedLongLong(b); + Py_DECREF(b); + if (m == (uint64_t)(-1) && PyErr_Occurred()) { + goto error; + } + u = _approximate_isqrt(m) >> (31U - d); + a = PyLong_FromUnsignedLong(u); + if (a == NULL) { + goto error; + } + + for (int s = c_bit_length - 6; s >= 0; --s) { + PyObject *q; + size_t e = d; + + d = c >> s; + + q = (n >> 2*c - e - d + 1) // a + q = _PyLong_Rshift(n, 2U*c - d - e + 1U); + if (q == NULL) { + goto error; + } + Py_SETREF(q, PyNumber_FloorDivide(q, a)); + if (q == NULL) { + goto error; + } + + a = (a << d - 1 - e) + q + Py_SETREF(a, _PyLong_Lshift(a, d - 1U - e)); + if (a == NULL) { + Py_DECREF(q); + goto error; + } + Py_SETREF(a, PyNumber_Add(a, q)); + Py_DECREF(q); + if (a == NULL) { + goto error; + } + } + + The correct result is either a or a - 1. Figure out which, and + decrement a if necessary. + + a_too_large = n < a * a + b = PyNumber_Multiply(a, a); + if (b == NULL) { + goto error; + } + a_too_large = PyObject_RichCompareBool(n, b, Py_LT); + Py_DECREF(b); + if (a_too_large == -1) { + goto error; + } + + if (a_too_large) { + Py_SETREF(a, PyNumber_Subtract(a, _PyLong_GetOne())); + } + */ + Py_DECREF(n); + return (PyObject *)res; + + error: + Py_DECREF(res); + Py_DECREF(n); + return NULL; +} + static int math_exec(PyObject *module) { @@ -4129,6 +4331,7 @@ static PyMethodDef math_methods[] = { MATH_COMB_METHODDEF MATH_NEXTAFTER_METHODDEF MATH_ULP_METHODDEF + MATH_FACTORIZE_METHODDEF {NULL, NULL} /* sentinel */ }; diff --git a/Objects/longobject.c b/Objects/longobject.c index 354cba9..448fa0c 100644 --- a/Objects/longobject.c +++ b/Objects/longobject.c @@ -274,7 +274,7 @@ _PyLong_FromSTwoDigits(stwodigits x) /* If a freshly-allocated int is already shared, it must be a small integer, so negating it must go to PyLong_FromLong */ -Py_LOCAL_INLINE(void) +void _PyLong_Negate(PyLongObject **x_p) { PyLongObject *x;