Skip to content

Commit

Permalink
py/objint: Try to convert big-int back to small-int after binary op.
Browse files Browse the repository at this point in the history
Before this change, long/mpz ints propagated into all future calculations,
even if their value could fit in a small-int object.  With this change, the
result of a big-int binary op will now be converted to a small-int object
if the value fits in a small-int.

For example, a relatively common operation like `x = a * b // c` where
a,b,c all small ints would always result in a long/mpz int, even if it
didn't need to, and then this would impact all future calculations with
x.

This adds +24 bytes on PYBV11 but avoids heap allocations and potential
surprises (e.g. `big-big` is now a small `0`, and can safely be accessed
with MP_OBJ_SMALL_INT_VALUE).

Performance tests are unchanged on PYBV10, except for `bm_pidigits.py`
which makes heavy use of big-ints and gains about 8% in speed.

Unix coverage tests have been updated to cover mpz code that is now
unreachable by normal Python code (removing the unreachable code would lead
to some surprising gaps in the internal C functions and the functionality
may be needed in the future, so it is kept because it has minimal
overhead).

This work was funded through GitHub Sponsors.

Signed-off-by: Jim Mussared <jim.mussared@gmail.com>
  • Loading branch information
jimmo authored and dpgeorge committed Jul 1, 2024
1 parent 0600e4f commit 557d31e
Show file tree
Hide file tree
Showing 6 changed files with 80 additions and 15 deletions.
11 changes: 11 additions & 0 deletions ports/unix/coverage.c
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

#include "py/obj.h"
#include "py/objfun.h"
#include "py/objint.h"
#include "py/objstr.h"
#include "py/runtime.h"
#include "py/gc.h"
Expand Down Expand Up @@ -454,6 +455,13 @@ static mp_obj_t extra_coverage(void) {
mpz_mul_inpl(&mpz, &mpz2, &mpz);
mpz_as_uint_checked(&mpz, &value);
mp_printf(&mp_plat_print, "%d\n", (int)value);

// mpz_not_inpl with argument==0, testing ~0
mpz_set_from_int(&mpz, 0);
mpz_not_inpl(&mpz, &mpz);
mp_int_t value_signed;
mpz_as_int_checked(&mpz, &value_signed);
mp_printf(&mp_plat_print, "%d\n", (int)value_signed);
}

// runtime utils
Expand All @@ -470,6 +478,9 @@ static mp_obj_t extra_coverage(void) {
// call mp_call_function_2_protected with invalid args
mp_call_function_2_protected(MP_OBJ_FROM_PTR(&mp_builtin_divmod_obj), mp_obj_new_str("abc", 3), mp_obj_new_str("abc", 3));

// mp_obj_int_get_checked with mp_obj_int_t that has a value that is a small integer
mp_printf(&mp_plat_print, "%d\n", mp_obj_int_get_checked(mp_obj_int_new_mpz()));

// mp_obj_int_get_uint_checked with non-negative small-int
mp_printf(&mp_plat_print, "%d\n", (int)mp_obj_int_get_uint_checked(MP_OBJ_NEW_SMALL_INT(1)));

Expand Down
23 changes: 8 additions & 15 deletions py/objint_longlong.c
Original file line number Diff line number Diff line change
Expand Up @@ -247,45 +247,38 @@ mp_obj_t mp_obj_int_binary_op(mp_binary_op_t op, mp_obj_t lhs_in, mp_obj_t rhs_i
}

mp_obj_t mp_obj_new_int(mp_int_t value) {
if (MP_SMALL_INT_FITS(value)) {
return MP_OBJ_NEW_SMALL_INT(value);
}
return mp_obj_new_int_from_ll(value);
}

mp_obj_t mp_obj_new_int_from_uint(mp_uint_t value) {
// SMALL_INT accepts only signed numbers, so make sure the input
// value fits completely in the small-int positive range.
if ((value & ~MP_SMALL_INT_POSITIVE_MASK) == 0) {
return MP_OBJ_NEW_SMALL_INT(value);
}
return mp_obj_new_int_from_ll(value);
}

mp_obj_t mp_obj_new_int_from_ll(long long val) {
if ((long long)(mp_int_t)val == val && MP_SMALL_INT_FITS(val)) {
return MP_OBJ_NEW_SMALL_INT(val);
}

mp_obj_int_t *o = mp_obj_malloc(mp_obj_int_t, &mp_type_int);
o->val = val;
return o;
return MP_OBJ_FROM_PTR(o);
}

mp_obj_t mp_obj_new_int_from_ull(unsigned long long val) {
// TODO raise an exception if the unsigned long long won't fit
if (val >> (sizeof(unsigned long long) * 8 - 1) != 0) {
mp_raise_msg(&mp_type_OverflowError, MP_ERROR_TEXT("ulonglong too large"));
}
mp_obj_int_t *o = mp_obj_malloc(mp_obj_int_t, &mp_type_int);
o->val = val;
return o;
return mp_obj_new_int_from_ll(val);
}

mp_obj_t mp_obj_new_int_from_str_len(const char **str, size_t len, bool neg, unsigned int base) {
// TODO this does not honor the given length of the string, but it all cases it should anyway be null terminated
// TODO check overflow
mp_obj_int_t *o = mp_obj_malloc(mp_obj_int_t, &mp_type_int);
char *endptr;
o->val = strtoll(*str, &endptr, base);
mp_obj_t result = mp_obj_new_int_from_ll(strtoll(*str, &endptr, base));
*str = endptr;
return o;
return result;
}

mp_int_t mp_obj_int_get_truncated(mp_const_obj_t self_in) {
Expand Down
12 changes: 12 additions & 0 deletions py/objint_mpz.c
Original file line number Diff line number Diff line change
Expand Up @@ -312,6 +312,14 @@ mp_obj_t mp_obj_int_binary_op(mp_binary_op_t op, mp_obj_t lhs_in, mp_obj_t rhs_i
return MP_OBJ_NULL; // op not supported
}

// Check if the result fits in a small-int, and if so just return that.
mp_int_t res_small;
if (mpz_as_int_checked(&res->mpz, &res_small)) {
if (MP_SMALL_INT_FITS(res_small)) {
return MP_OBJ_NEW_SMALL_INT(res_small);
}
}

return MP_OBJ_FROM_PTR(res);

} else {
Expand Down Expand Up @@ -425,6 +433,10 @@ mp_int_t mp_obj_int_get_checked(mp_const_obj_t self_in) {
const mp_obj_int_t *self = MP_OBJ_TO_PTR(self_in);
mp_int_t value;
if (mpz_as_int_checked(&self->mpz, &value)) {
// mp_obj_int_t objects should always contain a value that is a large
// integer (if the value fits in a small-int then it should have been
// converted to a small-int object), and so this code-path should never
// be taken in normal circumstances.
return value;
} else {
// overflow
Expand Down
22 changes: 22 additions & 0 deletions tests/basics/int_big_to_small.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
try:
import micropython
micropython.heap_lock
except:
print("SKIP")
raise SystemExit

# All less than small int max.
for d in (0, 27, 1<<29, -1861, -(1<<29)):
i = 1<<70
print(i)
j = (1<<70) + d
print(j)
# k should now be a small int.
k = j - i
print(k)

# Now verify that working with k doesn't allocate (i.e. it's a small int).
micropython.heap_lock()
print(k + 20)
print(k // 20)
micropython.heap_unlock()
25 changes: 25 additions & 0 deletions tests/basics/int_big_to_small.py.exp
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
1180591620717411303424
1180591620717411303424
0
20
0
1180591620717411303424
1180591620717411303451
27
47
1
1180591620717411303424
1180591620717948174336
536870912
536870932
26843545
1180591620717411303424
1180591620717411301563
-1861
-1841
-94
1180591620717411303424
1180591620716874432512
-536870912
-536870892
-26843546
2 changes: 2 additions & 0 deletions tests/ports/unix/extra_coverage.py.exp
Original file line number Diff line number Diff line change
Expand Up @@ -88,9 +88,11 @@ data
1
12345
6
-1
# runtime utils
TypeError: unsupported type for __abs__: 'str'
TypeError: unsupported types for __divmod__: 'str', 'str'
0
1
2
OverflowError: overflow converting long int to machine word
Expand Down

0 comments on commit 557d31e

Please sign in to comment.