From f4f9515a65d19b1892cda7710a3fc5c065cccbef Mon Sep 17 00:00:00 2001 From: wout4 Date: Mon, 10 Jul 2023 16:53:19 +0200 Subject: [PATCH 1/6] avoid numpy sum where we are at risk of overflow check for overflow in get_bounds --- cpmpy/expressions/core.py | 30 +++++++++++++++++----------- cpmpy/expressions/python_builtins.py | 4 ++-- 2 files changed, 20 insertions(+), 14 deletions(-) diff --git a/cpmpy/expressions/core.py b/cpmpy/expressions/core.py index cc50c74c6..e263c1466 100644 --- a/cpmpy/expressions/core.py +++ b/cpmpy/expressions/core.py @@ -554,26 +554,28 @@ def get_bounds(self): lb1, ub1 = get_bounds(self.args[0]) lb2, ub2 = get_bounds(self.args[1]) bounds = [lb1 * lb2, lb1 * ub2, ub1 * lb2, ub1 * ub2] - return min(bounds), max(bounds) + lowerbound, upperbound = min(bounds), max(bounds) elif self.name == 'sum': lbs, ubs = zip(*[get_bounds(x) for x in self.args]) - return sum(lbs), sum(ubs) + lowerbound, upperbound = sum(lbs), sum(ubs) elif self.name == 'wsum': weights, vars = self.args - var_bounds = np.array([get_bounds(arg) for arg in vars]).T - bounds = var_bounds * weights - return bounds.min(axis=0).sum(), bounds.max(axis=0).sum() # for every column is axis=0... + bounds = [] + for i, varbounds in enumerate([get_bounds(arg) for arg in vars]): + bounds += [(list(weights[i] * x for x in varbounds))] + lbs, ubs = (zip(*bounds)) + lowerbound, upperbound = sum(lbs), sum(ubs) elif self.name == 'sub': lb1, ub1 = get_bounds(self.args[0]) lb2, ub2 = get_bounds(self.args[1]) - return lb1-ub2, ub1-lb2 + lowerbound, upperbound = lb1-ub2, ub1-lb2 elif self.name == 'div': lb1, ub1 = get_bounds(self.args[0]) lb2, ub2 = get_bounds(self.args[1]) if lb2 <= 0 <= ub2: raise ZeroDivisionError("division by domain containing 0 is not supported") bounds = [lb1 // lb2, lb1 // ub2, ub1 // lb2, ub1 // ub2] - return min(bounds), max(bounds) + lowerbound, upperbound = min(bounds), max(bounds) elif self.name == 'mod': lb1, ub1 = get_bounds(self.args[0]) lb2, ub2 = get_bounds(self.args[1]) @@ -595,14 +597,18 @@ def get_bounds(self): # E.g., (-2)^2 is positive, but (-2)^1 is negative, so for (-2)^[0,2] we also need to add (-2)^1. bounds += [lb1 ** (ub2 - 1), ub1 ** (ub2 - 1)] # This approach is safe but not tight (e.g., [-2,-1]^2 will give (-2,4) as range instead of [1,4]). - return min(bounds), max(bounds) + lowerbound, upperbound = min(bounds), max(bounds) elif self.name == '-': lb1, ub1 = get_bounds(self.args[0]) - return -ub1, -lb1 - - raise ValueError(f"Bound requested for unknown expression {self}, please report bug on github") - + lowerbound, upperbound = -ub1, -lb1 + + if lowerbound == None: + raise ValueError(f"Bound requested for unknown expression {self}, please report bug on github") + if lowerbound > upperbound: + #overflow happened + raise OverflowError('Overflow when calculating bounds, your expression exceeds int64 bounds.') + return int(lowerbound), upperbound def _wsum_should(arg): """ Internal helper: should the arg be in a wsum instead of sum diff --git a/cpmpy/expressions/python_builtins.py b/cpmpy/expressions/python_builtins.py index 75ae58b7a..c28b1e2c9 100644 --- a/cpmpy/expressions/python_builtins.py +++ b/cpmpy/expressions/python_builtins.py @@ -96,10 +96,10 @@ def min(iterable): def sum(iterable): """ sum() overwrites python built-in, - checks if all constants and computes np.sum() in that case + checks if all constants and computes builtins.sum() in that case otherwise, makes a sum Operator directly on `iterable` """ iterable = list(iterable) # Fix generator polling if not builtins.any(isinstance(elem, Expression) for elem in iterable): - return np.sum(iterable) + return builtins.sum(iterable) #numpy sum is only faster on numpy arrays. builtin sum also avoids overflow of int32 return Operator("sum", iterable) From f21784315eb5cbba6f327d1e4c029ba54d2a9fc2 Mon Sep 17 00:00:00 2001 From: wout4 Date: Mon, 10 Jul 2023 17:13:01 +0200 Subject: [PATCH 2/6] update error message --- cpmpy/expressions/core.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpmpy/expressions/core.py b/cpmpy/expressions/core.py index e263c1466..182fe44bd 100644 --- a/cpmpy/expressions/core.py +++ b/cpmpy/expressions/core.py @@ -607,7 +607,7 @@ def get_bounds(self): raise ValueError(f"Bound requested for unknown expression {self}, please report bug on github") if lowerbound > upperbound: #overflow happened - raise OverflowError('Overflow when calculating bounds, your expression exceeds int64 bounds.') + raise OverflowError('Overflow when calculating bounds, your expression exceeds integer bounds.') return int(lowerbound), upperbound def _wsum_should(arg): """ Internal helper: should the arg be in a wsum instead of sum From 7dab6642f75337ffc7a8fb7fa9182644e95468cb Mon Sep 17 00:00:00 2001 From: wout4 Date: Mon, 10 Jul 2023 19:27:02 +0200 Subject: [PATCH 3/6] sort intermediate bounds --- cpmpy/expressions/core.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/cpmpy/expressions/core.py b/cpmpy/expressions/core.py index 182fe44bd..48d876c49 100644 --- a/cpmpy/expressions/core.py +++ b/cpmpy/expressions/core.py @@ -562,7 +562,9 @@ def get_bounds(self): weights, vars = self.args bounds = [] for i, varbounds in enumerate([get_bounds(arg) for arg in vars]): - bounds += [(list(weights[i] * x for x in varbounds))] + sortbounds = (list(weights[i] * x for x in varbounds)) + sortbounds.sort() + bounds += [sortbounds] lbs, ubs = (zip(*bounds)) lowerbound, upperbound = sum(lbs), sum(ubs) elif self.name == 'sub': From f017a39a83b6be46fcb7ddd70d7c0329a57ea13d Mon Sep 17 00:00:00 2001 From: wout4 Date: Thu, 13 Jul 2023 16:08:49 +0200 Subject: [PATCH 4/6] remove cast to int that was their for testing --- cpmpy/expressions/core.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpmpy/expressions/core.py b/cpmpy/expressions/core.py index 48d876c49..3b754e2bc 100644 --- a/cpmpy/expressions/core.py +++ b/cpmpy/expressions/core.py @@ -610,7 +610,7 @@ def get_bounds(self): if lowerbound > upperbound: #overflow happened raise OverflowError('Overflow when calculating bounds, your expression exceeds integer bounds.') - return int(lowerbound), upperbound + return lowerbound, upperbound def _wsum_should(arg): """ Internal helper: should the arg be in a wsum instead of sum From 488d5418a61e5324a832ee7f9715ee35d30172eb Mon Sep 17 00:00:00 2001 From: wout4 Date: Fri, 8 Sep 2023 13:46:52 +0200 Subject: [PATCH 5/6] revert change, we want to use numpy sum --- cpmpy/expressions/python_builtins.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cpmpy/expressions/python_builtins.py b/cpmpy/expressions/python_builtins.py index c28b1e2c9..75ae58b7a 100644 --- a/cpmpy/expressions/python_builtins.py +++ b/cpmpy/expressions/python_builtins.py @@ -96,10 +96,10 @@ def min(iterable): def sum(iterable): """ sum() overwrites python built-in, - checks if all constants and computes builtins.sum() in that case + checks if all constants and computes np.sum() in that case otherwise, makes a sum Operator directly on `iterable` """ iterable = list(iterable) # Fix generator polling if not builtins.any(isinstance(elem, Expression) for elem in iterable): - return builtins.sum(iterable) #numpy sum is only faster on numpy arrays. builtin sum also avoids overflow of int32 + return np.sum(iterable) return Operator("sum", iterable) From 266e29faed32a6375f46a4f5ad4da8b1b1314d02 Mon Sep 17 00:00:00 2001 From: wout4 Date: Fri, 8 Sep 2023 13:56:54 +0200 Subject: [PATCH 6/6] add some comments --- cpmpy/expressions/core.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/cpmpy/expressions/core.py b/cpmpy/expressions/core.py index 3b754e2bc..6f7e32140 100644 --- a/cpmpy/expressions/core.py +++ b/cpmpy/expressions/core.py @@ -561,12 +561,13 @@ def get_bounds(self): elif self.name == 'wsum': weights, vars = self.args bounds = [] + #this may seem like too many lines, but avoiding np.sum avoids overflowing things at int32 bounds for i, varbounds in enumerate([get_bounds(arg) for arg in vars]): sortbounds = (list(weights[i] * x for x in varbounds)) sortbounds.sort() bounds += [sortbounds] lbs, ubs = (zip(*bounds)) - lowerbound, upperbound = sum(lbs), sum(ubs) + lowerbound, upperbound = sum(lbs), sum(ubs) #this is builtins sum, not numpy sum elif self.name == 'sub': lb1, ub1 = get_bounds(self.args[0]) lb2, ub2 = get_bounds(self.args[1])