From 115a781d289900838bc62b5b09904abaccda8ffe Mon Sep 17 00:00:00 2001 From: Hans Dembinski Date: Mon, 22 Jun 2020 19:44:42 +0200 Subject: [PATCH 1/4] wip --- include/bh_python/accumulators/collector.hpp | 111 +++++++++++++++++++ include/bh_python/accumulators/ostream.hpp | 23 ++++ src/register_accumulators.cpp | 111 ++++++++++++++++--- 3 files changed, 228 insertions(+), 17 deletions(-) create mode 100644 include/bh_python/accumulators/collector.hpp diff --git a/include/bh_python/accumulators/collector.hpp b/include/bh_python/accumulators/collector.hpp new file mode 100644 index 00000000..89eb5381 --- /dev/null +++ b/include/bh_python/accumulators/collector.hpp @@ -0,0 +1,111 @@ +// Copyright 2020 Hans Dembinski +// +// Distributed under the Boost Software License, version 1.0. +// (See accompanying file LICENSE_1_0.txt +// or copy at http://www.boost.org/LICENSE_1_0.txt) + +#pragma once + +#include +#include +#include +#include +#include + +namespace accumulators { + +/** Keeps track of all weights in each bin. + + Can be used to compute bootstrap estimates of the uncertainies. +*/ +template +struct weight_collector { + using value_type = ValueType; + using const_reference = const value_type&; + using data_type = std::vector; + + weight_collector() = default; + + void operator+=(const boost::histogram::weight_type& w) noexcept { + data.push_back(w.value); + } + + weight_collector& operator+=(const weight_collector& rhs) noexcept { + data.reserve(data.size() + rhs.data.size()); + for(auto&& x : rhs.data) + data.push_back(x); + return *this; + } + + weight_collector& operator*=(const value_type& s) noexcept { + for(auto&& x : data) + x *= s; + return *this; + } + + bool operator==(const weight_collector& rhs) const noexcept { + return std::equal(data.begin(), data.end(), rhs.data.begin(), rhs.data.end()); + } + + bool operator!=(const weight_collector& rhs) const noexcept { + return !operator==(rhs); + } + + template + void serialize(Archive& ar, unsigned) { + ar& boost::make_nvp("data", data); + } + + data_type data{}; +}; + +/** Keeps track of all samples in each bin. + + Can be used to compute bootstrap estimates of the uncertainies. +*/ +template +struct sample_collector { + using value_type = ValueType; + using const_reference = const value_type&; + using item_type = std::array; + using data_type = std::vector; + + sample_collector() = default; + + void operator()(const value_type& x) noexcept { data.emplace_back(1, x); } + + void operator()(const boost::histogram::weight_type& w, + const value_type& x) noexcept { + data.emplace_back(w.value, x); + } + + sample_collector& operator+=(const sample_collector& rhs) noexcept { + data.reserve(data.size() + rhs.data.size()); + for(auto&& x : rhs) + data.push_back(x); + return *this; + } + + sample_collector& operator*=(const value_type& s) noexcept { + for(auto&& x : data) + x.second *= s; + return *this; + } + + bool operator==(const sample_collector& rhs) const noexcept { + return std::equal(data.begin(), data.end(), rhs.begin(), rhs.end()); + } + + bool operator!=(const sample_collector& rhs) const noexcept { + return !operator==(rhs); + } + + template + void serialize(Archive& ar, unsigned) { + ar& boost::make_nvp("data", data); + } + + data_type data{}; +}; + +} // namespace accumulators diff --git a/include/bh_python/accumulators/ostream.hpp b/include/bh_python/accumulators/ostream.hpp index ab0a00c3..853bc689 100644 --- a/include/bh_python/accumulators/ostream.hpp +++ b/include/bh_python/accumulators/ostream.hpp @@ -8,6 +8,7 @@ #pragma once +#include #include #include #include @@ -77,4 +78,26 @@ operator<<(std::basic_ostream& os, return os; } +template +std::basic_ostream& operator<<(std::basic_ostream& os, + const weight_collector& wc) { + if(os.width() == 0) { + os << "["; + const auto n = wc.data.size(); + if(n > 10) { + for(std::size_t i = 0; i != 5; ++i) + os << wc.data[i] << " "; + os << "..."; + for(std::size_t i = n - 5; i != n; ++i) + os << " " << wc.data[i]; + } else { + bool first = true; + for(auto&& x : wc.data) + os << (first ? (first = false, "") : " ") << x; + } + os << "]"; + } + return handle_nonzero_width(os, wc); +} + } // namespace accumulators diff --git a/src/register_accumulators.cpp b/src/register_accumulators.cpp index 7db6a847..99e22dff 100644 --- a/src/register_accumulators.cpp +++ b/src/register_accumulators.cpp @@ -5,6 +5,7 @@ #include +#include #include #include #include @@ -14,6 +15,50 @@ #include #include +// should be updated when py::vectorize is updated to support consumers +template +void consume(Consumer& c, decltype(c(Value{}), py::object{}) x) { + py::vectorize([](Consumer& c, Value x) { + c(x); + return false; + })(c, x); +} + +// should be updated when py::vectorize is updated to support consumers +template +void consume(Consumer& c, decltype(c += Value{}, py::object{}) x) { + py::vectorize([](Consumer& c, Value x) { + c += x; + return false; + })(c, x); +} + +// should be updated when py::vectorize is updated to support consumers +template +void consume_w(Consumer& c, py::object w) { + py::vectorize([](Consumer& c, double w) { + c += bh::weight(w); + return false; + })(c, w); +} + +// should be updated when py::vectorize is updated to support consumers +template +void consume_w(Consumer& c, py::object w, py::object x) { + py::vectorize([](Consumer& c, double w, double x) { + c(bh::weight(w), x); + return false; + })(c, w, x); +} + +std::size_t from_python_index(std::size_t size, ssize_t idx) { + if(idx < 0) + idx += static_cast(size); + if(idx >= static_cast(size)) + throw py::index_error("index is out of range"); + return static_cast(idx); +} + /// The mean fill can be implemented once. (sum fill varies slightly) template decltype(auto) make_mean_fill() { @@ -21,15 +66,9 @@ decltype(auto) make_mean_fill() { py::object weight = optional_arg(kwargs, "weight", py::none()); finalize_args(kwargs); if(weight.is_none()) { - py::vectorize([](T& self, double val) { - self(val); - return false; - })(self, value); + consume(self, value); } else { - py::vectorize([](T& self, double wei, double val) { - self(bh::weight(wei), val); - return false; - })(self, weight, value); + consume_w(self, weight, value); } return self; }; @@ -100,11 +139,10 @@ void register_accumulators(py::module& accumulators) { py::object variance = optional_arg(kwargs, "variance", py::none()); finalize_args(kwargs); if(variance.is_none()) { - py::vectorize([](weighted_sum& self, double val) { - self += bh::weight(val); - return false; - })(self, value); + consume_w(self, value); } else { + // should be updated when py::vectorize is updated to support + // consumers py::vectorize([](weighted_sum& self, double val, double var) { self += weighted_sum(val, var); return false; @@ -157,11 +195,7 @@ void register_accumulators(py::module& accumulators) { .def( "fill", [](sum& self, py::object value) { - py::vectorize([](sum& self, double v) { - self += v; - return false; // Required in PyBind11 2.4.2, - // requirement may be removed - })(self, value); + consume(self, value); return self; }, "value"_a, @@ -326,4 +360,47 @@ void register_accumulators(py::module& accumulators) { }) ; + + using weight_collector = accumulators::weight_collector; + using std::size_t; + + register_accumulator(accumulators, "WeightCollector") + .def(py::init([](py::sequence weight) { + auto wc = new weight_collector{}; + consume_w(*wc, weight); + return wc; + }), + "seq"_a) + + // .def("view", ...) TODO + + .def("__call__", + [](weight_collector& self, double w) { + self += bh::weight(w); + return self; + }) + + .def( + "fill", + [](weight_collector& self, py::object weight) { + consume_w(self, weight); + return self; + }, + "weight"_a, + "Fill the collector with weights.") + + .def("__len__", [](const weight_collector& self) { return self.data.size(); }) + + // .def("__iter__", ...) TODO + + .def("__getitem__", + [](const weight_collector& self, ssize_t idx) { + return self.data[from_python_index(self.data.size(), idx)]; + }) + .def("__setitem__", + [](weight_collector& self, ssize_t idx, double w) { + self.data[from_python_index(self.data.size(), idx)] = w; + }) + + ; } From c95c3bda8cc799592cf2ddf9b0d8a51afca5162b Mon Sep 17 00:00:00 2001 From: Hans Dembinski Date: Mon, 22 Jun 2020 23:01:43 +0200 Subject: [PATCH 2/4] tests for weight_collector and generic accumulators modules --- include/bh_python/accumulators/ostream.hpp | 16 +++++++------ src/boost_histogram/accumulators.py | 19 +++++++++++----- src/boost_histogram/cpp/accumulators.py | 26 +++++++++++++++------- src/register_accumulators.cpp | 16 ++++++------- tests/test_accumulators.py | 23 +++++++++++++++++++ 5 files changed, 70 insertions(+), 30 deletions(-) diff --git a/include/bh_python/accumulators/ostream.hpp b/include/bh_python/accumulators/ostream.hpp index 853bc689..e8824e2b 100644 --- a/include/bh_python/accumulators/ostream.hpp +++ b/include/bh_python/accumulators/ostream.hpp @@ -84,18 +84,20 @@ std::basic_ostream& operator<<(std::basic_ostream& if(os.width() == 0) { os << "["; const auto n = wc.data.size(); - if(n > 10) { + if(n > 1000) { for(std::size_t i = 0; i != 5; ++i) - os << wc.data[i] << " "; + os << wc.data[i] << ", "; os << "..."; for(std::size_t i = n - 5; i != n; ++i) - os << " " << wc.data[i]; + os << ", " << wc.data[i]; } else { - bool first = true; - for(auto&& x : wc.data) - os << (first ? (first = false, "") : " ") << x; + for(std::size_t i = 0; i != n; ++i) { + os << wc.data[i]; + if(i < n - 1) + os << ", "; + } } - os << "]"; + return os << "]"; } return handle_nonzero_width(os, wc); } diff --git a/src/boost_histogram/accumulators.py b/src/boost_histogram/accumulators.py index 04d31589..277c4f03 100644 --- a/src/boost_histogram/accumulators.py +++ b/src/boost_histogram/accumulators.py @@ -1,15 +1,22 @@ # -*- coding: utf-8 -*- from __future__ import absolute_import, division, print_function -from ._core.accumulators import Sum, Mean, WeightedSum, WeightedMean -del absolute_import, division, print_function +def _load(): + from ._core import accumulators as acc + + r = {} + for key in dir(acc): + if key.startswith("_"): + continue + cls = getattr(acc, key) + cls.__module__ = "boost_histogram.accumulators" + r[key] = cls + return r -__all__ = ("Sum", "Mean", "WeightedSum", "WeightedMean") -for cls in (Sum, Mean, WeightedSum, WeightedMean): - cls.__module__ = "boost_histogram.accumulators" -del cls +locals().update(_load()) +del absolute_import, division, print_function # Not supported by PyBind builtins # Enable if wrapper added diff --git a/src/boost_histogram/cpp/accumulators.py b/src/boost_histogram/cpp/accumulators.py index f7e2e243..96d5357e 100644 --- a/src/boost_histogram/cpp/accumulators.py +++ b/src/boost_histogram/cpp/accumulators.py @@ -1,16 +1,26 @@ # -*- coding: utf-8 -*- from __future__ import absolute_import, division, print_function -from ..accumulators import ( - Sum as sum, - Mean as mean, - WeightedSum as weighted_sum, - WeightedMean as weighted_mean, -) -del absolute_import, division, print_function +def _load(): + from .. import accumulators as acc + import string + + tr = {ord(k): "_" + k.lower() for k in string.ascii_uppercase} + + # from CamelCase to snake_case + r = {} + for key in dir(acc): + if key.startswith("_"): + continue + nkey = key[0].lower() + key[1:].translate(tr) + r[nkey] = getattr(acc, key) + return r -__all__ = ("sum", "mean", "weighted_sum", "weighted_mean") +locals().update(_load()) + +del absolute_import, division, print_function +del _load # These will have the original module locations and original names. diff --git a/src/register_accumulators.cpp b/src/register_accumulators.cpp index 99e22dff..fd1d26bf 100644 --- a/src/register_accumulators.cpp +++ b/src/register_accumulators.cpp @@ -16,18 +16,18 @@ #include // should be updated when py::vectorize is updated to support consumers -template -void consume(Consumer& c, decltype(c(Value{}), py::object{}) x) { - py::vectorize([](Consumer& c, Value x) { +template +auto consume(Consumer& c, py::object x) -> decltype(c(0.0), void()) { + py::vectorize([](Consumer& c, double x) { c(x); return false; })(c, x); } // should be updated when py::vectorize is updated to support consumers -template -void consume(Consumer& c, decltype(c += Value{}, py::object{}) x) { - py::vectorize([](Consumer& c, Value x) { +template +auto consume(Consumer& c, py::object x) -> decltype(c += 0.0, void()) { + py::vectorize([](Consumer& c, double x) { c += x; return false; })(c, x); @@ -374,7 +374,7 @@ void register_accumulators(py::module& accumulators) { // .def("view", ...) TODO - .def("__call__", + .def("__iadd__", [](weight_collector& self, double w) { self += bh::weight(w); return self; @@ -391,8 +391,6 @@ void register_accumulators(py::module& accumulators) { .def("__len__", [](const weight_collector& self) { return self.data.size(); }) - // .def("__iter__", ...) TODO - .def("__getitem__", [](const weight_collector& self, ssize_t idx) { return self.data[from_python_index(self.data.size(), idx)]; diff --git a/tests/test_accumulators.py b/tests/test_accumulators.py index 414c838a..6d6d18b7 100644 --- a/tests/test_accumulators.py +++ b/tests/test_accumulators.py @@ -87,3 +87,26 @@ def test_mean(): assert a == bh.accumulators.Mean(3, 2, 1) assert repr(a) == "Mean(count=3, value=2, variance=1)" + + +def test_weight_collector(): + ws = [1, 2, 3] + a = bh.accumulators.WeightCollector() + for w in ws: + a += w + + assert len(a) == 3 + assert list(a) == [1, 2, 3] + assert a[0] == 1 + a[0] = 9 + + assert repr(a) == "WeightCollector([9, 2, 3])" + + ws = [4, 5, 6, 7, 8, 9, 10, 11] + a.fill(ws) + + assert len(a) == 11 + assert repr(a) == "WeightCollector([9, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11])" + + a2 = bh.accumulators.WeightCollector().fill(range(1001)) + assert repr(a2) == "WeightCollector([0, 1, 2, 3, 4, ..., 996, 997, 998, 999, 1000])" From 7df2b0ea646c7c40fad6d0b1552be399f57948e9 Mon Sep 17 00:00:00 2001 From: Hans Dembinski Date: Mon, 22 Jun 2020 23:35:43 +0200 Subject: [PATCH 3/4] return and test an array view --- src/boost_histogram/_internal/view.py | 2 +- src/register_accumulators.cpp | 7 ++++++- tests/test_accumulators.py | 6 ++++-- 3 files changed, 11 insertions(+), 4 deletions(-) diff --git a/src/boost_histogram/_internal/view.py b/src/boost_histogram/_internal/view.py index 7579fa12..ed19bb7f 100644 --- a/src/boost_histogram/_internal/view.py +++ b/src/boost_histogram/_internal/view.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- from __future__ import absolute_import, division, print_function -from ..accumulators import Mean, WeightedMean, WeightedSum +from ..accumulators import Mean, WeightedMean, WeightedSum, WeightCollector import numpy as np diff --git a/src/register_accumulators.cpp b/src/register_accumulators.cpp index fd1d26bf..57eee702 100644 --- a/src/register_accumulators.cpp +++ b/src/register_accumulators.cpp @@ -372,7 +372,12 @@ void register_accumulators(py::module& accumulators) { }), "seq"_a) - // .def("view", ...) TODO + .def("view", + [](py::object pyself) { + auto& self = py::cast(pyself); + return py::array( + static_cast(self.data.size()), self.data.data(), pyself); + }) .def("__iadd__", [](weight_collector& self, double w) { diff --git a/tests/test_accumulators.py b/tests/test_accumulators.py index 6d6d18b7..47496859 100644 --- a/tests/test_accumulators.py +++ b/tests/test_accumulators.py @@ -97,16 +97,18 @@ def test_weight_collector(): assert len(a) == 3 assert list(a) == [1, 2, 3] + assert a.view().sum() == 6 assert a[0] == 1 a[0] = 9 + a.view()[1:] *= 2 - assert repr(a) == "WeightCollector([9, 2, 3])" + assert repr(a) == "WeightCollector([9, 4, 6])" ws = [4, 5, 6, 7, 8, 9, 10, 11] a.fill(ws) assert len(a) == 11 - assert repr(a) == "WeightCollector([9, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11])" + assert repr(a) == "WeightCollector([9, 4, 6, 4, 5, 6, 7, 8, 9, 10, 11])" a2 = bh.accumulators.WeightCollector().fill(range(1001)) assert repr(a2) == "WeightCollector([0, 1, 2, 3, 4, ..., 996, 997, 998, 999, 1000])" From 9ddb6f9b481275130b4067280098818433749fb1 Mon Sep 17 00:00:00 2001 From: Hans Dembinski Date: Mon, 22 Jun 2020 23:43:04 +0200 Subject: [PATCH 4/4] deleting _load --- src/boost_histogram/accumulators.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/boost_histogram/accumulators.py b/src/boost_histogram/accumulators.py index 277c4f03..ed25704d 100644 --- a/src/boost_histogram/accumulators.py +++ b/src/boost_histogram/accumulators.py @@ -17,6 +17,7 @@ def _load(): locals().update(_load()) del absolute_import, division, print_function +del _load # Not supported by PyBind builtins # Enable if wrapper added