Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adds collector accumulator #378

Draft
wants to merge 4 commits into
base: develop
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
111 changes: 111 additions & 0 deletions include/bh_python/accumulators/collector.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
// Copyright 2020 Hans Dembinski
//
// Distributed under the Boost Software License, version 1.0.
// (See accompanying file LICENSE_1_0.txt
// or copy at http://www.boost.org/LICENSE_1_0.txt)

#pragma once

#include <algorithm>
#include <array>
#include <boost/core/nvp.hpp>
#include <boost/histogram/weight.hpp>
#include <vector>

namespace accumulators {

/** Keeps track of all weights in each bin.

Can be used to compute bootstrap estimates of the uncertainies.
*/
template <class ValueType>
struct weight_collector {
using value_type = ValueType;
using const_reference = const value_type&;
using data_type = std::vector<value_type>;

weight_collector() = default;

void operator+=(const boost::histogram::weight_type<value_type>& w) noexcept {
data.push_back(w.value);
}

weight_collector& operator+=(const weight_collector& rhs) noexcept {
data.reserve(data.size() + rhs.data.size());
for(auto&& x : rhs.data)
data.push_back(x);
return *this;
}

weight_collector& operator*=(const value_type& s) noexcept {
for(auto&& x : data)
x *= s;
return *this;
}

bool operator==(const weight_collector& rhs) const noexcept {
return std::equal(data.begin(), data.end(), rhs.data.begin(), rhs.data.end());
}

bool operator!=(const weight_collector& rhs) const noexcept {
return !operator==(rhs);
}

template <class Archive>
void serialize(Archive& ar, unsigned) {
ar& boost::make_nvp("data", data);
}

data_type data{};
};

/** Keeps track of all samples in each bin.

Can be used to compute bootstrap estimates of the uncertainies.
*/
template <class ValueType>
struct sample_collector {
using value_type = ValueType;
using const_reference = const value_type&;
using item_type = std::array<value_type, 2>;
using data_type = std::vector<item_type>;

sample_collector() = default;

void operator()(const value_type& x) noexcept { data.emplace_back(1, x); }

void operator()(const boost::histogram::weight_type<value_type>& w,
const value_type& x) noexcept {
data.emplace_back(w.value, x);
}

sample_collector& operator+=(const sample_collector& rhs) noexcept {
data.reserve(data.size() + rhs.data.size());
for(auto&& x : rhs)
data.push_back(x);
return *this;
}

sample_collector& operator*=(const value_type& s) noexcept {
for(auto&& x : data)
x.second *= s;
return *this;
}

bool operator==(const sample_collector& rhs) const noexcept {
return std::equal(data.begin(), data.end(), rhs.begin(), rhs.end());
}

bool operator!=(const sample_collector& rhs) const noexcept {
return !operator==(rhs);
}

template <class Archive>
void serialize(Archive& ar, unsigned) {
ar& boost::make_nvp("data", data);
}

data_type data{};
};

} // namespace accumulators
25 changes: 25 additions & 0 deletions include/bh_python/accumulators/ostream.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

#pragma once

#include <bh_python/accumulators/collector.hpp>
#include <bh_python/accumulators/mean.hpp>
#include <bh_python/accumulators/weighted_mean.hpp>
#include <bh_python/accumulators/weighted_sum.hpp>
Expand Down Expand Up @@ -77,4 +78,28 @@ operator<<(std::basic_ostream<CharT, Traits>& os,
return os;
}

template <class CharT, class Traits, class W>
std::basic_ostream<CharT, Traits>& operator<<(std::basic_ostream<CharT, Traits>& os,
const weight_collector<W>& wc) {
if(os.width() == 0) {
os << "[";
const auto n = wc.data.size();
if(n > 1000) {
for(std::size_t i = 0; i != 5; ++i)
os << wc.data[i] << ", ";
os << "...";
for(std::size_t i = n - 5; i != n; ++i)
os << ", " << wc.data[i];
} else {
for(std::size_t i = 0; i != n; ++i) {
os << wc.data[i];
if(i < n - 1)
os << ", ";
}
}
return os << "]";
}
return handle_nonzero_width(os, wc);
}

} // namespace accumulators
2 changes: 1 addition & 1 deletion src/boost_histogram/_internal/view.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# -*- coding: utf-8 -*-
from __future__ import absolute_import, division, print_function

from ..accumulators import Mean, WeightedMean, WeightedSum
from ..accumulators import Mean, WeightedMean, WeightedSum, WeightCollector

import numpy as np

Expand Down
20 changes: 14 additions & 6 deletions src/boost_histogram/accumulators.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,23 @@
# -*- coding: utf-8 -*-
from __future__ import absolute_import, division, print_function

from ._core.accumulators import Sum, Mean, WeightedSum, WeightedMean
Copy link
Member

@henryiii henryiii Jun 23, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is taking a very simple static list, and doing run time manipulations with a function that has more lines than the code it replaces, breaking static analysis. We are also losing any ability to not follow the specific naming scheme in the future if something different is added.

If we add unit tests for a new type here, that will immediately break if a developer forgets to update this static list.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Also, PyBind11 is anything but DRY...

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Remember, _core is monkey-patched for documentation, so everything in it should be explicitly imported. It is also ignored for static analysis, so there again, everything should be explicitly imported. Explicit is better than implicit.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This (unrelated to list accumulators) change is also what is breaking Python 2!

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't know what pybind11 has to do with it, and on the contrary, it is a good example for being dry. It is even stated in their docs, that they strongly prefer minimal code to do the work. Minimal code equates avoiding redundancy.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Your counter arguments make no sense to me. The code is explicit, explicit in the forwarding and transformation rules. I don't have a problem with not being able to do static analysis here.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If you have another solution that allows me to easily add an accumulator without changing the code in several places, then go ahead. For now this is better than it was before.


del absolute_import, division, print_function
def _load():
from ._core import accumulators as acc

r = {}
for key in dir(acc):
if key.startswith("_"):
continue
cls = getattr(acc, key)
cls.__module__ = "boost_histogram.accumulators"
r[key] = cls
return r

__all__ = ("Sum", "Mean", "WeightedSum", "WeightedMean")

for cls in (Sum, Mean, WeightedSum, WeightedMean):
cls.__module__ = "boost_histogram.accumulators"
del cls
locals().update(_load())
del absolute_import, division, print_function
del _load

# Not supported by PyBind builtins
# Enable if wrapper added
Expand Down
26 changes: 18 additions & 8 deletions src/boost_histogram/cpp/accumulators.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,26 @@
# -*- coding: utf-8 -*-
from __future__ import absolute_import, division, print_function

from ..accumulators import (
Sum as sum,
Mean as mean,
WeightedSum as weighted_sum,
WeightedMean as weighted_mean,
)

del absolute_import, division, print_function
def _load():
from .. import accumulators as acc
import string

tr = {ord(k): "_" + k.lower() for k in string.ascii_uppercase}

# from CamelCase to snake_case
r = {}
for key in dir(acc):
if key.startswith("_"):
continue
nkey = key[0].lower() + key[1:].translate(tr)
r[nkey] = getattr(acc, key)
return r

__all__ = ("sum", "mean", "weighted_sum", "weighted_mean")

locals().update(_load())

del absolute_import, division, print_function
del _load

# These will have the original module locations and original names.
114 changes: 97 additions & 17 deletions src/register_accumulators.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

#include <bh_python/pybind11.hpp>

#include <bh_python/accumulators/collector.hpp>
#include <bh_python/accumulators/mean.hpp>
#include <bh_python/accumulators/ostream.hpp>
#include <bh_python/accumulators/weighted_mean.hpp>
Expand All @@ -14,22 +15,60 @@
#include <boost/histogram/accumulators/sum.hpp>
#include <pybind11/operators.h>

// should be updated when py::vectorize is updated to support consumers
template <class Consumer>
auto consume(Consumer& c, py::object x) -> decltype(c(0.0), void()) {
py::vectorize([](Consumer& c, double x) {
c(x);
return false;
})(c, x);
}

// should be updated when py::vectorize is updated to support consumers
template <class Consumer>
auto consume(Consumer& c, py::object x) -> decltype(c += 0.0, void()) {
py::vectorize([](Consumer& c, double x) {
c += x;
return false;
})(c, x);
}

// should be updated when py::vectorize is updated to support consumers
template <class Consumer>
void consume_w(Consumer& c, py::object w) {
py::vectorize([](Consumer& c, double w) {
c += bh::weight(w);
return false;
})(c, w);
}

// should be updated when py::vectorize is updated to support consumers
template <class Consumer>
void consume_w(Consumer& c, py::object w, py::object x) {
py::vectorize([](Consumer& c, double w, double x) {
c(bh::weight(w), x);
return false;
})(c, w, x);
}

std::size_t from_python_index(std::size_t size, ssize_t idx) {
if(idx < 0)
idx += static_cast<ssize_t>(size);
if(idx >= static_cast<ssize_t>(size))
throw py::index_error("index is out of range");
return static_cast<std::size_t>(idx);
}

/// The mean fill can be implemented once. (sum fill varies slightly)
template <class T>
decltype(auto) make_mean_fill() {
return [](T& self, py::object value, py::kwargs kwargs) {
py::object weight = optional_arg(kwargs, "weight", py::none());
finalize_args(kwargs);
if(weight.is_none()) {
py::vectorize([](T& self, double val) {
self(val);
return false;
})(self, value);
consume(self, value);
} else {
py::vectorize([](T& self, double wei, double val) {
self(bh::weight(wei), val);
return false;
})(self, weight, value);
consume_w(self, weight, value);
}
return self;
};
Expand Down Expand Up @@ -100,11 +139,10 @@ void register_accumulators(py::module& accumulators) {
py::object variance = optional_arg(kwargs, "variance", py::none());
finalize_args(kwargs);
if(variance.is_none()) {
py::vectorize([](weighted_sum& self, double val) {
self += bh::weight(val);
return false;
})(self, value);
consume_w(self, value);
} else {
// should be updated when py::vectorize is updated to support
// consumers
py::vectorize([](weighted_sum& self, double val, double var) {
self += weighted_sum(val, var);
return false;
Expand Down Expand Up @@ -157,11 +195,7 @@ void register_accumulators(py::module& accumulators) {
.def(
"fill",
[](sum& self, py::object value) {
py::vectorize([](sum& self, double v) {
self += v;
return false; // Required in PyBind11 2.4.2,
// requirement may be removed
})(self, value);
consume(self, value);
return self;
},
"value"_a,
Expand Down Expand Up @@ -326,4 +360,50 @@ void register_accumulators(py::module& accumulators) {
})

;

using weight_collector = accumulators::weight_collector<double>;
using std::size_t;

register_accumulator<weight_collector>(accumulators, "WeightCollector")
.def(py::init([](py::sequence weight) {
auto wc = new weight_collector{};
consume_w(*wc, weight);
return wc;
}),
"seq"_a)

.def("view",
[](py::object pyself) {
auto& self = py::cast<weight_collector&>(pyself);
return py::array(
static_cast<ssize_t>(self.data.size()), self.data.data(), pyself);
})

.def("__iadd__",
[](weight_collector& self, double w) {
self += bh::weight(w);
return self;
})

.def(
"fill",
[](weight_collector& self, py::object weight) {
consume_w(self, weight);
return self;
},
"weight"_a,
"Fill the collector with weights.")

.def("__len__", [](const weight_collector& self) { return self.data.size(); })

.def("__getitem__",
[](const weight_collector& self, ssize_t idx) {
return self.data[from_python_index(self.data.size(), idx)];
})
.def("__setitem__",
[](weight_collector& self, ssize_t idx, double w) {
self.data[from_python_index(self.data.size(), idx)] = w;
})

;
}
Loading