Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Further allocation reduction in power computations #361

Merged
merged 26 commits into from
Jul 17, 2024
Merged
Show file tree
Hide file tree
Changes from 12 commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
dbdbcc1
working version with oop sqr!
PerezHz Jul 8, 2024
a34ce7b
Update Project.toml
PerezHz Jul 9, 2024
c04d9f8
Update src/TaylorSeries.jl
PerezHz Jul 9, 2024
5d3d57d
Update `zero(x)`; add in-place mul! for TaylorN
PerezHz Jul 9, 2024
a9c770b
Get rid of most allocations in pow! for Taylor1{TaylorN{T}; add power…
PerezHz Jul 9, 2024
5bcccf4
Remove extra empty line
PerezHz Jul 9, 2024
6cce90a
Add power_by_squaring methods to avoid method ambiguities (detected b…
PerezHz Jul 9, 2024
a205dcd
Remove duplicated return
PerezHz Jul 10, 2024
e44811c
Add inbounds
PerezHz Jul 10, 2024
2e4e60a
Address review by @lbenet
PerezHz Jul 11, 2024
f581b21
Add extra arg to pow!
PerezHz Jul 11, 2024
27c3d0e
Update TaylorSeries IA extension
PerezHz Jul 11, 2024
dc6a68c
Update pow! and add fix suggested by @lbenet
PerezHz Jul 11, 2024
364faf7
Middle-of-the-road approach to suggestion by @lbenet
PerezHz Jul 12, 2024
99e3061
Handle pow! cases by dispatch-by-value
PerezHz Jul 12, 2024
efa02a1
Remove unneeded deepcopy in setindex! method
PerezHz Jul 12, 2024
d9af8ca
Add `power_by_squaring(x, ::Val{3})` methods and add tests
PerezHz Jul 14, 2024
68bf845
Update comments
PerezHz Jul 14, 2024
8610e21
Bump patch version
PerezHz Jul 14, 2024
ba30e7a
Switch back from dispatch-by-value to if/else for pow!
PerezHz Jul 17, 2024
ea15252
Revert change in setindex! overload for Taylor1
PerezHz Jul 17, 2024
21c876b
Add test for nested Taylor1s setindex! method
PerezHz Jul 17, 2024
702fbac
De-bump patch version
PerezHz Jul 17, 2024
6264eae
Another approach to suggestion by @lbenet
PerezHz Jul 17, 2024
4f613d6
Fix typo
PerezHz Jul 17, 2024
ad18057
Fix another typo
PerezHz Jul 17, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions ext/TaylorSeriesIAExt.jl
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ function ^(a::Taylor1{Interval{T}}, r::S) where {T<:Real, S<:Real}
c_order = l0 == 0 ? a.order : min(a.order, trunc(Int,r*a.order))
c = Taylor1(zero(aux), c_order)
for k = 0:c_order
TS.pow!(c, aa, r, k)
TS.pow!(c, aa, c[0], r, k)
end

return c
Expand All @@ -66,7 +66,7 @@ function ^(a::TaylorN{Interval{T}}, r::S) where {T<:Real, S<:Real}

c = TaylorN( a0r, a.order)
for ord in 1:a.order
TS.pow!(c, aa, r, ord)
TS.pow!(c, aa, c, r, ord)
end

return c
Expand Down
40 changes: 39 additions & 1 deletion src/arithmetic.jl
Original file line number Diff line number Diff line change
Expand Up @@ -162,7 +162,9 @@ for T in (:Taylor1, :HomogeneousPolynomial, :TaylorN)
end

for T in (:Taylor1, :TaylorN)
@eval zero(a::$T) = $T(zero.(a.coeffs))
@eval function zero(a::$T)
return $T(zero.(a.coeffs))
end
@eval function one(a::$T)
b = zero(a)
b[0] = one(b[0])
Expand Down Expand Up @@ -572,6 +574,42 @@ for T in (:Taylor1, :TaylorN)
end
end

# in-place product: `a` <- `a*b`
# this method computes the product `a*b` and saves it back into `a`
# assumes `a` and `b` are of same order
function mul!(a::TaylorN{T}, b::TaylorN{T}) where {T<:Number}
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

An equivalent method mul!(a::Taylor1{T}, b::Taylor1{T}) where {T<:Number}) could be useful for the extension of pow! for Taylor1{TaylorN{T}}?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It would indeed by useful, I've added the corresponding method, thanks!

@inbounds for k in reverse(eachindex(a))
mul!(a, a, b[0][1], k)
for l in 1:k
mul!(a[k], a[k-l], b[l])
end
end
return nothing
end
function mul!(a::Taylor1{T}, b::Taylor1{T}) where {T<:Number}
@inbounds for k in reverse(eachindex(a))
# a[k] <- a[k]*b[0]
mul!(a, a, b[0], k)
for l in 1:k
# a[k] <- a[k] + a[k-l] * b[l]
a[k] += a[k-l] * b[l]
end
end
return nothing
end
function mul!(a::Taylor1{TaylorN{T}}, b::Taylor1{TaylorN{T}}) where {T<:NumberNotSeries}
@inbounds for k in reverse(eachindex(a))
mul!(a, a, b[0], k)
for l in 1:k
# a[k] += a[k-l] * b[l]
for m in eachindex(a[k])
mul!(a[k], a[k-l], b[l], m)
end
end
end
return nothing
end

function mul!(res::Taylor1{TaylorN{T}}, a::Taylor1{TaylorN{T}}, b::Taylor1{TaylorN{T}},
ordT::Int) where {T<:NumberNotSeries}
# Sanity
Expand Down
6 changes: 2 additions & 4 deletions src/auxiliary.jl
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,8 @@ function resize_coeffs1!(coeffs::Array{T,1}, order::Int) where {T<:Number}
lencoef = length(coeffs)
resize!(coeffs, order+1)
if order > lencoef-1
z = zero(coeffs[1])
@simd for ord in lencoef+1:order+1
@inbounds coeffs[ord] = z
@inbounds coeffs[ord] = zero(coeffs[1])
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I understand that this change helps avoiding the allocation of z. Yet, I think the idea is to use z (stored in the stack) instead of looking for the value of coeffs[1] and zeroing it inside the for, helps wrt performance. I may be wrong, just want to make sure the idea behind the code is clear.

end
end
return nothing
Expand All @@ -39,9 +38,8 @@ function resize_coeffsHP!(coeffs::Array{T,1}, order::Int) where {T<:Number}
@assert order ≤ get_order() && lencoef ≤ num_coeffs
num_coeffs == lencoef && return nothing
resize!(coeffs, num_coeffs)
z = zero(coeffs[1])
@simd for ord in lencoef+1:num_coeffs
@inbounds coeffs[ord] = z
@inbounds coeffs[ord] = zero(coeffs[1])
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same comment as above

end
return nothing
end
Expand Down
2 changes: 1 addition & 1 deletion src/constructors.jl
Original file line number Diff line number Diff line change
Expand Up @@ -156,7 +156,7 @@ struct TaylorN{T<:Number} <: AbstractSeries{T}
order :: Int

function TaylorN{T}(v::Array{HomogeneousPolynomial{T},1}, order::Int) where T<:Number
coeffs = zeros(HomogeneousPolynomial{T}, order)
coeffs = isempty(v) ? zeros(HomogeneousPolynomial{T}, order) : zeros(v[1], order)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I guess we would need similar fixes in Taylor1 and HomogeneousPolynomial...

@inbounds for i in eachindex(v)
ord = v[i].order
if ord ≤ order
Expand Down
3 changes: 1 addition & 2 deletions src/dictmutfunct.jl
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ const _dict_binary_ops = Dict(
:- => (:subst!, (:_res, :_arg1, :_arg2, :_k), :(_res = _arg1 - _arg2)),
:* => (:mul!, (:_res, :_arg1, :_arg2, :_k), :(_res = _arg1 * _arg2)),
:/ => (:div!, (:_res, :_arg1, :_arg2, :_k), :(_res = _arg1 / _arg2)),
:^ => (:pow!, (:_res, :_arg1, :_arg2, :_k), :(_res = _arg1 ^ float(_arg2))),
:^ => (:pow!, (:_res, :_arg1, :_arg2, :_arg3, :_k), :(_res = _arg1 ^ float(_arg3))),
);

"""
Expand Down Expand Up @@ -196,4 +196,3 @@ internal mutating functions.
Evaluating the entries generates symbols that represent
the actual calls to the internal mutating functions.
""" _dict_binary_calls

2 changes: 1 addition & 1 deletion src/evaluate.jl
Original file line number Diff line number Diff line change
Expand Up @@ -413,7 +413,7 @@ function _evaluate!(suma::TaylorN{T}, a::HomogeneousPolynomial{T}, ind::Int,
else
for ordQ in eachindex(val)
zero!(vv, ordQ)
pow!(vv, val, vpow, ordQ)
pow!(vv, val, vv, vpow, ordQ)
end
end
for ordQ in eachindex(suma)
Expand Down
14 changes: 14 additions & 0 deletions src/functions.jl
Original file line number Diff line number Diff line change
Expand Up @@ -303,6 +303,13 @@ end
return nothing
end

@inline function zero!(a::TaylorN{Taylor1{T}}) where {T<:NumberNotSeries}
for k in eachindex(a)
zero!(a, k)
end
return nothing
end

@inline function one!(c::Taylor1{TaylorN{T}}, k::Int) where {T<:NumberNotSeries}
zero!(c, k)
if k == 0
Expand All @@ -316,6 +323,13 @@ end
return nothing
end

@inline function identity!(c::HomogeneousPolynomial{Taylor1{T}}, a::HomogeneousPolynomial{Taylor1{T}}, k::Int) where {T<:NumberNotSeries}
@inbounds for l in eachindex(c[k])
identity!(c[k], a[k], l)
end
return nothing
end

for T in (:Taylor1, :TaylorN)
@eval begin
@inline function identity!(c::$T{T}, a::$T{T}, k::Int) where {T<:NumberNotSeries}
Expand Down
145 changes: 120 additions & 25 deletions src/power.jl
lbenet marked this conversation as resolved.
Show resolved Hide resolved
Original file line number Diff line number Diff line change
Expand Up @@ -58,37 +58,98 @@ end

^(a::Taylor1{TaylorN{T}}, r::Rational) where {T<:NumberNotSeries} = a^(r.num/r.den)



# power_by_squaring; slightly modified from base/intfuncs.jl
# Licensed under MIT "Expat"
for T in (:Taylor1, :HomogeneousPolynomial, :TaylorN)
@eval function power_by_squaring(x::$T, p::Integer)
if p == 1
return copy(x)
elseif p == 0
return one(x)
elseif p == 2
return square(x)
end
# in-place form of power_by_squaring
# this method assumes `y`, `x` and `aux` are of same order
for T in (:Taylor1, :TaylorN)
@eval function power_by_squaring!(y::$T{T}, x::$T{T}, aux::$T{T},
p::Integer) where {T<:NumberNotSeries}
t = trailing_zeros(p) + 1
p >>= t
# aux = x
for k in eachindex(aux)
identity!(aux, x, k)
end
while (t -= 1) > 0
x = square(x)
# aux = square(aux)
for k in reverse(eachindex(aux))
sqr!(aux, k)
end
end
# y = aux
for k in eachindex(y)
identity!(y, aux, k)
end
y = x
while p > 0
t = trailing_zeros(p) + 1
p >>= t
while (t -= 1) ≥ 0
# aux = square(aux)
for k in reverse(eachindex(aux))
sqr!(aux, k)
end
end
# y = y * aux
mul!(y, aux)
end
return nothing
end
end


# power_by_squaring; slightly modified from base/intfuncs.jl
# Licensed under MIT "Expat"
for T in (:Taylor1, :HomogeneousPolynomial, :TaylorN)
@eval power_by_squaring(x::$T, p::Integer) = power_by_squaring(x, Val(p))
@eval power_by_squaring(x::$T, ::Val{0}) = one(x)
@eval power_by_squaring(x::$T, ::Val{1}) = copy(x)
@eval power_by_squaring(x::$T, ::Val{2}) = square(x)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Perhaps adding power_by_squaring(x::$T, ::Val{3}) could be useful too, since it includes the "odd" powers part of the function.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You mean adding something along the lines of power_by_squaring(x::$T, ::Val{3}) = x*square(x)?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good point... I guess this is the kind of question that we should define in terms of performance. My naive guess is that x*square(x) will allocate more than the direct call to power_by_squaring, but we should definetily check this.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I just checked this, and it turns out doing x*square(x) allocates less than power_by_squaring(x) (!), do you think it's worth then to add the ::Val{3} method?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Have you also teste with intervals? I would choose the case that behaves better wrt allocations.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

With intervals power_by_squaring(x, 3) on Taylor1{Interval{Float64}} allocates around half as x*square(x), while for TaylorN{Interval{Float64}} allocations are essentially the same

@eval function power_by_squaring(x::$T, ::Val{P}) where P
p = P # copy static parameter `P` into local variable `p`
if $T != HomogeneousPolynomial
y = zero(x)
aux = zero(x)
power_by_squaring!(y, x, aux, p)
else
t = trailing_zeros(p) + 1
p >>= t
while (t -= 1) > 0
x = square(x)
end
y *= x
y = x
while p > 0
t = trailing_zeros(p) + 1
p >>= t
while (t -= 1) ≥ 0
x = square(x)
end
y *= x
end
end
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this if block is now unnecessary, and can be replaced simply by:

Suggested change
if $T != HomogeneousPolynomial
y = zero(x)
aux = zero(x)
power_by_squaring!(y, x, aux, p)
else
t = trailing_zeros(p) + 1
p >>= t
while (t -= 1) > 0
x = square(x)
end
y *= x
y = x
while p > 0
t = trailing_zeros(p) + 1
p >>= t
while (t -= 1) 0
x = square(x)
end
y *= x
end
end
y = zero(x)
aux = zero(x)
power_by_squaring!(y, x, aux, p)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thank you for noticing this @lbenet! I agree there's better ways to deal with this if block, I'm just wondering if it makes sense to have a power_by_squaring! for HomogeneousPolynomial, since inside such methods, there's a lot of in-place squaring going around, which would require a different treatment, since squaring a HomogeneousPolynomial changes its order, so some extra auxiliaries might be needed, I think? Anyway, I'm pushing a commit where the power_by_squaring! method for HomogeneousPolynomial is declared separately, so that this if does not occur inside the method.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I agree it is worth keeping both methods for HomogeneousPolynomials. I think they can be useful in methods that exploit evaluate, which is worth checking, though perhaps in another PR.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Okay, I added a comment with a TODO for another PR

return y
end
end

power_by_squaring(x::TaylorN{Taylor1{T}}, ::Val{0}) where {T<:NumberNotSeries} = one(x)
power_by_squaring(x::TaylorN{Taylor1{T}}, ::Val{1}) where {T<:NumberNotSeries} = copy(x)
power_by_squaring(x::TaylorN{Taylor1{T}}, ::Val{2}) where {T<:NumberNotSeries} = square(x)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think these methods are defined above in lines 104-106...

function power_by_squaring(x::TaylorN{Taylor1{T}}, ::Val{P}) where {P, T<:NumberNotSeries}
lbenet marked this conversation as resolved.
Show resolved Hide resolved
p = P # copy static parameter `P` into local variable `p`
t = trailing_zeros(p) + 1
p >>= t
while (t -= 1) > 0
x = square(x)
end
y = x
while p > 0
t = trailing_zeros(p) + 1
p >>= t
while (t -= 1) ≥ 0
x = square(x)
end
y *= x
end
return y
end

## Real power ##
function ^(a::Taylor1{T}, r::S) where {T<:Number, S<:Real}
Expand All @@ -107,8 +168,9 @@ function ^(a::Taylor1{T}, r::S) where {T<:Number, S<:Real}

c_order = l0 == 0 ? a.order : min(a.order, trunc(Int,r*a.order))
c = Taylor1(zero(aux), c_order)
aux0 = deepcopy(c[0])
for k in eachindex(c)
pow!(c, aa, r, k)
pow!(c, aa, aux0, r, k)
end

return c
Expand All @@ -132,8 +194,9 @@ function ^(a::TaylorN, r::S) where {S<:Real}
in order to expand `^` around 0."""))

c = TaylorN( zero(aux), a.order)
aux = deepcopy(c)
for ord in eachindex(a)
pow!(c, aa, r, ord)
pow!(c, aa, aux, r, ord)
end

return c
Expand All @@ -158,8 +221,9 @@ function ^(a::Taylor1{TaylorN{T}}, r::S) where {T<:NumberNotSeries, S<:Real}

c_order = l0 == 0 ? a.order : min(a.order, trunc(Int,r*a.order))
c = Taylor1(zero(aux), c_order)
aux0 = deepcopy(c[0])
for k in eachindex(c)
pow!(c, aa, r, k)
pow!(c, aa, aux0, r, k)
end

return c
Expand All @@ -184,7 +248,7 @@ exploits `k_0`, the order of the first non-zero coefficient of `a`.

""" pow!

@inline function pow!(c::Taylor1{T}, a::Taylor1{T}, r::S, k::Int) where
@inline function pow!(c::Taylor1{T}, a::Taylor1{T}, ::T, r::S, k::Int) where
{T<:Number, S<:Real}

if r == 0
Expand Down Expand Up @@ -233,7 +297,7 @@ exploits `k_0`, the order of the first non-zero coefficient of `a`.
return nothing
end

@inline function pow!(c::TaylorN{T}, a::TaylorN{T}, r::S, k::Int) where
@inline function pow!(c::TaylorN{T}, a::TaylorN{T}, ::TaylorN{T}, r::S, k::Int) where
{T<:NumberNotSeriesN, S<:Real}

if r == 0
Expand Down Expand Up @@ -266,7 +330,7 @@ end
return nothing
end

@inline function pow!(res::Taylor1{TaylorN{T}}, a::Taylor1{TaylorN{T}}, r::S,
@inline function pow!(res::Taylor1{TaylorN{T}}, a::Taylor1{TaylorN{T}}, aux::TaylorN{T}, r::S,
ordT::Int) where {T<:NumberNotSeries, S<:Real}

if r == 0
Expand Down Expand Up @@ -300,7 +364,7 @@ end
if ordT == lnull
if isinteger(r)
# TODO: get rid of allocations here
res[ordT] = a[l0]^round(Int,r) # uses power_by_squaring
power_by_squaring!(res[ordT], a[l0], aux, round(Int,r))
return nothing
end

Expand All @@ -310,7 +374,7 @@ end
in order to expand `^` around 0."""))

for ordQ in eachindex(a[l0])
pow!(res[ordT], a[l0], r, ordQ)
pow!(res[ordT], a[l0], aux, r, ordQ)
end

return nothing
Expand Down Expand Up @@ -339,7 +403,7 @@ Return `a^2`; see [`TaylorSeries.sqr!`](@ref).

for T in (:Taylor1, :TaylorN)
@eval function square(a::$T)
c = $T( zero(constant_term(a)), a.order)
c = zero(a)
for k in eachindex(a)
sqr!(c, a, k)
end
Expand Down Expand Up @@ -447,6 +511,37 @@ for T = (:Taylor1, :TaylorN)

return nothing
end

# in-place squaring: given `c`, compute expansion of `c^2` and save back into `c`
@inline function sqr!(c::$T{T}, k::Int) where {T<:NumberNotSeries}
lbenet marked this conversation as resolved.
Show resolved Hide resolved
if k == 0
sqr_orderzero!(c, c)
return nothing
end

# Recursion formula
kodd = k%2
kend = (k - 2 + kodd) >> 1
if $T == Taylor1
(kend ≥ 0) && ( @inbounds c[k] = c[0] * c[k] )
@inbounds for i = 1:kend
c[k] += c[i] * c[k-i]
end
@inbounds c[k] = 2 * c[k]
(kodd == 0) && ( @inbounds c[k] = c[k >> 1]^2 )
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think the following solves the problem...

Suggested change
(kodd == 0) && ( @inbounds c[k] = c[k >> 1]^2 )
(kodd == 0) && ( @inbounds c[k] += c[k >> 1]^2 )

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Great catch! It does the trick, many thanks!

else
(kend ≥ 0) && ( @inbounds mul!(c, c[0][1], c, k) )
@inbounds for i = 1:kend
mul!(c[k], c[i], c[k-i])
end
@inbounds mul!(c, 2, c, k)
if (kodd == 0)
accsqr!(c[k], c[k >> 1])
end
end

return nothing
end
end
end

Expand Down
Loading
Loading