diff --git a/Project.toml b/Project.toml index 0cfe85f..cd658d3 100644 --- a/Project.toml +++ b/Project.toml @@ -1,16 +1,18 @@ name = "RateTables" uuid = "d40fb65e-c2ee-4113-9e14-cb96ca0acb32" authors = ["Oskar Laverny and contributors"] -version = "0.1.0" +version = "0.1.1" [deps] CSV = "336ed68f-0bac-5ca0-87d4-7b16caf5d00b" DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0" +Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f" [compat] Aqua = "0.8" CSV = "0.10" DataFrames = "1" +Distributions = "0.25" RData = "1" Test = "1.6" julia = "1" diff --git a/docs/src/index.md b/docs/src/index.md index 2836fa2..b027888 100644 --- a/docs/src/index.md +++ b/docs/src/index.md @@ -77,6 +77,32 @@ Please note that retrieving these daily hazards is a highly sensitive operation For a list of the available rate tables, kindly refer to the following index: +## Life random variables + +The `Life` function is used to extract individual life profiles (as random variables complient with `Distributions.jl`'s API) from a comprehensive ratetable, by using covariates such as age, gender, and health status or others. Once these life profiles are established, they serve as foundational elements for various analytical practices such as survival probability estimations, expected lifespan calculations, and simulations involving random variables related to life expectancy. + +When applying it to a male individual aged $20$ in $1990$, we get the outcome below: + +```@example 1 +L = Life(slopop[:male], 7000, 1990*365.241) +``` + +Due to the constance of the hazard rates on each cell of the lifetable, the life expectation can be computed through the following formula: + +$$ \mathbf{E}(P) = \int_0^\inf S_p (t) dt = \sum_{j=0}^\inf \frac{S_p(t_j)}{\lambda_p(t_j)(1 - exp(-\lambda_p(t_j)(t_{j+1}-t_j)))} $$ + +Implemented in the function `Distributions.expectation`: + +```@example 1 +expectation(L)/365.241 +``` + +We get $57.7$ years left, implying a total life expectancy of about $77$ years for the given individual. + +These random variables comply with the [`Distributions.jl`](https://github.com/JuliaStats/Distributions.jl)'s API. + +## Package contents + ```@index ``` diff --git a/src/Life.jl b/src/Life.jl new file mode 100644 index 0000000..faf01e4 --- /dev/null +++ b/src/Life.jl @@ -0,0 +1,96 @@ +""" + Life(brt::BasicRateTable,a,d) + +This function returns a random variable that correspond to an extracted Life from the `BasicRateTable` at age `a` and date `d`. + +This works by checking if the individual is closer to the oldest age than the last year in the ratetable, calculating at each step the time difference and the hazard values. For the younger individuals, we assume they go through the last column at the end no matter what age they are. +""" +struct Life<:Distributions.ContinuousUnivariateDistribution + ∂t::Vector{Float64} + λ::Vector{Float64} + function Life(brt::BasicRateTable,a,d) + i, j = dty(a, brt.extrema_age...), dty(d, brt.extrema_year...) + + rem_a = RT_DAYS_IN_YEAR - rem(a, RT_DAYS_IN_YEAR) + rem_d = RT_DAYS_IN_YEAR - rem(d, RT_DAYS_IN_YEAR) + + # Do we go right or below first ? + # Happy birthday (below) or Happy new year (right) first ? + k,l = rem_a < rem_d ? (i+1,j) : (i,j+1) + + # Cap obtained values to avoid going out of bounds: + K,L = size(brt.values) + k = min(k, K) + l = min(l, L) + + # lengths and hazards in the first two cells: + ∂t = [min(rem_a,rem_d), abs(rem_a - rem_d)] + λ = [brt.values[i,j], brt.values[k,l]] + + while (k < K) && (l < L) + i,j,k,l = i+1, j+1, k+1, l+1 + push!(∂t, RT_DAYS_IN_YEAR - ∂t[2], ∂t[2]) + push!(λ, brt.values[i,j], brt.values[k,l]) + end + if (l >= L) # exit on the right => still young ! + # A good approximation is to go through the last column. + for m in (k+1):K + push!(∂t, RT_DAYS_IN_YEAR) + push!(λ, brt.values[m,end]) + end + end + return new(∂t,λ) + end +end +Distributions.@distr_support Life 0.0 Inf +function Distributions.expectation(L::Life) + S = 1.0 + E = 0.0 + for j in eachindex(L.∂t) + if L.λ[j] > 0 + S_inc = exp(-L.λ[j]*L.∂t[j]) + E += S * (1 - S_inc) / L.λ[j] + S *= S_inc + else + E += S * L.∂t[j] + end + end + # This reminder assumes a exponential life time afer the maximum age. + R = ifelse(L.λ[end] == 0.0, 0.0, S / L.λ[end]) + return E + R +end +""" + cumhazard + +Assuming the last box is infinitely wide, we calculate the cumulative hazard from ∂t and λ taken from the `Life` function. +""" +function cumhazard(L::Life, t::Real) + Λ = 0.0 + u = 0.0 + for j in eachindex(L.∂t) + u += L.∂t[j] + if t > u + Λ += L.λ[j]*L.∂t[j] + else + Λ += L.λ[j]*(t-(u-L.∂t[j])) + return Λ + end + end + # We consider that the last box is in fact infinitely wide (exponential tail) + return Λ + (t-u)*L.λ[end] +end +Distributions.ccdf(L::Life, t::Real) = exp(-cumhazard(L::Life,t)) +function Distributions.quantile(L::Life, p::Real) + Λ_target = -log(1-p) + Λ = 0.0 + u = 0.0 + for j in eachindex(L.∂t) + Λ += L.λ[j]*L.∂t[j] + u += L.∂t[j] + if Λ_target < Λ + u -= (Λ - Λ_target) / L.λ[j] + return u + end + end + return u +end \ No newline at end of file diff --git a/src/RateTable.jl b/src/RateTable.jl index 84a0df8..7edc2b9 100644 --- a/src/RateTable.jl +++ b/src/RateTable.jl @@ -114,3 +114,4 @@ The parameters `age` and `date` have to be in days (1 year = $(RT_DAYS_IN_YEAR) @inline daily_hazard(rt::RateTable, a, d; kwargs...) = daily_hazard(getindex(rt; kwargs...), a, d) @inline daily_hazard(rt::RateTable, a, d, args...) = daily_hazard(getindex(rt, args...), a, d) + diff --git a/src/RateTables.jl b/src/RateTables.jl index 95d8134..e799591 100644 --- a/src/RateTables.jl +++ b/src/RateTables.jl @@ -2,6 +2,8 @@ module RateTables import CSV using DataFrames +using Distributions +import Distributions: ccdf, cdf, expectation using Base.Cartesian const RT_DAYS_IN_YEAR = 365.241 @@ -10,7 +12,20 @@ const RT_YEARS_IN_DAY = Float64(1/big"365.241") # precise include("RateTable.jl") include("hmd_rates.jl") include("r_rates.jl") +include("Life.jl") -export hmd_rates, hmd_countries, slopop, survexp_us, survexp_usr, survexp_mn, survexp_fr, daily_hazard, availlable_covariates, frpop +export hmd_rates, + hmd_countries, + slopop, + survexp_us, + survexp_usr, + survexp_mn, + survexp_fr, + daily_hazard, + availlable_covariates, + frpop, + Life, + expectation, + ccdf end diff --git a/test/runtests.jl b/test/runtests.jl index 99dd517..833a323 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -20,6 +20,12 @@ using RData v3 = daily_hazard(rt[s], a, d) @test v1 == v2 @test v2 == v3 + + # check life: + L = Life(rt[s], a, d) + e, r = expectation(L), rand(L, 10) + @test all(a .+ r .<= 120*365.241) # noone lives too long. + @test e+a <= 120*365.241 end a = 20*365.241 + 365*rand() @@ -33,6 +39,12 @@ using RData @test v1 == v2 @test v2 == v3 @test v4 == v3 + + # check life: + L = Life(survexp_usr[s,r], a, d) + e, r = expectation(L), rand(L, 10) + @test all(a .+ r .<= 120*365.241) # noone lives too long. + @test e+a <= 120*365.241 end @testset "HMD ratetables" begin @@ -47,6 +59,12 @@ using RData @test v1 == v2 @test v2 == v3 @test v4 == v3 + + # check life: + L = Life(hmd_rates[c,s],a,d) + e, r = expectation(L), rand(L, 10) + @test all(a .+ r .<= 120*365.241) # noone lives too long. + @test e+a <= 120*365.241 end end end