Skip to content

Commit

Permalink
Merge pull request #8 from yufongpeng/release-0.2.1
Browse files Browse the repository at this point in the history
Release 0.2.1
  • Loading branch information
yufongpeng authored Mar 17, 2023
2 parents c2b1d27 + e808234 commit 95b71cd
Show file tree
Hide file tree
Showing 3 changed files with 54 additions and 22 deletions.
2 changes: 1 addition & 1 deletion Project.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
name = "AnovaGLM"
uuid = "0a47a8e3-ec57-451e-bddb-e0be9d22772b"
authors = ["Yu-Fong Peng <sciphypar@gmail.com>"]
version = "0.2.0"
version = "0.2.1"

[deps]
AnovaBase = "946dddda-6a23-4b48-8e70-8e60d9b8d680"
Expand Down
59 changes: 45 additions & 14 deletions src/anova.jl
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,9 @@
# Main API
"""
anova(<glmmodels>...; test::Type{<: GoodnessOfFit}, <keyword arguments>)
anova(<anovamodel>; test::Type{<: GoodnessOfFit}, <keyword arguments>)
anova(test::Type{<: GoodnessOfFit}, <glmmodels>...; <keyword arguments>)
anova(test::Type{<: GoodnessOfFit}, <anovamodel>; <keyword arguments>)
Analysis of variance.
Expand All @@ -13,16 +15,16 @@ Return `AnovaResult{M, test, N}`. See [`AnovaResult`](@ref) for details.
1. `TableRegressionModel{<: LinearModel}` fitted by `GLM.lm`
2. `TableRegressionModel{<: GeneralizedLinearModel}` fitted by `GLM.glm`
If mutiple models are provided, they should be nested and the last one is the most complex.
* `anovamodel`: wrapped model objects; `FullModel` and `NestedModels`.
* `test`: test statistics for goodness of fit. Available tests are [`LikelihoodRatioTest`](@ref) ([`LRT`](@ref)) and [`FTest`](@ref). The default is based on the model type.
1. `TableRegressionModel{<: LinearModel}`: `FTest`.
2. `TableRegressionModel{<: GeneralizedLinearModel}`: based on distribution function, see `canonicalgoodnessoffit`.
## Other keyword arguments
# Other keyword arguments
* When one model is provided:
1. `type` specifies type of anova (1, 2 or 3). Default value is 1.
* When multiple models are provided:
1. `check`: allows to check if models are nested. Defalut value is true. Some checkers are not implemented now.
2. `isnested`: true when models are checked as nested (manually or automatically). Defalut value is false.
!!! note
For fitting new models and conducting anova at the same time, see [`anova_lm`](@ref) for `LinearModel`, [`anova_glm`](@ref) for `GeneralizedLinearModel`.
Expand All @@ -34,11 +36,31 @@ anova(models::Vararg{TableRegressionModel{<: LinearModel}};
kwargs...) =
anova(test, models...; kwargs...)

anova(models::FullModel{<: TableRegressionModel{<: LinearModel}};
test::Type{<: GoodnessOfFit} = FTest,
kwargs...) =
anova(test, models; kwargs...)

anova(anovamodel::NestedModels{<: TableRegressionModel{<: LinearModel}};
test::Type{<: GoodnessOfFit} = FTest,
kwargs...) =
anova(test, anovamodel; kwargs...)

anova(models::Vararg{TableRegressionModel{<: GeneralizedLinearModel}};
test::Type{<: GoodnessOfFit} = canonicalgoodnessoffit(models[1].model.rr.d),
kwargs...) =
anova(test, models...; kwargs...)

anova(anovamodel::FullModel{<: TableRegressionModel{<: GeneralizedLinearModel}};
test::Type{<: GoodnessOfFit} = canonicalgoodnessoffit(anovamodel.model.model.rr.d),
kwargs...) =
anova(test, anovamodel; kwargs...)

anova(anovamodel::NestedModels{<: TableRegressionModel{<: GeneralizedLinearModel}};
test::Type{<: GoodnessOfFit} = canonicalgoodnessoffit(anovamodel.model[1].model.rr.d),
kwargs...) =
anova(test, anovamodel; kwargs...)

# ==================================================================================================================
# ANOVA by F test
# LinearModels
Expand All @@ -49,9 +71,9 @@ anova(::Type{FTest},
type::Int = 1, kwargs...) = anova(FTest, FullModel(trm, type, isnullable(trm.model), true); kwargs...)

function anova(::Type{FTest}, aovm::FullModel{<: TRM_LM})
f = formula(aovm.model)
assign = asgn(predictors(aovm))
fullasgn = asgn(f)
fullpred = predictors(aovm.model)
fullasgn = asgn(fullpred)
df = dof_asgn(assign)
varβ = vcov(aovm.model.model)
β = aovm.model.model.pp.beta0
Expand All @@ -63,17 +85,17 @@ function anova(::Type{FTest}, aovm::FullModel{<: TRM_LM})
end
elseif aovm.type == 2
fstat = ntuple(last(fullasgn) - offset) do fix
select1 = sort!(collect(select_super_interaction(f.rhs, fix + offset)))
select1 = sort!(collect(select_super_interaction(fullpred, fix + offset)))
select2 = setdiff(select1, fix + offset)
select1 = findall(in(select1), fullasgn)
select2 = findall(in(select2), fullasgn)
(β[select1]' * inv(varβ[select1, select1]) * β[select1] - β[select2]' * inv(varβ[select2, select2]) * β[select2]) / df[fix]
(β[select1]' * (varβ[select1, select1] \ β[select1]) - β[select2]' * (varβ[select2, select2] \ β[select2])) / df[fix]
end
else
# calculate block by block
fstat = ntuple(last(fullasgn) - offset) do fix
select = findall(==(fix + offset), fullasgn)
β[select]' * inv(varβ[select, select]) * β[select] / df[fix]
β[select]' * (varβ[select, select] \ β[select]) / df[fix]
end
end
σ² = dispersion(aovm.model.model, true)
Expand Down Expand Up @@ -129,8 +151,7 @@ end

function anova(::Type{FTest},
trms::Vararg{M};
check::Bool = true,
isnested::Bool = false) where {M <: TableRegressionModel{<: Union{LinearModel, GeneralizedLinearModel}}}
check::Bool = true) where {M <: TableRegressionModel{<: Union{LinearModel, GeneralizedLinearModel}}}
df = dof.(trms)
ord = sortperm(collect(df))
df = df[ord]
Expand All @@ -144,8 +165,7 @@ end

function anova(::Type{LRT},
trms::Vararg{M};
check::Bool = true,
isnested::Bool = false) where {M <: TableRegressionModel{<: Union{LinearModel, GeneralizedLinearModel}}}
check::Bool = true) where {M <: TableRegressionModel{<: Union{LinearModel, GeneralizedLinearModel}}}
df = dof.(trms)
ord = sortperm(collect(df))
trms = trms[ord]
Expand All @@ -155,7 +175,11 @@ function anova(::Type{LRT},
lrt_nested(NestedModels{M}(trms), df, deviance.(trms), dispersion(last(trms).model, true))
end

anova(::Type{FTest}, anovamodel::NestedModels{M}) where {M <: TableRegressionModel{<: Union{LinearModel, GeneralizedLinearModel}}} =
ftest_nested(anovamodel, dof.(anovamodel.model), round.(Int, dof_residual.(anovamodel.model)), deviance.(anovamodel.model), dispersion(last(anovamodel.model).model, true))

anova(::Type{LRT}, anovamodel::NestedModels{M}) where {M <: TableRegressionModel{<: Union{LinearModel, GeneralizedLinearModel}}} =
lrt_nested(anovamodel, dof.(anovamodel.model), deviance.(anovamodel.model), dispersion(last(anovamodel.model).model, true))
# =================================================================================================================================
# Fit new models

Expand All @@ -170,7 +194,12 @@ end
ANOVA for simple linear regression.
The arguments `X` and `y` can be a `Matrix` and a `Vector` or a `Formula` and a `DataFrame`.
# Arguments
* `X` and `y` can be a `Matrix` and a `Vector` or a `Formula` and a `Tables.jl` compatible data.
* `test`: test statistics for goodness of fit.
# Keyword arguments
* `test`: test statistics for goodness of fit.
* `type` specifies type of anova (1, 2 or 3). Default value is 1.
* `dropcollinear` controls whether or not `lm` accepts a model matrix which is less-than-full rank. If true (the default), only the first of each set of linearly-dependent columns is used. The coefficient for redundant linearly dependent columns is 0.0 and all associated statistics are set to NaN.
Expand All @@ -188,7 +217,7 @@ function anova(test::Type{<: GoodnessOfFit}, ::Type{LinearModel}, X, y;
type::Int = 1,
kwargs...)
model = lm(X, y; kwargs...)
anova(test, model; type = type)
anova(test, model; type)
end

"""
Expand All @@ -201,9 +230,11 @@ end
ANOVA for genaralized linear models.
The arguments `X` and `y` can be a `Matrix` and a `Vector` or a `Formula` and a `DataFrame`.
# Arguments
* `X` and `y` can be a `Matrix` and a `Vector` or a `Formula` and a `Tables.jl` compatible data.
* `d`: a `GLM.UnivariateDistribution`.
* `l`: a `GLM.Link`
* `test`: test statistics for goodness of fit based on distribution function. See `canonicalgoodnessoffit`.
For other keyword arguments, see `fit`.
"""
Expand Down
15 changes: 8 additions & 7 deletions test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -44,12 +44,13 @@ isapprox(x::NTuple{N, Float64}, y::NTuple{N, Float64}, atol::NTuple{N, Float64}
@testset "AnovaGLM.jl" begin
@testset "LinearModel" begin
@testset "Simple linear regression" begin
lm0, lm1, lm2, lm3, lm4 = nestedmodels(LinearModel, @formula(SepalLength ~ SepalWidth * Species), iris, dropcollinear = false).model
lms = nestedmodels(LinearModel, @formula(SepalLength ~ SepalWidth * Species), iris, dropcollinear = false)
lm0, lm1, lm2, lm3, lm4 = lms.model
global aov3 = anova(lm4, type = 3)
global aov2 = anova_lm(@formula(SepalLength ~ SepalWidth * Species), iris, type = 2)
global aov1 = anova(lm4)
global aov1 = anova(FullModel(lm4, 1, true, true))
global aovf = anova(lm0, lm1, lm2, lm3, lm4)
global aovlr = anova(LRT, lm0, lm1, lm2, lm3, lm4)
global aovlr = anova(LRT, lms)
global aov1lr = anova(LRT, lm4)
global aovlf = anova_lm(FTest, @formula(wdi_lifexp ~ log(gle_rgdpc) * ti_cpi), qog18, type = 2)
ft = ftest(lm1.model, lm2.model, lm3.model, lm4.model)
Expand Down Expand Up @@ -111,9 +112,9 @@ isapprox(x::NTuple{N, Float64}, y::NTuple{N, Float64}, atol::NTuple{N, Float64}
end

@testset "Poisson regression" begin
gmp = nestedmodels(GeneralizedLinearModel, @formula(num_awards ~ prog * math), sim, Poisson()).model
global aov = anova(gmp...)
lr = lrtest(gmp...)
gmp = nestedmodels(GeneralizedLinearModel, @formula(num_awards ~ prog * math), sim, Poisson())
global aov = anova(gmp)
lr = lrtest(gmp.model...)
@test !(@test_error test_show(aov))
@test first(nobs(aov)) == lr.nobs
@test dof(aov) == lr.dof
Expand Down Expand Up @@ -149,7 +150,7 @@ isapprox(x::NTuple{N, Float64}, y::NTuple{N, Float64}, atol::NTuple{N, Float64}

@testset "InverseGaussian regression" begin
gmi = glm(@formula(SepalLength ~ SepalWidth * Species), iris, InverseGaussian())
global aov = anova(gmi)
global aov = anova(FullModel(gmi, 1, false, false))
@test !(@test_error test_show(aov))
@test nobs(aov) == round(Int, nobs(gmi))
@test dof(aov) == (1, 2, 2)
Expand Down

2 comments on commit 95b71cd

@yufongpeng
Copy link
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@JuliaRegistrator
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Registration pull request created: JuliaRegistries/General/79776

After the above pull request is merged, it is recommended that a tag is created on this repository for the registered package version.

This will be done automatically if the Julia TagBot GitHub Action is installed, or can be done manually through the github interface, or via:

git tag -a v0.2.1 -m "<description of version>" 95b71cd3dd9701dfcce832632808b38b44425c58
git push origin v0.2.1

Please sign in to comment.