Skip to content

Release 0.7.4 #64

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Mar 22, 2023
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/CompatHelper.yml
Original file line number Diff line number Diff line change
@@ -9,6 +9,6 @@ jobs:
- name: Pkg.add("CompatHelper")
run: julia -e 'using Pkg; Pkg.add("CompatHelper")'
- name: CompatHelper.main()
run: julia -e 'using CompatHelper; CompatHelper.main(master = "dev")'
run: julia -e 'using CompatHelper; CompatHelper.main(master_branch = "dev")'
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
2 changes: 1 addition & 1 deletion Project.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
name = "AnovaBase"
uuid = "946dddda-6a23-4b48-8e70-8e60d9b8d680"
authors = ["Yu-Fong Peng <sciphypar@gmail.com>"]
version = "0.7.3"
version = "0.7.4"

[deps]
Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f"
4 changes: 1 addition & 3 deletions README.md
Original file line number Diff line number Diff line change
@@ -29,6 +29,4 @@ Use the following packages for different models:
|[FixedEffectModels.jl](https://github.com/FixedEffects/FixedEffectModels.jl)|[AnovaFixedEffectModels.jl](https://github.com/yufongpeng/AnovaFixedEffectModels.jl) |`FixedEffectModel`|`AnovaFixedEffectModels.lfe` or `FixedEffectModels.reg`|

## TO DO
1. Likelihood ratio test for `FixedEffectModels`.
2. `nestedmodels` for `FixedEffectModels`.
3. Implementation of Rao and Mallow's Cp.
1. Implementation of Rao and Mallow's Cp.
20 changes: 10 additions & 10 deletions docs/Manifest.toml
Original file line number Diff line number Diff line change
@@ -11,27 +11,27 @@ version = "0.0.1"

[[deps.AnovaBase]]
deps = ["Distributions", "Printf", "Reexport", "Statistics", "StatsBase", "StatsModels"]
git-tree-sha1 = "8a60d8875f4328232e818c84a387e6d3768990d1"
git-tree-sha1 = "0f861eef5069dee24649295cbc2aaeaa1ffe2af3"
uuid = "946dddda-6a23-4b48-8e70-8e60d9b8d680"
version = "0.7.2"
version = "0.7.3"

[[deps.AnovaFixedEffectModels]]
deps = ["AnovaBase", "Distributions", "FixedEffectModels", "LinearAlgebra", "Printf", "Reexport", "Statistics", "StatsBase", "StatsModels", "Tables"]
git-tree-sha1 = "af61b10af5d1006b4673263d149e67d197b00d3d"
git-tree-sha1 = "74a15787b79d84b2a209ada922614434820c6be0"
uuid = "e4965305-65d6-464d-9c03-ae3e5cffadab"
version = "0.2.2"
version = "0.2.3"

[[deps.AnovaGLM]]
deps = ["AnovaBase", "Distributions", "GLM", "LinearAlgebra", "Printf", "Reexport", "Statistics", "StatsBase", "StatsModels"]
git-tree-sha1 = "dac85d26439ffc76a18200239fbace27c78b1e71"
git-tree-sha1 = "d4d62c676b0078e7c174226959eaddb43a8edafd"
uuid = "0a47a8e3-ec57-451e-bddb-e0be9d22772b"
version = "0.2.1"
version = "0.2.2"

[[deps.AnovaMixedModels]]
deps = ["AnovaBase", "Distributions", "GLM", "LinearAlgebra", "MixedModels", "Printf", "Reexport", "Statistics", "StatsBase", "StatsModels"]
git-tree-sha1 = "3f4d2b74a55cc39e1cfa58e5678c3ee4d44972e3"
git-tree-sha1 = "97d664c8ec2872909b3d033ef02edea27b26bd97"
uuid = "6832ca0c-c2dd-4d0c-851a-521ca5db42f5"
version = "0.2.1"
version = "0.2.2"

[[deps.ArgTools]]
uuid = "0dad84c5-d112-42e6-8d28-ef12dabb789f"
@@ -258,9 +258,9 @@ uuid = "7b1f6079-737a-58dc-b8bc-7a2ca5c1b5ee"

[[deps.FillArrays]]
deps = ["LinearAlgebra", "Random", "SparseArrays", "Statistics"]
git-tree-sha1 = "9dec0199898d4d5c1d1b257cbf2cc498afe03a2a"
git-tree-sha1 = "3b245d1e50466ca0c9529e2033a3c92387c59c2f"
uuid = "1a297f60-69ca-5386-bcde-b61e274b549b"
version = "0.13.8"
version = "0.13.9"

[[deps.FixedEffectModels]]
deps = ["DataFrames", "FixedEffects", "LinearAlgebra", "Printf", "Reexport", "Statistics", "StatsAPI", "StatsBase", "StatsFuns", "StatsModels", "Tables", "Vcov"]
47 changes: 3 additions & 44 deletions docs/make.jl
Original file line number Diff line number Diff line change
@@ -1,50 +1,6 @@
push!(LOAD_PATH,"../src/")
using Documenter
using AnovaBase, GLM, AnovaGLM, AnovaMixedModels, AnovaFixedEffectModels, DataFrames
import AnovaBase: _diff, teststat, AnovaTable, pval
import AnovaMixedModels: GLM_MODEL, anovatable, AnovaResult, LRT, NestedModels
#DocMeta.setdocmeta!(AnovaBase, :DocTestSetup, :(using AnovaBase); recursive = true)
teststat(aov::AnovaResult) = aov.teststat
pval(aov::AnovaResult) = aov.pval
function anovatable(aov::AnovaResult{NestedModels{M, N}, LRT};
rownames = string.(1:N)) where {M <: Union{GLM_MODEL, MixedModel}, N}
if last(aov.anovamodel.model) isa GLM_MODEL
AnovaTable([
dof(aov),
[NaN, _diff(dof(aov))...],
dof_residual(aov),
deviance(aov),
teststat(aov),
pval(aov)
],
["DOF", "ΔDOF", "Res.DOF", "Deviance", "χ²", "Pr(>|χ²|)"],
rownames, 6, 5)
elseif last(aov.anovamodel.model).optsum.REML
AnovaTable([
dof(aov),
[NaN, _diff(dof(aov))...],
dof_residual(aov),
deviance(aov),
teststat(aov),
pval(aov)
],
["DOF", "ΔDOF", "Res.DOF", "-2 logLik", "χ²", "Pr(>|χ²|)"],
rownames, 6, 5)
else
AnovaTable([
dof(aov),
[NaN, _diff(dof(aov))...],
dof_residual(aov),
aic.(aov.anovamodel.model),
bic.(aov.anovamodel.model),
deviance(aov),
teststat(aov),
pval(aov)
],
["DOF", "ΔDOF", "Res.DOF", "AIC", "BIC", "-2 logLik", "χ²", "Pr(>|χ²|)"],
rownames, 8, 7)
end
end

makedocs(
sitename = "AnovaBase",
@@ -55,6 +11,9 @@ makedocs(
"Examples_MixedModels.md",
"Examples_FixedEffectModels.md"
],
"Interfacing AnovaBase.jl" => [
"Interface.md"
],
"Algorithm" => [
"Algorithm_AnovaGLM.md",
"Algorithm_AnovaMixedModels.md",
4 changes: 2 additions & 2 deletions docs/src/AnovaBase.md
Original file line number Diff line number Diff line change
@@ -40,15 +40,15 @@ AnovaBase.canonicalgoodnessoffit

## Other interface
```@docs
AnovaBase.ftest_nested
AnovaBase.lrt_nested
AnovaBase.dof_residual(aov::AnovaResult)
AnovaBase.predictors(::RegressionModel)
AnovaBase.anovatable(::AnovaResult{<: FullModel})
```

## Developer utility
```@docs
AnovaBase.ftest_nested
AnovaBase.lrt_nested
AnovaBase.dof_asgn
AnovaBase.prednames
AnovaBase.has_intercept
1 change: 1 addition & 0 deletions docs/src/AnovaFixedEffectModels.md
Original file line number Diff line number Diff line change
@@ -5,5 +5,6 @@ CurrentModule = AnovaFixedEffectModels
```@docs
AnovaFixedEffectModels.anova(::Type{<: GoodnessOfFit}, ::Vararg{FixedEffectModel})
anova_lfe
AnovaFixedEffectModels.nestedmodels(::FixedEffectModel)
lfe
```
7 changes: 5 additions & 2 deletions docs/src/Examples_FixedEffectModels.md
Original file line number Diff line number Diff line change
@@ -18,5 +18,8 @@ using AnovaFixedEffectModels
fem1 = lfe(@formula(gpa ~ fe(student) + occasion + job), gpa)
aovf = anova(fem1)
```
!!! note
Only F-test is available for `FixedEffectModel`.
Likelihood-ratio test is available for nested models.
```@example fem
fems = nestedmodels(FixedEffectModel, @formula(gpa ~ fe(student) + occasion + job), gpa)
anova(LRT, fems)
```
65 changes: 65 additions & 0 deletions docs/src/Interface.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
# Interface
Given model types `SomeModel` and `OtherModel`, the following functions have to be defined or used.

## [anova](./AnovaBase.md#AnovaBase.anova-Tuple{Type{<:GoodnessOfFit},%20AnovaModel})
`anova` can be overloaded in the following arguments signature.
1. `anova(::SomeModel; test, type, kwargs...)`
2. `anova(::GoodnessOfFit, ::SomeModel; type, kwargs...)`
3. `anova(::Vararg{SomeModel}; test, kwargs...)`
4. `anova(::GoodnessOfFit, ::Vararg{SomeModel}; kwargs...)`

Other arguments signatures are acceptable, but there will be `Type piracy` warnings.

Models should be wrapped in the following types before storing in the returned `AnovaResult`.

|Model|Wrapper|
|:---:|:-----:|
|Single model|`FullModel`|
|Nested models|`NestedModels`|
|Nested models with different types|`MixedAovModels`|

It is recommended that `anova` additionally dispatched on wrapper types and the main algorithm is implemented in it.

## [predictors](./AnovaBase.md#AnovaBase.predictors-Tuple{RegressionModel})
This function returns a tuple of terms which can be used in ANOVA (some terms may not be used because of ANOVA type or model itself).

There is a default method for `RegressionModel`, i.e., `formula(model).rhs.terms`. If the formula for `SomeModel` has special structure like `MixedModel`, this function should be overloaded.

## AnovaModels
### [NestedModels](./AnovaBase.md#AnovaBase.NestedModels)
Wrap nested models in the following way
1. `NestedModels{SomeModel}((model1, model2, ...))`
2. `NestedModels{SomeModel}(model1, model2, ...)`

### [MixedAovModels](./AnovaBase.md#AnovaBase.MixedAovModels)
This is for comparing models with different types.
1. `MixedAovModels{Union{SomeModel, OtherModel}}((model1, model2, ...))`
2. `MixedAovModels{Union{SomeModel, OtherModel}}(model1, model2, ...)`

Both `NestedModels` and `MixedAovModels` do not check if the order is correct, so user should be careful to put simpler models former and more complex models later.
ANOVA can be computed along with some basic statistics. See [`ftest_nested`](./AnovaBase.md#AnovaBase.ftest_nested) and [`lrt_nested`](./AnovaBase.md#AnovaBase.lrt_nested).

### [FullModel](./AnovaBase.md#AnovaBase.FullModel)
`FullModel` wraps the model along with the index of predictors(`pred_id`) that is actually used in ANOVA and ANOVA type.

`AnovaBase` provides a method which automatically determines `pred_id` based on ANOVA type, whether empty model is allowed, and whether intercept is tested. See [`FullModel`](./AnovaBase.md#AnovaBase.FullModel).

To customize `FullModel`, there are many helper functions for manipulating terms. See [Developer utility](./AnovaBase.md#Developer-utility)

## [anovatable](./AnovaBase.md#AnovaBase.anovatable-Tuple{AnovaResult{<:FullModel}})
This function returns a `AnovaTable` for showing `AnovaResult`. For `NestedModels` and `MixedAovModels`, `AnovaBase` provides a default interface. When dealing with `FullModel` or customizing methods on `SomeModel`, the following methods should be defined.
1. `anovatable(::AnovaResult{<: FullModel{SomeModel}}; rownames)`
2. `anovatable(::AnovaResult{<: NestedModels{SomeModel}}; rownames)`
3. `anovatable(::AnovaResult{<: MixedAovModels{SomeModel}}; rownames)`

See [`AnovaTable`](./AnovaBase.md#AnovaBase.AnovaTable) for the argument signature.

## [nestedmodels](./AnovaBase.md#AnovaBase.nestedmodels-Tuple{RegressionModel})
This function is not essential for ANOVA; it is just for convenience to create nested models(`NestedModels`). It should be defined as follow
1. `nestedmodels(SomeModel, ::FormulaTerm, data; kwargs)`
2. `nestedmodels(::SomeModel; kwargs)`

`AnovaBase` provides a lot of functions to work on formula, terms and contrasts. See [Developer utility](./AnovaBase.md#Developer-utility)

## Other function
* [`dof_residual`](./AnovaBase.md#AnovaBase.dof_residual-Tuple{AnovaResult}) applies `dof_residual` to all models by default. If `dof_residual(::SomeModel)` is not valid for ANOVA, customize `dof_residual(::AnovaResult{<: AnovaModel{SomeModel}})` alternatively.
18 changes: 16 additions & 2 deletions docs/src/index.md
Original file line number Diff line number Diff line change
@@ -4,19 +4,28 @@
It is similar to function [anova in R](https://www.rdocumentation.org/packages/stats/versions/3.6.2/topics/anova).

## Functionality overview
### anova
### ANOVA
```
anova(<model>; <type>, <test>)
anova(<test>, <model>; <type>)
anova(<models>; <test>)
anova(<test>, <models>)

```
### Model-specific ANOVA
#### AnovaGLM
```
anova_lm(<formula>, <data>; <type>, <test>)
anova_lm(<test>, <formula>, <data>; <type>)
anova_glm(<formula>, <data>, <distr>, <link>; <type>, <test>)
anova_glm(<test>, <formula>, <data>, <distr>, <link>; <type>)
```
#### AnovaMixedModels
```
anova_lme(<formula>, <data>; <type>, <test>)
anova_lme(<test>, <formula>, <data>; <type>)
```
#### AnovaFixedEffecModels
```
anova_lfe(<formula>, <data>, <vcov>; <type>, <test>)
anova_lfe(<test>, <formula>, <data>, <vcov>; <type>)
```
@@ -63,6 +72,11 @@ Pages = [
]
Depth = 2
```
### Interfacing AnovaBase.jl
```@contents
Pages = ["Interface.md"]
Depth = 2
```
### API
```@contents
Pages = [
36 changes: 24 additions & 12 deletions src/AnovaBase.jl
Original file line number Diff line number Diff line change
@@ -65,13 +65,16 @@ A wrapper of nested models for conducting ANOVA.

# Fields
* `model`: a tuple of models.

# Constructors
NestedModels{M}(model...) where M
NestedModels{M}(model::T) where {M, N, T <: NTuple{N, M}}
"""
struct NestedModels{M, N} <: AnovaModel{M, N}
model::Tuple

NestedModels{M}(model::T) where {M, N, T <: NTuple{N, M}} = new{M, N}(model)
end

NestedModels{M}(model...) where M = NestedModels{M}(model)
NestedModels{M}(model::T...) where {M, T <: Tuple} = throw(ArgumentError("Some models in $T are not subtype of $M"))
"""
@@ -83,12 +86,16 @@ A wrapper of nested models with multiple types for conducting ANOVA.

# Fields
* `model`: a tuple of models.

# Constructors
MixedAovModels{M}(model...) where M
MixedAovModels{M}(model::T) where {M, T <: Tuple}
"""
struct MixedAovModels{M, N} <: AnovaModel{M, N}
model::Tuple
end
MixedAovModels{M}(model::T) where {M, T <: Tuple} = all(m -> isa(m, M), model) ? MixedAovModels{M, length(model)}(model) : throw(ArgumentError("Some models in are not subtype of $M"))
MixedAovModels{M}(model...) where M = MixedAovModels{M}(model)
MixedAovModels{M}(model::T) where {M, T <: Tuple} = all(m -> isa(m, M), model) ? MixedAovModels{M, length(model)}(model) : throw(ArgumentError("Some models in are not subtype of $M"))
"""
const MultiAovModels{M, N} = Union{NestedModels{M, N}, MixedAovModels{M, N}} where {M, N}

@@ -106,23 +113,20 @@ A wrapper of full model for conducting ANOVA.
* `model`: a regression model.
* `pred_id`: the index of terms included in ANOVA. The source iterable can be obtained by `predictors(model)`. This value may depend on `type` for certain model, e.g. type 1 ANOVA for a gamma regression model with inverse link.
* `type`: type of ANOVA, either 1, 2 or 3.
"""
struct FullModel{M, N} <: AnovaModel{M, N}
model::M
pred_id::NTuple{N, Int}
type::Int
end

"""
# Constructor
FullModel(model::RegressionModel, type::Int, null::Bool, test_intercept::Bool)

Create a `FullModel` with several model-specific parameters.

* `model`: a regression model.
* `type`: type of ANOVA, either 1, 2 or 3.
* `null`: whether `y ~ 0` is allowed.
* `test_intercept`: whether intercept is going to be tested.
"""
struct FullModel{M, N} <: AnovaModel{M, N}
model::M
pred_id::NTuple{N, Int}
type::Int
end
function FullModel(model::RegressionModel, type::Int, null::Bool, test_intercept::Bool)
err1 = ArgumentError("Invalid set of model specification for ANOVA; not enough variables provided.")
#err2 = ArgumentError("Invalid set of model specification for ANOVA; all coefficents are aliased with 1.")
@@ -162,12 +166,20 @@ Returned object of `anova`.
* `N` is the length of parameters.

# Fields
* `anovamodel`: a [`NestedModels`](@ref) or a [`FullModel`](@ref).
* `anovamodel`: [`NestedModels`](@ref), [`MixedAovModels`](@ref), or [`FullModel`](@ref).
* `dof`: degrees of freedom of models or predictors.
* `deviance`: deviance(s) for calculating test statistics. See [`deviance`](@ref) for more details.
* `teststat`: value(s) of test statiscics.
* `pval`: p-value(s) of test statiscics.
* `otherstat`: `NamedTuple` contained extra statistics.

# Constructor
AnovaResult{T}(anovamodel::M,
dof::NTuple{N, Int},
deviance::NTuple{N, Float64},
teststat::NTuple{N, Float64},
pval::NTuple{N, Float64},
otherstat::NamedTuple) where {N, M <: AnovaModel{<: RegressionModel, N}, T <: GoodnessOfFit}
"""
struct AnovaResult{M, T, N}
anovamodel::M
20 changes: 19 additions & 1 deletion src/fit.jl
Original file line number Diff line number Diff line change
@@ -52,7 +52,25 @@ end
"""
dof_asgn(v::Vector{Int})

Calculate degrees of freedom of each predictors. 'v' can be obtained by `StatsModels.asgn(f::FormulaTerm)`. For a given `trm::RegressionModel`, it is as same as `trm.mm.assign`.
Calculate degrees of freedom of each predictors. 'assign' can be obtained by `StatsModels.asgn(f::FormulaTerm)`. For a given `trm::RegressionModel`, it is as same as `trm.mm.assign`.

The index of the output matches values in the orinal `assign`. If any index value is not in `assign`, the default is 0.

# Examples
```julia
julia> dof_asgn([1, 2, 2, 3, 3, 3])
3-element Vector{Int64}:
1
2
3

julia> dof_asgn([2, 2, 3, 3, 3])
3-element Vector{Int64}:
0
2
3

```
"""
function dof_asgn(v::Vector{Int})
dofv = zeros(Int, maximum(v))
2 changes: 1 addition & 1 deletion src/interface.jl
Original file line number Diff line number Diff line change
@@ -3,7 +3,7 @@
nestedmodels(<model>; <keyword arguments>)
nestedmodels(<model type>, formula, data; <keyword arguments>)
Generate nested models [`NestedModels`](@ref) from a model or modeltype, formula and data.
Create nested models [`NestedModels`](@ref) from a model or modeltype, formula and data.
"""
function nestedmodels(::T; kwargs...) where {T <: RegressionModel}
throw(function_arg_error(nestedmodels, T))
13 changes: 7 additions & 6 deletions src/term.jl
Original file line number Diff line number Diff line change
@@ -2,7 +2,7 @@
"""
has_intercept(<terms>)
Return true if 'InterceptTerm{true}` is in the terms.
Return `true` if `InterceptTerm{true}` is in the terms.
"""
has_intercept(f::FormulaTerm) = has_intercept(f.rhs)
has_intercept(f::MatrixTerm) = has_intercept(f.terms)
@@ -13,7 +13,7 @@ has_intercept(::AbstractTerm) = false
"""
any_not_aliased_with_1(<terms>)
Return true if there are any terms not aliased with the intercept, e.g. `ContinuousTerm` or `FunctionTerm`.
Return `true` if there are any terms not aliased with the intercept, e.g. `ContinuousTerm` or `FunctionTerm`.
Terms without schema are considered aliased with the intercept.
"""
@@ -53,6 +53,7 @@ julia> prednames(f)
julia> prednames(InterceptTerm{false}())
```
"""
prednames(t::FormulaTerm) = filter!(!isnothing, vectorize(prednames(t.rhs)))
@@ -152,10 +153,10 @@ const doc_select_interaction = """
Return a set of index of `f`, which
1. return terms are interaction terms of `f[id]` and other terms.\n
2. `f[id]` is an interaction term of return terms and other terms.\n
3. return terms not interaction terms of `f[id]` and other terms.\n
4. `f[id]` is not interaction term of return terms and other terms.
1. returned terms are interaction terms of `f[id]` and other terms.\n
2. `f[id]` is an interaction term of returned terms and other terms.\n
3. returned terms not interaction terms of `f[id]` and other terms.\n
4. `f[id]` is not interaction term of returned terms and other terms.
# Examples
```julia