diff --git a/src/MLJLinearModels.jl b/src/MLJLinearModels.jl index 76aff71..f567466 100644 --- a/src/MLJLinearModels.jl +++ b/src/MLJLinearModels.jl @@ -1,4 +1,4 @@ -module MLJLinearModels +module MLJLinearModels using Parameters, DocStringExtensions using LinearAlgebra, IterativeSolvers @@ -22,6 +22,9 @@ include("loss-penalty/standard.jl") include("loss-penalty/robust.jl") include("loss-penalty/utils.jl") +include("loss-penalty/scad_mcp.jl") + + # > Constructors for regression models < include("glr/constructors.jl") include("glr/d_l2loss.jl") diff --git a/src/fit/newton.jl b/src/fit/newton.jl index 752ddfb..20f61b1 100644 --- a/src/fit/newton.jl +++ b/src/fit/newton.jl @@ -67,6 +67,18 @@ function _fit(glr::GLR{<:Union{LogisticLoss,RobustLoss},<:L2R}, end +function _fit(glr::GLR{L2Loss,<:FCPenalty}, + solver::LBFGS, X, y, scratch) + _,p,_ = npc(scratch) + θ₀ = zeros(p) + _fg! = (f, g, θ) -> fg!(glr, X, y, scratch)(f, g, θ) + opt = Optim.only_fg!(_fg!) + res = Optim.optimize(opt, θ₀, Optim.LBFGS()) + return Optim.minimizer(res) +end + + + ## MULTINOMIAL + 0/L2 ============== """ diff --git a/src/glr/constructors.jl b/src/glr/constructors.jl index 5b005da..f756c04 100644 --- a/src/glr/constructors.jl +++ b/src/glr/constructors.jl @@ -282,3 +282,18 @@ function LADRegression(λ::Real=1.0, γ::Real=0.0; penalize_intercept=penalize_intercept, scale_penalty_with_samples=scale_penalty_with_samples) end + + +# see https://cloud.r-project.org/web/packages/ncvreg/ncvreg.pdf +# +# should also allow for L2 penalty on top (composite penalty) + +# function SCADRegression(λ::Real=1.0, gamma:; lambda::Real=λ, fit_intercept::Bool=true, +# penalize_intercept::Bool=false, +# scale_penalty_with_samples::Bool=true) +# check_pos(lambda) +# GLR(penalty=lambda*L2Penalty(), +# fit_intercept=fit_intercept, +# penalize_intercept=penalize_intercept, +# scale_penalty_with_samples=scale_penalty_with_samples) +# end diff --git a/src/glr/d_l2loss.jl b/src/glr/d_l2loss.jl index 3160a9a..f384d2e 100644 --- a/src/glr/d_l2loss.jl +++ b/src/glr/d_l2loss.jl @@ -72,3 +72,35 @@ function smooth_fg!(glr::GLR{L2Loss,<:ENR}, X, y, scratch) return glr.loss(r) + get_l2(glr.penalty)(view_θ(glr, θ)) end end + +# -------------------------- # +# -- SCAD/MCP Regression -- # +# -------------------------- # +# -> J(θ) = f(θ) + r(θ; λ) +# -> f(θ) = |Xθ - y|₂² +# -> r(θ;λ) +# -> ∇f(θ) = X'(Xθ - y) + r'(θ;λ) +# # +function fg!(glr::GLR{L2Loss,<:FCPenalty}, X, y, scratch) + n, p = size(X) + grad = ∇(glr.penalty) + if glr.fit_intercept + (f, g, θ) -> begin + r = scratch.n + get_residuals!(r, X, θ, y) + apply_Xt!(g, X, r) + + + # XXX could be applied directly to a scratch to avoid allocs + # also could capture last elem to remove it if not penalize intercept + + g .+= grad(θ) + + return glr.loss(r) + glr.penalty(view_θ(glr, θ)) + end + else + # XXX + end +end + +# --> need to test compared to https://cloud.r-project.org/web/packages/ncvreg/ncvreg.pdf diff --git a/src/loss-penalty/scad_mcp.jl b/src/loss-penalty/scad_mcp.jl new file mode 100644 index 0000000..8dfa29d --- /dev/null +++ b/src/loss-penalty/scad_mcp.jl @@ -0,0 +1,45 @@ +export ScadPenalty, MCPPenalty + +# FoldedConcavePenalty +abstract type FCPenalty{λ,γ} <: AtomicPenalty where {λ<:Real,γ<:Real} end + +struct ScadPenalty{λ,γ} <: FCPenalty{λ,γ} end +struct MCPPenalty{λ,γ} <: FCPenalty{λ,γ} end + +getlambda(p::FCPenalty{λ,γ}) where {λ,γ} = λ +getgamma(p::FCPenalty{λ,γ}) where {λ,γ} = γ + +# not efficient but doesn't matter, the derivative matters more +(p::ScadPenalty{λ,γ})(x::Real) where {λ, γ} = begin + abs_x = abs(x) + if abs_x <= λ + λ * abs_x + elseif abs_x >= γ * λ + λ^2 * (γ + 1) / 2 + else + (2γ * λ * abs_x - x^2 - λ^2) / (2 * (γ - 1)) + end +end + +(p::FCPenalty{λ,γ})(θ) where {λ,γ} = p.(θ) + +∇(p::ScadPenalty{λ,γ}) where {λ,γ} = θ -> begin + T = promote_type(eltype(θ), typeof(λ), typeof(γ)) + abs_θ = abs.(T.(θ)) + λ̂, γ̂ = T(λ), T(γ) + λγ = λ̂ * γ̂ + + left = λ̂ * ones(T, length(θ)) + middle = @. (λγ - abs_θ) / (γ̂ - T(1)) + + return @. (abs_θ <= λ̂) * left + (λ̂ < abs_θ < λγ) * middle +end + +∇(p::MCPPenalty{λ,γ}) where {λ,γ} = θ -> begin + T = promote_type(eltype(θ), typeof(λ), typeof(γ)) + abs_θ = abs.(T.(θ)) + λ̂, γ̂ = T(λ), T(γ) + λγ = λ̂ * γ̂ + + return @. (abs_θ <= λγ) * (λ̂ - abs_θ / γ̂) * sign(θ) +end