diff --git a/src/MLJLinearModels.jl b/src/MLJLinearModels.jl
index 76aff71..f567466 100644
--- a/src/MLJLinearModels.jl
+++ b/src/MLJLinearModels.jl
@@ -1,4 +1,4 @@
-module MLJLinearModels 
+module MLJLinearModels
 
 using Parameters, DocStringExtensions
 using LinearAlgebra, IterativeSolvers
@@ -22,6 +22,9 @@ include("loss-penalty/standard.jl")
 include("loss-penalty/robust.jl")
 include("loss-penalty/utils.jl")
 
+include("loss-penalty/scad_mcp.jl")
+
+
 # > Constructors for regression models <
 include("glr/constructors.jl")
 include("glr/d_l2loss.jl")
diff --git a/src/fit/newton.jl b/src/fit/newton.jl
index 752ddfb..20f61b1 100644
--- a/src/fit/newton.jl
+++ b/src/fit/newton.jl
@@ -67,6 +67,18 @@ function _fit(glr::GLR{<:Union{LogisticLoss,RobustLoss},<:L2R},
 end
 
 
+function _fit(glr::GLR{L2Loss,<:FCPenalty},
+              solver::LBFGS, X, y, scratch)
+    _,p,_ = npc(scratch)
+    θ₀    = zeros(p)
+    _fg!  = (f, g, θ) -> fg!(glr, X, y, scratch)(f, g, θ)
+    opt   = Optim.only_fg!(_fg!)
+    res   = Optim.optimize(opt, θ₀, Optim.LBFGS())
+    return Optim.minimizer(res)
+end
+
+
+
 ## MULTINOMIAL + 0/L2 ==============
 
 """
diff --git a/src/glr/constructors.jl b/src/glr/constructors.jl
index 5b005da..f756c04 100644
--- a/src/glr/constructors.jl
+++ b/src/glr/constructors.jl
@@ -282,3 +282,18 @@ function LADRegression(λ::Real=1.0, γ::Real=0.0;
                               penalize_intercept=penalize_intercept,
                               scale_penalty_with_samples=scale_penalty_with_samples)
 end
+
+
+# see https://cloud.r-project.org/web/packages/ncvreg/ncvreg.pdf
+#
+# should also allow for L2 penalty on top (composite penalty)
+
+# function SCADRegression(λ::Real=1.0, gamma:; lambda::Real=λ, fit_intercept::Bool=true,
+#                          penalize_intercept::Bool=false,
+#                          scale_penalty_with_samples::Bool=true)
+#     check_pos(lambda)
+#     GLR(penalty=lambda*L2Penalty(),
+#         fit_intercept=fit_intercept,
+#         penalize_intercept=penalize_intercept,
+#         scale_penalty_with_samples=scale_penalty_with_samples)
+# end
diff --git a/src/glr/d_l2loss.jl b/src/glr/d_l2loss.jl
index 3160a9a..f384d2e 100644
--- a/src/glr/d_l2loss.jl
+++ b/src/glr/d_l2loss.jl
@@ -72,3 +72,35 @@ function smooth_fg!(glr::GLR{L2Loss,<:ENR}, X, y, scratch)
         return glr.loss(r) + get_l2(glr.penalty)(view_θ(glr, θ))
     end
 end
+
+# -------------------------- #
+#  -- SCAD/MCP Regression -- #
+# -------------------------- #
+# -> J(θ) = f(θ) + r(θ; λ)
+# -> f(θ) = |Xθ - y|₂²
+# -> r(θ;λ)
+# -> ∇f(θ) = X'(Xθ - y) + r'(θ;λ)
+# #
+function fg!(glr::GLR{L2Loss,<:FCPenalty}, X, y, scratch)
+    n, p = size(X)
+    grad = ∇(glr.penalty)
+    if glr.fit_intercept
+        (f, g, θ) -> begin
+            r = scratch.n
+            get_residuals!(r, X, θ, y)
+            apply_Xt!(g, X, r)
+
+
+            # XXX could be applied directly to a scratch to avoid allocs
+            # also could capture last elem to remove it if not penalize intercept
+
+            g .+= grad(θ)
+
+            return glr.loss(r) + glr.penalty(view_θ(glr, θ))
+        end
+    else
+        # XXX
+    end
+end
+
+# --> need to test compared to https://cloud.r-project.org/web/packages/ncvreg/ncvreg.pdf
diff --git a/src/loss-penalty/scad_mcp.jl b/src/loss-penalty/scad_mcp.jl
new file mode 100644
index 0000000..8dfa29d
--- /dev/null
+++ b/src/loss-penalty/scad_mcp.jl
@@ -0,0 +1,45 @@
+export ScadPenalty, MCPPenalty
+
+# FoldedConcavePenalty
+abstract type FCPenalty{λ,γ} <: AtomicPenalty where {λ<:Real,γ<:Real} end
+
+struct ScadPenalty{λ,γ} <: FCPenalty{λ,γ} end
+struct MCPPenalty{λ,γ} <: FCPenalty{λ,γ} end
+
+getlambda(p::FCPenalty{λ,γ}) where {λ,γ} = λ
+getgamma(p::FCPenalty{λ,γ}) where {λ,γ} = γ
+
+# not efficient but doesn't matter, the derivative matters more
+(p::ScadPenalty{λ,γ})(x::Real) where {λ, γ} = begin
+	abs_x = abs(x)
+	if abs_x <= λ
+		λ * abs_x
+	elseif abs_x >= γ * λ
+		λ^2 * (γ + 1) / 2
+	else
+		(2γ * λ * abs_x - x^2 - λ^2) / (2 * (γ - 1))
+	end
+end
+
+(p::FCPenalty{λ,γ})(θ) where {λ,γ} = p.(θ)
+
+∇(p::ScadPenalty{λ,γ}) where {λ,γ} = θ -> begin
+	T 	   = promote_type(eltype(θ), typeof(λ), typeof(γ))
+	abs_θ  = abs.(T.(θ))
+	λ̂, γ̂   = T(λ), T(γ)
+	λγ 	   = λ̂ * γ̂
+
+	left   = λ̂ * ones(T, length(θ))
+	middle = @. (λγ - abs_θ) / (γ̂ - T(1))
+
+	return @. (abs_θ <= λ̂) * left + (λ̂ < abs_θ < λγ) * middle
+end
+
+∇(p::MCPPenalty{λ,γ}) where {λ,γ} = θ -> begin
+	T 	  = promote_type(eltype(θ), typeof(λ), typeof(γ))
+	abs_θ = abs.(T.(θ))
+	λ̂, γ̂   = T(λ), T(γ)
+	λγ 	   = λ̂ * γ̂
+
+	return  @. (abs_θ <= λγ) * (λ̂ - abs_θ / γ̂) * sign(θ)
+end