Skip to content

Commit f313cb0

Browse files
authored
Fix allocations by dropping CategoricalPool type parameter (#418)
Self-referential types generate allocations since Julia 1.11 (JuliaLang/julia#58169). This third parameter seems to have been unnecessary since `NominalValue` and `OrdinalValue` got merged into a single `CategoricalValue` type.
1 parent d0f7081 commit f313cb0

File tree

4 files changed

+28
-50
lines changed

4 files changed

+28
-50
lines changed

src/array.jl

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -160,9 +160,8 @@ function CategoricalArray{T, N, R}(::UndefInitializer, dims::NTuple{N,Int};
160160
U = leveltype(nonmissingtype(T))
161161
S = T >: Missing ? Union{U, Missing} : U
162162
check_supported_eltype(S, T)
163-
V = CategoricalValue{U, R}
164163
levs = levels === nothing ? U[] : collect(U, levels)
165-
CategoricalArray{S, N}(zeros(R, dims), CategoricalPool{U, R, V}(levs, ordered))
164+
CategoricalArray{S, N}(zeros(R, dims), CategoricalPool{U, R}(levs, ordered))
166165
end
167166

168167
CategoricalArray{T, N}(::UndefInitializer, dims::NTuple{N,Int};

src/pool.jl

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2,20 +2,18 @@ const catpool_seed = UInt === UInt32 ? 0xe3cf1386 : 0x356f2c715023f1a5
22

33
hashlevels(levs::AbstractVector) = foldl((h, x) -> hash(x, h), levs, init=catpool_seed)
44

5-
CategoricalPool{T, R, V}(ordered::Bool=false) where {T, R, V} =
6-
CategoricalPool{T, R, V}(T[], ordered)
75
CategoricalPool{T, R}(ordered::Bool=false) where {T, R} =
86
CategoricalPool{T, R}(T[], ordered)
97
CategoricalPool{T}(ordered::Bool=false) where {T} =
108
CategoricalPool{T, DefaultRefType}(T[], ordered)
119

1210
CategoricalPool{T, R}(levels::AbstractVector, ordered::Bool=false) where {T, R} =
13-
CategoricalPool{T, R, CategoricalValue{T, R}}(convert(Vector{T}, levels), ordered)
11+
CategoricalPool{T, R}(convert(Vector{T}, levels), ordered)
1412
CategoricalPool(levels::AbstractVector{T}, ordered::Bool=false) where {T} =
1513
CategoricalPool{T, DefaultRefType}(convert(Vector{T}, levels), ordered)
1614

1715
CategoricalPool(invindex::Dict{T, R}, ordered::Bool=false) where {T, R <: Integer} =
18-
CategoricalPool{T, R, CategoricalValue{T, R}}(invindex, ordered)
16+
CategoricalPool{T, R}(invindex, ordered)
1917

2018
Base.convert(::Type{T}, pool::T) where {T <: CategoricalPool} = pool
2119

@@ -29,12 +27,12 @@ function Base.convert(::Type{CategoricalPool{T, R}}, pool::CategoricalPool) wher
2927

3028
levelsT = convert(Vector{T}, pool.levels)
3129
invindexT = convert(Dict{T, R}, pool.invindex)
32-
return CategoricalPool{T, R, CategoricalValue{T, R}}(levelsT, invindexT, pool.ordered)
30+
return CategoricalPool{T, R}(levelsT, invindexT, pool.ordered)
3331
end
3432

35-
Base.copy(pool::CategoricalPool{T, R, V}) where {T, R, V} =
36-
CategoricalPool{T, R, V}(copy(pool.levels), copy(pool.invindex),
37-
pool.ordered, pool.hash)
33+
Base.copy(pool::CategoricalPool{T, R}) where {T, R} =
34+
CategoricalPool{T, R}(copy(pool.levels), copy(pool.invindex),
35+
pool.ordered, pool.hash)
3836

3937
function Base.show(io::IO, pool::CategoricalPool{T, R}) where {T, R}
4038
@static if VERSION >= v"1.6.0"

src/typedefs.jl

Lines changed: 16 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -6,28 +6,27 @@ const SupportedTypes = Union{AbstractString, AbstractChar, Number}
66
# Type params:
77
# * `T` type of categorized values
88
# * `R` integer type for referencing category levels
9-
# * `V` categorical value type
10-
mutable struct CategoricalPool{T <: SupportedTypes, R <: Integer, V}
9+
mutable struct CategoricalPool{T <: SupportedTypes, R <: Integer}
1110
levels::Vector{T} # category levels ordered by their reference codes
1211
invindex::Dict{T, R} # map from category levels to their reference codes
1312
ordered::Bool # whether levels can be compared using <
1413
hash::Union{UInt, Nothing} # hash of levels
1514
subsetof::Ptr{Nothing} # last seen strict superset pool
1615
equalto::Ptr{Nothing} # last seen equal pool
1716

18-
function CategoricalPool{T, R, V}(levels::Vector{T},
19-
ordered::Bool) where {T, R, V}
17+
function CategoricalPool{T, R}(levels::Vector{T},
18+
ordered::Bool) where {T, R}
2019
if length(levels) > typemax(R)
2120
throw(LevelsException{T, R}(levels[Int(typemax(R))+1:end]))
2221
end
2322
invindex = Dict{T, R}(v => i for (i, v) in enumerate(levels))
2423
if length(invindex) != length(levels)
2524
throw(ArgumentError("Duplicate entries are not allowed in levels"))
2625
end
27-
CategoricalPool{T, R, V}(levels, invindex, ordered)
26+
CategoricalPool{T, R}(levels, invindex, ordered)
2827
end
29-
function CategoricalPool{T, R, V}(invindex::Dict{T, R},
30-
ordered::Bool) where {T, R, V}
28+
function CategoricalPool{T, R}(invindex::Dict{T, R},
29+
ordered::Bool) where {T, R}
3130
levels = Vector{T}(undef, length(invindex))
3231
# If invindex contains non consecutive values, a BoundsError will be thrown
3332
try
@@ -40,18 +39,12 @@ mutable struct CategoricalPool{T <: SupportedTypes, R <: Integer, V}
4039
if length(invindex) > typemax(R)
4140
throw(LevelsException{T, R}(levels[typemax(R)+1:end]))
4241
end
43-
CategoricalPool{T, R, V}(levels, invindex, ordered)
42+
CategoricalPool{T, R}(levels, invindex, ordered)
4443
end
45-
function CategoricalPool{T, R, V}(levels::Vector{T},
46-
invindex::Dict{T, R},
47-
ordered::Bool,
48-
hash::Union{UInt, Nothing}=nothing) where {T, R, V}
49-
if !(V <: CategoricalValue)
50-
throw(ArgumentError("Type $V is not a categorical value type"))
51-
end
52-
if V !== CategoricalValue{T, R}
53-
throw(ArgumentError("V must be CategoricalValue{T, R}"))
54-
end
44+
function CategoricalPool{T, R}(levels::Vector{T},
45+
invindex::Dict{T, R},
46+
ordered::Bool,
47+
hash::Union{UInt, Nothing}=nothing) where {T, R}
5548
pool = new(levels, invindex, ordered, hash, C_NULL, C_NULL)
5649
return pool
5750
end
@@ -77,7 +70,7 @@ the order of the pool's [`levels`](@ref DataAPI.levels) is used rather than the
7770
ordering of values of type `T`.
7871
"""
7972
struct CategoricalValue{T <: SupportedTypes, R <: Integer}
80-
pool::CategoricalPool{T, R, CategoricalValue{T, R}}
73+
pool::CategoricalPool{T, R}
8174
ref::R
8275
end
8376

@@ -98,14 +91,14 @@ const AbstractCategoricalMatrix{T, R, V, C, U} = AbstractCategoricalArray{T, 2,
9891

9992
mutable struct CategoricalArray{T, N, R <: Integer, V, C, U} <: AbstractCategoricalArray{T, N, R, V, C, U}
10093
refs::Array{R, N}
101-
pool::CategoricalPool{V, R, C}
94+
pool::CategoricalPool{V, R}
10295

10396
function CategoricalArray{T, N}(refs::Array{R, N},
104-
pool::CategoricalPool{V, R, C}) where
105-
{T, N, R <: Integer, V, C}
97+
pool::CategoricalPool{V, R}) where
98+
{T, N, R <: Integer, V}
10699
T === V || T == Union{V, Missing} || throw(ArgumentError("T ($T) must be equal to $V or Union{$V, Missing}"))
107100
U = T >: Missing ? Missing : Union{}
108-
new{T, N, R, V, C, U}(refs, pool)
101+
new{T, N, R, V, CategoricalValue{V, R}, U}(refs, pool)
109102
end
110103
end
111104
const CategoricalVector{T, R <: Integer, V, C, U} = CategoricalArray{T, 1, R, V, C, U}

test/04_constructors.jl

Lines changed: 5 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -5,22 +5,10 @@ using CategoricalArrays: DefaultRefType
55

66
@testset "Type parameter constraints" begin
77
# cannot use categorical value as level type
8-
@test_throws TypeError CategoricalPool{CategoricalValue{Int,UInt8}, UInt8, CategoricalValue{CategoricalValue{Int,UInt8},UInt8}}(
8+
@test_throws TypeError CategoricalPool{CategoricalValue{Int,UInt8}, UInt8}(
99
Dict{CategoricalValue{Int,UInt8}, UInt8}(), false)
10-
@test_throws TypeError CategoricalPool{CategoricalValue{Int,UInt8}, UInt8, CategoricalValue{CategoricalValue{Int,UInt8},UInt8}}(
10+
@test_throws TypeError CategoricalPool{CategoricalValue{Int,UInt8}, UInt8}(
1111
CategoricalValue{Int,UInt8}[], false)
12-
# cannot use non-categorical value as categorical value type
13-
@test_throws ArgumentError CategoricalPool{Int, UInt8, Int}(Int[], false)
14-
@test_throws ArgumentError CategoricalPool{Int, UInt8, Int}(Dict{Int, UInt8}(), false)
15-
# level type of the pool and categorical value must match
16-
@test_throws ArgumentError CategoricalPool{Int, UInt8, CategoricalValue{String, UInt8}}(Int[], false)
17-
@test_throws ArgumentError CategoricalPool{Int, UInt8, CategoricalValue{String, UInt8}}(Dict{Int, UInt8}(), false)
18-
# reference type of the pool and categorical value must match
19-
@test_throws ArgumentError CategoricalPool{Int, UInt8, CategoricalValue{Int, UInt16}}(Int[], false)
20-
@test_throws ArgumentError CategoricalPool{Int, UInt8, CategoricalValue{Int, UInt16}}(Dict{Int, UInt8}(), false)
21-
# correct types combination
22-
@test CategoricalPool{Int, UInt8, CategoricalValue{Int, UInt8}}(Int[], false) isa CategoricalPool
23-
@test CategoricalPool{Int, UInt8, CategoricalValue{Int, UInt8}}(Dict{Int, UInt8}(), false) isa CategoricalPool
2412
end
2513

2614
@testset "empty CategoricalPool{String}" begin
@@ -38,7 +26,7 @@ end
3826
@testset "empty CategoricalPool{Int}" begin
3927
pool = CategoricalPool{Int, UInt8}()
4028

41-
@test isa(pool, CategoricalPool{Int, UInt8, CategoricalValue{Int, UInt8}})
29+
@test isa(pool, CategoricalPool{Int, UInt8})
4230

4331
@test isa(pool.levels, Vector{Int})
4432
@test length(pool.levels) == 0
@@ -50,7 +38,7 @@ end
5038
@testset "CategoricalPool{String, DefaultRefType}(a b c)" begin
5139
pool = CategoricalPool(["a", "b", "c"])
5240

53-
@test isa(pool, CategoricalPool{String, UInt32, CategoricalValue{String, UInt32}})
41+
@test isa(pool, CategoricalPool{String, UInt32})
5442

5543
@test isa(pool.levels, Vector{String})
5644
@test pool.levels == ["a", "b", "c"]
@@ -156,7 +144,7 @@ end
156144
@testset "CategoricalPool{Float64, UInt8}()" begin
157145
pool = CategoricalPool{Float64, UInt8}([1.0, 2.0, 3.0])
158146

159-
@test isa(pool, CategoricalPool{Float64, UInt8, CategoricalValue{Float64, UInt8}})
147+
@test isa(pool, CategoricalPool{Float64, UInt8})
160148
@test CategoricalValue(pool, 1) isa CategoricalValue{Float64, UInt8}
161149
end
162150

0 commit comments

Comments
 (0)