diff --git a/NEWS.md b/NEWS.md index 4831de4a33064..ffb30b81496d4 100644 --- a/NEWS.md +++ b/NEWS.md @@ -42,6 +42,7 @@ New library features -------------------- * The optional keyword argument `context` of `sprint` can now be set to a tuple of `:key => value` pairs to specify multiple attributes. ([#39381]) +* `bytes2hex` and `hex2bytes` are no longer limited to arguments of type `Union{String,AbstractVector{UInt8}}` and now only require that they're iterable and have a length. ([#39710]) Standard library changes ------------------------ diff --git a/base/strings/util.jl b/base/strings/util.jl index 140c5a31194f1..5cea8d280f07c 100644 --- a/base/strings/util.jl +++ b/base/strings/util.jl @@ -595,15 +595,20 @@ replace(s::AbstractString, pat_f::Pair; count=typemax(Int)) = # hex <-> bytes conversion """ - hex2bytes(s::Union{AbstractString,AbstractVector{UInt8}}) + hex2bytes(itr) -Given a string or array `s` of ASCII codes for a sequence of hexadecimal digits, returns a +Given an iterable `itr` of ASCII codes for a sequence of hexadecimal digits, returns a `Vector{UInt8}` of bytes corresponding to the binary representation: each successive pair -of hexadecimal digits in `s` gives the value of one byte in the return vector. +of hexadecimal digits in `itr` gives the value of one byte in the return vector. -The length of `s` must be even, and the returned array has half of the length of `s`. +The length of `itr` must be even, and the returned array has half of the length of `itr`. See also [`hex2bytes!`](@ref) for an in-place version, and [`bytes2hex`](@ref) for the inverse. +!!! compat "Julia 1.7" + Calling hex2bytes with iterables producing UInt8 requires + version 1.7. In earlier versions, you can collect the iterable + before calling instead. + # Examples ```jldoctest julia> s = string(12345, base = 16) @@ -632,46 +637,64 @@ julia> hex2bytes(a) """ function hex2bytes end -hex2bytes(s::AbstractString) = hex2bytes(String(s)) -hex2bytes(s::Union{String,AbstractVector{UInt8}}) = hex2bytes!(Vector{UInt8}(undef, length(s) >> 1), s) +hex2bytes(s) = hex2bytes!(Vector{UInt8}(undef, length(s) >> 1), s) + +# special case - valid bytes are checked in the generic implementation +function hex2bytes!(dest::AbstractArray{UInt8}, s::String) + sizeof(s) != length(s) && throw(ArgumentError("input string must consist of hexadecimal characters only")) -_firstbyteidx(s::String) = 1 -_firstbyteidx(s::AbstractVector{UInt8}) = first(eachindex(s)) -_lastbyteidx(s::String) = sizeof(s) -_lastbyteidx(s::AbstractVector{UInt8}) = lastindex(s) + hex2bytes!(dest, transcode(UInt8, s)) +end """ - hex2bytes!(d::AbstractVector{UInt8}, s::Union{String,AbstractVector{UInt8}}) + hex2bytes!(dest::AbstractVector{UInt8}, itr) -Convert an array `s` of bytes representing a hexadecimal string to its binary +Convert an iterable `itr` of bytes representing a hexadecimal string to its binary representation, similar to [`hex2bytes`](@ref) except that the output is written in-place -in `d`. The length of `s` must be exactly twice the length of `d`. -""" -function hex2bytes!(d::AbstractVector{UInt8}, s::Union{String,AbstractVector{UInt8}}) - if 2length(d) != sizeof(s) - isodd(sizeof(s)) && throw(ArgumentError("input hex array must have even length")) - throw(ArgumentError("output array must be half length of input array")) +to `dest`. The length of `dest` must be half the length of `itr`. + +!!! compat "Julia 1.7" + Calling hex2bytes! with iterators producing UInt8 requires + version 1.7. In earlier versions, you can collect the iterable + before calling instead. +""" +function hex2bytes!(dest::AbstractArray{UInt8}, itr) + isodd(length(itr)) && throw(ArgumentError("length of iterable must be even")) + @boundscheck 2*length(dest) != length(itr) && throw(ArgumentError("length of output array must be half of the length of input iterable")) + iszero(length(itr)) && return dest + + next = iterate(itr) + @inbounds for i in eachindex(dest) + x,state = next + y,state = iterate(itr, state) + next = iterate(itr, state) + dest[i] = number_from_hex(x) << 4 + number_from_hex(y) end - j = first(eachindex(d)) - 1 - for i = _firstbyteidx(s):2:_lastbyteidx(s) - @inbounds d[j += 1] = number_from_hex(_nthbyte(s,i)) << 4 + number_from_hex(_nthbyte(s,i+1)) - end - return d + + return dest end -@inline number_from_hex(c) = - (UInt8('0') <= c <= UInt8('9')) ? c - UInt8('0') : - (UInt8('A') <= c <= UInt8('F')) ? c - (UInt8('A') - 0x0a) : - (UInt8('a') <= c <= UInt8('f')) ? c - (UInt8('a') - 0x0a) : +@inline number_from_hex(c::AbstractChar) = number_from_hex(Char(c)) +@inline number_from_hex(c::Char) = number_from_hex(UInt8(c)) +@inline function number_from_hex(c::UInt8) + UInt8('0') <= c <= UInt8('9') && return c - UInt8('0') + c |= 0b0100000 + UInt8('a') <= c <= UInt8('f') && return c - UInt8('a') + 0x0a throw(ArgumentError("byte is not an ASCII hexadecimal digit")) +end """ - bytes2hex(a::AbstractArray{UInt8}) -> String - bytes2hex(io::IO, a::AbstractArray{UInt8}) + bytes2hex(itr) -> String + bytes2hex(io::IO, itr) + +Convert an iterator `itr` of bytes to its hexadecimal string representation, either +returning a `String` via `bytes2hex(itr)` or writing the string to an `io` stream +via `bytes2hex(io, itr)`. The hexadecimal characters are all lowercase. -Convert an array `a` of bytes to its hexadecimal string representation, either -returning a `String` via `bytes2hex(a)` or writing the string to an `io` stream -via `bytes2hex(io, a)`. The hexadecimal characters are all lowercase. +!!! compat "Julia 1.7" + Calling bytes2hex with iterators producing UInt8 requires + version 1.7. In earlier versions, you can collect the iterable + before calling instead. # Examples ```jldoctest @@ -689,17 +712,19 @@ julia> bytes2hex(b) """ function bytes2hex end -function bytes2hex(a::Union{Tuple{Vararg{UInt8}}, AbstractArray{UInt8}}) - b = Base.StringVector(2*length(a)) - @inbounds for (i, x) in enumerate(a) +function bytes2hex(itr) + eltype(itr) === UInt8 || throw(ArgumentError("eltype of iterator not UInt8")) + b = Base.StringVector(2*length(itr)) + @inbounds for (i, x) in enumerate(itr) b[2i - 1] = hex_chars[1 + x >> 4] b[2i ] = hex_chars[1 + x & 0xf] end return String(b) end -function bytes2hex(io::IO, a::Union{Tuple{Vararg{UInt8}}, AbstractArray{UInt8}}) - for x in a +function bytes2hex(io::IO, itr) + eltype(itr) === UInt8 || throw(ArgumentError("eltype of iterator not UInt8")) + for x in itr print(io, Char(hex_chars[1 + x >> 4]), Char(hex_chars[1 + x & 0xf])) end end diff --git a/test/strings/util.jl b/test/strings/util.jl index 617ff31106634..2ca6df529ba51 100644 --- a/test/strings/util.jl +++ b/test/strings/util.jl @@ -376,6 +376,11 @@ end #non-hex characters @test_throws ArgumentError hex2bytes(b"0123456789abcdefABCDEFGH") end + + @testset "Issue 39284" begin + @test "efcdabefcdab8967452301" == bytes2hex(Iterators.reverse(hex2bytes("0123456789abcdefABCDEF"))) + @test hex2bytes(Iterators.reverse(b"CE1A85EECc")) == UInt8[0xcc, 0xee, 0x58, 0xa1, 0xec] + end end # b"" should be immutable