Skip to content

Commit 8bf4c40

Browse files
committed
Change scalar(%hash) to be the same as 0+keys(%hash)
This subject has a long history see [perl #114576] for more discussion. https://rt.perl.org/Public/Bug/Display.html?id=114576 There are a variety of reasons we want to change the return signature of scalar(%hash). One is that it leaks implementation details about our associative array structure. Another is that it requires us to keep track of the used buckets in the hash, which we use for no other purpose but for scalar(%hash). Another is that it is just odd. Almost nothing needs to know these values. Perhaps debugging, but we have several much better functions for introspecting the internals of a hash. By changing the return signature we can remove all the logic related to maintaining and updating xhv_fill_lazy. This should make hot code paths a little faster, and maybe save some memory for traversed hashes. In order to provide some form of backwards compatibility we adds three new functions to the Hash::Util namespace: bucket_ratio(), num_buckets() and used_buckets(). These functions are actually implemented in universal.c, and thus always available even if Hash::Util is not loaded. This simplifies testing. At the same time Hash::Util contains backwards compatible code so that the new functions are available from it should they be needed in older perls. There are many tests in t/op/hash.t that are more or less obsolete after this patch as they test that xhv_fill_lazy is correctly set in various situations. However since we have a backwards compat layer we can just switch them to use bucket_ratio(%hash) instead of scalar(%hash) and keep the tests, just in case they are actually testing something not tested elsewhere.
1 parent 6c50b67 commit 8bf4c40

File tree

22 files changed

+325
-128
lines changed

22 files changed

+325
-128
lines changed

MANIFEST

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3858,6 +3858,7 @@ ext/Hash-Util-FieldHash/t/11_hashassign.t Adapted from t/op/hashassign.t
38583858
ext/Hash-Util-FieldHash/t/12_hashwarn.t Adapted from t/op/hashwarn.t
38593859
ext/Hash-Util/lib/Hash/Util.pm Hash::Util
38603860
ext/Hash-Util/Makefile.PL Makefile for Hash::Util
3861+
ext/Hash-Util/t/builtin.t See if Hash::Util builtin exports work as expected
38613862
ext/Hash-Util/t/Util.t See if Hash::Util works
38623863
ext/Hash-Util/Util.xs XS bits of Hash::Util
38633864
ext/I18N-Langinfo/Langinfo.pm I18N::Langinfo

dump.c

Lines changed: 2 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1761,15 +1761,8 @@ Perl_do_sv_dump(pTHX_ I32 level, PerlIO *file, SV *sv, I32 nest, I32 maxnest, bo
17611761
} while (++ents <= last);
17621762
}
17631763

1764-
if (SvOOK(sv)) {
1765-
struct xpvhv_aux *const aux = HvAUX(sv);
1766-
Perl_dump_indent(aTHX_ level, file, " FILL = %"UVuf
1767-
" (cached = %"UVuf")\n",
1768-
(UV)count, (UV)aux->xhv_fill_lazy);
1769-
} else {
1770-
Perl_dump_indent(aTHX_ level, file, " FILL = %"UVuf"\n",
1771-
(UV)count);
1772-
}
1764+
Perl_dump_indent(aTHX_ level, file, " FILL = %"UVuf"\n",
1765+
(UV)count);
17731766
}
17741767
Perl_dump_indent(aTHX_ level, file, " MAX = %"IVdf"\n", (IV)HvMAX(sv));
17751768
if (SvOOK(sv)) {

embed.fnc

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2774,6 +2774,7 @@ Apod |void |hv_assert |NN HV *hv
27742774
#endif
27752775

27762776
ApdR |SV* |hv_scalar |NN HV *hv
2777+
ApdRMD |SV* |hv_bucket_ratio|NN HV *hv
27772778
ApoR |I32* |hv_riter_p |NN HV *hv
27782779
ApoR |HE** |hv_eiter_p |NN HV *hv
27792780
Apo |void |hv_riter_set |NN HV *hv|I32 riter

embed.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -217,6 +217,7 @@
217217
#define gv_stashpv(a,b) Perl_gv_stashpv(aTHX_ a,b)
218218
#define gv_stashpvn(a,b,c) Perl_gv_stashpvn(aTHX_ a,b,c)
219219
#define gv_stashsv(a,b) Perl_gv_stashsv(aTHX_ a,b)
220+
#define hv_bucket_ratio(a) Perl_hv_bucket_ratio(aTHX_ a)
220221
#define hv_clear(a) Perl_hv_clear(aTHX_ a)
221222
#define hv_clear_placeholders(a) Perl_hv_clear_placeholders(aTHX_ a)
222223
#define hv_common(a,b,c,d,e,f,g,h) Perl_hv_common(aTHX_ a,b,c,d,e,f,g,h)

ext/Devel-Peek/t/Peek.t

Lines changed: 5 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -783,7 +783,7 @@ do_test('ENAME on a stash',
783783
AUX_FLAGS = 0 # $] > 5.019008
784784
ARRAY = $ADDR
785785
KEYS = 0
786-
FILL = 0 \(cached = 0\)
786+
FILL = 0
787787
MAX = 7
788788
RITER = -1
789789
EITER = 0x0
@@ -806,7 +806,7 @@ do_test('ENAMEs on a stash',
806806
AUX_FLAGS = 0 # $] > 5.019008
807807
ARRAY = $ADDR
808808
KEYS = 0
809-
FILL = 0 \(cached = 0\)
809+
FILL = 0
810810
MAX = 7
811811
RITER = -1
812812
EITER = 0x0
@@ -832,7 +832,7 @@ do_test('ENAMEs on a stash with no NAME',
832832
AUX_FLAGS = 0 # $] > 5.019008
833833
ARRAY = $ADDR
834834
KEYS = 0
835-
FILL = 0 \(cached = 0\)
835+
FILL = 0
836836
MAX = 7
837837
RITER = -1
838838
EITER = 0x0
@@ -882,7 +882,7 @@ do_test('small hash after keys',
882882
ARRAY = $ADDR \\(0:[67],.*\\)
883883
hash quality = [0-9.]+%
884884
KEYS = 2
885-
FILL = [12] \\(cached = 0\\)
885+
FILL = [12]
886886
MAX = 7
887887
RITER = -1
888888
EITER = 0x0
@@ -912,7 +912,7 @@ do_test('small hash after keys and scalar',
912912
ARRAY = $ADDR \\(0:[67],.*\\)
913913
hash quality = [0-9.]+%
914914
KEYS = 2
915-
FILL = ([12]) \\(cached = \1\\)
915+
FILL = ([12])
916916
MAX = 7
917917
RITER = -1
918918
EITER = 0x0
@@ -927,30 +927,6 @@ do_test('small hash after keys and scalar',
927927
COW_REFCNT = 1
928928
){2}');
929929

930-
# This should immediately start with the FILL cached correctly.
931-
my %large = (0..1999);
932-
$b = %large;
933-
do_test('large hash',
934-
\%large,
935-
'SV = $RV\\($ADDR\\) at $ADDR
936-
REFCNT = 1
937-
FLAGS = \\(ROK\\)
938-
RV = $ADDR
939-
SV = PVHV\\($ADDR\\) at $ADDR
940-
REFCNT = 2
941-
FLAGS = \\($PADMY,OOK,SHAREKEYS\\)
942-
AUX_FLAGS = 0 # $] > 5.019008
943-
ARRAY = $ADDR \\(0:\d+,.*\\)
944-
hash quality = \d+\\.\d+%
945-
KEYS = 1000
946-
FILL = (\d+) \\(cached = \1\\)
947-
MAX = 1023
948-
RITER = -1
949-
EITER = 0x0
950-
RAND = $ADDR
951-
Elt .*
952-
');
953-
954930
# Dump with arrays, hashes, and operator return values
955931
@array = 1..3;
956932
do_test('Dump @array', '@array', <<'ARRAY', '', '', 1);

ext/Hash-Util-FieldHash/t/05_perlhook.t

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -103,9 +103,9 @@ sub numbers_first { # Sort helper: All digit entries sort in front of others
103103
is( $counter, 1, "list each doesn't trigger");
104104
is( "@x", "abc 123", "the return is correct");
105105

106-
$x = %h;
106+
$x = scalar %h;
107107
is( $counter, 1, "hash in scalar context doesn't trigger");
108-
like( $x, qr!^\d+/\d+$!, "correct result");
108+
is( $x, 1, "correct result");
109109

110110
(@x) = %h;
111111
is( $counter, 1, "hash in list context doesn't trigger");

ext/Hash-Util/Changes

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,10 @@
11
Revision history for Perl extension Hash::Util.
22

3+
0.20
4+
Add bucket_ratio, num_buckets, used_buckets as a back-compat
5+
shin for 5.25 where we remove the bucket data from scalar(%hash)
6+
by making it return the count of keys by default.
7+
38
0.17
49
Add bucket_stats_formatted() as utility method to Hash::Util
510
Bug fixes to hash_stats()

ext/Hash-Util/Util.xs

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -263,3 +263,53 @@ bucket_array(rhv)
263263
}
264264
XSRETURN(0);
265265
}
266+
267+
#if PERL_VERSION < 25
268+
SV*
269+
bucket_ratio(rhv)
270+
SV* rhv
271+
PROTOTYPE: \%
272+
PPCODE:
273+
{
274+
if (SvROK(rhv)) {
275+
rhv= SvRV(rhv);
276+
if ( SvTYPE(rhv)==SVt_PVHV ) {
277+
SV *ret= Perl_hv_scalar(aTHX_ (HV*)rhv);
278+
ST(0)= ret;
279+
XSRETURN(1);
280+
}
281+
}
282+
XSRETURN_UNDEF;
283+
}
284+
285+
SV*
286+
num_buckets(rhv)
287+
SV* rhv
288+
PROTOTYPE: \%
289+
PPCODE:
290+
{
291+
if (SvROK(rhv)) {
292+
rhv= SvRV(rhv);
293+
if ( SvTYPE(rhv)==SVt_PVHV ) {
294+
XSRETURN_UV(HvMAX((HV*)rhv)+1);
295+
}
296+
}
297+
XSRETURN_UNDEF;
298+
}
299+
300+
SV*
301+
used_buckets(rhv)
302+
SV* rhv
303+
PROTOTYPE: \%
304+
PPCODE:
305+
{
306+
if (SvROK(rhv)) {
307+
rhv= SvRV(rhv);
308+
if ( SvTYPE(rhv)==SVt_PVHV ) {
309+
XSRETURN_UV(HvFILL((HV*)rhv));
310+
}
311+
}
312+
XSRETURN_UNDEF;
313+
}
314+
315+
#endif

ext/Hash-Util/lib/Hash/Util.pm

Lines changed: 28 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,8 +34,12 @@ our @EXPORT_OK = qw(
3434
lock_hashref_recurse unlock_hashref_recurse
3535
3636
hash_traversal_mask
37+
38+
bucket_ratio
39+
used_buckets
40+
num_buckets
3741
);
38-
our $VERSION = '0.19';
42+
our $VERSION = '0.20';
3943
require XSLoader;
4044
XSLoader::load();
4145

@@ -727,6 +731,29 @@ order. B<Note> that this does B<not> guarantee that B<two> hashes will produce
727731
the same key order for the same hash seed and traversal mask, items that
728732
collide into one bucket may have different orders regardless of this setting.
729733
734+
=item B<bucket_ratio>
735+
736+
This function behaves the same way that scalar(%hash) behaved prior to
737+
Perl 5.25. Specifically if the hash is tied, then it calls the SCALAR tied
738+
hash method, if untied then if the hash is empty it return 0, otherwise it
739+
returns a string containing the number of used buckets in the hash,
740+
followed by a slash, followed by the total number of buckets in the hash.
741+
742+
my %hash=("foo"=>1);
743+
print Hash::Util::bucket_ratio(%hash); # prints "1/8"
744+
745+
=item B<used_buckets>
746+
747+
This function returns the count of used buckets in the hash. It is expensive
748+
to calculate and the value is NOT cached, so avoid use of this function
749+
in production code.
750+
751+
=item B<num_buckets>
752+
753+
This function returns the total number of buckets the hash holds, or would
754+
hold if the array were created. (When a hash is freshly created the array
755+
may not be allocated even though this value will be non-zero.)
756+
730757
=back
731758
732759
=head2 Operating on references to hashes.

ext/Hash-Util/t/builtin.t

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
#!/usr/bin/perl -Tw
2+
3+
use strict;
4+
use Test::More;
5+
6+
my @Exported_Funcs;
7+
BEGIN {
8+
@Exported_Funcs = qw( bucket_ratio num_buckets used_buckets );
9+
plan tests => 13 + @Exported_Funcs;
10+
use_ok 'Hash::Util', @Exported_Funcs;
11+
}
12+
foreach my $func (@Exported_Funcs) {
13+
can_ok __PACKAGE__, $func;
14+
}
15+
16+
my %hash;
17+
18+
is(bucket_ratio(%hash), 0, "Empty hash has no bucket_ratio");
19+
is(num_buckets(%hash), 8, "Empty hash should have eight buckets");
20+
is(used_buckets(%hash), 0, "Empty hash should have no used buckets");
21+
22+
$hash{1}= 1;
23+
is(bucket_ratio(%hash), "1/8", "hash has expected bucket_ratio");
24+
is(num_buckets(%hash), 8, "hash should have eight buckets");
25+
is(used_buckets(%hash), 1, "hash should have one used buckets");
26+
27+
$hash{$_}= $_ for 2..7;
28+
29+
like(bucket_ratio(%hash), qr!/8!, "hash has expected number of buckets in bucket_ratio");
30+
is(num_buckets(%hash), 8, "hash should have eight buckets");
31+
cmp_ok(used_buckets(%hash), "<", 8, "hash should have one used buckets");
32+
33+
$hash{8}= 8;
34+
like(bucket_ratio(%hash), qr!/16!, "hash has expected number of buckets in bucket_ratio");
35+
is(num_buckets(%hash), 16, "hash should have sixteen buckets");
36+
cmp_ok(used_buckets(%hash), "<=", 8, "hash should have at most 8 used buckets");
37+
38+

0 commit comments

Comments
 (0)