|
13 | 13 | import tempfile
|
14 | 14 | import uuid
|
15 | 15 | import warnings
|
| 16 | +from collections import Counter |
16 | 17 | from collections.abc import Generator, Iterator, Mapping
|
17 | 18 | from contextlib import ExitStack
|
18 | 19 | from io import BytesIO
|
@@ -2990,6 +2991,184 @@ def test_chunked_cftime_datetime(self) -> None:
|
2990 | 2991 | assert original.chunks == actual.chunks
|
2991 | 2992 |
|
2992 | 2993 |
|
| 2994 | +def attach_counter(f): |
| 2995 | + counter = Counter() |
| 2996 | + |
| 2997 | + def wrapper(self, *args): |
| 2998 | + if f.__name__ in ["__contains__", "__getitem__"] and args[0] == "zarr.json": |
| 2999 | + # ignore this spam |
| 3000 | + pass |
| 3001 | + else: |
| 3002 | + print(f"Calling {f.__name__} with {args}") |
| 3003 | + counter.update(args[slice(1)]) if args else counter.update(("foo",)) |
| 3004 | + return f(self, *args) |
| 3005 | + |
| 3006 | + wrapper.counter = counter |
| 3007 | + return wrapper |
| 3008 | + |
| 3009 | + |
| 3010 | +class CountingStore(KVStoreV3): |
| 3011 | + def __init__(self): |
| 3012 | + super().__init__({}) |
| 3013 | + self.instrumented_methods = [ |
| 3014 | + "__iter__", |
| 3015 | + "__contains__", |
| 3016 | + "__setitem__", |
| 3017 | + "__getitem__", |
| 3018 | + "listdir", |
| 3019 | + "list_prefix", |
| 3020 | + ] |
| 3021 | + |
| 3022 | + @attach_counter |
| 3023 | + def __iter__(self): |
| 3024 | + return super().__iter__() |
| 3025 | + |
| 3026 | + @attach_counter |
| 3027 | + def listdir(self, *args, **kwargs): |
| 3028 | + return super().listdir(*args, **kwargs) |
| 3029 | + |
| 3030 | + @attach_counter |
| 3031 | + def list_prefix(self, *args, **kwargs): |
| 3032 | + return super().list_prefix(*args, **kwargs) |
| 3033 | + |
| 3034 | + @attach_counter |
| 3035 | + def __contains__(self, key) -> bool: |
| 3036 | + return super().__contains__(key) |
| 3037 | + |
| 3038 | + @attach_counter |
| 3039 | + def __getitem__(self, key): |
| 3040 | + return super().__getitem__(key) |
| 3041 | + |
| 3042 | + @attach_counter |
| 3043 | + def __setitem__(self, *args, **kwargs): |
| 3044 | + return super().__setitem__(*args, **kwargs) |
| 3045 | + |
| 3046 | + def summarize(self): |
| 3047 | + summary = {} |
| 3048 | + for method in self.instrumented_methods: |
| 3049 | + name = method.strip("__") |
| 3050 | + if counter := getattr(self, method).counter: |
| 3051 | + summary[name] = sum(counter.values()) |
| 3052 | + else: |
| 3053 | + summary[name] = 0 |
| 3054 | + return summary |
| 3055 | + |
| 3056 | + def reset(self): |
| 3057 | + for method in self.instrumented_methods: |
| 3058 | + getattr(self, method).counter.clear() |
| 3059 | + |
| 3060 | + |
| 3061 | +@requires_zarr |
| 3062 | +@pytest.mark.skipif(not have_zarr_v3, reason="requires zarr version 3") |
| 3063 | +class TestInstrumentedZarrStore: |
| 3064 | + @contextlib.contextmanager |
| 3065 | + def create_zarr_target(self): |
| 3066 | + yield CountingStore() |
| 3067 | + |
| 3068 | + def check_requests(self, expected, store): |
| 3069 | + summary = store.summarize() |
| 3070 | + for k in summary: |
| 3071 | + assert summary[k] <= expected[k], summary |
| 3072 | + store.reset() |
| 3073 | + |
| 3074 | + def test_append(self) -> None: |
| 3075 | + original = Dataset({"foo": ("x", [1])}, coords={"x": [0]}) |
| 3076 | + modified = Dataset({"foo": ("x", [2])}, coords={"x": [1]}) |
| 3077 | + with self.create_zarr_target() as store: |
| 3078 | + original.to_zarr(store) |
| 3079 | + expected = { |
| 3080 | + "iter": 2, |
| 3081 | + "contains": 9, |
| 3082 | + "setitem": 9, |
| 3083 | + "getitem": 6, |
| 3084 | + "listdir": 2, |
| 3085 | + "list_prefix": 2, |
| 3086 | + } |
| 3087 | + self.check_requests(expected, store) |
| 3088 | + |
| 3089 | + modified.to_zarr(store, mode="a", append_dim="x") |
| 3090 | + # v2024.03.0: {'iter': 6, 'contains': 2, 'setitem': 5, 'getitem': 10, 'listdir': 6, 'list_prefix': 0} |
| 3091 | + # 6057128b: {'iter': 5, 'contains': 2, 'setitem': 5, 'getitem': 10, "listdir": 5, "list_prefix": 0} |
| 3092 | + expected = { |
| 3093 | + "iter": 2, |
| 3094 | + "contains": 2, |
| 3095 | + "setitem": 5, |
| 3096 | + "getitem": 6, |
| 3097 | + "listdir": 2, |
| 3098 | + "list_prefix": 0, |
| 3099 | + } |
| 3100 | + self.check_requests(expected, store) |
| 3101 | + |
| 3102 | + modified.to_zarr(store, mode="a-", append_dim="x") |
| 3103 | + expected = { |
| 3104 | + "iter": 2, |
| 3105 | + "contains": 2, |
| 3106 | + "setitem": 5, |
| 3107 | + "getitem": 6, |
| 3108 | + "listdir": 2, |
| 3109 | + "list_prefix": 2, |
| 3110 | + } |
| 3111 | + self.check_requests(expected, store) |
| 3112 | + |
| 3113 | + with open_dataset(store, engine="zarr") as actual: |
| 3114 | + assert_identical( |
| 3115 | + actual, xr.concat([original, modified, modified], dim="x") |
| 3116 | + ) |
| 3117 | + |
| 3118 | + @requires_dask |
| 3119 | + def test_region_write(self) -> None: |
| 3120 | + ds = Dataset({"foo": ("x", [1, 2, 3])}, coords={"x": [0, 1, 2]}).chunk() |
| 3121 | + with self.create_zarr_target() as store: |
| 3122 | + ds.to_zarr(store, mode="w", compute=False) |
| 3123 | + expected = { |
| 3124 | + "iter": 2, |
| 3125 | + "contains": 7, |
| 3126 | + "setitem": 9, |
| 3127 | + "getitem": 6, |
| 3128 | + "listdir": 2, |
| 3129 | + "list_prefix": 4, |
| 3130 | + } |
| 3131 | + self.check_requests(expected, store) |
| 3132 | + |
| 3133 | + ds.to_zarr(store, region={"x": slice(None)}) |
| 3134 | + # v2024.03.0: {'iter': 5, 'contains': 2, 'setitem': 1, 'getitem': 6, 'listdir': 5, 'list_prefix': 0} |
| 3135 | + # 6057128b: {'iter': 4, 'contains': 2, 'setitem': 1, 'getitem': 5, 'listdir': 4, 'list_prefix': 0} |
| 3136 | + expected = { |
| 3137 | + "iter": 2, |
| 3138 | + "contains": 2, |
| 3139 | + "setitem": 1, |
| 3140 | + "getitem": 3, |
| 3141 | + "listdir": 2, |
| 3142 | + "list_prefix": 0, |
| 3143 | + } |
| 3144 | + self.check_requests(expected, store) |
| 3145 | + |
| 3146 | + ds.to_zarr(store, region="auto") |
| 3147 | + # v2024.03.0: {'iter': 6, 'contains': 4, 'setitem': 1, 'getitem': 11, 'listdir': 6, 'list_prefix': 0} |
| 3148 | + # 6057128b: {'iter': 4, 'contains': 2, 'setitem': 1, 'getitem': 7, 'listdir': 4, 'list_prefix': 0} |
| 3149 | + expected = { |
| 3150 | + "iter": 2, |
| 3151 | + "contains": 2, |
| 3152 | + "setitem": 1, |
| 3153 | + "getitem": 5, |
| 3154 | + "listdir": 2, |
| 3155 | + "list_prefix": 0, |
| 3156 | + } |
| 3157 | + self.check_requests(expected, store) |
| 3158 | + |
| 3159 | + expected = { |
| 3160 | + "iter": 1, |
| 3161 | + "contains": 2, |
| 3162 | + "setitem": 0, |
| 3163 | + "getitem": 5, |
| 3164 | + "listdir": 1, |
| 3165 | + "list_prefix": 0, |
| 3166 | + } |
| 3167 | + with open_dataset(store, engine="zarr") as actual: |
| 3168 | + assert_identical(actual, ds) |
| 3169 | + self.check_requests(expected, store) |
| 3170 | + |
| 3171 | + |
2993 | 3172 | @requires_zarr
|
2994 | 3173 | class TestZarrDictStore(ZarrBase):
|
2995 | 3174 | @contextlib.contextmanager
|
|
0 commit comments