Skip to content

Commit 5eaa074

Browse files
committed
Add fuzzed private data for testing .iea.web 2024 edition
1 parent 9cc2154 commit 5eaa074

File tree

3 files changed

+26
-8
lines changed

3 files changed

+26
-8
lines changed
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
version https://git-lfs.github.com/spec/v1
2+
oid sha256:fee0dbac438e255d0faddb7bc43705678180f06bd738ba8434d24017c2449047
3+
size 8334294
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
version https://git-lfs.github.com/spec/v1
2+
oid sha256:14ff34fbbde1b4105df603571da15cb04be073ece885ee9db5cb9fcb9990e749
3+
size 9897875

message_ix_models/testing/cli.py

Lines changed: 20 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,8 @@ def cli():
1616
"iea/372f7e29-en.zip",
1717
"iea/8624f431-en.zip",
1818
"iea/cac5fa90-en.zip",
19+
"iea/web/2024-07-25/WBIG1.zip",
20+
"iea/web/2024-07-25/WBIG2.zip",
1921
"shape/gdp_v1p0.mif",
2022
"shape/gdp_v1p1.mif",
2123
"shape/gdp_v1p2.mif",
@@ -38,8 +40,9 @@ def fuzz_private_data(filename, frac: float): # pragma: no cover
3840
"""Create random data for testing.
3941
4042
This command creates data files in message_ix_models/data/test/… based on
41-
corresponding private files in message_data/data/…. This supports testing of code in
42-
message_ix_models that handles these files.
43+
corresponding private files in either message_data/data/… or the local data
44+
directory. This supports testing of code in message_ix_models that handles these
45+
files.
4346
4447
The files are identical in structure and layout, except the values are "fuzzed", or
4548
replaced with random values.
@@ -55,11 +58,11 @@ def fuzz_private_data(filename, frac: float): # pragma: no cover
5558
from numpy import char, random
5659

5760
from message_ix_models.project.advance.data import NAME
58-
from message_ix_models.util import package_data_path, private_data_path
61+
from message_ix_models.util import package_data_path, path_fallback
5962

6063
# Paths
6164
p = Path(filename)
62-
path_in = private_data_path(p)
65+
path_in = path_fallback(p, where="private local")
6366
path_out = package_data_path("test", p)
6467

6568
# Shared arguments for read_csv() and to_csv()
@@ -70,21 +73,28 @@ def fuzz_private_data(filename, frac: float): # pragma: no cover
7073
sep = ";"
7174

7275
# Read the data
76+
zf_member_name = None
7377
with TemporaryDirectory() as td:
7478
td_path = Path(td)
7579
if "advance" in filename:
7680
# Manually unpack one member of the multi-member archive `path_in`
81+
zf_member_name = NAME
7782
target: Union[IO, Path, str] = zipfile.ZipFile(path_in).extract(
78-
NAME, path=td_path
83+
zf_member_name, path=td_path
7984
)
8085
elif "iea" in filename:
8186
# Manually unpack so that dask.dataframe.read_csv() can be used
82-
from message_ix_models.tools.iea.web import unpack_zip
87+
from message_ix_models.tools.iea.web import fwf_to_csv, unpack_zip
8388

8489
target = unpack_zip(path_in)
90+
zf_member_name = target.name
91+
if target.suffix == ".TXT":
92+
target = fwf_to_csv(target, progress=True)
8593
else:
8694
target = path_in
8795

96+
print(f"Read {target}")
97+
8898
# - Read the data
8999
# - Use dask & pyarrow.
90100
# - Prevent values like "NA" being auto-transformed to np.nan.
@@ -127,10 +137,12 @@ def fuzz_private_data(filename, frac: float): # pragma: no cover
127137
# Write to file, keeping only a few decimal points
128138
path_out.parent.mkdir(parents=True, exist_ok=True)
129139

130-
if "advance" in filename:
140+
if path_out.suffix.lower() == ".zip":
131141
zf = zipfile.ZipFile(path_out, "w", compression=zipfile.ZIP_BZIP2)
132-
target = zf.open(NAME)
142+
target = zf.open(zf_member_name, "w")
143+
print(f"Write to member {zf_member_name} in {path_out}")
133144
else:
134145
target = path_out
146+
print(f"Write to {path_out}")
135147

136148
df.to_csv(target, float_format="%.2f", index=False, sep=sep)

0 commit comments

Comments
 (0)