@@ -16,6 +16,8 @@ def cli():
16
16
"iea/372f7e29-en.zip" ,
17
17
"iea/8624f431-en.zip" ,
18
18
"iea/cac5fa90-en.zip" ,
19
+ "iea/web/2024-07-25/WBIG1.zip" ,
20
+ "iea/web/2024-07-25/WBIG2.zip" ,
19
21
"shape/gdp_v1p0.mif" ,
20
22
"shape/gdp_v1p1.mif" ,
21
23
"shape/gdp_v1p2.mif" ,
@@ -38,8 +40,9 @@ def fuzz_private_data(filename, frac: float): # pragma: no cover
38
40
"""Create random data for testing.
39
41
40
42
This command creates data files in message_ix_models/data/test/… based on
41
- corresponding private files in message_data/data/…. This supports testing of code in
42
- message_ix_models that handles these files.
43
+ corresponding private files in either message_data/data/… or the local data
44
+ directory. This supports testing of code in message_ix_models that handles these
45
+ files.
43
46
44
47
The files are identical in structure and layout, except the values are "fuzzed", or
45
48
replaced with random values.
@@ -55,11 +58,11 @@ def fuzz_private_data(filename, frac: float): # pragma: no cover
55
58
from numpy import char , random
56
59
57
60
from message_ix_models .project .advance .data import NAME
58
- from message_ix_models .util import package_data_path , private_data_path
61
+ from message_ix_models .util import package_data_path , path_fallback
59
62
60
63
# Paths
61
64
p = Path (filename )
62
- path_in = private_data_path ( p )
65
+ path_in = path_fallback ( p , where = "private local" )
63
66
path_out = package_data_path ("test" , p )
64
67
65
68
# Shared arguments for read_csv() and to_csv()
@@ -70,21 +73,28 @@ def fuzz_private_data(filename, frac: float): # pragma: no cover
70
73
sep = ";"
71
74
72
75
# Read the data
76
+ zf_member_name = None
73
77
with TemporaryDirectory () as td :
74
78
td_path = Path (td )
75
79
if "advance" in filename :
76
80
# Manually unpack one member of the multi-member archive `path_in`
81
+ zf_member_name = NAME
77
82
target : Union [IO , Path , str ] = zipfile .ZipFile (path_in ).extract (
78
- NAME , path = td_path
83
+ zf_member_name , path = td_path
79
84
)
80
85
elif "iea" in filename :
81
86
# Manually unpack so that dask.dataframe.read_csv() can be used
82
- from message_ix_models .tools .iea .web import unpack_zip
87
+ from message_ix_models .tools .iea .web import fwf_to_csv , unpack_zip
83
88
84
89
target = unpack_zip (path_in )
90
+ zf_member_name = target .name
91
+ if target .suffix == ".TXT" :
92
+ target = fwf_to_csv (target , progress = True )
85
93
else :
86
94
target = path_in
87
95
96
+ print (f"Read { target } " )
97
+
88
98
# - Read the data
89
99
# - Use dask & pyarrow.
90
100
# - Prevent values like "NA" being auto-transformed to np.nan.
@@ -127,10 +137,12 @@ def fuzz_private_data(filename, frac: float): # pragma: no cover
127
137
# Write to file, keeping only a few decimal points
128
138
path_out .parent .mkdir (parents = True , exist_ok = True )
129
139
130
- if "advance" in filename :
140
+ if path_out . suffix . lower () == ".zip" :
131
141
zf = zipfile .ZipFile (path_out , "w" , compression = zipfile .ZIP_BZIP2 )
132
- target = zf .open (NAME )
142
+ target = zf .open (zf_member_name , "w" )
143
+ print (f"Write to member { zf_member_name } in { path_out } " )
133
144
else :
134
145
target = path_out
146
+ print (f"Write to { path_out } " )
135
147
136
148
df .to_csv (target , float_format = "%.2f" , index = False , sep = sep )
0 commit comments