Skip to content
This repository was archived by the owner on Sep 11, 2023. It is now read-only.

Commit 83b5773

Browse files
committed
failed attempt to fix #42 using retries. Next, going to try replacing .sel() with np.searchsorted
1 parent a34aa99 commit 83b5773

File tree

4 files changed

+216
-846
lines changed

4 files changed

+216
-846
lines changed

notebooks/benchmark_loading_speed.ipynb

Lines changed: 18 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -52,8 +52,8 @@
5252
"name": "stdout",
5353
"output_type": "stream",
5454
"text": [
55-
"CPU times: user 65 µs, sys: 0 ns, total: 65 µs\n",
56-
"Wall time: 67.9 µs\n"
55+
"CPU times: user 56 µs, sys: 18 µs, total: 74 µs\n",
56+
"Wall time: 76.3 µs\n"
5757
]
5858
}
5959
],
@@ -69,7 +69,7 @@
6969
" # sat_channels =('HRV', 'WV_062', 'WV_073'),\n",
7070
" nwp_base_path = f'gs://{NWP_BASE_PATH}',\n",
7171
" pin_memory = True, #: Passed to DataLoader.\n",
72-
" num_workers = 1, #: Passed to DataLoader.\n",
72+
" num_workers = 0, #: Passed to DataLoader.\n",
7373
" prefetch_factor = 256, #: Passed to DataLoader.\n",
7474
" n_samples_per_timestep = 8, #: Passed to NowcastingDataset\n",
7575
")"
@@ -94,8 +94,8 @@
9494
"text": [
9595
"15 bad PV systems found and removed!\n",
9696
"pv_power = 400.0 MB\n",
97-
"CPU times: user 56.2 s, sys: 3.56 s, total: 59.8 s\n",
98-
"Wall time: 59.9 s\n"
97+
"CPU times: user 55.6 s, sys: 3.45 s, total: 59 s\n",
98+
"Wall time: 59.3 s\n"
9999
]
100100
}
101101
],
@@ -124,23 +124,21 @@
124124
"/home/jack/miniconda3/envs/nowcasting_dataset/lib/python3.9/site-packages/pvlib/solarposition.py:368: FutureWarning: casting datetime64[ns] values to int64 with .astype(...) is deprecated and will raise in a future version. Use .view(...) instead.\n",
125125
" unixtime = np.array(time.astype(np.int64)/10**9)\n",
126126
"/home/jack/miniconda3/envs/nowcasting_dataset/lib/python3.9/site-packages/pvlib/solarposition.py:368: FutureWarning: casting datetime64[ns] values to int64 with .astype(...) is deprecated and will raise in a future version. Use .view(...) instead.\n",
127-
" unixtime = np.array(time.astype(np.int64)/10**9)\n"
127+
" unixtime = np.array(time.astype(np.int64)/10**9)\n",
128+
"/home/jack/dev/ocf/nowcasting_dataset/nowcasting_dataset/utils.py:20: FutureWarning: casting datetime64[ns] values to int64 with .astype(...) is deprecated and will raise in a future version. Use .view(...) instead.\n",
129+
" a = a.astype(int)\n",
130+
"DEBUG:nowcasting_dataset:Opening satellite data: gs://solar-pv-nowcasting-data/satellite/EUMETSAT/SEVIRI_RSS/OSGB36/all_zarr_int16_single_timestep.zarr\n",
131+
"DEBUG:nowcasting_dataset:Opening NWP data: gs://solar-pv-nowcasting-data/NWP/UK_Met_Office/UKV_zarr\n",
132+
"DEBUG:nowcasting_dataset:Opening satellite data: gs://solar-pv-nowcasting-data/satellite/EUMETSAT/SEVIRI_RSS/OSGB36/all_zarr_int16_single_timestep.zarr\n",
133+
"DEBUG:nowcasting_dataset:Opening NWP data: gs://solar-pv-nowcasting-data/NWP/UK_Met_Office/UKV_zarr\n"
128134
]
129135
},
130136
{
131137
"name": "stdout",
132138
"output_type": "stream",
133139
"text": [
134-
"CPU times: user 12.9 s, sys: 347 ms, total: 13.2 s\n",
135-
"Wall time: 15.4 s\n"
136-
]
137-
},
138-
{
139-
"name": "stderr",
140-
"output_type": "stream",
141-
"text": [
142-
"/home/jack/dev/ocf/nowcasting_dataset/nowcasting_dataset/utils.py:20: FutureWarning: casting datetime64[ns] values to int64 with .astype(...) is deprecated and will raise in a future version. Use .view(...) instead.\n",
143-
" a = a.astype(int)\n"
140+
"CPU times: user 26.3 s, sys: 622 ms, total: 26.9 s\n",
141+
"Wall time: 33.8 s\n"
144142
]
145143
}
146144
],
@@ -354,9 +352,7 @@
354352
"output_type": "stream",
355353
"text": [
356354
"/home/jack/miniconda3/envs/nowcasting_dataset/lib/python3.9/site-packages/pytorch_lightning/trainer/data_loading.py:102: UserWarning: The dataloader, val dataloader 0, does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` (try 16 which is the number of cpus on this machine) in the `DataLoader` init to improve performance.\n",
357-
" rank_zero_warn(\n",
358-
"DEBUG:nowcasting_dataset:Opening satellite data: gs://solar-pv-nowcasting-data/satellite/EUMETSAT/SEVIRI_RSS/OSGB36/all_zarr_int16_single_timestep.zarr\n",
359-
"DEBUG:nowcasting_dataset:Opening NWP data: gs://solar-pv-nowcasting-data/NWP/UK_Met_Office/UKV_zarr\n"
355+
" rank_zero_warn(\n"
360356
]
361357
},
362358
{
@@ -371,17 +367,14 @@
371367
"output_type": "stream",
372368
"text": [
373369
"/home/jack/miniconda3/envs/nowcasting_dataset/lib/python3.9/site-packages/pytorch_lightning/trainer/data_loading.py:102: UserWarning: The dataloader, train dataloader, does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` (try 16 which is the number of cpus on this machine) in the `DataLoader` init to improve performance.\n",
374-
" rank_zero_warn(\n",
375-
"DEBUG:nowcasting_dataset:Opening satellite data: gs://solar-pv-nowcasting-data/satellite/EUMETSAT/SEVIRI_RSS/OSGB36/all_zarr_int16_single_timestep.zarr\n",
376-
"[W pthreadpool-cpp.cc:90] Warning: Leaking Caffe2 thread-pool after fork. (function pthreadpool)\n",
377-
"DEBUG:nowcasting_dataset:Opening NWP data: gs://solar-pv-nowcasting-data/NWP/UK_Met_Office/UKV_zarr\n"
370+
" rank_zero_warn(\n"
378371
]
379372
},
380373
{
381374
"name": "stdout",
382375
"output_type": "stream",
383376
"text": [
384-
"Epoch 0: : 183it [13:31, 4.43s/it, loss=0.105, v_num=178] "
377+
"Epoch 0: : 209it [14:28, 4.16s/it, loss=0.0968, v_num=181]"
385378
]
386379
}
387380
],
@@ -392,37 +385,7 @@
392385
{
393386
"cell_type": "code",
394387
"execution_count": null,
395-
"id": "f2c4c66f-cdc5-4319-b7c8-0b7dddf438d5",
396-
"metadata": {},
397-
"outputs": [],
398-
"source": [
399-
"# torch.save(model.state_dict(), 'model_state_dict.pt')"
400-
]
401-
},
402-
{
403-
"cell_type": "code",
404-
"execution_count": null,
405-
"id": "54d607e2-f10e-4c07-88bf-492c0215c21e",
406-
"metadata": {},
407-
"outputs": [],
408-
"source": [
409-
"import pandas as pd"
410-
]
411-
},
412-
{
413-
"cell_type": "code",
414-
"execution_count": null,
415-
"id": "10119390-bdc1-4724-b5f8-1aa3877d5293",
416-
"metadata": {},
417-
"outputs": [],
418-
"source": [
419-
"pd.__version__"
420-
]
421-
},
422-
{
423-
"cell_type": "code",
424-
"execution_count": null,
425-
"id": "3e025240-b406-4add-9564-e27358f65d17",
388+
"id": "6813e0ec-0e7f-4cbb-87f3-51357f80362e",
426389
"metadata": {},
427390
"outputs": [],
428391
"source": []

0 commit comments

Comments
 (0)