-
-
Notifications
You must be signed in to change notification settings - Fork 6
Get nowcasting_dataset
running on OCF's on-premises server
#152
Changes from all commits
5f1ca67
a4984c6
c2d04f3
2945094
a70e737
f983669
ae2ae15
4322255
78887d8
d7d7c0f
ff08180
eb671ae
caa7459
b300a13
4db102f
e89f85e
2bfee06
2a195c9
32420dd
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -4,7 +4,7 @@ channels: | |
- pvlib | ||
- conda-forge | ||
dependencies: | ||
- python>=3.8 | ||
- python>=3.9 | ||
- pip | ||
|
||
# Scientific Python | ||
|
@@ -14,17 +14,20 @@ dependencies: | |
- zarr | ||
- xarray | ||
- ipykernel | ||
- h5netcdf # For opening NetCDF files from cloud buckets. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
- h5netcdf # For opening NetCDF files from cloud buckets. | ||
|
||
# Cloud & distributed compute | ||
- gcsfs | ||
- s3fs | ||
- fsspec | ||
- pathy | ||
|
||
# Images & optical flow | ||
- conda-forge::opencv # also run `apt install libgl1-mesa-glx` | ||
- conda-forge::opencv # also run `apt install libgl1-mesa-glx` | ||
- scikit-image | ||
|
||
# Machine learning | ||
- pytorch::pytorch # explicitly specify pytorch channel to prevent conda from using conda-forge for pytorch, and hence installing the CPU-only version. | ||
- pytorch::pytorch # explicitly specify pytorch channel to prevent conda from using conda-forge for pytorch, and hence installing the CPU-only version. | ||
- pytorch-lightning | ||
|
||
# PV & Geospatial | ||
|
@@ -40,5 +43,5 @@ dependencies: | |
- pre-commit | ||
|
||
- pip: | ||
- neptune-client[pytorch-lightning] | ||
- tilemapbase | ||
- neptune-client[pytorch-lightning] | ||
- tilemapbase |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,12 +1,12 @@ | ||
import glob | ||
import os | ||
import shutil | ||
|
||
from typing import Union | ||
|
||
import logging | ||
from pathlib import Path | ||
|
||
_LOG = logging.getLogger("nowcasting_dataset") | ||
_LOG = logging.getLogger(__name__) | ||
|
||
|
||
def delete_all_files_and_folder_in_temp_path(path: str): | ||
|
@@ -32,3 +32,10 @@ def delete_all_files_in_temp_path(path: Path): | |
_LOG.info(f"Deleting {len(files)} files from {path}.") | ||
for f in files: | ||
os.remove(f) | ||
|
||
|
||
def check_path_exists(path: Union[str, Path]): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. add doc string? |
||
"""Raises a RuntimeError if `path` does not exist in the local filesystem.""" | ||
path = Path(path) | ||
if not path.exists(): | ||
raise RuntimeError(f"{path} does not exist!") |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2,6 +2,7 @@ | |
from pathlib import Path | ||
import gcsfs | ||
import tempfile | ||
import fsspec | ||
|
||
from nowcasting_dataset.cloud.aws import aws_upload_and_delete_local_files, upload_one_file | ||
from nowcasting_dataset.cloud.gcp import gcp_upload_and_delete_local_files, gcp_download_to_local | ||
|
@@ -43,3 +44,38 @@ def gcp_to_aws(gcp_filename: str, gcs: gcsfs.GCSFileSystem, aws_filename: str, a | |
upload_one_file( | ||
remote_filename=aws_filename, bucket=aws_bucket, local_filename=local_filename | ||
) | ||
|
||
|
||
def get_maximum_batch_id(path: str): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Moved from |
||
""" | ||
Get the last batch ID. Works with GCS, AWS, and local. | ||
|
||
Args: | ||
path: the path folder to look in. Begin with 'gs://' for GCS. | ||
|
||
Returns: the maximum batch id of data in `path`. | ||
""" | ||
_LOG.debug(f"Looking for maximum batch id in {path}") | ||
|
||
filesystem = fsspec.open(path).fs | ||
filenames = filesystem.ls(path) | ||
|
||
# just take filename | ||
filenames = [filename.split("/")[-1] for filename in filenames] | ||
|
||
# remove suffix | ||
filenames = [filename.split(".")[0] for filename in filenames] | ||
|
||
# change to integer | ||
batch_indexes = [int(filename) for filename in filenames if len(filename) > 0] | ||
|
||
# if there is no files, return None | ||
if len(batch_indexes) == 0: | ||
_LOG.debug(f"Did not find any files in {path}") | ||
return None | ||
|
||
# get the maximum batch id | ||
maximum_batch_id = max(batch_indexes) | ||
_LOG.debug(f"Found maximum of batch it of {maximum_batch_id} in {path}") | ||
|
||
return maximum_batch_id |
This file was deleted.
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,18 +1,19 @@ | ||
general: | ||
description: example configuration | ||
name: example | ||
name: gcp | ||
description: Configuration for Google Cloud | ||
input_data: | ||
bucket: solar-pv-nowcasting-data | ||
npw_base_path: NWP/UK_Met_Office/UKV__2018-01_to_2019-12__chunks__variable10__init_time1__step1__x548__y704__.zarr | ||
satelite_filename: satellite/EUMETSAT/SEVIRI_RSS/OSGB36/all_zarr_int16_single_timestep.zarr | ||
solar_pv_data_filename: UK_PV_timeseries_batch.nc | ||
solar_pv_metadata_filename: UK_PV_metadata.csv | ||
solar_pv_path: PV/PVOutput.org | ||
nwp_zarr_path: gs://solar-pv-nowcasting-data/NWP/UK_Met_Office/UKV__2018-01_to_2019-12__chunks__variable10__init_time1__step1__x548__y704__.zarr | ||
satellite_zarr_path: gs://solar-pv-nowcasting-data/satellite/EUMETSAT/SEVIRI_RSS/OSGB36/all_zarr_int16_single_timestep.zarr | ||
solar_pv_data_filename: gs://solar-pv-nowcasting-data/PV/PVOutput.org/UK_PV_timeseries_batch.nc | ||
solar_pv_metadata_filename: gs://solar-pv-nowcasting-data/PV/PVOutput.org/UK_PV_metadata.csv | ||
gsp_zarr_path: gs://solar-pv-nowcasting-data/PV/PVOutput.org/PV/GSP/v0/pv_gsp.zarr | ||
output_data: | ||
filepath: solar-pv-nowcasting-data/prepared_ML_training_data/v6/ | ||
filepath: gs://solar-pv-nowcasting-data/prepared_ML_training_data/v6/ | ||
process: | ||
local_temp_path: ~/temp/ | ||
seed: 1234 | ||
batch_size: 32 | ||
upload_every_n_batches: 16 | ||
forecast_minutes: 60 | ||
history_minutes: 30 | ||
satellite_image_size_pixels: 64 | ||
|
@@ -28,7 +29,6 @@ process: | |
- lcc | ||
- mcc | ||
- hcc | ||
prcesion: 16 | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
sat_channels: | ||
- HRV | ||
- IR_016 | ||
|
@@ -42,4 +42,3 @@ process: | |
- VIS008 | ||
- WV_062 | ||
- WV_073 | ||
val_check_interval: 1000 | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -4,19 +4,23 @@ | |
import io | ||
import yaml | ||
from nowcasting_dataset.config.model import Configuration | ||
from pathy import Pathy | ||
from typing import Union | ||
import fsspec | ||
|
||
logger = logging.getLogger(__name__) | ||
|
||
|
||
def load_yaml_configuration(filename: str) -> Configuration: | ||
def load_yaml_configuration(filename: Union[str, Pathy]) -> Configuration: | ||
""" | ||
Load a yaml file which has a configuration in it | ||
filename: the file name that you want to load | ||
filename: the file name that you want to load. Will load from local, AWS, or GCP | ||
depending on the protocol suffix (e.g. 's3://bucket/config.yaml'). | ||
Returns: pydantic class | ||
""" | ||
|
||
# load the file to a dictionary | ||
with open(filename, "r") as stream: | ||
with fsspec.open(filename, mode="r") as stream: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. should we get ride of https://github.com/openclimatefix/nowcasting_dataset/blob/main/nowcasting_dataset/config/load.py#L28 then? |
||
configuration = yaml.safe_load(stream) | ||
|
||
# turn into pydantic class | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This PR deletes
example.yaml
(see the 'conversation' tab!)