From 5abc087e802f83ed5f4e5632d0b1f6eca22f22ac Mon Sep 17 00:00:00 2001 From: Eva Woodbridge Date: Fri, 24 Mar 2023 15:41:45 +0000 Subject: [PATCH 1/6] Initial script --- .gradient/health_check.py | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) create mode 100644 .gradient/health_check.py diff --git a/.gradient/health_check.py b/.gradient/health_check.py new file mode 100644 index 0000000..09663f7 --- /dev/null +++ b/.gradient/health_check.py @@ -0,0 +1,30 @@ +from pathlib import Path +import subprocess +import json +import os + +# Check that the datasets have mounted as expected + +# Gather the datasets expected from the settings.yaml +# Using script from examples-utils check that the metadata files are correct +# Do not need to run full hash checks + +# Check that files are symlinked correctly +expected_files = ["tmp/exe_cache/fine-tuning-bert"] +dirname = Path("/tmp") +tmp_sub_directories = [str(f) for f in dirname.iterdir() if f.is_dir()] +print(tmp_sub_directories) +for file_name in expected_files: + if file_name not in tmp_sub_directories: + print(file_name + " not mounted") + +#Check that the number of detected IPUs is correct +pod_type = os.getenv("GRAPHCORE_POD_TYPE") +expected_ipu_num = pod_type.replace("pod","") + +num_ipus = os.getenv("NUM_AVAILABLE_IPU") + +if expected_ipu_num != num_ipus: + print("Incorrect number of IPUs found "+ num_ipus+" expected "+ expected_ipu_num) +else: + print("Correct number IPUs found") \ No newline at end of file From c7a891ebb204a56d66df97f8471faa95e3b5894b Mon Sep 17 00:00:00 2001 From: Eva Woodbridge Date: Fri, 24 Mar 2023 16:20:00 +0000 Subject: [PATCH 2/6] parse settings.yaml file --- .gradient/health_check.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/.gradient/health_check.py b/.gradient/health_check.py index 09663f7..793b333 100644 --- a/.gradient/health_check.py +++ b/.gradient/health_check.py @@ -2,10 +2,24 @@ import subprocess import json import os +import yaml # Check that the datasets have mounted as expected # Gather the datasets expected from the settings.yaml +with open('settings.yaml') as f: + my_dict = yaml.safe_load(f) + datasets = my_dict["integrations"].keys() + +# Check that dataset exists and +dirname = Path("/datasets") +datasets_sub_directories = [str(f) for f in dirname.iterdir() if f.is_dir()] +print(datasets_sub_directories) +for dataset in datasets: + full_path = str(dirname/dataset) + if full_path not in datasets_sub_directories: + print(dataset + " not found") + # Using script from examples-utils check that the metadata files are correct # Do not need to run full hash checks From 23b26e0a766921b4036d2d141eb5ce037199d562 Mon Sep 17 00:00:00 2001 From: Eva Woodbridge Date: Mon, 27 Mar 2023 15:54:58 +0100 Subject: [PATCH 3/6] Extract check files exist function --- .gradient/health_check.py | 26 ++++++++++++-------------- 1 file changed, 12 insertions(+), 14 deletions(-) diff --git a/.gradient/health_check.py b/.gradient/health_check.py index 793b333..2fc3ccb 100644 --- a/.gradient/health_check.py +++ b/.gradient/health_check.py @@ -11,26 +11,24 @@ my_dict = yaml.safe_load(f) datasets = my_dict["integrations"].keys() +def check_files_exist(files: [str], dirname: str): + dirpath = Path(dirname) + sub_directories = [str(f) for f in dirpath.iterdir() if f.is_dir()] + print(sub_directories) + for filename in files: + full_path = str(dirpath/filename) + if full_path not in sub_directories: + print(filename + " not found") + # Check that dataset exists and -dirname = Path("/datasets") -datasets_sub_directories = [str(f) for f in dirname.iterdir() if f.is_dir()] -print(datasets_sub_directories) -for dataset in datasets: - full_path = str(dirname/dataset) - if full_path not in datasets_sub_directories: - print(dataset + " not found") +check_files_exist(datasets, "/datasets") # Using script from examples-utils check that the metadata files are correct # Do not need to run full hash checks # Check that files are symlinked correctly -expected_files = ["tmp/exe_cache/fine-tuning-bert"] -dirname = Path("/tmp") -tmp_sub_directories = [str(f) for f in dirname.iterdir() if f.is_dir()] -print(tmp_sub_directories) -for file_name in expected_files: - if file_name not in tmp_sub_directories: - print(file_name + " not mounted") +expected_exe_cache = ["fine-tuning-bert", "kge_training"] +check_files_exist(expected_files, "/tmp/exe_cache") #Check that the number of detected IPUs is correct pod_type = os.getenv("GRAPHCORE_POD_TYPE") From ddd37109f87ef9f53964e3111db1ae20d0d5a77d Mon Sep 17 00:00:00 2001 From: Eva Woodbridge Date: Mon, 27 Mar 2023 16:45:15 +0100 Subject: [PATCH 4/6] Use logger --- .gradient/health_check.py | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/.gradient/health_check.py b/.gradient/health_check.py index 2fc3ccb..18f4079 100644 --- a/.gradient/health_check.py +++ b/.gradient/health_check.py @@ -3,11 +3,14 @@ import json import os import yaml +import logging +logger = logging.getLogger() +logger.setLevel(logging.INFO) # Check that the datasets have mounted as expected # Gather the datasets expected from the settings.yaml -with open('settings.yaml') as f: +with open("settings.yaml") as f: my_dict = yaml.safe_load(f) datasets = my_dict["integrations"].keys() @@ -18,7 +21,7 @@ def check_files_exist(files: [str], dirname: str): for filename in files: full_path = str(dirpath/filename) if full_path not in sub_directories: - print(filename + " not found") + logging.warning(filename + " not found in " + dirname) # Check that dataset exists and check_files_exist(datasets, "/datasets") @@ -27,16 +30,16 @@ def check_files_exist(files: [str], dirname: str): # Do not need to run full hash checks # Check that files are symlinked correctly -expected_exe_cache = ["fine-tuning-bert", "kge_training"] -check_files_exist(expected_files, "/tmp/exe_cache") +expected_exe_cache = ["fine-tuning-bert", "kge_training", "not_here"] +check_files_exist(expected_exe_cache, "/tmp/exe_cache") #Check that the number of detected IPUs is correct pod_type = os.getenv("GRAPHCORE_POD_TYPE") expected_ipu_num = pod_type.replace("pod","") -num_ipus = os.getenv("NUM_AVAILABLE_IPU") +num_ipus = os.getenv("NUM_AVAILABLE_IPU", "0") if expected_ipu_num != num_ipus: - print("Incorrect number of IPUs found "+ num_ipus+" expected "+ expected_ipu_num) + logger.warning("Incorrect number of IPUs found "+ num_ipus+" expected "+ expected_ipu_num) else: - print("Correct number IPUs found") \ No newline at end of file + logger.info("Correct number IPUs found") \ No newline at end of file From 2d588d09e6fe2d2d04dd37f905ce9101af4a7c40 Mon Sep 17 00:00:00 2001 From: Eva Woodbridge Date: Mon, 3 Apr 2023 16:48:40 +0100 Subject: [PATCH 5/6] Add metadata file checking --- .gradient/health_check.py | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/.gradient/health_check.py b/.gradient/health_check.py index 18f4079..95cc3c5 100644 --- a/.gradient/health_check.py +++ b/.gradient/health_check.py @@ -4,6 +4,8 @@ import os import yaml import logging +from examples_utils.paperspace_utils.dataset_upload_checker import check_files_match_metadata + logger = logging.getLogger() logger.setLevel(logging.INFO) @@ -14,22 +16,25 @@ my_dict = yaml.safe_load(f) datasets = my_dict["integrations"].keys() + def check_files_exist(files: [str], dirname: str): dirpath = Path(dirname) sub_directories = [str(f) for f in dirpath.iterdir() if f.is_dir()] - print(sub_directories) for filename in files: full_path = str(dirpath/filename) if full_path not in sub_directories: logging.warning(filename + " not found in " + dirname) + else: + dataset_sub_directories = [str(f) for f in Path(full_path).iterdir()] + if full_path+"/gradient_dataset_metadata.json" in dataset_sub_directories: + check_files_match_metadata(full_path,False) + else: + logging.warning("Metadata file not found in "+ full_path) -# Check that dataset exists and -check_files_exist(datasets, "/datasets") - -# Using script from examples-utils check that the metadata files are correct -# Do not need to run full hash checks +# Check that dataset exists and if a metadata file is found check that all files in the metadata file exist +check_files_exist(datasets, "./datasets") -# Check that files are symlinked correctly +# Check that files are symlinked correctly - this needs to be manually edited for each runtime expected_exe_cache = ["fine-tuning-bert", "kge_training", "not_here"] check_files_exist(expected_exe_cache, "/tmp/exe_cache") From 058f0250e38b41a5c795869c7bf9d6afb5192d32 Mon Sep 17 00:00:00 2001 From: Eva Woodbridge Date: Tue, 4 Apr 2023 14:55:37 +0100 Subject: [PATCH 6/6] Adding main function --- .gradient/health_check.py | 50 ++++++++++++++++++++------------------- 1 file changed, 26 insertions(+), 24 deletions(-) diff --git a/.gradient/health_check.py b/.gradient/health_check.py index 95cc3c5..943a8ef 100644 --- a/.gradient/health_check.py +++ b/.gradient/health_check.py @@ -6,17 +6,6 @@ import logging from examples_utils.paperspace_utils.dataset_upload_checker import check_files_match_metadata - -logger = logging.getLogger() -logger.setLevel(logging.INFO) -# Check that the datasets have mounted as expected - -# Gather the datasets expected from the settings.yaml -with open("settings.yaml") as f: - my_dict = yaml.safe_load(f) - datasets = my_dict["integrations"].keys() - - def check_files_exist(files: [str], dirname: str): dirpath = Path(dirname) sub_directories = [str(f) for f in dirpath.iterdir() if f.is_dir()] @@ -31,20 +20,33 @@ def check_files_exist(files: [str], dirname: str): else: logging.warning("Metadata file not found in "+ full_path) -# Check that dataset exists and if a metadata file is found check that all files in the metadata file exist -check_files_exist(datasets, "./datasets") +#Check that the number of detected IPUs is correct +def check_num_pod_expected(logger:logging.Logger): + pod_type = os.getenv("GRAPHCORE_POD_TYPE") + expected_ipu_num = pod_type.replace("pod","") -# Check that files are symlinked correctly - this needs to be manually edited for each runtime -expected_exe_cache = ["fine-tuning-bert", "kge_training", "not_here"] -check_files_exist(expected_exe_cache, "/tmp/exe_cache") + num_ipus = os.getenv("NUM_AVAILABLE_IPU", "0") -#Check that the number of detected IPUs is correct -pod_type = os.getenv("GRAPHCORE_POD_TYPE") -expected_ipu_num = pod_type.replace("pod","") + if expected_ipu_num != num_ipus: + logger.warning("Incorrect number of IPUs found "+ num_ipus+" expected "+ expected_ipu_num) + else: + logger.info("Correct number IPUs found") + +def main(): + logger = logging.getLogger() + logger.setLevel(logging.INFO) + # Check that the datasets have mounted as expected + + # Gather the datasets expected from the settings.yaml + with open("settings.yaml") as f: + my_dict = yaml.safe_load(f) + datasets = my_dict["integrations"].keys() + + # Check that dataset exists and if a metadata file is found check that all files in the metadata file exist + check_files_exist(datasets, "./datasets") -num_ipus = os.getenv("NUM_AVAILABLE_IPU", "0") + # Check that files are symlinked correctly - this needs to be manually edited for each runtime + expected_exe_cache = ["fine-tuning-bert", "kge_training"] + check_files_exist(expected_exe_cache, "/tmp/exe_cache") -if expected_ipu_num != num_ipus: - logger.warning("Incorrect number of IPUs found "+ num_ipus+" expected "+ expected_ipu_num) -else: - logger.info("Correct number IPUs found") \ No newline at end of file +main() \ No newline at end of file