diff --git a/fedml_experiments/distributed/fed_launch/README.md b/fedml_experiments/distributed/fed_launch/README.md index 59e59af55e..0573984e6c 100644 --- a/fedml_experiments/distributed/fed_launch/README.md +++ b/fedml_experiments/distributed/fed_launch/README.md @@ -47,6 +47,17 @@ config_11: This example is also used for 11 process. But the mapping is different: Server process -- host1:GPU:0, client 1 -- host1:GPU:0, client 2 -- host1:GPU:1, client 3 -- host1:GPU:1, client 4 -- host2:GPU:0, client 5 -- host2:GPU:1, client 6 -- host2:GPU:2, client 7 -- host3:GPU:0, client 8 -- host3:GPU:1, client 9 -- host3:GPU:2, client 10 -- host3:GPU:3 +Sometimes one may want to use some GPUs in one machine, instead of all GPUs. Then you can use this: +``` +config_11: + host1: [0, 2] + host2: [1, 0, 1] + host3: [1, 1, 0, 1] + host4: [0, 1, 0, 0, 0, 1, 0, 2] +``` +Now the mapping become: Server process -- host1:GPU:1, client 1 -- host1:GPU:1, client 2 -- host1:GPU:0, client 3 -- host1:GPU:2, client 4 -- host3:GPU:0, client 5 -- host3:GPU:1, client 6 -- host3:GPU:3, client 7 -- host4:GPU:1, client 8 -- host4:GPU:6, client 9 -- host4:GPU:7, client 10 -- host4:GPU:7. + + And you also can add many mappings in one yaml file like this: ``` config_11: diff --git a/fedml_experiments/distributed/fed_launch/main.py b/fedml_experiments/distributed/fed_launch/main.py index 7e151aa6d0..d3b6cc300e 100644 --- a/fedml_experiments/distributed/fed_launch/main.py +++ b/fedml_experiments/distributed/fed_launch/main.py @@ -467,31 +467,6 @@ def init_training_device_from_gpu_util_file(process_id, worker_number, gpu_util_ FedML_FedAvg_distributed(process_id, worker_number, device, comm, model, train_data_num, train_data_global, test_data_global, train_data_local_num_dict, train_data_local_dict, test_data_local_dict, args) - elif args.algorithm == 'PSGD': - from fedml_api.distributed.PSGD.PSGD_API import FedML_init, FedML_PSGD_distributed - FedML_PSGD_distributed(process_id, worker_number, device, comm, - model, train_data_num, train_data_global, test_data_global, - train_data_local_num_dict, train_data_local_dict, test_data_local_dict, args) - elif args.algorithm == 'DPSGD': - from fedml_api.distributed.DPSGD.DPSGD_API import FedML_init, FedML_DPSGD - FedML_DPSGD(process_id, worker_number, device, comm, - model, train_data_num, train_data_global, test_data_global, - train_data_local_num_dict, train_data_local_dict, test_data_local_dict, args) - elif args.algorithm == 'DCD_PSGD': - from fedml_api.distributed.DCD_PSGD.DCD_PSGD_API import FedML_init, FedML_DCD_PSGD - FedML_DCD_PSGD(process_id, worker_number, device, comm, - model, train_data_num, train_data_global, test_data_global, - train_data_local_num_dict, train_data_local_dict, test_data_local_dict, args) - elif args.algorithm == 'CHOCO_SGD': - from fedml_api.distributed.CHOCO_SGD.CHOCO_SGD_API import FedML_init, FedML_CHOCO_SGD - FedML_CHOCO_SGD(process_id, worker_number, device, comm, - model, train_data_num, train_data_global, test_data_global, - train_data_local_num_dict, train_data_local_dict, test_data_local_dict, args) - elif args.algorithm == 'CHOCO_SGD': - from fedml_api.distributed.SAPS_FL.SAPS_FL_API import FedML_init, FedML_SAPS_FL - FedML_SAPS_FL(process_id, worker_number, device, comm, - model, train_data_num, train_data_global, test_data_global, - train_data_local_num_dict, train_data_local_dict, test_data_local_dict, args) else: raise NotImplementedError