From e9a62c285f12b7acdf6f61df5a68dc6f62567504 Mon Sep 17 00:00:00 2001 From: Ying Sheng Date: Mon, 17 Apr 2023 03:22:16 -0700 Subject: [PATCH 1/6] add ft in plot --- plot/plot.sh | 29 +++++++++++++++++++++++++++++ plot/plot_normalized_latency.py | 33 +++++++++++++++++++++++++++------ 2 files changed, 56 insertions(+), 6 deletions(-) create mode 100644 plot/plot.sh diff --git a/plot/plot.sh b/plot/plot.sh new file mode 100644 index 000000000000..d22875ccb726 --- /dev/null +++ b/plot/plot.sh @@ -0,0 +1,29 @@ +# alpaca-13b: + python plot_normalized_latency.py paper/exp0/exp/alpaca/opt-13b-tp1/n1 --duration 3600 --ylim 1 --format pdf +# alpaca-66b: + python plot_normalized_latency.py paper/exp4/exp/alpaca/opt-66b-tp4/n1 --duration 3600 --ylim 1 --format pdf +# alpaca-175b: + python plot_normalized_latency.py paper/exp-175b/exp/alpaca/opt-175b-tp8/n1/ --duration 3600 --ylim 1 --format pdf + +# sharegpt-13b: + python plot_normalized_latency.py paper/exp0/exp/sharegpt/opt-13b-tp1/n1/ --duration 3600 --ylim 1 --format pdf +# sharegpt-66b: + python plot_normalized_latency.py paper/exp2/exp/sharegpt/opt-66b-tp4/n1 --duration 3600 --ylim 1 --format pdf +# sharegpt-175b: + python plot_normalized_latency.py paper/exp-175b/exp/sharegpt/opt-175b-tp8/n1 --duration 3600 --ylim 1 --format pdf + + +# alpaca-n2-13b: + python plot_normalized_latency.py paper/exp3/exp/alpaca/opt-13b-tp1/n2 --duration 3600 --ylim 1 --format pdf +# alpaca-n4-13b: + python plot_normalized_latency.py paper/exp3/exp/alpaca/opt-13b-tp1/n4 --duration 3600 --ylim 1 --format pdf +# alpaca-n6-13b: + python plot_normalized_latency.py paper/exp0/exp/alpaca/opt-13b-tp1/n6 --duration 3600 --ylim 1 --format pdf + + +# alpaca-n2-beam-13b: + python plot_normalized_latency.py paper/exp1/exp/alpaca/opt-13b-tp1/n2-beam --duration 3600 --ylim 1 --format pdf +# alpaca-n4-beam-13b: + python plot_normalized_latency.py paper/exp1/exp/alpaca/opt-13b-tp1/n4-beam --duration 3600 --ylim 1 --format pdf +# alapca-n6-beam-13b: + python plot_normalized_latency.py paper/exp1/exp/alpaca/opt-13b-tp1/n6-beam --duration 3600 --ylim 1 --format pdf diff --git a/plot/plot_normalized_latency.py b/plot/plot_normalized_latency.py index 4bccc40fc2b1..3acabf3a53d4 100644 --- a/plot/plot_normalized_latency.py +++ b/plot/plot_normalized_latency.py @@ -8,6 +8,7 @@ SYSTEMS = [ + 'FT', 'orca-constant', 'orca-power2', 'orca-oracle', @@ -15,6 +16,7 @@ ] SYSTEM_TO_LABEL = { + 'FT': 'FT', 'orca-constant': 'Orca (Max)', 'orca-power2': 'Orca (Pow2)', 'orca-oracle': 'Orca (Oracle)', @@ -22,6 +24,7 @@ } SYSTEM_TO_COLOR = { + 'FT': 'gray', 'orca-constant': 'red', 'orca-power2': 'orange', 'orca-oracle': 'green', @@ -29,6 +32,7 @@ } SYSTEM_TO_MARKER = { + 'FT': '.', 'orca-constant': 'x', 'orca-power2': '^', 'orca-oracle': 's', @@ -78,13 +82,15 @@ def get_model(save_dir: str) -> Tuple[str, int]: def get_system(save_dir: str) -> str: save_dir = os.path.abspath(save_dir) - dir_names = save_dir.split('/') + dir_names = save_dir.split('/')[6:] for dir_name in dir_names: if dir_name.startswith('orca-'): return dir_name if dir_name == 'cacheflow': return dir_name + if dir_name == 'ft': + return 'FT' raise ValueError(f'Cannot find system in {save_dir}') @@ -100,6 +106,17 @@ def get_sampling(save_dir: str) -> str: return dir_name raise ValueError(f'Cannot find sampling method in {save_dir}') +def get_dataset(save_dir: str) -> str: + save_dir = os.path.abspath(save_dir) + dir_names = save_dir.split('/') + + for dir_name in dir_names: + if dir_name == 'alpaca': + return 'alpaca' + if dir_name == 'sharegpt': + return 'sharegpt' + raise ValueError(f'Cannot find dataset in {save_dir}') + def plot_normalized_latency( exp_dir: str, @@ -120,9 +137,12 @@ def plot_normalized_latency( continue if f'seed{seed}' not in root: continue - if f'duration-{duration}' not in root: + if f'duration-{duration}' not in root and 'duration-900' not in root: + continue + if 'unused' in root: continue save_dirs.append(root) + print(save_dirs) # Plot normalized latency. perf_per_system: Dict[str, Tuple[List[float], List[float]]] = {} @@ -147,8 +167,8 @@ def plot_normalized_latency( perf_per_system[system_name][0].append(request_rate) perf_per_system[system_name][1].append(normalized_latency) - print('#seqs', len(per_seq_norm_latencies)) - print(f'{save_dir}: {normalized_latency:.3f} s') + # print('#seqs', len(per_seq_norm_latencies)) + # print(f'{save_dir}: {normalized_latency:.3f} s') # Plot normalized latency. @@ -184,13 +204,14 @@ def plot_normalized_latency( plt.legend( handles, labels, - ncol=4, fontsize=12, loc='upper center', bbox_to_anchor=(0.5, 1.15), + ncol=5, fontsize=12, loc='upper center', bbox_to_anchor=(0.5, 1.15), columnspacing=0.5, handletextpad=0.5, handlelength=1.5, frameon=False, borderpad=0) # Save figure. model, tp = get_model(exp_dir) sampling = get_sampling(exp_dir) - figname = f'{model}-tp{tp}-{sampling}.{format}' + dataset = get_dataset(exp_dir) + figname = f'{dataset}-{model}-tp{tp}-{sampling}.{format}' os.makedirs('./figures', exist_ok=True) plt.savefig(os.path.join('figures', figname), bbox_inches='tight') print(f'Saved figure to ./figures/{figname}') From 58b86f8c39b6c7de60c06e485cae109fbbe4f59c Mon Sep 17 00:00:00 2001 From: Ying Sheng Date: Mon, 17 Apr 2023 17:03:53 -0700 Subject: [PATCH 2/6] add aggregated plot for each subset --- plot/plot_normalized_latency.py | 2 +- plot/plot_sec_6_2.py | 297 +++++++++++++++++++++++++++++++ plot/plot_sec_6_3.py | 300 ++++++++++++++++++++++++++++++++ 3 files changed, 598 insertions(+), 1 deletion(-) create mode 100644 plot/plot_sec_6_2.py create mode 100644 plot/plot_sec_6_3.py diff --git a/plot/plot_normalized_latency.py b/plot/plot_normalized_latency.py index 3acabf3a53d4..1984db3b8d16 100644 --- a/plot/plot_normalized_latency.py +++ b/plot/plot_normalized_latency.py @@ -20,7 +20,7 @@ 'orca-constant': 'Orca (Max)', 'orca-power2': 'Orca (Pow2)', 'orca-oracle': 'Orca (Oracle)', - 'cacheflow': 'KVFlow', + 'cacheflow': 'Astra', } SYSTEM_TO_COLOR = { diff --git a/plot/plot_sec_6_2.py b/plot/plot_sec_6_2.py new file mode 100644 index 000000000000..a59c07135787 --- /dev/null +++ b/plot/plot_sec_6_2.py @@ -0,0 +1,297 @@ +import argparse +import os +import pickle +from typing import Any, Dict, List, Optional, Tuple + +import matplotlib +import matplotlib.pyplot as plt +import numpy as np + + +SYSTEMS = [ + 'FasterTransformer', + 'orca-constant', + 'orca-power2', + 'orca-oracle', + 'cacheflow', +] + +SYSTEM_TO_LABEL = { + 'FasterTransformer': 'FasterTransformer', + 'orca-constant': 'Orca (Max)', + 'orca-power2': 'Orca (Pow2)', + 'orca-oracle': 'Orca (Oracle)', + 'cacheflow': 'Astra', +} + +SYSTEM_TO_COLOR = { + 'FasterTransformer': 'gray', + 'orca-constant': 'red', + 'orca-power2': 'orange', + 'orca-oracle': 'green', + 'cacheflow': 'blue', +} + +SYSTEM_TO_MARKER = { + 'FasterTransformer': '.', + 'orca-constant': 'x', + 'orca-power2': '^', + 'orca-oracle': 's', + 'cacheflow': 'o', +} + +MODEL_SHOW_NAME = { + 'opt-13b': 'OPT-13B, 1 GPU', + 'opt-66b': 'OPT-66B, 4 GPUs', + 'opt-175b': 'OPT-175B, 8 GPUs', +} + +DATASET_SHOW_NAME = { + 'sharegpt': 'ShareGPT', + 'alpaca': 'Alpaca', +} + +MODEL_RANK = { + 'opt-13b': 0, + 'opt-66b': 1, + 'opt-175b': 2, +} + + +def get_alpha_enum(i: int): + return '(' + chr(ord('a') + i) + ')' + + +def get_results(save_dir: str) -> List[Dict[str, Any]]: + with open(os.path.join(save_dir, 'sequences.pkl'), 'rb') as f: + results = pickle.load(f) + return results + + +def get_request_rate(save_dir: str) -> float: + """Get request rate from save_dir name.""" + # Directory name format: + # .../req-rate-{req_rate}/seed-{seed}/duration-{duration} + save_dir = os.path.abspath(save_dir) + dir_names = save_dir.split('/') + + request_rate = None + for dir_name in dir_names: + if dir_name.startswith('req-rate-'): + if request_rate is not None: + raise ValueError(f'Found multiple request rates in {save_dir}') + request_rate = float(dir_name.split('-')[-1]) + if request_rate is None: + raise ValueError(f'Cannot find request rate in {save_dir}') + return request_rate + + +def get_model(save_dir: str) -> Tuple[str, int]: + save_dir = os.path.abspath(save_dir) + dir_names = save_dir.split('/') + + model = None + for dir_name in dir_names: + if '-tp' in dir_name: + if model is not None: + raise ValueError(f'Found multiple models in {save_dir}') + model = dir_name.split('-tp')[0] + tp = int(dir_name.split('-tp')[-1]) + if model is None: + raise ValueError(f'Cannot find model in {save_dir}') + return model, tp + + +def get_system(save_dir: str) -> str: + save_dir = os.path.abspath(save_dir) + dir_names = save_dir.split('/')[6:] + + for dir_name in dir_names: + if dir_name.startswith('orca-'): + return dir_name + if dir_name == 'cacheflow': + return dir_name + if dir_name == 'ft': + return 'FasterTransformer' + raise ValueError(f'Cannot find system in {save_dir}') + + +def get_sampling(save_dir: str) -> str: + save_dir = os.path.abspath(save_dir) + dir_names = save_dir.split('/') + + for dir_name in dir_names: + if dir_name.startswith('n'): + if dir_name.endswith('-beam'): + return dir_name + if dir_name[1:].isdigit(): + return dir_name + raise ValueError(f'Cannot find sampling method in {save_dir}') + +def get_dataset(save_dir: str) -> str: + save_dir = os.path.abspath(save_dir) + dir_names = save_dir.split('/') + + for dir_name in dir_names: + if dir_name == 'alpaca': + return 'alpaca' + if dir_name == 'sharegpt': + return 'sharegpt' + raise ValueError(f'Cannot find dataset in {save_dir}') + + +def in_subset(save_dir: str, subset: str): + if subset == 'n1-alpaca': + return get_sampling(save_dir) == 'n1' and get_dataset(save_dir) == "alpaca" + elif subset == 'n1-sharegpt': + return get_sampling(save_dir) == 'n1' and get_dataset(save_dir) == "sharegpt" + elif subset == 'parallel': + if get_dataset(save_dir) != "alpaca": + return False + sampling = get_sampling(save_dir) + return sampling == 'n2' or sampling == 'n4' or sampling == 'n6' + elif subset == 'beam': + if get_dataset(save_dir) != "alpaca": + return False + sampling = get_sampling(save_dir) + return sampling == 'n2-beam' or sampling == 'n4-beam' or sampling == 'n6-beam' + + +def plot_normalized_latency( + exp_dir: str, + subset: str, + duration: int, + seed: int, + warmup: int, + xlim: Optional[float], + ylim: Optional[float], + label_offset: int, + log_scale: bool, + format: str, +) -> None: + # Get leaf directories. + save_dirs = [] + for root, dirs, files in os.walk(exp_dir): + if dirs: + continue + if 'sequences.pkl' not in files: + continue + if f'seed{seed}' not in root: + continue + if f'duration-{duration}' not in root and 'duration-900' not in root: + continue + if 'unused' in root: + continue + if not in_subset(root, subset): + continue + save_dirs.append(root) + # print(save_dirs) + + # Collect data points + plot_names = [] + # model -> system -> (request_rate, normalized_latency) + perf: Dict[str, Dict[str, Tuple[List[float], List[float]]]] = {} + for save_dir in save_dirs: + per_seq_norm_latencies = [] + results = get_results(save_dir) + for seq in results: + arrival_time = seq['arrival_time'] + finish_time = seq['finish_time'] + output_len = seq['output_len'] + if arrival_time < warmup: + continue + latency = finish_time - arrival_time + norm_latency = latency / output_len + per_seq_norm_latencies.append(norm_latency) + + request_rate = get_request_rate(save_dir) + normalized_latency = np.mean(per_seq_norm_latencies) + + model_name = get_model(save_dir)[0] + if model_name not in perf: + perf[model_name] = {} + plot_names.append(model_name) + system_name = get_system(save_dir) + if system_name not in perf[model_name]: + perf[model_name][system_name] = ([], []) + perf[model_name][system_name][0].append(request_rate) + perf[model_name][system_name][1].append(normalized_latency) + + # print('#seqs', len(per_seq_norm_latencies)) + # print(f'{save_dir}: {normalized_latency:.3f} s') + + + # Plot normalized latency. + plot_names = sorted(plot_names, key=lambda x: MODEL_RANK[x]) + fig, axs = plt.subplots(1, 3) + for i, (model_name, ax) in enumerate(zip(plot_names, axs)): + curves = [] + legends = [] + for system_name in SYSTEMS: + if system_name not in perf[model_name]: + continue + # Sort by request rate. + request_rates, normalized_latencies = perf[model_name][system_name] + request_rates, normalized_latencies = zip(*sorted(zip(request_rates, normalized_latencies))) + label = SYSTEM_TO_LABEL[system_name] + color = SYSTEM_TO_COLOR[system_name] + marker = SYSTEM_TO_MARKER[system_name] + curve = ax.plot(request_rates, normalized_latencies, label=label, color=color, marker=marker, markersize=4) + curves.append(curve[0]) + legends.append(label) + + enum = get_alpha_enum(i + label_offset) + model_show_name = MODEL_SHOW_NAME[model_name] + dataset = DATASET_SHOW_NAME[get_dataset(save_dir)] + ax.set_xlabel(f'Request rate (req/s)\n\n{enum} {model_show_name}, {dataset}', fontsize=10) + ax.tick_params(axis='both', which='major', labelsize=10) + ax.tick_params(axis='both', which='minor', labelsize=10) + if log_scale: + ax.set_yscale('log') + if xlim is not None: + ax.set_xlim(left=0, right=xlim) + if ylim is not None: + if log_scale: + ax.set_ylim(top=ylim) + else: + ax.set_ylim(bottom=0, top=ylim) + ax.grid(linestyle='--') + + # handles, labels = plt.gca().get_legend_handles_labels() + # handles = reversed(handles) + # labels = reversed(labels) + + # plt.legend( + # handles, labels, + # ncol=5, fontsize=10, loc='upper center', bbox_to_anchor=(0.5, 1.15), + # columnspacing=0.5, handletextpad=0.5, handlelength=1.5, frameon=False, borderpad=0) + + fig.text(0.08, 0.5, 'Normalized latency\n (s/token)', va='center', rotation='vertical', fontsize=10) + if subset != 'n1-alpaca': + fig.legend(curves, legends, loc="upper center", ncol=5, bbox_to_anchor=(0.5, 1.3), fontsize=10, frameon=False) + + # Save figure. + fig.set_size_inches((18, 1.5)) + figname = f'{subset}.{format}' + os.makedirs('./figures', exist_ok=True) + plt.savefig(os.path.join('figures', figname), bbox_inches='tight') + print(f'Saved figure to ./figures/{figname}') + + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument('exp_dir', type=str) + parser.add_argument('--subset', choices=['n1-alpaca', 'n1-sharegpt'], required=True) + parser.add_argument('--duration', type=int, required=True) + parser.add_argument('--seed', type=int, default=0) + parser.add_argument('--warmup', type=int, default=60) + parser.add_argument('--xlim', type=float, required=False, default=None) + parser.add_argument('--ylim', type=float, required=False, default=1) + parser.add_argument('--label-offset', type=int, default=0) + parser.add_argument('--log', action='store_true') + parser.add_argument('--format', choices=['png', 'pdf'], default='pdf') + args = parser.parse_args() + + plot_normalized_latency( + args.exp_dir, args.subset, args.duration, args.seed, args.warmup, + args.xlim, args.ylim, args.label_offset, args.log, args.format) diff --git a/plot/plot_sec_6_3.py b/plot/plot_sec_6_3.py new file mode 100644 index 000000000000..5dbbbfde5ad0 --- /dev/null +++ b/plot/plot_sec_6_3.py @@ -0,0 +1,300 @@ +import argparse +import os +import pickle +from typing import Any, Dict, List, Optional, Tuple + +import matplotlib +import matplotlib.pyplot as plt +import numpy as np + + +SYSTEMS = [ + 'FasterTransformer', + 'orca-constant', + 'orca-power2', + 'orca-oracle', + 'cacheflow', +] + +SYSTEM_TO_LABEL = { + 'FasterTransformer': 'FasterTransformer', + 'orca-constant': 'Orca (Max)', + 'orca-power2': 'Orca (Pow2)', + 'orca-oracle': 'Orca (Oracle)', + 'cacheflow': 'Astra', +} + +SYSTEM_TO_COLOR = { + 'FasterTransformer': 'gray', + 'orca-constant': 'red', + 'orca-power2': 'orange', + 'orca-oracle': 'green', + 'cacheflow': 'blue', +} + +SYSTEM_TO_MARKER = { + 'FasterTransformer': '.', + 'orca-constant': 'x', + 'orca-power2': '^', + 'orca-oracle': 's', + 'cacheflow': 'o', +} + +MODEL_SHOW_NAME = { + 'opt-13b': 'OPT-13B, 1 GPU', + 'opt-66b': 'OPT-66B, 4 GPUs', + 'opt-175b': 'OPT-175B, 8 GPUs', +} + +DATASET_SHOW_NAME = { + 'sharegpt': 'ShareGPT', + 'alpaca': 'Alpaca', +} + +MODEL_RANK = { + 'opt-13b': 0, + 'opt-66b': 1, + 'opt-175b': 2, +} + + +def get_alpha_enum(i: int): + return '(' + chr(ord('a') + i) + ')' + + +def get_results(save_dir: str) -> List[Dict[str, Any]]: + with open(os.path.join(save_dir, 'sequences.pkl'), 'rb') as f: + results = pickle.load(f) + return results + + +def get_request_rate(save_dir: str) -> float: + """Get request rate from save_dir name.""" + # Directory name format: + # .../req-rate-{req_rate}/seed-{seed}/duration-{duration} + save_dir = os.path.abspath(save_dir) + dir_names = save_dir.split('/') + + request_rate = None + for dir_name in dir_names: + if dir_name.startswith('req-rate-'): + if request_rate is not None: + raise ValueError(f'Found multiple request rates in {save_dir}') + request_rate = float(dir_name.split('-')[-1]) + if request_rate is None: + raise ValueError(f'Cannot find request rate in {save_dir}') + return request_rate + + +def get_model(save_dir: str) -> Tuple[str, int]: + save_dir = os.path.abspath(save_dir) + dir_names = save_dir.split('/') + + model = None + for dir_name in dir_names: + if '-tp' in dir_name: + if model is not None: + raise ValueError(f'Found multiple models in {save_dir}') + model = dir_name.split('-tp')[0] + tp = int(dir_name.split('-tp')[-1]) + if model is None: + raise ValueError(f'Cannot find model in {save_dir}') + return model, tp + + +def get_system(save_dir: str) -> str: + save_dir = os.path.abspath(save_dir) + dir_names = save_dir.split('/')[6:] + + for dir_name in dir_names: + if dir_name.startswith('orca-'): + return dir_name + if dir_name == 'cacheflow': + return dir_name + if dir_name == 'ft': + return 'FasterTransformer' + raise ValueError(f'Cannot find system in {save_dir}') + + +def get_sampling(save_dir: str) -> str: + save_dir = os.path.abspath(save_dir) + dir_names = save_dir.split('/') + + for dir_name in dir_names: + if dir_name.startswith('n'): + if dir_name.endswith('-beam'): + return dir_name + if dir_name[1:].isdigit(): + return dir_name + raise ValueError(f'Cannot find sampling method in {save_dir}') + +def get_dataset(save_dir: str) -> str: + save_dir = os.path.abspath(save_dir) + dir_names = save_dir.split('/') + + for dir_name in dir_names: + if dir_name == 'alpaca': + return 'alpaca' + if dir_name == 'sharegpt': + return 'sharegpt' + raise ValueError(f'Cannot find dataset in {save_dir}') + + +def in_subset(save_dir: str, subset: str): + if subset == 'n1-alpaca': + return get_sampling(save_dir) == 'n1' and get_dataset(save_dir) == "alpaca" + elif subset == 'n1-sharegpt': + return get_sampling(save_dir) == 'n1' and get_dataset(save_dir) == "sharegpt" + elif subset == 'parallel': + if get_dataset(save_dir) != "alpaca": + return False + sampling = get_sampling(save_dir) + return sampling == 'n2' or sampling == 'n4' or sampling == 'n6' + elif subset == 'beam': + if get_dataset(save_dir) != "alpaca": + return False + sampling = get_sampling(save_dir) + return sampling == 'n2-beam' or sampling == 'n4-beam' or sampling == 'n6-beam' + + +def plot_normalized_latency( + exp_dir: str, + subset: str, + duration: int, + seed: int, + warmup: int, + xlim: Optional[float], + ylim: Optional[float], + label_offset: int, + log_scale: bool, + format: str, +) -> None: + # Get leaf directories. + save_dirs = [] + for root, dirs, files in os.walk(exp_dir): + if dirs: + continue + if 'sequences.pkl' not in files: + continue + if f'seed{seed}' not in root: + continue + if f'duration-{duration}' not in root and 'duration-900' not in root: + continue + if 'unused' in root: + continue + if not in_subset(root, subset): + continue + save_dirs.append(root) + # print(save_dirs) + + # Collect data points + plot_names = [] + # sampling -> system -> (request_rate, normalized_latency) + perf: Dict[str, Dict[str, Tuple[List[float], List[float]]]] = {} + for save_dir in save_dirs: + per_seq_norm_latencies = [] + results = get_results(save_dir) + for seq in results: + arrival_time = seq['arrival_time'] + finish_time = seq['finish_time'] + output_len = seq['output_len'] + if arrival_time < warmup: + continue + latency = finish_time - arrival_time + norm_latency = latency / output_len + per_seq_norm_latencies.append(norm_latency) + + request_rate = get_request_rate(save_dir) + normalized_latency = np.mean(per_seq_norm_latencies) + + sampling = get_sampling(save_dir) + if sampling not in perf: + perf[sampling] = {} + plot_names.append(sampling) + system_name = get_system(save_dir) + if system_name not in perf[sampling]: + perf[sampling][system_name] = ([], []) + perf[sampling][system_name][0].append(request_rate) + perf[sampling][system_name][1].append(normalized_latency) + + # print('#seqs', len(per_seq_norm_latencies)) + # print(f'{save_dir}: {normalized_latency:.3f} s') + + + # Plot normalized latency. + plot_names = sorted(plot_names) + fig, axs = plt.subplots(1, 3) + for i, (sampling, ax) in enumerate(zip(plot_names, axs)): + curves = [] + legends = [] + for system_name in SYSTEMS: + if system_name not in perf[sampling]: + continue + # Sort by request rate. + request_rates, normalized_latencies = perf[sampling][system_name] + request_rates, normalized_latencies = zip(*sorted(zip(request_rates, normalized_latencies))) + label = SYSTEM_TO_LABEL[system_name] + color = SYSTEM_TO_COLOR[system_name] + marker = SYSTEM_TO_MARKER[system_name] + curve = ax.plot(request_rates, normalized_latencies, label=label, color=color, marker=marker, markersize=4) + curves.append(curve[0]) + legends.append(label) + + enum = get_alpha_enum(i + label_offset) + dataset = DATASET_SHOW_NAME[get_dataset(save_dir)] + if subset == 'parallel': + sampling_name = 'parallel generation (parallel size = ' + sampling[1:] + ')' + elif subset == 'beam': + sampling_name = 'beam search (beam width = ' + sampling.split('-')[0][1:] + ')' + ax.set_xlabel(f'Request rate (req/s)\n\n{enum} {sampling_name}', fontsize=10) + ax.tick_params(axis='both', which='major', labelsize=10) + ax.tick_params(axis='both', which='minor', labelsize=10) + if log_scale: + ax.set_yscale('log') + if xlim is not None: + ax.set_xlim(left=0, right=xlim) + if ylim is not None: + if log_scale: + ax.set_ylim(top=ylim) + else: + ax.set_ylim(bottom=0, top=ylim) + ax.grid(linestyle='--') + + # handles, labels = plt.gca().get_legend_handles_labels() + # handles = reversed(handles) + # labels = reversed(labels) + + # plt.legend( + # handles, labels, + # ncol=5, fontsize=10, loc='upper center', bbox_to_anchor=(0.5, 1.15), + # columnspacing=0.5, handletextpad=0.5, handlelength=1.5, frameon=False, borderpad=0) + + fig.text(0.08, 0.5, 'Normalized latency\n (s/token)', va='center', rotation='vertical', fontsize=10) + if subset == 'parallel': + fig.legend(curves, legends, loc="upper center", ncol=5, bbox_to_anchor=(0.5, 1.3), fontsize=10, frameon=False) + + # Save figure. + fig.set_size_inches((18, 1.5)) + figname = f'{subset}.{format}' + os.makedirs('./figures', exist_ok=True) + plt.savefig(os.path.join('figures', figname), bbox_inches='tight') + print(f'Saved figure to ./figures/{figname}') + + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument('exp_dir', type=str) + parser.add_argument('--subset', choices=['parallel', 'beam'], required=True) + parser.add_argument('--duration', type=int, required=True) + parser.add_argument('--seed', type=int, default=0) + parser.add_argument('--warmup', type=int, default=60) + parser.add_argument('--xlim', type=float, required=False, default=None) + parser.add_argument('--ylim', type=float, required=False, default=1) + parser.add_argument('--label-offset', type=int, default=0) + parser.add_argument('--log', action='store_true') + parser.add_argument('--format', choices=['png', 'pdf'], default='pdf') + args = parser.parse_args() + + plot_normalized_latency( + args.exp_dir, args.subset, args.duration, args.seed, args.warmup, + args.xlim, args.ylim, args.label_offset, args.log, args.format) From 801753398415368b6caa11ac340fbc375bae8359 Mon Sep 17 00:00:00 2001 From: Ying Sheng Date: Mon, 17 Apr 2023 18:45:16 -0700 Subject: [PATCH 3/6] update plot xlim --- plot/plot_sec_6_2.py | 9 +++++++++ plot/plot_sec_6_3.py | 9 +++++++++ 2 files changed, 18 insertions(+) diff --git a/plot/plot_sec_6_2.py b/plot/plot_sec_6_2.py index a59c07135787..4e0649f4b019 100644 --- a/plot/plot_sec_6_2.py +++ b/plot/plot_sec_6_2.py @@ -189,6 +189,8 @@ def plot_normalized_latency( # Collect data points plot_names = [] + # model -> x_cut + x_top: Dict[str, float] = {} # model -> system -> (request_rate, normalized_latency) perf: Dict[str, Dict[str, Tuple[List[float], List[float]]]] = {} for save_dir in save_dirs: @@ -217,6 +219,11 @@ def plot_normalized_latency( perf[model_name][system_name][0].append(request_rate) perf[model_name][system_name][1].append(normalized_latency) + if model_name not in x_top: + x_top[model_name] = 0 + if normalized_latency < 1.1: + x_top[model_name] = max(x_top[model_name], request_rate) + # print('#seqs', len(per_seq_norm_latencies)) # print(f'{save_dir}: {normalized_latency:.3f} s') @@ -250,6 +257,8 @@ def plot_normalized_latency( ax.set_yscale('log') if xlim is not None: ax.set_xlim(left=0, right=xlim) + else: + ax.set_xlim(left=0, right=x_top[model_name] * 1.1) if ylim is not None: if log_scale: ax.set_ylim(top=ylim) diff --git a/plot/plot_sec_6_3.py b/plot/plot_sec_6_3.py index 5dbbbfde5ad0..a33a871e90fa 100644 --- a/plot/plot_sec_6_3.py +++ b/plot/plot_sec_6_3.py @@ -189,6 +189,8 @@ def plot_normalized_latency( # Collect data points plot_names = [] + # sampling -> x_cut + x_top: Dict[str, float] = {} # sampling -> system -> (request_rate, normalized_latency) perf: Dict[str, Dict[str, Tuple[List[float], List[float]]]] = {} for save_dir in save_dirs: @@ -217,6 +219,11 @@ def plot_normalized_latency( perf[sampling][system_name][0].append(request_rate) perf[sampling][system_name][1].append(normalized_latency) + if sampling not in x_top: + x_top[sampling] = 0 + if normalized_latency < 1.1: + x_top[sampling] = max(x_top[sampling], request_rate) + # print('#seqs', len(per_seq_norm_latencies)) # print(f'{save_dir}: {normalized_latency:.3f} s') @@ -253,6 +260,8 @@ def plot_normalized_latency( ax.set_yscale('log') if xlim is not None: ax.set_xlim(left=0, right=xlim) + else: + ax.set_xlim(left=0, right=x_top[sampling] * 1.1) if ylim is not None: if log_scale: ax.set_ylim(top=ylim) From 5b2b7ae83c3fa8a17df398035de227cbedb835ab Mon Sep 17 00:00:00 2001 From: Ying Sheng Date: Mon, 17 Apr 2023 20:10:45 -0700 Subject: [PATCH 4/6] update plot scripts --- plot/plot_sec_6_2.py | 18 ++- plot/plot_sec_6_3.py | 18 ++- plot/plot_sec_6_4.py | 319 ++++++++++++++++++++++++++++++++++++++++++ plot/plot_sec_6_5.py | 324 +++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 665 insertions(+), 14 deletions(-) create mode 100644 plot/plot_sec_6_4.py create mode 100644 plot/plot_sec_6_5.py diff --git a/plot/plot_sec_6_2.py b/plot/plot_sec_6_2.py index 4e0649f4b019..beda34137fcc 100644 --- a/plot/plot_sec_6_2.py +++ b/plot/plot_sec_6_2.py @@ -137,6 +137,10 @@ def get_dataset(save_dir: str) -> str: return 'alpaca' if dir_name == 'sharegpt': return 'sharegpt' + if dir_name == 'prefix': + return 'prefix' + if dir_name == 'sharegpt_chat': + return 'sharegpt_chat' raise ValueError(f'Cannot find dataset in {save_dir}') @@ -243,16 +247,16 @@ def plot_normalized_latency( label = SYSTEM_TO_LABEL[system_name] color = SYSTEM_TO_COLOR[system_name] marker = SYSTEM_TO_MARKER[system_name] - curve = ax.plot(request_rates, normalized_latencies, label=label, color=color, marker=marker, markersize=4) + curve = ax.plot(request_rates, normalized_latencies, label=label, color=color, marker=marker, markersize=6) curves.append(curve[0]) legends.append(label) enum = get_alpha_enum(i + label_offset) model_show_name = MODEL_SHOW_NAME[model_name] dataset = DATASET_SHOW_NAME[get_dataset(save_dir)] - ax.set_xlabel(f'Request rate (req/s)\n\n{enum} {model_show_name}, {dataset}', fontsize=10) - ax.tick_params(axis='both', which='major', labelsize=10) - ax.tick_params(axis='both', which='minor', labelsize=10) + ax.set_xlabel(f'Request rate (req/s)\n\n{enum} {model_show_name}, {dataset}', fontsize=15) + ax.tick_params(axis='both', which='major', labelsize=15) + ax.tick_params(axis='both', which='minor', labelsize=15) if log_scale: ax.set_yscale('log') if xlim is not None: @@ -272,12 +276,12 @@ def plot_normalized_latency( # plt.legend( # handles, labels, - # ncol=5, fontsize=10, loc='upper center', bbox_to_anchor=(0.5, 1.15), + # ncol=5, fontsize=15, loc='upper center', bbox_to_anchor=(0.5, 1.15), # columnspacing=0.5, handletextpad=0.5, handlelength=1.5, frameon=False, borderpad=0) - fig.text(0.08, 0.5, 'Normalized latency\n (s/token)', va='center', rotation='vertical', fontsize=10) + fig.text(0.07, 0.5, 'Normalized latency\n (s/token)', va='center', rotation='vertical', fontsize=15) if subset != 'n1-alpaca': - fig.legend(curves, legends, loc="upper center", ncol=5, bbox_to_anchor=(0.5, 1.3), fontsize=10, frameon=False) + fig.legend(curves, legends, loc="upper center", ncol=5, bbox_to_anchor=(0.5, 1.35), fontsize=15, frameon=False) # Save figure. fig.set_size_inches((18, 1.5)) diff --git a/plot/plot_sec_6_3.py b/plot/plot_sec_6_3.py index a33a871e90fa..bbf91d704c1a 100644 --- a/plot/plot_sec_6_3.py +++ b/plot/plot_sec_6_3.py @@ -137,6 +137,10 @@ def get_dataset(save_dir: str) -> str: return 'alpaca' if dir_name == 'sharegpt': return 'sharegpt' + if dir_name == 'prefix': + return 'prefix' + if dir_name == 'sharegpt_chat': + return 'sharegpt_chat' raise ValueError(f'Cannot find dataset in {save_dir}') @@ -243,7 +247,7 @@ def plot_normalized_latency( label = SYSTEM_TO_LABEL[system_name] color = SYSTEM_TO_COLOR[system_name] marker = SYSTEM_TO_MARKER[system_name] - curve = ax.plot(request_rates, normalized_latencies, label=label, color=color, marker=marker, markersize=4) + curve = ax.plot(request_rates, normalized_latencies, label=label, color=color, marker=marker, markersize=6) curves.append(curve[0]) legends.append(label) @@ -253,9 +257,9 @@ def plot_normalized_latency( sampling_name = 'parallel generation (parallel size = ' + sampling[1:] + ')' elif subset == 'beam': sampling_name = 'beam search (beam width = ' + sampling.split('-')[0][1:] + ')' - ax.set_xlabel(f'Request rate (req/s)\n\n{enum} {sampling_name}', fontsize=10) - ax.tick_params(axis='both', which='major', labelsize=10) - ax.tick_params(axis='both', which='minor', labelsize=10) + ax.set_xlabel(f'Request rate (req/s)\n\n{enum} {sampling_name}', fontsize=15) + ax.tick_params(axis='both', which='major', labelsize=15) + ax.tick_params(axis='both', which='minor', labelsize=15) if log_scale: ax.set_yscale('log') if xlim is not None: @@ -275,12 +279,12 @@ def plot_normalized_latency( # plt.legend( # handles, labels, - # ncol=5, fontsize=10, loc='upper center', bbox_to_anchor=(0.5, 1.15), + # ncol=5, fontsize=15, loc='upper center', bbox_to_anchor=(0.5, 1.15), # columnspacing=0.5, handletextpad=0.5, handlelength=1.5, frameon=False, borderpad=0) - fig.text(0.08, 0.5, 'Normalized latency\n (s/token)', va='center', rotation='vertical', fontsize=10) + fig.text(0.07, 0.5, 'Normalized latency\n (s/token)', va='center', rotation='vertical', fontsize=15) if subset == 'parallel': - fig.legend(curves, legends, loc="upper center", ncol=5, bbox_to_anchor=(0.5, 1.3), fontsize=10, frameon=False) + fig.legend(curves, legends, loc="upper center", ncol=5, bbox_to_anchor=(0.5, 1.35), fontsize=15, frameon=False) # Save figure. fig.set_size_inches((18, 1.5)) diff --git a/plot/plot_sec_6_4.py b/plot/plot_sec_6_4.py new file mode 100644 index 000000000000..c8599545a45e --- /dev/null +++ b/plot/plot_sec_6_4.py @@ -0,0 +1,319 @@ +import argparse +import os +import pickle +from typing import Any, Dict, List, Optional, Tuple + +import matplotlib +import matplotlib.pyplot as plt +import numpy as np + + +SYSTEMS = [ + 'FasterTransformer', + 'orca-constant', + 'orca-power2', + 'orca-oracle', + 'cacheflow', +] + +SYSTEM_TO_LABEL = { + 'FasterTransformer': 'FasterTransformer', + 'orca-constant': 'Orca (Max)', + 'orca-power2': 'Orca (Pow2)', + 'orca-oracle': 'Orca (Oracle)', + 'cacheflow': 'Astra', +} + +SYSTEM_TO_COLOR = { + 'FasterTransformer': 'gray', + 'orca-constant': 'red', + 'orca-power2': 'orange', + 'orca-oracle': 'green', + 'cacheflow': 'blue', +} + +SYSTEM_TO_MARKER = { + 'FasterTransformer': '.', + 'orca-constant': 'x', + 'orca-power2': '^', + 'orca-oracle': 's', + 'cacheflow': 'o', +} + +MODEL_SHOW_NAME = { + 'opt-13b': 'OPT-13B, 1 GPU', + 'opt-66b': 'OPT-66B, 4 GPUs', + 'opt-175b': 'OPT-175B, 8 GPUs', +} + +DATASET_SHOW_NAME = { + 'sharegpt': 'ShareGPT', + 'alpaca': 'Alpaca', +} + +MODEL_RANK = { + 'opt-13b': 0, + 'opt-66b': 1, + 'opt-175b': 2, +} + + +def get_alpha_enum(i: int): + return '(' + chr(ord('a') + i) + ')' + + +def get_results(save_dir: str) -> List[Dict[str, Any]]: + with open(os.path.join(save_dir, 'sequences.pkl'), 'rb') as f: + results = pickle.load(f) + return results + + +def get_request_rate(save_dir: str) -> float: + """Get request rate from save_dir name.""" + # Directory name format: + # .../req-rate-{req_rate}/seed-{seed}/duration-{duration} + save_dir = os.path.abspath(save_dir) + dir_names = save_dir.split('/') + + request_rate = None + for dir_name in dir_names: + if dir_name.startswith('req-rate-'): + if request_rate is not None: + raise ValueError(f'Found multiple request rates in {save_dir}') + request_rate = float(dir_name.split('-')[-1]) + if request_rate is None: + raise ValueError(f'Cannot find request rate in {save_dir}') + return request_rate + + +def get_model(save_dir: str) -> Tuple[str, int]: + save_dir = os.path.abspath(save_dir) + dir_names = save_dir.split('/') + + model = None + for dir_name in dir_names: + if '-tp' in dir_name: + if model is not None: + raise ValueError(f'Found multiple models in {save_dir}') + model = dir_name.split('-tp')[0] + tp = int(dir_name.split('-tp')[-1]) + if model is None: + raise ValueError(f'Cannot find model in {save_dir}') + return model, tp + + +def get_system(save_dir: str) -> str: + save_dir = os.path.abspath(save_dir) + dir_names = save_dir.split('/')[6:] + + for dir_name in dir_names: + if dir_name.startswith('orca-'): + return dir_name + if dir_name == 'cacheflow': + return dir_name + if dir_name == 'ft': + return 'FasterTransformer' + raise ValueError(f'Cannot find system in {save_dir}') + + +def get_sampling(save_dir: str) -> str: + save_dir = os.path.abspath(save_dir) + dir_names = save_dir.split('/') + + for dir_name in dir_names: + if dir_name.startswith('n'): + if dir_name.endswith('-beam'): + return dir_name + if dir_name[1:].isdigit(): + return dir_name + raise ValueError(f'Cannot find sampling method in {save_dir}') + +def get_dataset(save_dir: str) -> str: + save_dir = os.path.abspath(save_dir) + dir_names = save_dir.split('/') + + for dir_name in dir_names: + if dir_name == 'alpaca': + return 'alpaca' + if dir_name == 'sharegpt': + return 'sharegpt' + raise ValueError(f'Cannot find dataset in {save_dir}') + + +def get_num_shot(save_dir: str) -> str: + save_dir = os.path.abspath(save_dir) + dir_names = save_dir.split('/') + + for dir_name in dir_names: + if dir_name.startswith('wmt'): + return dir_name.split('-')[1][:-4] + raise ValueError(f'Cannot find shot number in {save_dir}') + + +def in_subset(save_dir: str, subset: str): + if subset == 'n1-alpaca': + return get_sampling(save_dir) == 'n1' and get_dataset(save_dir) == "alpaca" + elif subset == 'n1-sharegpt': + return get_sampling(save_dir) == 'n1' and get_dataset(save_dir) == "sharegpt" + elif subset == 'parallel': + if get_dataset(save_dir) != "alpaca": + return False + sampling = get_sampling(save_dir) + return sampling == 'n2' or sampling == 'n4' or sampling == 'n6' + elif subset == 'beam': + if get_dataset(save_dir) != "alpaca": + return False + sampling = get_sampling(save_dir) + return sampling == 'n2-beam' or sampling == 'n4-beam' or sampling == 'n6-beam' + elif subset == 'prefix': + return 'wmt' in save_dir + + +def plot_normalized_latency( + exp_dir: str, + subset: str, + duration: int, + seed: int, + warmup: int, + xlim: Optional[float], + ylim: Optional[float], + label_offset: int, + log_scale: bool, + format: str, +) -> None: + # Get leaf directories. + save_dirs = [] + for root, dirs, files in os.walk(exp_dir): + if dirs: + continue + if 'sequences.pkl' not in files: + continue + if f'seed{seed}' not in root: + continue + if f'duration-{duration}' not in root and 'duration-900' not in root: + continue + if 'unused' in root: + continue + if not in_subset(root, subset): + continue + save_dirs.append(root) + # print(save_dirs) + + # Collect data points + plot_names = [] + # num_shot -> x_cut + x_top: Dict[str, float] = {} + # num_shot -> system -> (request_rate, normalized_latency) + perf: Dict[str, Dict[str, Tuple[List[float], List[float]]]] = {} + for save_dir in save_dirs: + per_seq_norm_latencies = [] + results = get_results(save_dir) + for seq in results: + arrival_time = seq['arrival_time'] + finish_time = seq['finish_time'] + output_len = seq['output_len'] + if arrival_time < warmup: + continue + latency = finish_time - arrival_time + norm_latency = latency / output_len + per_seq_norm_latencies.append(norm_latency) + + request_rate = get_request_rate(save_dir) + normalized_latency = np.mean(per_seq_norm_latencies) + + num_shot = get_num_shot(save_dir) + if num_shot not in perf: + perf[num_shot] = {} + plot_names.append(num_shot) + system_name = get_system(save_dir) + if system_name not in perf[num_shot]: + perf[num_shot][system_name] = ([], []) + perf[num_shot][system_name][0].append(request_rate) + perf[num_shot][system_name][1].append(normalized_latency) + + if num_shot not in x_top: + x_top[num_shot] = 0 + if normalized_latency < 1.1: + x_top[num_shot] = max(x_top[num_shot], request_rate) + + # print('#seqs', len(per_seq_norm_latencies)) + # print(f'{save_dir}: {normalized_latency:.3f} s') + + + # Plot normalized latency. + plot_names = sorted(plot_names) + fig, axs = plt.subplots(1, 2) + for i, (num_shot, ax) in enumerate(zip(plot_names, axs)): + curves = [] + legends = [] + for system_name in SYSTEMS: + if system_name not in perf[num_shot]: + continue + # Sort by request rate. + request_rates, normalized_latencies = perf[num_shot][system_name] + request_rates, normalized_latencies = zip(*sorted(zip(request_rates, normalized_latencies))) + label = SYSTEM_TO_LABEL[system_name] + color = SYSTEM_TO_COLOR[system_name] + marker = SYSTEM_TO_MARKER[system_name] + curve = ax.plot(request_rates, normalized_latencies, label=label, color=color, marker=marker, markersize=6) + curves.append(curve[0]) + legends.append(label) + + enum = get_alpha_enum(i + label_offset) + if subset == 'prefix': + sub_name = num_shot + '-shot prefix prompt' + ax.set_xlabel(f'Request rate (req/s)\n\n{enum} {sub_name}', fontsize=14) + ax.tick_params(axis='both', which='major', labelsize=14) + ax.tick_params(axis='both', which='minor', labelsize=14) + if log_scale: + ax.set_yscale('log') + if xlim is not None: + ax.set_xlim(left=0, right=xlim) + else: + ax.set_xlim(left=0, right=x_top[num_shot] * 1.1) + if ylim is not None: + if log_scale: + ax.set_ylim(top=ylim) + else: + ax.set_ylim(bottom=0, top=ylim) + ax.grid(linestyle='--') + + # handles, labels = plt.gca().get_legend_handles_labels() + # handles = reversed(handles) + # labels = reversed(labels) + + # plt.legend( + # handles, labels, + # ncol=5, fontsize=14, loc='upper center', bbox_to_anchor=(0.5, 1.15), + # columnspacing=0.5, handletextpad=0.5, handlelength=1.5, frameon=False, borderpad=0) + + fig.text(-0.03, 0.5, 'Normalized latency\n (s/token)', va='center', rotation='vertical', fontsize=14) + if subset == 'prefix': + fig.legend(curves, legends, loc="upper center", ncol=5, bbox_to_anchor=(0.5, 1.18), fontsize=14, frameon=False) + # fig.subplots_adjust(hspace=0.6) + + # Save figure. + fig.set_size_inches((6, 2)) + figname = f'{subset}.{format}' + os.makedirs('./figures', exist_ok=True) + plt.savefig(os.path.join('figures', figname), bbox_inches='tight') + print(f'Saved figure to ./figures/{figname}') + + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument('exp_dir', type=str) + parser.add_argument('--subset', choices=['prefix'], default='prefix') + parser.add_argument('--duration', type=int, default=1200) + parser.add_argument('--seed', type=int, default=0) + parser.add_argument('--warmup', type=int, default=60) + parser.add_argument('--xlim', type=float, required=False, default=None) + parser.add_argument('--ylim', type=float, required=False, default=1) + parser.add_argument('--label-offset', type=int, default=0) + parser.add_argument('--log', action='store_true') + parser.add_argument('--format', choices=['png', 'pdf'], default='pdf') + args = parser.parse_args() + + plot_normalized_latency( + args.exp_dir, args.subset, args.duration, args.seed, args.warmup, + args.xlim, args.ylim, args.label_offset, args.log, args.format) diff --git a/plot/plot_sec_6_5.py b/plot/plot_sec_6_5.py new file mode 100644 index 000000000000..d426ca0dfef3 --- /dev/null +++ b/plot/plot_sec_6_5.py @@ -0,0 +1,324 @@ +import argparse +import os +import pickle +from typing import Any, Dict, List, Optional, Tuple + +import matplotlib +import matplotlib.pyplot as plt +import numpy as np + + +SYSTEMS = [ + 'FasterTransformer', + 'orca-constant', + 'orca-power2', + 'orca-oracle', + 'cacheflow', +] + +SYSTEM_TO_LABEL = { + 'FasterTransformer': 'FasterTransformer', + 'orca-constant': 'Orca (Max)', + 'orca-power2': 'Orca (Pow2)', + 'orca-oracle': 'Orca (Oracle)', + 'cacheflow': 'Astra', +} + +SYSTEM_TO_COLOR = { + 'FasterTransformer': 'gray', + 'orca-constant': 'red', + 'orca-power2': 'orange', + 'orca-oracle': 'green', + 'cacheflow': 'blue', +} + +SYSTEM_TO_MARKER = { + 'FasterTransformer': '.', + 'orca-constant': 'x', + 'orca-power2': '^', + 'orca-oracle': 's', + 'cacheflow': 'o', +} + +MODEL_SHOW_NAME = { + 'opt-13b': 'OPT-13B, 1 GPU', + 'opt-66b': 'OPT-66B, 4 GPUs', + 'opt-175b': 'OPT-175B, 8 GPUs', +} + +DATASET_SHOW_NAME = { + 'sharegpt': 'ShareGPT', + 'alpaca': 'Alpaca', +} + +MODEL_RANK = { + 'opt-13b': 0, + 'opt-66b': 1, + 'opt-175b': 2, +} + + +def get_alpha_enum(i: int): + return '(' + chr(ord('a') + i) + ')' + + +def get_results(save_dir: str) -> List[Dict[str, Any]]: + with open(os.path.join(save_dir, 'sequences.pkl'), 'rb') as f: + results = pickle.load(f) + return results + + +def get_request_rate(save_dir: str) -> float: + """Get request rate from save_dir name.""" + # Directory name format: + # .../req-rate-{req_rate}/seed-{seed}/duration-{duration} + save_dir = os.path.abspath(save_dir) + dir_names = save_dir.split('/') + + request_rate = None + for dir_name in dir_names: + if dir_name.startswith('req-rate-'): + if request_rate is not None: + raise ValueError(f'Found multiple request rates in {save_dir}') + request_rate = float(dir_name.split('-')[-1]) + if request_rate is None: + raise ValueError(f'Cannot find request rate in {save_dir}') + return request_rate + + +def get_model(save_dir: str) -> Tuple[str, int]: + save_dir = os.path.abspath(save_dir) + dir_names = save_dir.split('/') + + model = None + for dir_name in dir_names: + if '-tp' in dir_name: + if model is not None: + raise ValueError(f'Found multiple models in {save_dir}') + model = dir_name.split('-tp')[0] + tp = int(dir_name.split('-tp')[-1]) + if model is None: + raise ValueError(f'Cannot find model in {save_dir}') + return model, tp + + +def get_system(save_dir: str) -> str: + save_dir = os.path.abspath(save_dir) + dir_names = save_dir.split('/')[6:] + + for dir_name in dir_names: + if dir_name.startswith('orca-'): + return dir_name + if dir_name == 'cacheflow': + return dir_name + if dir_name == 'ft': + return 'FasterTransformer' + raise ValueError(f'Cannot find system in {save_dir}') + + +def get_sampling(save_dir: str) -> str: + save_dir = os.path.abspath(save_dir) + dir_names = save_dir.split('/') + + for dir_name in dir_names: + if dir_name.startswith('n'): + if dir_name.endswith('-beam'): + return dir_name + if dir_name[1:].isdigit(): + return dir_name + raise ValueError(f'Cannot find sampling method in {save_dir}') + +def get_dataset(save_dir: str) -> str: + save_dir = os.path.abspath(save_dir) + dir_names = save_dir.split('/') + + for dir_name in dir_names: + if dir_name == 'alpaca': + return 'alpaca' + if dir_name == 'sharegpt': + return 'sharegpt' + raise ValueError(f'Cannot find dataset in {save_dir}') + + +def get_num_shot(save_dir: str) -> str: + save_dir = os.path.abspath(save_dir) + dir_names = save_dir.split('/') + + for dir_name in dir_names: + if dir_name.startswith('wmt'): + return dir_name.split('-')[1][:-4] + raise ValueError(f'Cannot find shot number in {save_dir}') + + +def in_subset(save_dir: str, subset: str): + if subset == 'n1-alpaca': + return get_sampling(save_dir) == 'n1' and get_dataset(save_dir) == "alpaca" + elif subset == 'n1-sharegpt': + return get_sampling(save_dir) == 'n1' and get_dataset(save_dir) == "sharegpt" + elif subset == 'parallel': + if get_dataset(save_dir) != "alpaca": + return False + sampling = get_sampling(save_dir) + return sampling == 'n2' or sampling == 'n4' or sampling == 'n6' + elif subset == 'beam': + if get_dataset(save_dir) != "alpaca": + return False + sampling = get_sampling(save_dir) + return sampling == 'n2-beam' or sampling == 'n4-beam' or sampling == 'n6-beam' + elif subset == 'prefix': + return 'wmt' in save_dir + elif subset == 'chat-sharegpt': + return 'sharegpt_chat' in save_dir + + +def plot_normalized_latency( + exp_dir: str, + subset: str, + duration: int, + seed: int, + warmup: int, + xlim: Optional[float], + ylim: Optional[float], + label_offset: int, + log_scale: bool, + format: str, +) -> None: + # Get leaf directories. + save_dirs = [] + for root, dirs, files in os.walk(exp_dir): + if dirs: + continue + if 'sequences.pkl' not in files: + continue + if f'seed{seed}' not in root: + continue + if f'duration-{duration}' not in root and 'duration-900' not in root: + continue + if 'unused' in root: + continue + if not in_subset(root, subset): + continue + save_dirs.append(root) + # print(save_dirs) + + # Collect data points + plot_names = [] + # model_name -> x_cut + x_top: Dict[str, float] = {} + # model_name -> system -> (request_rate, normalized_latency) + perf: Dict[str, Dict[str, Tuple[List[float], List[float]]]] = {} + for save_dir in save_dirs: + per_seq_norm_latencies = [] + results = get_results(save_dir) + for seq in results: + arrival_time = seq['arrival_time'] + finish_time = seq['finish_time'] + output_len = seq['output_len'] + if arrival_time < warmup: + continue + latency = finish_time - arrival_time + norm_latency = latency / output_len + per_seq_norm_latencies.append(norm_latency) + + request_rate = get_request_rate(save_dir) + normalized_latency = np.mean(per_seq_norm_latencies) + + model_name = get_model(save_dir) + if model_name not in perf: + perf[model_name] = {} + plot_names.append(model_name) + system_name = get_system(save_dir) + if system_name not in perf[model_name]: + perf[model_name][system_name] = ([], []) + perf[model_name][system_name][0].append(request_rate) + perf[model_name][system_name][1].append(normalized_latency) + + if model_name not in x_top: + x_top[model_name] = 0 + if normalized_latency < 1.1: + x_top[model_name] = max(x_top[model_name], request_rate) + + # print('#seqs', len(per_seq_norm_latencies)) + # print(f'{save_dir}: {normalized_latency:.3f} s') + + + # Plot normalized latency. + plot_names = sorted(plot_names) + fig, axs = plt.subplots(1, 1) + # for i, (model_name, ax) in enumerate(zip(plot_names, axs)): + model_name = plot_names[0] + ax = axs + i = 0 + + curves = [] + legends = [] + for system_name in SYSTEMS: + if system_name not in perf[model_name]: + continue + # Sort by request rate. + request_rates, normalized_latencies = perf[model_name][system_name] + request_rates, normalized_latencies = zip(*sorted(zip(request_rates, normalized_latencies))) + label = SYSTEM_TO_LABEL[system_name] + color = SYSTEM_TO_COLOR[system_name] + marker = SYSTEM_TO_MARKER[system_name] + curve = ax.plot(request_rates, normalized_latencies, label=label, color=color, marker=marker, markersize=6) + curves.append(curve[0]) + legends.append(label) + + enum = get_alpha_enum(i + label_offset) + ax.set_xlabel(f'Request rate (req/s)', fontsize=14) + ax.tick_params(axis='both', which='major', labelsize=14) + ax.tick_params(axis='both', which='minor', labelsize=14) + if log_scale: + ax.set_yscale('log') + if xlim is not None: + ax.set_xlim(left=0, right=xlim) + else: + ax.set_xlim(left=0, right=x_top[model_name] * 1.1) + if ylim is not None: + if log_scale: + ax.set_ylim(top=ylim) + else: + ax.set_ylim(bottom=0, top=ylim) + ax.grid(linestyle='--') + + # handles, labels = plt.gca().get_legend_handles_labels() + # handles = reversed(handles) + # labels = reversed(labels) + + # plt.legend( + # handles, labels, + # ncol=5, fontsize=14, loc='upper center', bbox_to_anchor=(0.5, 1.15), + # columnspacing=0.5, handletextpad=0.5, handlelength=1.5, frameon=False, borderpad=0) + + fig.text(-0.05, 0.45, 'Normalized latency\n (s/token)', va='center', rotation='vertical', fontsize=14) + if subset == 'chat-sharegpt': + fig.legend(curves, legends, loc="upper center", ncol=5, bbox_to_anchor=(0.5, 1.2), fontsize=14, + columnspacing=0.5, frameon=False) + # fig.subplots_adjust(hspace=0.6) + + # Save figure. + fig.set_size_inches((6, 2)) + figname = f'{subset}.{format}' + os.makedirs('./figures', exist_ok=True) + plt.savefig(os.path.join('figures', figname), bbox_inches='tight') + print(f'Saved figure to ./figures/{figname}') + + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument('exp_dir', type=str) + parser.add_argument('--subset', choices=['chat-sharegpt'], default='chat-sharegpt') + parser.add_argument('--duration', type=int, default=3600) + parser.add_argument('--seed', type=int, default=0) + parser.add_argument('--warmup', type=int, default=60) + parser.add_argument('--xlim', type=float, required=False, default=None) + parser.add_argument('--ylim', type=float, required=False, default=1) + parser.add_argument('--label-offset', type=int, default=0) + parser.add_argument('--log', action='store_true') + parser.add_argument('--format', choices=['png', 'pdf'], default='pdf') + args = parser.parse_args() + + plot_normalized_latency( + args.exp_dir, args.subset, args.duration, args.seed, args.warmup, + args.xlim, args.ylim, args.label_offset, args.log, args.format) From 8206d0d7de18ff1dd319c1016192c1473e6e1b62 Mon Sep 17 00:00:00 2001 From: Ying Sheng Date: Sun, 23 Apr 2023 14:02:59 -0700 Subject: [PATCH 5/6] clean up --- plot/plot.sh | 29 ----------------------------- 1 file changed, 29 deletions(-) delete mode 100644 plot/plot.sh diff --git a/plot/plot.sh b/plot/plot.sh deleted file mode 100644 index d22875ccb726..000000000000 --- a/plot/plot.sh +++ /dev/null @@ -1,29 +0,0 @@ -# alpaca-13b: - python plot_normalized_latency.py paper/exp0/exp/alpaca/opt-13b-tp1/n1 --duration 3600 --ylim 1 --format pdf -# alpaca-66b: - python plot_normalized_latency.py paper/exp4/exp/alpaca/opt-66b-tp4/n1 --duration 3600 --ylim 1 --format pdf -# alpaca-175b: - python plot_normalized_latency.py paper/exp-175b/exp/alpaca/opt-175b-tp8/n1/ --duration 3600 --ylim 1 --format pdf - -# sharegpt-13b: - python plot_normalized_latency.py paper/exp0/exp/sharegpt/opt-13b-tp1/n1/ --duration 3600 --ylim 1 --format pdf -# sharegpt-66b: - python plot_normalized_latency.py paper/exp2/exp/sharegpt/opt-66b-tp4/n1 --duration 3600 --ylim 1 --format pdf -# sharegpt-175b: - python plot_normalized_latency.py paper/exp-175b/exp/sharegpt/opt-175b-tp8/n1 --duration 3600 --ylim 1 --format pdf - - -# alpaca-n2-13b: - python plot_normalized_latency.py paper/exp3/exp/alpaca/opt-13b-tp1/n2 --duration 3600 --ylim 1 --format pdf -# alpaca-n4-13b: - python plot_normalized_latency.py paper/exp3/exp/alpaca/opt-13b-tp1/n4 --duration 3600 --ylim 1 --format pdf -# alpaca-n6-13b: - python plot_normalized_latency.py paper/exp0/exp/alpaca/opt-13b-tp1/n6 --duration 3600 --ylim 1 --format pdf - - -# alpaca-n2-beam-13b: - python plot_normalized_latency.py paper/exp1/exp/alpaca/opt-13b-tp1/n2-beam --duration 3600 --ylim 1 --format pdf -# alpaca-n4-beam-13b: - python plot_normalized_latency.py paper/exp1/exp/alpaca/opt-13b-tp1/n4-beam --duration 3600 --ylim 1 --format pdf -# alapca-n6-beam-13b: - python plot_normalized_latency.py paper/exp1/exp/alpaca/opt-13b-tp1/n6-beam --duration 3600 --ylim 1 --format pdf From cb86741ea23aeda2e1aacd685d71026ea71c92c9 Mon Sep 17 00:00:00 2001 From: Ying Sheng Date: Sun, 23 Apr 2023 14:11:52 -0700 Subject: [PATCH 6/6] add reproduce cmds --- plot/plot_sec_6.sh | 8 ++++++++ 1 file changed, 8 insertions(+) create mode 100644 plot/plot_sec_6.sh diff --git a/plot/plot_sec_6.sh b/plot/plot_sec_6.sh new file mode 100644 index 000000000000..91b4c6b888ac --- /dev/null +++ b/plot/plot_sec_6.sh @@ -0,0 +1,8 @@ +# data in gs://woosuk-exp/exp_sec_6 +python plot_sec_6_2.py exp_sec_6 --subset n1-sharegpt --duration 3600 +python plot_sec_6_2.py exp_sec_6 --subset n1-alpaca --duration 3600 +python plot_sec_6_3.py exp_sec_6 --subset parallel --duration 3600 +python plot_sec_6_3.py exp_sec_6 --subset beam --duration 3600 +python plot_sec_6_4.py exp_sec_6 +python plot_sec_6_5.py exp_sec_6 +