From 853311acfafb25d19b200acd1299676082a74d00 Mon Sep 17 00:00:00 2001 From: Justin Waugh <916073+bluecoconut@users.noreply.github.com> Date: Tue, 21 Jan 2025 23:27:04 +0000 Subject: [PATCH] Add deepseek distils as options --- jetstream_pt/fetch_models.py | 2 ++ jetstream_pt/third_party/llama/model_exportable.py | 2 ++ 2 files changed, 4 insertions(+) diff --git a/jetstream_pt/fetch_models.py b/jetstream_pt/fetch_models.py index 2db7838..cb5045c 100644 --- a/jetstream_pt/fetch_models.py +++ b/jetstream_pt/fetch_models.py @@ -93,6 +93,8 @@ class ModelInfo: "google/gemma-7b-it": _gemma_7b, "mistralai/Mixtral-8x7B-v0.1": _mixtral_87, "mistralai/Mixtral-8x7B-Instruct-v0.1": _mixtral_87, + "deepseek-ai/DeepSeek-R1-Distill-Llama-8B": _llama3_1_8b, + "deepseek-ai/DeepSeek-R1-Distill-Llama-70B": _llama3_3_70b, } diff --git a/jetstream_pt/third_party/llama/model_exportable.py b/jetstream_pt/third_party/llama/model_exportable.py index 7358588..d1548c3 100644 --- a/jetstream_pt/third_party/llama/model_exportable.py +++ b/jetstream_pt/third_party/llama/model_exportable.py @@ -344,6 +344,8 @@ def from_hf_model_id(cls, model_id, env, is_tiny=False): "meta-llama/Llama-3.2-1B-Instruct": "llama-3.2-1b", "meta-llama/Llama-3.3-70B": "llama-3.3-70b", "meta-llama/Llama-3.3-70B-Instruct": "llama-3.3-70b", + "deepseek-ai/DeepSeek-R1-Distill-Llama-8B": "llama-3.1-8b", + "deepseek-ai/DeepSeek-R1-Distill-Llama-70B": "llama-3.3-70b", }.get(model_id) assert name args = model_args.get_model_args(