Skip to content

Commit 78a4e53

Browse files
committed
use cpu model as input
1 parent 518f134 commit 78a4e53

File tree

4 files changed

+23
-15
lines changed

4 files changed

+23
-15
lines changed

backends/aoti/aoti_backend.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
)
2222
from executorch.exir.backend.compile_spec_schema import CompileSpec
2323
from torch._inductor.codegen.cpp_wrapper_cpu import CppWrapperCpu
24+
from torch.export.passes import move_to_device_pass
2425

2526

2627
# exist fallback operators in et namespace;
@@ -71,14 +72,33 @@ def preprocess(
7172
edge_program: ExportedProgram,
7273
compile_specs: List[CompileSpec],
7374
) -> PreprocessResult:
75+
7476
print("entering the lowerable parts in AotiBackend.preprocess....")
7577
named_data_store = NamedDataStore()
7678

7779
# print("here", edge_program.example_inputs)
7880
copy_edge_program = copy.deepcopy(edge_program)
81+
82+
# Move the edge_program from CPU to CUDA using move_to_device_pass
83+
copy_edge_program = move_to_device_pass(copy_edge_program, "cuda")
7984
# graph_module = copy_edge_program.graph_module
8085
edge_program_module = copy_edge_program.module()
8186
args, kwargs = copy_edge_program.example_inputs
87+
88+
# Deep copy args and move tensors to CUDA for aot_compile
89+
def move_to_cuda(obj):
90+
if isinstance(obj, torch.Tensor):
91+
return obj.cuda()
92+
elif isinstance(obj, (list, tuple)):
93+
return type(obj)(move_to_cuda(item) for item in obj)
94+
elif isinstance(obj, dict):
95+
return {key: move_to_cuda(value) for key, value in obj.items()}
96+
else:
97+
return obj
98+
99+
args = move_to_cuda(copy.deepcopy(args))
100+
kwargs = move_to_cuda(copy.deepcopy(kwargs))
101+
82102
# print("args, kwargs", args, kwargs)
83103
print("len(args)", len(args))
84104
print("args[0].shape", args[0].shape)

backends/aoti/runtime/aoti_backend.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -83,7 +83,7 @@ class AOTIBackend final : public ::executorch::runtime::BackendInterface {
8383
std::string so_blob_key = "so_blob";
8484

8585
Result<FreeableBuffer> aoti_cuda_buffer =
86-
named_data_map->get_data(aoti_cuda_blob_name.c_str());
86+
named_data_map->get_data(so_blob_key.c_str());
8787

8888
// Create a temporary file
8989
std::ofstream outfile(so_path.c_str(), std::ios::binary);

exir/program/_program.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1680,7 +1680,7 @@ def exported_program_to_device(exported_program, device):
16801680

16811681
execution_programs: Dict[str, ExportedProgram] = {}
16821682
for name, program in self._edge_programs.items():
1683-
program = exported_program_to_device(program, "cpu")
1683+
# program = exported_program_to_device(program, "cpu")
16841684
if config.do_quant_fusion_and_const_prop:
16851685
if program.graph_signature.backward_signature is not None:
16861686
raise Exception(

export_aoti.py

Lines changed: 1 addition & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -264,73 +264,61 @@ def forward(self, x):
264264
"mv2": {
265265
"model_class": MV2,
266266
"input_shapes": [(1, 3, 224, 224)],
267-
"device": "cuda",
268267
"description": "MobileNetV2 model",
269268
},
270269
"resnet18": {
271270
"model_class": ResNet18,
272271
"input_shapes": [(1, 3, 224, 224)],
273-
"device": "cuda",
274272
"description": "ResNet18 model",
275273
},
276274
"linear": {
277275
"model_class": Linear,
278276
"input_shapes": [(127, 7)],
279-
"device": "cuda",
280277
"description": "Simple linear layer model",
281278
},
282279
"conv2d": {
283280
"model_class": SingleConv2d,
284281
"input_shapes": [(4, 3, 8, 8)],
285-
"device": "cuda",
286282
"description": "Single Conv2d layer model",
287283
},
288284
"depthwise_conv": {
289285
"model_class": DepthwiseConv,
290286
"input_shapes": [(1, 32, 112, 112)],
291-
"device": "cuda",
292287
"description": "Single Depthwise Conv2d layer model",
293288
},
294289
"add": {
295290
"model_class": Add,
296291
"input_shapes": [(10,), (10,)],
297-
"device": "cuda",
298292
"description": "Simple tensor addition model",
299293
},
300294
"batchnorm": {
301295
"model_class": BatchNorm,
302296
"input_shapes": [(1, 16, 32, 32)],
303-
"device": "cuda",
304297
"description": "Single BatchNorm2d layer model",
305298
},
306299
"single_resnet_block": {
307300
"model_class": SingleResNetBlock,
308301
"input_shapes": [(1, 64, 8, 8)],
309-
"device": "cuda",
310302
"description": "Single ResNet block with skip connection",
311303
},
312304
"llama31": {
313305
"model_class": Llama31,
314306
"input_shapes": [(1, 32)], # batch_size=1, sequence_length=128
315-
"device": "cuda",
316307
"description": "Llama 3.1 model with KV cache disabled",
317308
},
318309
"whisper": {
319310
"model_class": Whisper,
320311
"input_shapes": [(1, 80, 3000)],
321-
"device": "cuda",
322312
"description": "OpenAI Whisper ASR model. now is encoder only",
323313
},
324314
"conv1d": {
325315
"model_class": MockConv1d,
326316
"input_shapes": [(1, 80, 3000)],
327-
"device": "cuda",
328317
"description": "Conv1d layer with 80 input channels, 384 output channels",
329318
},
330319
"transformer_block": {
331320
"model_class": TransformerBlock,
332321
"input_shapes": [(4, 32, 256)], # batch_size=4, seq_len=32, embed_dim=256
333-
"device": "cuda",
334322
"description": "Single transformer block with multi-head attention and feed-forward network",
335323
},
336324
}
@@ -350,7 +338,7 @@ def get_model_and_inputs(
350338
model_config = MODEL_REGISTRY[model_name]
351339
model_class = model_config["model_class"]
352340
input_shapes = model_config["input_shapes"]
353-
device = model_config["device"]
341+
device = "cpu"
354342

355343
# Create model instance
356344
model = model_class().to(device).eval()

0 commit comments

Comments
 (0)