diff --git a/torchbenchmark/models/hf_clip/__init__.py b/torchbenchmark/models/hf_clip/__init__.py
index f2920eef68..33e8bfd809 100644
--- a/torchbenchmark/models/hf_clip/__init__.py
+++ b/torchbenchmark/models/hf_clip/__init__.py
@@ -51,9 +51,9 @@ def __init__(self, test, device, batch_size=1, extra_args=[]):
         text = "the dog is here"
         images = [image] * self.batch_size
         texts = [text] * self.batch_size
-        self.inputs = processor(
+        self.inputs = dict(processor(
             text=texts, images=images, return_tensors="pt", padding=True
-        )
+        ))
 
         # dict_keys(['input_ids', 'attention_mask', 'pixel_values'])
         for key in self.inputs: