csarofeen
diff --git a/‎torch/csrc/jit/codegen/cuda/codegen.cpp
Lines changed: 2 additions & 2 deletions b/‎torch/csrc/jit/codegen/cuda/codegen.cpp
Lines changed: 2 additions & 2 deletions
diff --git a/‎torch/csrc/jit/codegen/cuda/dynamic_type.h
Lines changed: 26 additions & 19 deletions b/‎torch/csrc/jit/codegen/cuda/dynamic_type.h
Lines changed: 26 additions & 19 deletions
@@ -597,7 +597,7 @@ class CudaKernelGenerator : private OptOutConstDispatch {
             vector_size_optional.has_value(),
             "Could not evaluate constant value bound to vectorized dim.");
 
-        vector_word_size = vector_size_optional.value();
+        vector_word_size = vector_size_optional->as<int64_t>();
 
         vectorize_op = id->getParallelType() == ParallelType::Vectorize;
         misaligned_op =
@@ -1267,7 +1267,7 @@ class CudaKernelGenerator : private OptOutConstDispatch {
       TORCH_INTERNAL_ASSERT(
           id->getParallelType() != ParallelType::MisalignedVectorize,
           "LoadStoreOp: no support yet for mis-aligned vectorization");
-      vector_word_size = vector_size_optional.value();
+      vector_word_size = vector_size_optional->as<int64_t>();
       vectorize_op = true;
       break;
     }
 
@@ -40,7 +40,8 @@ class TORCH_CUDA_CU_API IntOrDouble {
 
   template <typename T>
   T as() const {
-    TORCH_CHECK(c10::holds_alternative<T>(value_), "wrong type");
+    TORCH_CHECK(
+        c10::holds_alternative<T>(value_), "dtype not supported in evaluator");
     return c10::get<T>(value_);
   }
 
@@ -145,8 +146,19 @@ class TORCH_CUDA_CU_API IntOrDouble {
     }                                                   \
     TORCH_INTERNAL_ASSERT(false);                       \
   }                                                     \
-  template <typename T>                                 \
-  bool operator op(T other) {                           \
+  bool operator op(double other) {                      \
+    if (is_int()) {                                     \
+      return as<int64_t>() op other;                    \
+    }                                                   \
+    return as<double>() op other;                       \
+  }                                                     \
+  bool operator op(int64_t other) {                     \
+    if (is_int()) {                                     \
+      return as<int64_t>() op other;                    \
+    }                                                   \
+    return as<double>() op other;                       \
+  }                                                     \
+  bool operator op(int other) {                         \
     if (is_int()) {                                     \
       return as<int64_t>() op other;                    \
     }                                                   \
@@ -169,21 +181,10 @@ class TORCH_CUDA_CU_API IntOrDouble {
     return IntOrDouble(-as<double>());
   }
 
-  template <typename T>
-  bool operator==(T val) const {
-    return operator==(IntOrDouble(val));
-  }
-
-  template <typename T>
-  bool operator!=(T val) const {
-    return operator!=(IntOrDouble(val));
-  }
-
-  operator double() const;
-
-  operator int64_t() const;
-  operator size_t() const;
-  operator int() const;
+  explicit operator double() const;
+  explicit operator int64_t() const;
+  explicit operator size_t() const;
+  explicit operator int() const;
 };
 
 #define DEFINE_ARITHMETIC_OP(op)                           \
@@ -269,7 +270,13 @@ namespace IntOrDouble_functions {
 
 inline IntOrDouble ceildiv(const IntOrDouble& a, const IntOrDouble& b) {
   if (a.is_int() && b.is_int()) {
-    return (a.as<int64_t>() + b.as<int64_t>() - 1) / b.as<int64_t>();
+    auto aa = a.as<int64_t>();
+    auto bb = b.as<int64_t>();
+    if (bb > 0) {
+      return (aa + bb - 1) / bb;
+    } else {
+      return (aa + bb + 1) / bb;
+    }
   }
   return std::ceil((a / b).as<double>());
 }