From 1fb1643129f9ac6a63bddf04d69e28632615c0df Mon Sep 17 00:00:00 2001
From: Gary Guo <gary@garyguo.net>
Date: Tue, 10 Aug 2021 11:50:33 +0100
Subject: [PATCH] Implement `black_box` using intrinsic

The new implementation allows some `memcpy`s to be optimized away,
so the uninit value in ui/sanitize/memory.rs is constructed directly
onto the return place. Therefore the sanitizer now says that the
value is allocated by `main` rather than `random`.
---
 .../src/intrinsics/mod.rs                     |  5 ++++
 compiler/rustc_codegen_llvm/src/asm.rs        |  2 +-
 compiler/rustc_codegen_llvm/src/intrinsic.rs  | 26 +++++++++++++++++++
 .../rustc_mir/src/interpret/intrinsics.rs     |  2 +-
 compiler/rustc_span/src/symbol.rs             |  1 +
 compiler/rustc_typeck/src/check/intrinsic.rs  |  3 +++
 library/core/src/hint.rs                      | 20 ++++++--------
 library/core/src/intrinsics.rs                |  6 +++++
 src/test/ui/sanitize/memory.rs                |  2 +-
 9 files changed, 52 insertions(+), 15 deletions(-)

diff --git a/compiler/rustc_codegen_cranelift/src/intrinsics/mod.rs b/compiler/rustc_codegen_cranelift/src/intrinsics/mod.rs
index 8669846074749..1c4d307fc50e1 100644
--- a/compiler/rustc_codegen_cranelift/src/intrinsics/mod.rs
+++ b/compiler/rustc_codegen_cranelift/src/intrinsics/mod.rs
@@ -1136,6 +1136,11 @@ pub(crate) fn codegen_intrinsic_call<'tcx>(
                 };
             ret.write_cvalue(fx, CValue::by_val(is_eq_value, ret.layout()));
         };
+
+        black_box, (c a) {
+            // FIXME implement black_box semantics
+            ret.write_cvalue(fx, a);
+        };
     }
 
     if let Some((_, dest)) = destination {
diff --git a/compiler/rustc_codegen_llvm/src/asm.rs b/compiler/rustc_codegen_llvm/src/asm.rs
index ebc3773df57cd..4790b44bd19ef 100644
--- a/compiler/rustc_codegen_llvm/src/asm.rs
+++ b/compiler/rustc_codegen_llvm/src/asm.rs
@@ -425,7 +425,7 @@ impl AsmMethods for CodegenCx<'ll, 'tcx> {
     }
 }
 
-fn inline_asm_call(
+pub(crate) fn inline_asm_call(
     bx: &mut Builder<'a, 'll, 'tcx>,
     asm: &str,
     cons: &str,
diff --git a/compiler/rustc_codegen_llvm/src/intrinsic.rs b/compiler/rustc_codegen_llvm/src/intrinsic.rs
index ed48418586517..fe2ed21c1e3b0 100644
--- a/compiler/rustc_codegen_llvm/src/intrinsic.rs
+++ b/compiler/rustc_codegen_llvm/src/intrinsic.rs
@@ -7,6 +7,7 @@ use crate::type_of::LayoutLlvmExt;
 use crate::va_arg::emit_va_arg;
 use crate::value::Value;
 
+use rustc_ast as ast;
 use rustc_codegen_ssa::base::{compare_simd_types, wants_msvc_seh};
 use rustc_codegen_ssa::common::span_invalid_monomorphization_error;
 use rustc_codegen_ssa::common::{IntPredicate, TypeKind};
@@ -327,6 +328,31 @@ impl IntrinsicCallMethods<'tcx> for Builder<'a, 'll, 'tcx> {
                 }
             }
 
+            sym::black_box => {
+                args[0].val.store(self, result);
+
+                // We need to "use" the argument in some way LLVM can't introspect, and on
+                // targets that support it we can typically leverage inline assembly to do
+                // this. LLVM's interpretation of inline assembly is that it's, well, a black
+                // box. This isn't the greatest implementation since it probably deoptimizes
+                // more than we want, but it's so far good enough.
+                crate::asm::inline_asm_call(
+                    self,
+                    "",
+                    "r,~{memory}",
+                    &[result.llval],
+                    self.type_void(),
+                    true,
+                    false,
+                    ast::LlvmAsmDialect::Att,
+                    &[span],
+                )
+                .unwrap_or_else(|| bug!("failed to generate inline asm call for `black_box`"));
+
+                // We have copied the value to `result` already.
+                return;
+            }
+
             _ if name_str.starts_with("simd_") => {
                 match generic_simd_intrinsic(self, name, callee_ty, args, ret_ty, llret_ty, span) {
                     Ok(llval) => llval,
diff --git a/compiler/rustc_mir/src/interpret/intrinsics.rs b/compiler/rustc_mir/src/interpret/intrinsics.rs
index dc1f9053b61f0..bfab886b6ee4f 100644
--- a/compiler/rustc_mir/src/interpret/intrinsics.rs
+++ b/compiler/rustc_mir/src/interpret/intrinsics.rs
@@ -465,7 +465,7 @@ impl<'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> InterpCx<'mir, 'tcx, M> {
                 );
                 self.copy_op(&self.operand_index(&args[0], index)?, dest)?;
             }
-            sym::likely | sym::unlikely => {
+            sym::likely | sym::unlikely | sym::black_box => {
                 // These just return their argument
                 self.copy_op(&args[0], dest)?;
             }
diff --git a/compiler/rustc_span/src/symbol.rs b/compiler/rustc_span/src/symbol.rs
index 3f5d8273b38b8..6d03f1a37329e 100644
--- a/compiler/rustc_span/src/symbol.rs
+++ b/compiler/rustc_span/src/symbol.rs
@@ -335,6 +335,7 @@ symbols! {
         bitreverse,
         bitxor,
         bitxor_assign,
+        black_box,
         block,
         bool,
         borrowck_graphviz_format,
diff --git a/compiler/rustc_typeck/src/check/intrinsic.rs b/compiler/rustc_typeck/src/check/intrinsic.rs
index 6661df21ed952..664954b0eb7a2 100644
--- a/compiler/rustc_typeck/src/check/intrinsic.rs
+++ b/compiler/rustc_typeck/src/check/intrinsic.rs
@@ -102,6 +102,7 @@ pub fn intrinsic_operation_unsafety(intrinsic: Symbol) -> hir::Unsafety {
         | sym::maxnumf64
         | sym::type_name
         | sym::forget
+        | sym::black_box
         | sym::variant_count => hir::Unsafety::Normal,
         _ => hir::Unsafety::Unsafe,
     }
@@ -387,6 +388,8 @@ pub fn check_intrinsic_type(tcx: TyCtxt<'_>, it: &hir::ForeignItem<'_>) {
                 (1, vec![param_ty; 2], tcx.types.bool)
             }
 
+            sym::black_box => (1, vec![param(0)], param(0)),
+
             other => {
                 tcx.sess.emit_err(UnrecognizedIntrinsicFunction { span: it.span, name: other });
                 return;
diff --git a/library/core/src/hint.rs b/library/core/src/hint.rs
index a0b65399da2c5..a4924554919b0 100644
--- a/library/core/src/hint.rs
+++ b/library/core/src/hint.rs
@@ -152,23 +152,19 @@ pub fn spin_loop() {
 /// backend used. Programs cannot rely on `black_box` for *correctness* in any way.
 ///
 /// [`std::convert::identity`]: crate::convert::identity
-#[cfg_attr(not(miri), inline)]
-#[cfg_attr(miri, inline(never))]
+#[inline]
 #[unstable(feature = "bench_black_box", issue = "64102")]
-#[cfg_attr(miri, allow(unused_mut))]
+#[cfg_attr(not(bootstrap), allow(unused_mut))]
 pub fn black_box<T>(mut dummy: T) -> T {
-    // We need to "use" the argument in some way LLVM can't introspect, and on
-    // targets that support it we can typically leverage inline assembly to do
-    // this. LLVM's interpretation of inline assembly is that it's, well, a black
-    // box. This isn't the greatest implementation since it probably deoptimizes
-    // more than we want, but it's so far good enough.
-
-    #[cfg(not(miri))] // This is just a hint, so it is fine to skip in Miri.
+    #[cfg(bootstrap)]
     // SAFETY: the inline assembly is a no-op.
     unsafe {
-        // FIXME: Cannot use `asm!` because it doesn't support MIPS and other architectures.
         llvm_asm!("" : : "r"(&mut dummy) : "memory" : "volatile");
+        dummy
     }
 
-    dummy
+    #[cfg(not(bootstrap))]
+    {
+        crate::intrinsics::black_box(dummy)
+    }
 }
diff --git a/library/core/src/intrinsics.rs b/library/core/src/intrinsics.rs
index d15ac89668fa3..272b1e3a1d75e 100644
--- a/library/core/src/intrinsics.rs
+++ b/library/core/src/intrinsics.rs
@@ -1933,6 +1933,12 @@ extern "rust-intrinsic" {
     /// which is UB if any of their inputs are `undef`.)
     #[rustc_const_unstable(feature = "const_intrinsic_raw_eq", issue = "none")]
     pub fn raw_eq<T>(a: &T, b: &T) -> bool;
+
+    /// See documentation of [`std::hint::black_box`] for details.
+    ///
+    /// [`std::hint::black_box`]: crate::hint::black_box
+    #[cfg(not(bootstrap))]
+    pub fn black_box<T>(dummy: T) -> T;
 }
 
 // Some functions are defined here because they accidentally got made
diff --git a/src/test/ui/sanitize/memory.rs b/src/test/ui/sanitize/memory.rs
index 48a482a13aaa9..b53f19a5b01aa 100644
--- a/src/test/ui/sanitize/memory.rs
+++ b/src/test/ui/sanitize/memory.rs
@@ -6,7 +6,7 @@
 // run-fail
 // error-pattern: MemorySanitizer: use-of-uninitialized-value
 // error-pattern: Uninitialized value was created by an allocation
-// error-pattern: in the stack frame of function 'random'
+// error-pattern: in the stack frame of function 'main'
 //
 // This test case intentionally limits the usage of the std,
 // since it will be linked with an uninstrumented version of it.