diff --git a/compiler/rustc_const_eval/src/interpret/machine.rs b/compiler/rustc_const_eval/src/interpret/machine.rs
index 8cab3c34eedfb..67c9993405979 100644
--- a/compiler/rustc_const_eval/src/interpret/machine.rs
+++ b/compiler/rustc_const_eval/src/interpret/machine.rs
@@ -540,10 +540,29 @@ pub trait Machine<'tcx>: Sized {
         Ok(ReturnAction::Normal)
     }
 
+    /// Called immediately after an "immediate" local variable is read
+    /// (i.e., this is called for reads that do not end up accessing addressable memory).
+    #[inline(always)]
+    fn after_local_read(_ecx: &InterpCx<'tcx, Self>, _local: mir::Local) -> InterpResult<'tcx> {
+        Ok(())
+    }
+
+    /// Called immediately after an "immediate" local variable is assigned a new value
+    /// (i.e., this is called for writes that do not end up in memory).
+    /// `storage_live` indicates whether this is the initial write upon `StorageLive`.
+    #[inline(always)]
+    fn after_local_write(
+        _ecx: &mut InterpCx<'tcx, Self>,
+        _local: mir::Local,
+        _storage_live: bool,
+    ) -> InterpResult<'tcx> {
+        Ok(())
+    }
+
     /// Called immediately after actual memory was allocated for a local
     /// but before the local's stack frame is updated to point to that memory.
     #[inline(always)]
-    fn after_local_allocated(
+    fn after_local_moved_to_memory(
         _ecx: &mut InterpCx<'tcx, Self>,
         _local: mir::Local,
         _mplace: &MPlaceTy<'tcx, Self::Provenance>,
diff --git a/compiler/rustc_const_eval/src/interpret/memory.rs b/compiler/rustc_const_eval/src/interpret/memory.rs
index d87588496c0bd..a65637f497837 100644
--- a/compiler/rustc_const_eval/src/interpret/memory.rs
+++ b/compiler/rustc_const_eval/src/interpret/memory.rs
@@ -1030,6 +1030,10 @@ impl<'tcx, M: Machine<'tcx>> InterpCx<'tcx, M> {
         );
         res
     }
+
+    pub(super) fn validation_in_progress(&self) -> bool {
+        self.memory.validation_in_progress
+    }
 }
 
 #[doc(hidden)]
diff --git a/compiler/rustc_const_eval/src/interpret/operand.rs b/compiler/rustc_const_eval/src/interpret/operand.rs
index b906e3422dba5..bdbacfd20c2d5 100644
--- a/compiler/rustc_const_eval/src/interpret/operand.rs
+++ b/compiler/rustc_const_eval/src/interpret/operand.rs
@@ -719,6 +719,7 @@ impl<'tcx, M: Machine<'tcx>> InterpCx<'tcx, M> {
         if matches!(op, Operand::Immediate(_)) {
             assert!(!layout.is_unsized());
         }
+        M::after_local_read(self, local)?;
         Ok(OpTy { op, layout })
     }
 
diff --git a/compiler/rustc_const_eval/src/interpret/place.rs b/compiler/rustc_const_eval/src/interpret/place.rs
index 3b14142da02ed..ee8e5419495d8 100644
--- a/compiler/rustc_const_eval/src/interpret/place.rs
+++ b/compiler/rustc_const_eval/src/interpret/place.rs
@@ -504,15 +504,13 @@ where
         &self,
         local: mir::Local,
     ) -> InterpResult<'tcx, PlaceTy<'tcx, M::Provenance>> {
-        // Other parts of the system rely on `Place::Local` never being unsized.
-        // So we eagerly check here if this local has an MPlace, and if yes we use it.
         let frame = self.frame();
         let layout = self.layout_of_local(frame, local, None)?;
         let place = if layout.is_sized() {
             // We can just always use the `Local` for sized values.
             Place::Local { local, offset: None, locals_addr: frame.locals_addr() }
         } else {
-            // Unsized `Local` isn't okay (we cannot store the metadata).
+            // Other parts of the system rely on `Place::Local` never being unsized.
             match frame.locals[local].access()? {
                 Operand::Immediate(_) => bug!(),
                 Operand::Indirect(mplace) => Place::Ptr(*mplace),
@@ -565,7 +563,10 @@ where
         place: &PlaceTy<'tcx, M::Provenance>,
     ) -> InterpResult<
         'tcx,
-        Either<MPlaceTy<'tcx, M::Provenance>, (&mut Immediate<M::Provenance>, TyAndLayout<'tcx>)>,
+        Either<
+            MPlaceTy<'tcx, M::Provenance>,
+            (&mut Immediate<M::Provenance>, TyAndLayout<'tcx>, mir::Local),
+        >,
     > {
         Ok(match place.to_place().as_mplace_or_local() {
             Left(mplace) => Left(mplace),
@@ -584,7 +585,7 @@ where
                         }
                         Operand::Immediate(local_val) => {
                             // The local still has the optimized representation.
-                            Right((local_val, layout))
+                            Right((local_val, layout, local))
                         }
                     }
                 }
@@ -646,9 +647,13 @@ where
         assert!(dest.layout().is_sized(), "Cannot write unsized immediate data");
 
         match self.as_mplace_or_mutable_local(&dest.to_place())? {
-            Right((local_val, local_layout)) => {
+            Right((local_val, local_layout, local)) => {
                 // Local can be updated in-place.
                 *local_val = src;
+                // Call the machine hook (the data race detector needs to know about this write).
+                if !self.validation_in_progress() {
+                    M::after_local_write(self, local, /*storage_live*/ false)?;
+                }
                 // Double-check that the value we are storing and the local fit to each other.
                 if cfg!(debug_assertions) {
                     src.assert_matches_abi(local_layout.abi, self);
@@ -717,8 +722,12 @@ where
         dest: &impl Writeable<'tcx, M::Provenance>,
     ) -> InterpResult<'tcx> {
         match self.as_mplace_or_mutable_local(&dest.to_place())? {
-            Right((local_val, _local_layout)) => {
+            Right((local_val, _local_layout, local)) => {
                 *local_val = Immediate::Uninit;
+                // Call the machine hook (the data race detector needs to know about this write).
+                if !self.validation_in_progress() {
+                    M::after_local_write(self, local, /*storage_live*/ false)?;
+                }
             }
             Left(mplace) => {
                 let Some(mut alloc) = self.get_place_alloc_mut(&mplace)? else {
@@ -737,8 +746,12 @@ where
         dest: &impl Writeable<'tcx, M::Provenance>,
     ) -> InterpResult<'tcx> {
         match self.as_mplace_or_mutable_local(&dest.to_place())? {
-            Right((local_val, _local_layout)) => {
+            Right((local_val, _local_layout, local)) => {
                 local_val.clear_provenance()?;
+                // Call the machine hook (the data race detector needs to know about this write).
+                if !self.validation_in_progress() {
+                    M::after_local_write(self, local, /*storage_live*/ false)?;
+                }
             }
             Left(mplace) => {
                 let Some(mut alloc) = self.get_place_alloc_mut(&mplace)? else {
@@ -944,7 +957,7 @@ where
                                 mplace.mplace,
                             )?;
                         }
-                        M::after_local_allocated(self, local, &mplace)?;
+                        M::after_local_moved_to_memory(self, local, &mplace)?;
                         // Now we can call `access_mut` again, asserting it goes well, and actually
                         // overwrite things. This points to the entire allocation, not just the part
                         // the place refers to, i.e. we do this before we apply `offset`.
diff --git a/compiler/rustc_const_eval/src/interpret/stack.rs b/compiler/rustc_const_eval/src/interpret/stack.rs
index b6e83715e3989..db418c82f663f 100644
--- a/compiler/rustc_const_eval/src/interpret/stack.rs
+++ b/compiler/rustc_const_eval/src/interpret/stack.rs
@@ -534,8 +534,11 @@ impl<'tcx, M: Machine<'tcx>> InterpCx<'tcx, M> {
             let dest_place = self.allocate_dyn(layout, MemoryKind::Stack, meta)?;
             Operand::Indirect(*dest_place.mplace())
         } else {
-            assert!(!meta.has_meta()); // we're dropping the metadata
             // Just make this an efficient immediate.
+            assert!(!meta.has_meta()); // we're dropping the metadata
+            // Make sure the machine knows this "write" is happening. (This is important so that
+            // races involving local variable allocation can be detected by Miri.)
+            M::after_local_write(self, local, /*storage_live*/ true)?;
             // Note that not calling `layout_of` here does have one real consequence:
             // if the type is too big, we'll only notice this when the local is actually initialized,
             // which is a bit too late -- we should ideally notice this already here, when the memory
diff --git a/src/tools/miri/src/concurrency/data_race.rs b/src/tools/miri/src/concurrency/data_race.rs
index b604fd868a02a..f686b331ad6c7 100644
--- a/src/tools/miri/src/concurrency/data_race.rs
+++ b/src/tools/miri/src/concurrency/data_race.rs
@@ -47,6 +47,7 @@ use std::{
 };
 
 use rustc_ast::Mutability;
+use rustc_data_structures::fx::FxHashMap;
 use rustc_data_structures::fx::FxHashSet;
 use rustc_index::{Idx, IndexVec};
 use rustc_middle::{mir, ty::Ty};
@@ -1047,32 +1048,31 @@ impl VClockAlloc {
     ) -> InterpResult<'tcx> {
         let current_span = machine.current_span();
         let global = machine.data_race.as_ref().unwrap();
-        if global.race_detecting() {
-            let (index, mut thread_clocks) = global.active_thread_state_mut(&machine.threads);
-            let mut alloc_ranges = self.alloc_ranges.borrow_mut();
-            for (mem_clocks_range, mem_clocks) in
-                alloc_ranges.iter_mut(access_range.start, access_range.size)
+        if !global.race_detecting() {
+            return Ok(());
+        }
+        let (index, mut thread_clocks) = global.active_thread_state_mut(&machine.threads);
+        let mut alloc_ranges = self.alloc_ranges.borrow_mut();
+        for (mem_clocks_range, mem_clocks) in
+            alloc_ranges.iter_mut(access_range.start, access_range.size)
+        {
+            if let Err(DataRace) =
+                mem_clocks.read_race_detect(&mut thread_clocks, index, read_type, current_span)
             {
-                if let Err(DataRace) =
-                    mem_clocks.read_race_detect(&mut thread_clocks, index, read_type, current_span)
-                {
-                    drop(thread_clocks);
-                    // Report data-race.
-                    return Self::report_data_race(
-                        global,
-                        &machine.threads,
-                        mem_clocks,
-                        AccessType::NaRead(read_type),
-                        access_range.size,
-                        interpret::Pointer::new(alloc_id, Size::from_bytes(mem_clocks_range.start)),
-                        ty,
-                    );
-                }
+                drop(thread_clocks);
+                // Report data-race.
+                return Self::report_data_race(
+                    global,
+                    &machine.threads,
+                    mem_clocks,
+                    AccessType::NaRead(read_type),
+                    access_range.size,
+                    interpret::Pointer::new(alloc_id, Size::from_bytes(mem_clocks_range.start)),
+                    ty,
+                );
             }
-            Ok(())
-        } else {
-            Ok(())
         }
+        Ok(())
     }
 
     /// Detect data-races for an unsynchronized write operation. It will not perform
@@ -1090,33 +1090,129 @@ impl VClockAlloc {
     ) -> InterpResult<'tcx> {
         let current_span = machine.current_span();
         let global = machine.data_race.as_mut().unwrap();
-        if global.race_detecting() {
-            let (index, mut thread_clocks) = global.active_thread_state_mut(&machine.threads);
-            for (mem_clocks_range, mem_clocks) in
-                self.alloc_ranges.get_mut().iter_mut(access_range.start, access_range.size)
+        if !global.race_detecting() {
+            return Ok(());
+        }
+        let (index, mut thread_clocks) = global.active_thread_state_mut(&machine.threads);
+        for (mem_clocks_range, mem_clocks) in
+            self.alloc_ranges.get_mut().iter_mut(access_range.start, access_range.size)
+        {
+            if let Err(DataRace) =
+                mem_clocks.write_race_detect(&mut thread_clocks, index, write_type, current_span)
             {
-                if let Err(DataRace) = mem_clocks.write_race_detect(
-                    &mut thread_clocks,
-                    index,
-                    write_type,
-                    current_span,
-                ) {
-                    drop(thread_clocks);
-                    // Report data-race
-                    return Self::report_data_race(
-                        global,
-                        &machine.threads,
-                        mem_clocks,
-                        AccessType::NaWrite(write_type),
-                        access_range.size,
-                        interpret::Pointer::new(alloc_id, Size::from_bytes(mem_clocks_range.start)),
-                        ty,
-                    );
-                }
+                drop(thread_clocks);
+                // Report data-race
+                return Self::report_data_race(
+                    global,
+                    &machine.threads,
+                    mem_clocks,
+                    AccessType::NaWrite(write_type),
+                    access_range.size,
+                    interpret::Pointer::new(alloc_id, Size::from_bytes(mem_clocks_range.start)),
+                    ty,
+                );
             }
-            Ok(())
+        }
+        Ok(())
+    }
+}
+
+/// Vector clock state for a stack frame (tracking the local variables
+/// that do not have an allocation yet).
+#[derive(Debug, Default)]
+pub struct FrameState {
+    local_clocks: RefCell<FxHashMap<mir::Local, LocalClocks>>,
+}
+
+/// Stripped-down version of [`MemoryCellClocks`] for the clocks we need to keep track
+/// of in a local that does not yet have addressable memory -- and hence can only
+/// be accessed from the thread its stack frame belongs to, and cannot be access atomically.
+#[derive(Debug)]
+struct LocalClocks {
+    write: VTimestamp,
+    write_type: NaWriteType,
+    read: VTimestamp,
+}
+
+impl Default for LocalClocks {
+    fn default() -> Self {
+        Self { write: VTimestamp::ZERO, write_type: NaWriteType::Allocate, read: VTimestamp::ZERO }
+    }
+}
+
+impl FrameState {
+    pub fn local_write(&self, local: mir::Local, storage_live: bool, machine: &MiriMachine<'_>) {
+        let current_span = machine.current_span();
+        let global = machine.data_race.as_ref().unwrap();
+        if !global.race_detecting() {
+            return;
+        }
+        let (index, mut thread_clocks) = global.active_thread_state_mut(&machine.threads);
+        // This should do the same things as `MemoryCellClocks::write_race_detect`.
+        if !current_span.is_dummy() {
+            thread_clocks.clock.index_mut(index).span = current_span;
+        }
+        let mut clocks = self.local_clocks.borrow_mut();
+        if storage_live {
+            let new_clocks = LocalClocks {
+                write: thread_clocks.clock[index],
+                write_type: NaWriteType::Allocate,
+                read: VTimestamp::ZERO,
+            };
+            // There might already be an entry in the map for this, if the local was previously
+            // live already.
+            clocks.insert(local, new_clocks);
         } else {
-            Ok(())
+            // This can fail to exist if `race_detecting` was false when the allocation
+            // occurred, in which case we can backdate this to the beginning of time.
+            let clocks = clocks.entry(local).or_insert_with(Default::default);
+            clocks.write = thread_clocks.clock[index];
+            clocks.write_type = NaWriteType::Write;
+        }
+    }
+
+    pub fn local_read(&self, local: mir::Local, machine: &MiriMachine<'_>) {
+        let current_span = machine.current_span();
+        let global = machine.data_race.as_ref().unwrap();
+        if !global.race_detecting() {
+            return;
+        }
+        let (index, mut thread_clocks) = global.active_thread_state_mut(&machine.threads);
+        // This should do the same things as `MemoryCellClocks::read_race_detect`.
+        if !current_span.is_dummy() {
+            thread_clocks.clock.index_mut(index).span = current_span;
+        }
+        thread_clocks.clock.index_mut(index).set_read_type(NaReadType::Read);
+        // This can fail to exist if `race_detecting` was false when the allocation
+        // occurred, in which case we can backdate this to the beginning of time.
+        let mut clocks = self.local_clocks.borrow_mut();
+        let clocks = clocks.entry(local).or_insert_with(Default::default);
+        clocks.read = thread_clocks.clock[index];
+    }
+
+    pub fn local_moved_to_memory(
+        &self,
+        local: mir::Local,
+        alloc: &mut VClockAlloc,
+        machine: &MiriMachine<'_>,
+    ) {
+        let global = machine.data_race.as_ref().unwrap();
+        if !global.race_detecting() {
+            return;
+        }
+        let (index, _thread_clocks) = global.active_thread_state_mut(&machine.threads);
+        // Get the time the last write actually happened. This can fail to exist if
+        // `race_detecting` was false when the write occurred, in that case we can backdate this
+        // to the beginning of time.
+        let local_clocks = self.local_clocks.borrow_mut().remove(&local).unwrap_or_default();
+        for (_mem_clocks_range, mem_clocks) in alloc.alloc_ranges.get_mut().iter_mut_all() {
+            // The initialization write for this already happened, just at the wrong timestamp.
+            // Check that the thread index matches what we expect.
+            assert_eq!(mem_clocks.write.0, index);
+            // Convert the local's clocks into memory clocks.
+            mem_clocks.write = (index, local_clocks.write);
+            mem_clocks.write_type = local_clocks.write_type;
+            mem_clocks.read = VClock::new_with_index(index, local_clocks.read);
         }
     }
 }
@@ -1305,69 +1401,67 @@ trait EvalContextPrivExt<'tcx>: MiriInterpCxExt<'tcx> {
     ) -> InterpResult<'tcx> {
         let this = self.eval_context_ref();
         assert!(access.is_atomic());
-        if let Some(data_race) = &this.machine.data_race {
-            if data_race.race_detecting() {
-                let size = place.layout.size;
-                let (alloc_id, base_offset, _prov) = this.ptr_get_alloc_id(place.ptr(), 0)?;
-                // Load and log the atomic operation.
-                // Note that atomic loads are possible even from read-only allocations, so `get_alloc_extra_mut` is not an option.
-                let alloc_meta = this.get_alloc_extra(alloc_id)?.data_race.as_ref().unwrap();
-                trace!(
-                    "Atomic op({}) with ordering {:?} on {:?} (size={})",
-                    access.description(None, None),
-                    &atomic,
-                    place.ptr(),
-                    size.bytes()
-                );
+        let Some(data_race) = &this.machine.data_race else { return Ok(()) };
+        if !data_race.race_detecting() {
+            return Ok(());
+        }
+        let size = place.layout.size;
+        let (alloc_id, base_offset, _prov) = this.ptr_get_alloc_id(place.ptr(), 0)?;
+        // Load and log the atomic operation.
+        // Note that atomic loads are possible even from read-only allocations, so `get_alloc_extra_mut` is not an option.
+        let alloc_meta = this.get_alloc_extra(alloc_id)?.data_race.as_ref().unwrap();
+        trace!(
+            "Atomic op({}) with ordering {:?} on {:?} (size={})",
+            access.description(None, None),
+            &atomic,
+            place.ptr(),
+            size.bytes()
+        );
 
-                let current_span = this.machine.current_span();
-                // Perform the atomic operation.
-                data_race.maybe_perform_sync_operation(
-                    &this.machine.threads,
-                    current_span,
-                    |index, mut thread_clocks| {
-                        for (mem_clocks_range, mem_clocks) in
-                            alloc_meta.alloc_ranges.borrow_mut().iter_mut(base_offset, size)
-                        {
-                            if let Err(DataRace) = op(mem_clocks, &mut thread_clocks, index, atomic)
-                            {
-                                mem::drop(thread_clocks);
-                                return VClockAlloc::report_data_race(
-                                    data_race,
-                                    &this.machine.threads,
-                                    mem_clocks,
-                                    access,
-                                    place.layout.size,
-                                    interpret::Pointer::new(
-                                        alloc_id,
-                                        Size::from_bytes(mem_clocks_range.start),
-                                    ),
-                                    None,
-                                )
-                                .map(|_| true);
-                            }
-                        }
-
-                        // This conservatively assumes all operations have release semantics
-                        Ok(true)
-                    },
-                )?;
-
-                // Log changes to atomic memory.
-                if tracing::enabled!(tracing::Level::TRACE) {
-                    for (_offset, mem_clocks) in
-                        alloc_meta.alloc_ranges.borrow().iter(base_offset, size)
-                    {
-                        trace!(
-                            "Updated atomic memory({:?}, size={}) to {:#?}",
-                            place.ptr(),
-                            size.bytes(),
-                            mem_clocks.atomic_ops
-                        );
+        let current_span = this.machine.current_span();
+        // Perform the atomic operation.
+        data_race.maybe_perform_sync_operation(
+            &this.machine.threads,
+            current_span,
+            |index, mut thread_clocks| {
+                for (mem_clocks_range, mem_clocks) in
+                    alloc_meta.alloc_ranges.borrow_mut().iter_mut(base_offset, size)
+                {
+                    if let Err(DataRace) = op(mem_clocks, &mut thread_clocks, index, atomic) {
+                        mem::drop(thread_clocks);
+                        return VClockAlloc::report_data_race(
+                            data_race,
+                            &this.machine.threads,
+                            mem_clocks,
+                            access,
+                            place.layout.size,
+                            interpret::Pointer::new(
+                                alloc_id,
+                                Size::from_bytes(mem_clocks_range.start),
+                            ),
+                            None,
+                        )
+                        .map(|_| true);
                     }
                 }
+
+                // This conservatively assumes all operations have release semantics
+                Ok(true)
+            },
+        )?;
+
+        // Log changes to atomic memory.
+        if tracing::enabled!(tracing::Level::TRACE) {
+            for (_offset, mem_clocks) in alloc_meta.alloc_ranges.borrow().iter(base_offset, size) {
+                trace!(
+                    "Updated atomic memory({:?}, size={}) to {:#?}",
+                    place.ptr(),
+                    size.bytes(),
+                    mem_clocks.atomic_ops
+                );
             }
         }
+
         Ok(())
     }
 }
diff --git a/src/tools/miri/src/concurrency/thread.rs b/src/tools/miri/src/concurrency/thread.rs
index 306245a843bf4..8c3bee83e46cb 100644
--- a/src/tools/miri/src/concurrency/thread.rs
+++ b/src/tools/miri/src/concurrency/thread.rs
@@ -530,7 +530,9 @@ impl<'tcx> ThreadManager<'tcx> {
     }
 
     /// Mutably borrow the stack of the active thread.
-    fn active_thread_stack_mut(&mut self) -> &mut Vec<Frame<'tcx, Provenance, FrameExtra<'tcx>>> {
+    pub fn active_thread_stack_mut(
+        &mut self,
+    ) -> &mut Vec<Frame<'tcx, Provenance, FrameExtra<'tcx>>> {
         &mut self.threads[self.active_thread].stack
     }
     pub fn all_stacks(
diff --git a/src/tools/miri/src/concurrency/vector_clock.rs b/src/tools/miri/src/concurrency/vector_clock.rs
index c3496bc1a0c8a..0968e10bbee0b 100644
--- a/src/tools/miri/src/concurrency/vector_clock.rs
+++ b/src/tools/miri/src/concurrency/vector_clock.rs
@@ -130,6 +130,9 @@ impl Ord for VTimestamp {
 /// also this means that there is only one unique valid length
 /// for each set of vector clock values and hence the PartialEq
 /// and Eq derivations are correct.
+///
+/// This means we cannot represent a clock where the last entry is a timestamp-0 read that occurs
+/// because of a retag. That's fine, all it does is risk wrong diagnostics in a extreme corner case.
 #[derive(PartialEq, Eq, Default, Debug)]
 pub struct VClock(SmallVec<[VTimestamp; SMALL_VECTOR]>);
 
@@ -137,6 +140,9 @@ impl VClock {
     /// Create a new vector-clock containing all zeros except
     /// for a value at the given index
     pub(super) fn new_with_index(index: VectorIdx, timestamp: VTimestamp) -> VClock {
+        if timestamp.time() == 0 {
+            return VClock::default();
+        }
         let len = index.index() + 1;
         let mut vec = smallvec::smallvec![VTimestamp::ZERO; len];
         vec[index.index()] = timestamp;
diff --git a/src/tools/miri/src/machine.rs b/src/tools/miri/src/machine.rs
index 76b4366476d52..df55902decdc8 100644
--- a/src/tools/miri/src/machine.rs
+++ b/src/tools/miri/src/machine.rs
@@ -81,24 +81,42 @@ pub struct FrameExtra<'tcx> {
     /// an additional bit of "salt" into the cache key. This salt is fixed per-frame
     /// so that within a call, a const will have a stable address.
     salt: usize,
+
+    /// Data race detector per-frame data.
+    pub data_race: Option<data_race::FrameState>,
 }
 
 impl<'tcx> std::fmt::Debug for FrameExtra<'tcx> {
     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
         // Omitting `timing`, it does not support `Debug`.
-        let FrameExtra { borrow_tracker, catch_unwind, timing: _, is_user_relevant: _, salt: _ } =
-            self;
+        let FrameExtra {
+            borrow_tracker,
+            catch_unwind,
+            timing: _,
+            is_user_relevant,
+            salt,
+            data_race,
+        } = self;
         f.debug_struct("FrameData")
             .field("borrow_tracker", borrow_tracker)
             .field("catch_unwind", catch_unwind)
+            .field("is_user_relevant", is_user_relevant)
+            .field("salt", salt)
+            .field("data_race", data_race)
             .finish()
     }
 }
 
 impl VisitProvenance for FrameExtra<'_> {
     fn visit_provenance(&self, visit: &mut VisitWith<'_>) {
-        let FrameExtra { catch_unwind, borrow_tracker, timing: _, is_user_relevant: _, salt: _ } =
-            self;
+        let FrameExtra {
+            catch_unwind,
+            borrow_tracker,
+            timing: _,
+            is_user_relevant: _,
+            salt: _,
+            data_race: _,
+        } = self;
 
         catch_unwind.visit_provenance(visit);
         borrow_tracker.visit_provenance(visit);
@@ -1446,6 +1464,7 @@ impl<'tcx> Machine<'tcx> for MiriMachine<'tcx> {
             timing,
             is_user_relevant: ecx.machine.is_user_relevant(&frame),
             salt: ecx.machine.rng.borrow_mut().gen::<usize>() % ADDRS_PER_ANON_GLOBAL,
+            data_race: ecx.machine.data_race.as_ref().map(|_| data_race::FrameState::default()),
         };
 
         Ok(frame.with_extra(extra))
@@ -1551,7 +1570,25 @@ impl<'tcx> Machine<'tcx> for MiriMachine<'tcx> {
         res
     }
 
-    fn after_local_allocated(
+    fn after_local_read(ecx: &InterpCx<'tcx, Self>, local: mir::Local) -> InterpResult<'tcx> {
+        if let Some(data_race) = &ecx.frame().extra.data_race {
+            data_race.local_read(local, &ecx.machine);
+        }
+        Ok(())
+    }
+
+    fn after_local_write(
+        ecx: &mut InterpCx<'tcx, Self>,
+        local: mir::Local,
+        storage_live: bool,
+    ) -> InterpResult<'tcx> {
+        if let Some(data_race) = &ecx.frame().extra.data_race {
+            data_race.local_write(local, storage_live, &ecx.machine);
+        }
+        Ok(())
+    }
+
+    fn after_local_moved_to_memory(
         ecx: &mut InterpCx<'tcx, Self>,
         local: mir::Local,
         mplace: &MPlaceTy<'tcx>,
@@ -1559,9 +1596,17 @@ impl<'tcx> Machine<'tcx> for MiriMachine<'tcx> {
         let Some(Provenance::Concrete { alloc_id, .. }) = mplace.ptr().provenance else {
             panic!("after_local_allocated should only be called on fresh allocations");
         };
+        // Record the span where this was allocated: the declaration of the local.
         let local_decl = &ecx.frame().body().local_decls[local];
         let span = local_decl.source_info.span;
         ecx.machine.allocation_spans.borrow_mut().insert(alloc_id, (span, None));
+        // The data race system has to fix the clocks used for this write.
+        let (alloc_info, machine) = ecx.get_alloc_extra_mut(alloc_id)?;
+        if let Some(data_race) =
+            &machine.threads.active_thread_stack().last().unwrap().extra.data_race
+        {
+            data_race.local_moved_to_memory(local, alloc_info.data_race.as_mut().unwrap(), machine);
+        }
         Ok(())
     }
 
diff --git a/src/tools/miri/tests/fail/data_race/local_variable_alloc_race.rs b/src/tools/miri/tests/fail/data_race/local_variable_alloc_race.rs
new file mode 100644
index 0000000000000..751a308a39998
--- /dev/null
+++ b/src/tools/miri/tests/fail/data_race/local_variable_alloc_race.rs
@@ -0,0 +1,57 @@
+//@compile-flags: -Zmiri-preemption-rate=0.0 -Zmiri-disable-weak-memory-emulation
+#![feature(core_intrinsics)]
+#![feature(custom_mir)]
+
+use std::intrinsics::mir::*;
+use std::sync::atomic::Ordering::*;
+use std::sync::atomic::*;
+use std::thread::JoinHandle;
+
+static P: AtomicPtr<u8> = AtomicPtr::new(core::ptr::null_mut());
+
+fn spawn_thread() -> JoinHandle<()> {
+    std::thread::spawn(|| {
+        while P.load(Relaxed).is_null() {
+            std::hint::spin_loop();
+        }
+        unsafe {
+            // Initialize `*P`.
+            let ptr = P.load(Relaxed);
+            *ptr = 127;
+            //~^ ERROR: Data race detected between (1) creating a new allocation on thread `main` and (2) non-atomic write on thread `unnamed-1`
+        }
+    })
+}
+
+fn finish(t: JoinHandle<()>, val_ptr: *mut u8) {
+    P.store(val_ptr, Relaxed);
+
+    // Wait for the thread to be done.
+    t.join().unwrap();
+
+    // Read initialized value.
+    assert_eq!(unsafe { *val_ptr }, 127);
+}
+
+#[custom_mir(dialect = "runtime", phase = "optimized")]
+fn main() {
+    mir! {
+        let t;
+        let val;
+        let val_ptr;
+        let _ret;
+        {
+            Call(t = spawn_thread(), ReturnTo(after_spawn), UnwindContinue())
+        }
+        after_spawn = {
+            // This races with the write in the other thread.
+            StorageLive(val);
+
+            val_ptr = &raw mut val;
+            Call(_ret = finish(t, val_ptr), ReturnTo(done), UnwindContinue())
+        }
+        done = {
+            Return()
+        }
+    }
+}
diff --git a/src/tools/miri/tests/fail/data_race/local_variable_alloc_race.stderr b/src/tools/miri/tests/fail/data_race/local_variable_alloc_race.stderr
new file mode 100644
index 0000000000000..f46eb078a5189
--- /dev/null
+++ b/src/tools/miri/tests/fail/data_race/local_variable_alloc_race.stderr
@@ -0,0 +1,20 @@
+error: Undefined Behavior: Data race detected between (1) creating a new allocation on thread `main` and (2) non-atomic write on thread `unnamed-ID` at ALLOC. (2) just happened here
+  --> $DIR/local_variable_alloc_race.rs:LL:CC
+   |
+LL |             *ptr = 127;
+   |             ^^^^^^^^^^ Data race detected between (1) creating a new allocation on thread `main` and (2) non-atomic write on thread `unnamed-ID` at ALLOC. (2) just happened here
+   |
+help: and (1) occurred earlier here
+  --> $DIR/local_variable_alloc_race.rs:LL:CC
+   |
+LL |             StorageLive(val);
+   |             ^^^^^^^^^^^^^^^^
+   = help: this indicates a bug in the program: it performed an invalid operation, and caused Undefined Behavior
+   = help: see https://doc.rust-lang.org/nightly/reference/behavior-considered-undefined.html for further information
+   = note: BACKTRACE (of the first span) on thread `unnamed-ID`:
+   = note: inside closure at $DIR/local_variable_alloc_race.rs:LL:CC
+
+note: some details are omitted, run with `MIRIFLAGS=-Zmiri-backtrace=full` for a verbose backtrace
+
+error: aborting due to 1 previous error
+
diff --git a/src/tools/miri/tests/fail/data_race/local_variable_read_race.rs b/src/tools/miri/tests/fail/data_race/local_variable_read_race.rs
new file mode 100644
index 0000000000000..80d2b7b7c12bd
--- /dev/null
+++ b/src/tools/miri/tests/fail/data_race/local_variable_read_race.rs
@@ -0,0 +1,38 @@
+//@compile-flags: -Zmiri-preemption-rate=0.0 -Zmiri-disable-weak-memory-emulation
+use std::sync::atomic::Ordering::*;
+use std::sync::atomic::*;
+
+static P: AtomicPtr<u8> = AtomicPtr::new(core::ptr::null_mut());
+
+fn main() {
+    // Create the local variable, and initialize it.
+    let mut val: u8 = 0;
+
+    let t1 = std::thread::spawn(|| {
+        while P.load(Relaxed).is_null() {
+            std::hint::spin_loop();
+        }
+        unsafe {
+            // Initialize `*P`.
+            let ptr = P.load(Relaxed);
+            *ptr = 127;
+            //~^ ERROR: Data race detected between (1) non-atomic read on thread `main` and (2) non-atomic write on thread `unnamed-1`
+        }
+    });
+
+    // This read is not ordered with the store above, and thus should be reported as a race.
+    let _val = val;
+
+    // Actually generate memory for the local variable.
+    // This is the time its value is actually written to memory.
+    // If we just "pre-date" the write to the beginning of time (since we don't know
+    // when it actually happened), we'd miss the UB in this test.
+    // Also, the UB error should point at the write above, not the addr-of here.
+    P.store(std::ptr::addr_of_mut!(val), Relaxed);
+    
+    // Wait for the thread to be done.
+    t1.join().unwrap();
+
+    // Read initialized value.
+    assert_eq!(val, 127);
+}
diff --git a/src/tools/miri/tests/fail/data_race/local_variable_read_race.stderr b/src/tools/miri/tests/fail/data_race/local_variable_read_race.stderr
new file mode 100644
index 0000000000000..d14c2fb47ffc8
--- /dev/null
+++ b/src/tools/miri/tests/fail/data_race/local_variable_read_race.stderr
@@ -0,0 +1,20 @@
+error: Undefined Behavior: Data race detected between (1) non-atomic read on thread `main` and (2) non-atomic write on thread `unnamed-ID` at ALLOC. (2) just happened here
+  --> $DIR/local_variable_read_race.rs:LL:CC
+   |
+LL |             *ptr = 127;
+   |             ^^^^^^^^^^ Data race detected between (1) non-atomic read on thread `main` and (2) non-atomic write on thread `unnamed-ID` at ALLOC. (2) just happened here
+   |
+help: and (1) occurred earlier here
+  --> $DIR/local_variable_read_race.rs:LL:CC
+   |
+LL |     let _val = val;
+   |                ^^^
+   = help: this indicates a bug in the program: it performed an invalid operation, and caused Undefined Behavior
+   = help: see https://doc.rust-lang.org/nightly/reference/behavior-considered-undefined.html for further information
+   = note: BACKTRACE (of the first span) on thread `unnamed-ID`:
+   = note: inside closure at $DIR/local_variable_read_race.rs:LL:CC
+
+note: some details are omitted, run with `MIRIFLAGS=-Zmiri-backtrace=full` for a verbose backtrace
+
+error: aborting due to 1 previous error
+
diff --git a/src/tools/miri/tests/fail/data_race/local_variable_write_race.rs b/src/tools/miri/tests/fail/data_race/local_variable_write_race.rs
new file mode 100644
index 0000000000000..eabbe4403c676
--- /dev/null
+++ b/src/tools/miri/tests/fail/data_race/local_variable_write_race.rs
@@ -0,0 +1,37 @@
+//@compile-flags: -Zmiri-preemption-rate=0.0 -Zmiri-disable-weak-memory-emulation
+use std::sync::atomic::Ordering::*;
+use std::sync::atomic::*;
+
+static P: AtomicPtr<u8> = AtomicPtr::new(core::ptr::null_mut());
+
+fn main() {
+    let t1 = std::thread::spawn(|| {
+        while P.load(Relaxed).is_null() {
+            std::hint::spin_loop();
+        }
+        unsafe {
+            // Initialize `*P`.
+            let ptr = P.load(Relaxed);
+            *ptr = 127;
+            //~^ ERROR: Data race detected between (1) non-atomic write on thread `main` and (2) non-atomic write on thread `unnamed-1`
+        }
+    });
+
+    // Create the local variable, and initialize it.
+    // This is not ordered with the store above, so it's definitely UB
+    // for that thread to access this variable.
+    let mut val: u8 = 0;
+
+    // Actually generate memory for the local variable.
+    // This is the time its value is actually written to memory.
+    // If we just "pre-date" the write to the beginning of time (since we don't know
+    // when it actually happened), we'd miss the UB in this test.
+    // Also, the UB error should point at the write above, not the addr-of here.
+    P.store(std::ptr::addr_of_mut!(val), Relaxed);
+    
+    // Wait for the thread to be done.
+    t1.join().unwrap();
+
+    // Read initialized value.
+    assert_eq!(val, 127);
+}
diff --git a/src/tools/miri/tests/fail/data_race/local_variable_write_race.stderr b/src/tools/miri/tests/fail/data_race/local_variable_write_race.stderr
new file mode 100644
index 0000000000000..d84db955a3d78
--- /dev/null
+++ b/src/tools/miri/tests/fail/data_race/local_variable_write_race.stderr
@@ -0,0 +1,20 @@
+error: Undefined Behavior: Data race detected between (1) non-atomic write on thread `main` and (2) non-atomic write on thread `unnamed-ID` at ALLOC. (2) just happened here
+  --> $DIR/local_variable_write_race.rs:LL:CC
+   |
+LL |             *ptr = 127;
+   |             ^^^^^^^^^^ Data race detected between (1) non-atomic write on thread `main` and (2) non-atomic write on thread `unnamed-ID` at ALLOC. (2) just happened here
+   |
+help: and (1) occurred earlier here
+  --> $DIR/local_variable_write_race.rs:LL:CC
+   |
+LL |     let mut val: u8 = 0;
+   |                       ^
+   = help: this indicates a bug in the program: it performed an invalid operation, and caused Undefined Behavior
+   = help: see https://doc.rust-lang.org/nightly/reference/behavior-considered-undefined.html for further information
+   = note: BACKTRACE (of the first span) on thread `unnamed-ID`:
+   = note: inside closure at $DIR/local_variable_write_race.rs:LL:CC
+
+note: some details are omitted, run with `MIRIFLAGS=-Zmiri-backtrace=full` for a verbose backtrace
+
+error: aborting due to 1 previous error
+
diff --git a/src/tools/miri/tests/pass/concurrency/data_race.rs b/src/tools/miri/tests/pass/concurrency/data_race.rs
index d31420380a565..34380dfa504d5 100644
--- a/src/tools/miri/tests/pass/concurrency/data_race.rs
+++ b/src/tools/miri/tests/pass/concurrency/data_race.rs
@@ -1,6 +1,6 @@
 //@compile-flags: -Zmiri-disable-weak-memory-emulation -Zmiri-preemption-rate=0
 
-use std::sync::atomic::{fence, AtomicUsize, Ordering};
+use std::sync::atomic::*;
 use std::thread::spawn;
 
 #[derive(Copy, Clone)]
@@ -112,9 +112,41 @@ pub fn test_simple_release() {
     }
 }
 
+fn test_local_variable_lazy_write() {
+    static P: AtomicPtr<u8> = AtomicPtr::new(core::ptr::null_mut());
+
+    // Create the local variable, and initialize it.
+    // This write happens before the thread is spanwed, so there is no data race.
+    let mut val: u8 = 0;
+
+    let t1 = std::thread::spawn(|| {
+        while P.load(Ordering::Relaxed).is_null() {
+            std::hint::spin_loop();
+        }
+        unsafe {
+            // Initialize `*P`.
+            let ptr = P.load(Ordering::Relaxed);
+            *ptr = 127;
+        }
+    });
+
+    // Actually generate memory for the local variable.
+    // This is the time its value is actually written to memory:
+    // that's *after* the thread above was spawned!
+    // This may hence look like a data race wrt the access in the thread above.
+    P.store(std::ptr::addr_of_mut!(val), Ordering::Relaxed);
+
+    // Wait for the thread to be done.
+    t1.join().unwrap();
+
+    // Read initialized value.
+    assert_eq!(val, 127);
+}
+
 pub fn main() {
     test_fence_sync();
     test_multiple_reads();
     test_rmw_no_block();
     test_simple_release();
+    test_local_variable_lazy_write();
 }