From 3fe7cfc32656518d5e6262e580f0a16cd2412dd7 Mon Sep 17 00:00:00 2001
From: Steven Fackler <sfackler@gmail.com>
Date: Wed, 13 Nov 2019 17:14:50 -0800
Subject: [PATCH] Remove some stack frames from `.async` calls

The `Context` argument is currently smuggled through TLS for
async-generated futures. The current infrastructure is closure-based,
and results in an extra 6 stack frames when .awaiting an async-generated
future!

```
  12: foo::async_b::{{closure}}
             at src/main.rs:10
  13: <std::future::GenFuture<T> as core::future::future::Future>::poll::{{closure}}
             at /rustc/4560ea788cb760f0a34127156c78e2552949f734/src/libstd/future.rs:43
  14: std::future::set_task_context
             at /rustc/4560ea788cb760f0a34127156c78e2552949f734/src/libstd/future.rs:79
  15: <std::future::GenFuture<T> as core::future::future::Future>::poll
             at /rustc/4560ea788cb760f0a34127156c78e2552949f734/src/libstd/future.rs:43
  16: std::future::poll_with_tls_context::{{closure}}
             at /rustc/4560ea788cb760f0a34127156c78e2552949f734/src/libstd/future.rs:121
  17: std::future::get_task_context
             at /rustc/4560ea788cb760f0a34127156c78e2552949f734/src/libstd/future.rs:111
  18: std::future::poll_with_tls_context
             at /rustc/4560ea788cb760f0a34127156c78e2552949f734/src/libstd/future.rs:121
  19: foo::async_a::{{closure}}
             at src/main.rs:6
```

While the long (medium?) term solution is to remove the use of TLS
entirely, we can improve things a bit in the meantime. In particular,
this commit does 2 things:

1. `get_task_context` has been inlined into `poll_with_tls_context`,
    removing 2 frames (16 and 17 above).
2. `set_task_context` now returns a guard type that resets the TLS
    rather than taking a closure, removing 2 frames (13 and 14 above).

We can also remove frame 18 by removing `poll_with_tls_context` in favor
of a `get_task_context` function which returns a guard, but that
requires adjusting the code generated for .await, so I've left that off
for now.
---
 src/libstd/future.rs | 45 ++++++++++++--------------------------------
 1 file changed, 12 insertions(+), 33 deletions(-)

diff --git a/src/libstd/future.rs b/src/libstd/future.rs
index c65f71fb1a4e2..6de3f1d545b57 100644
--- a/src/libstd/future.rs
+++ b/src/libstd/future.rs
@@ -40,10 +40,11 @@ impl<T: Generator<Yield = ()>> Future for GenFuture<T> {
     fn poll(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Self::Output> {
         // Safe because we're !Unpin + !Drop mapping to a ?Unpin value
         let gen = unsafe { Pin::map_unchecked_mut(self, |s| &mut s.0) };
-        set_task_context(cx, || match gen.resume() {
+        let _guard = unsafe { set_task_context(cx) };
+        match gen.resume() {
             GeneratorState::Yielded(()) => Poll::Pending,
             GeneratorState::Complete(x) => Poll::Ready(x),
-        })
+        }
     }
 }
 
@@ -61,35 +62,23 @@ impl Drop for SetOnDrop {
     }
 }
 
-#[doc(hidden)]
-#[unstable(feature = "gen_future", issue = "50547")]
-/// Sets the thread-local task context used by async/await futures.
-pub fn set_task_context<F, R>(cx: &mut Context<'_>, f: F) -> R
-where
-    F: FnOnce() -> R
-{
+// Safety: the returned guard must drop before `cx` is dropped and before
+// any previous guard is dropped.
+unsafe fn set_task_context(cx: &mut Context<'_>) -> SetOnDrop {
     // transmute the context's lifetime to 'static so we can store it.
-    let cx = unsafe {
-        core::mem::transmute::<&mut Context<'_>, &mut Context<'static>>(cx)
-    };
+    let cx = core::mem::transmute::<&mut Context<'_>, &mut Context<'static>>(cx);
     let old_cx = TLS_CX.with(|tls_cx| {
         tls_cx.replace(Some(NonNull::from(cx)))
     });
-    let _reset = SetOnDrop(old_cx);
-    f()
+    SetOnDrop(old_cx)
 }
 
 #[doc(hidden)]
 #[unstable(feature = "gen_future", issue = "50547")]
-/// Retrieves the thread-local task context used by async/await futures.
-///
-/// This function acquires exclusive access to the task context.
-///
-/// Panics if no context has been set or if the context has already been
-/// retrieved by a surrounding call to get_task_context.
-pub fn get_task_context<F, R>(f: F) -> R
+/// Polls a future in the current thread-local task waker.
+pub fn poll_with_tls_context<F>(f: Pin<&mut F>) -> Poll<F::Output>
 where
-    F: FnOnce(&mut Context<'_>) -> R
+    F: Future
 {
     let cx_ptr = TLS_CX.with(|tls_cx| {
         // Clear the entry so that nested `get_task_waker` calls
@@ -108,15 +97,5 @@ where
     //
     // The pointer that was inserted came from an `&mut Context<'_>`,
     // so it is safe to treat as mutable.
-    unsafe { f(cx_ptr.as_mut()) }
-}
-
-#[doc(hidden)]
-#[unstable(feature = "gen_future", issue = "50547")]
-/// Polls a future in the current thread-local task waker.
-pub fn poll_with_tls_context<F>(f: Pin<&mut F>) -> Poll<F::Output>
-where
-    F: Future
-{
-    get_task_context(|cx| F::poll(f, cx))
+    unsafe { F::poll(f, cx_ptr.as_mut()) }
 }