From 61a0a7f0a3a843282c186e8c35b116d6cb4c8d19 Mon Sep 17 00:00:00 2001
From: Michael Woerister <michaelwoerister@posteo>
Date: Thu, 27 Nov 2014 13:54:01 +0100
Subject: [PATCH] debuginfo: Fix multi-byte character related bug in cleanup
 scope handling.

Also see issue #18791.
---
 src/librustc_trans/trans/base.rs        |  2 +-
 src/librustc_trans/trans/controlflow.rs |  4 +--
 src/librustc_trans/trans/debuginfo.rs   | 35 ++++++++++++++++---------
 src/librustc_trans/trans/expr.rs        | 11 +++++---
 src/test/debuginfo/multi-byte-chars.rs  | 28 ++++++++++++++++++++
 5 files changed, 62 insertions(+), 18 deletions(-)
 create mode 100644 src/test/debuginfo/multi-byte-chars.rs

diff --git a/src/librustc_trans/trans/base.rs b/src/librustc_trans/trans/base.rs
index 52e54a4a2613a..e0d588a1d3985 100644
--- a/src/librustc_trans/trans/base.rs
+++ b/src/librustc_trans/trans/base.rs
@@ -1816,7 +1816,7 @@ pub fn trans_closure<'a, 'b, 'tcx>(ccx: &CrateContext<'a, 'tcx>,
 
     // cleanup scope for the incoming arguments
     let fn_cleanup_debug_loc =
-        debuginfo::get_cleanup_debug_loc_for_ast_node(fn_ast_id, body.span, true);
+        debuginfo::get_cleanup_debug_loc_for_ast_node(ccx, fn_ast_id, body.span, true);
     let arg_scope = fcx.push_custom_cleanup_scope_with_debug_loc(fn_cleanup_debug_loc);
 
     let block_ty = node_id_type(bcx, body.id);
diff --git a/src/librustc_trans/trans/controlflow.rs b/src/librustc_trans/trans/controlflow.rs
index 10a73033b64af..62d314a482912 100644
--- a/src/librustc_trans/trans/controlflow.rs
+++ b/src/librustc_trans/trans/controlflow.rs
@@ -55,7 +55,7 @@ pub fn trans_stmt<'blk, 'tcx>(cx: Block<'blk, 'tcx>,
 
     let id = ast_util::stmt_id(s);
     let cleanup_debug_loc =
-        debuginfo::get_cleanup_debug_loc_for_ast_node(id, s.span, false);
+        debuginfo::get_cleanup_debug_loc_for_ast_node(bcx.ccx(), id, s.span, false);
     fcx.push_ast_cleanup_scope(cleanup_debug_loc);
 
     match s.node {
@@ -103,7 +103,7 @@ pub fn trans_block<'blk, 'tcx>(bcx: Block<'blk, 'tcx>,
     let mut bcx = bcx;
 
     let cleanup_debug_loc =
-        debuginfo::get_cleanup_debug_loc_for_ast_node(b.id, b.span, true);
+        debuginfo::get_cleanup_debug_loc_for_ast_node(bcx.ccx(), b.id, b.span, true);
     fcx.push_ast_cleanup_scope(cleanup_debug_loc);
 
     for s in b.stmts.iter() {
diff --git a/src/librustc_trans/trans/debuginfo.rs b/src/librustc_trans/trans/debuginfo.rs
index 326adf1f3e7b6..e798dd4dc945f 100644
--- a/src/librustc_trans/trans/debuginfo.rs
+++ b/src/librustc_trans/trans/debuginfo.rs
@@ -1047,10 +1047,11 @@ pub fn create_argument_metadata(bcx: Block, arg: &ast::Arg) {
     })
 }
 
-pub fn get_cleanup_debug_loc_for_ast_node(node_id: ast::NodeId,
-                                          node_span: Span,
-                                          is_block: bool)
-                                          -> NodeInfo {
+pub fn get_cleanup_debug_loc_for_ast_node<'a, 'tcx>(cx: &CrateContext<'a, 'tcx>,
+                                                    node_id: ast::NodeId,
+                                                    node_span: Span,
+                                                    is_block: bool)
+                                                 -> NodeInfo {
     // A debug location needs two things:
     // (1) A span (of which only the beginning will actually be used)
     // (2) An AST node-id which will be used to look up the lexical scope
@@ -1080,15 +1081,25 @@ pub fn get_cleanup_debug_loc_for_ast_node(node_id: ast::NodeId,
     // scope is actually left when the cleanup code is executed.
     // In practice it shouldn't make much of a difference.
 
-    let cleanup_span = if is_block {
-        Span {
-            lo: node_span.hi - codemap::BytePos(1), // closing brace should always be 1 byte...
-            hi: node_span.hi,
-            expn_id: node_span.expn_id
+    let mut cleanup_span = node_span;
+
+    if is_block {
+        // Not all blocks actually have curly braces (e.g. simple closure
+        // bodies), in which case we also just want to return the span of the
+        // whole expression.
+        let code_snippet = cx.sess().codemap().span_to_snippet(node_span);
+        if let Some(code_snippet) = code_snippet {
+            let bytes = code_snippet.as_bytes();
+
+            if bytes.len() > 0 && bytes[bytes.len()-1 ..] == b"}" {
+                cleanup_span = Span {
+                    lo: node_span.hi - codemap::BytePos(1),
+                    hi: node_span.hi,
+                    expn_id: node_span.expn_id
+                };
+            }
         }
-    } else {
-        node_span
-    };
+    }
 
     NodeInfo {
         id: node_id,
diff --git a/src/librustc_trans/trans/expr.rs b/src/librustc_trans/trans/expr.rs
index b7ac0f4975430..149a179634873 100644
--- a/src/librustc_trans/trans/expr.rs
+++ b/src/librustc_trans/trans/expr.rs
@@ -97,7 +97,8 @@ pub fn trans_into<'blk, 'tcx>(bcx: Block<'blk, 'tcx>,
 
     debug!("trans_into() expr={}", expr.repr(bcx.tcx()));
 
-    let cleanup_debug_loc = debuginfo::get_cleanup_debug_loc_for_ast_node(expr.id,
+    let cleanup_debug_loc = debuginfo::get_cleanup_debug_loc_for_ast_node(bcx.ccx(),
+                                                                          expr.id,
                                                                           expr.span,
                                                                           false);
     bcx.fcx.push_ast_cleanup_scope(cleanup_debug_loc);
@@ -130,7 +131,8 @@ pub fn trans<'blk, 'tcx>(bcx: Block<'blk, 'tcx>,
     let mut bcx = bcx;
     let fcx = bcx.fcx;
 
-    let cleanup_debug_loc = debuginfo::get_cleanup_debug_loc_for_ast_node(expr.id,
+    let cleanup_debug_loc = debuginfo::get_cleanup_debug_loc_for_ast_node(bcx.ccx(),
+                                                                          expr.id,
                                                                           expr.span,
                                                                           false);
     fcx.push_ast_cleanup_scope(cleanup_debug_loc);
@@ -621,7 +623,10 @@ fn trans_datum_unadjusted<'blk, 'tcx>(bcx: Block<'blk, 'tcx>,
                 ast::ExprRepeat(..) | ast::ExprVec(..) => {
                     // Special case for slices.
                     let cleanup_debug_loc =
-                        debuginfo::get_cleanup_debug_loc_for_ast_node(x.id, x.span, false);
+                        debuginfo::get_cleanup_debug_loc_for_ast_node(bcx.ccx(),
+                                                                      x.id,
+                                                                      x.span,
+                                                                      false);
                     fcx.push_ast_cleanup_scope(cleanup_debug_loc);
                     let datum = unpack_datum!(
                         bcx, tvec::trans_slice_vec(bcx, expr, &**x));
diff --git a/src/test/debuginfo/multi-byte-chars.rs b/src/test/debuginfo/multi-byte-chars.rs
new file mode 100644
index 0000000000000..dd0d86bf742e6
--- /dev/null
+++ b/src/test/debuginfo/multi-byte-chars.rs
@@ -0,0 +1,28 @@
+// Copyright 2013-2014 The Rust Project Developers. See the COPYRIGHT
+// file at the top-level directory of this distribution and at
+// http://rust-lang.org/COPYRIGHT.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+// ignore-android: FIXME(#10381)
+// min-lldb-version: 310
+
+// compile-flags:-g
+
+#![feature(non_ascii_idents)]
+
+// This test checks whether debuginfo generation can handle multi-byte UTF-8
+// characters at the end of a block. There's no need to do anything in the
+// debugger -- just make sure that the compiler doesn't crash.
+// See also issue #18791.
+
+struct C { θ: u8 }
+
+fn main() {
+    let x =  C { θ: 0 };
+    (|c: C| c.θ )(x);
+}