Not planned
Description
#include<vector>
void f(int);
void use_idx_const_size_resize() {
std::vector<int> v;
v.resize(100000);
auto s = v.size();
for (std::vector<int>::size_type i = 0; i < s; i++)
f(v[i]);
}
$ clang++ -O3 -stdlib=libc++ -fno-exceptions -std=c++20
use_idx_const_size_resize(): # @use_idx_const_size_resize()
push r14
push rbx
sub rsp, 24
xorps xmm0, xmm0
movaps xmmword ptr [rsp], xmm0
mov qword ptr [rsp + 16], 0
mov rdi, rsp
mov esi, 100000
call std::__1::vector<int, std::__1::allocator<int> >::__append(unsigned long)
mov rdi, qword ptr [rsp]
mov rbx, qword ptr [rsp + 8]
sub rbx, rdi
je .LBB0_4
sar rbx, 2
cmp rbx, 1
adc rbx, 0
xor r14d, r14d
.LBB0_2: # =>This Inner Loop Header: Depth=1
mov rax, qword ptr [rsp]
mov edi, dword ptr [rax + 4*r14]
call f(int)@PLT
inc r14
cmp rbx, r14
jne .LBB0_2
mov rdi, qword ptr [rsp]
.LBB0_4:
test rdi, rdi
je .LBB0_6
mov qword ptr [rsp + 8], rdi
call operator delete(void*)@PLT
.LBB0_6:
add rsp, 24
pop rbx
pop r14
ret
std::__1::vector<int, std::__1::allocator<int> >::__append(unsigned long): # @std::__1::vector<int, std::__1::allocator<int> >::__append(unsigned long)
push rbp
push r15
push r14
push r13
push r12
push rbx
sub rsp, 24
mov r13, rsi
mov rbx, rdi
mov r15, qword ptr [rdi + 8]
mov rax, qword ptr [rdi + 16]
mov rcx, rax
sub rcx, r15
sar rcx, 2
cmp rcx, rsi
jae .LBB1_1
mov r14, qword ptr [rbx]
mov rdi, r15
sub rdi, r14
mov r12, rdi
sar r12, 2
lea rdx, [r12 + r13]
mov rcx, rdx
shr rcx, 62
jne .LBB1_20
movabs rcx, 4611686018427387903
movabs rsi, 9223372036854775800
sub rax, r14
add rsi, 4
mov rbp, rax
sar rbp
cmp rbp, rdx
cmovbe rbp, rdx
cmp rax, rsi
cmovae rbp, rcx
test rbp, rbp
mov qword ptr [rsp + 16], rdi # 8-byte Spill
je .LBB1_6
cmp rbp, rcx
ja .LBB1_21
lea rdi, [4*rbp]
call operator new(unsigned long)@PLT
mov rcx, rax
jmp .LBB1_9
.LBB1_1:
test r13, r13
je .LBB1_3
lea rdx, [4*r13]
mov rdi, r15
xor esi, esi
call memset@PLT
lea r15, [r15 + 4*r13]
.LBB1_3:
mov qword ptr [rbx + 8], r15
jmp .LBB1_19
.LBB1_6:
xor ecx, ecx
.LBB1_9:
mov qword ptr [rsp], r12 # 8-byte Spill
lea r12, [rcx + 4*r12]
mov qword ptr [rsp + 8], rcx # 8-byte Spill
lea rbp, [rcx + 4*rbp]
lea rdx, [4*r13]
mov rdi, r12
xor esi, esi
call memset@PLT
lea rax, [r12 + 4*r13]
mov rdx, r15
sub rdx, r14
je .LBB1_18
add rdx, -4
cmp rdx, 76
jae .LBB1_12
mov rcx, r15
jmp .LBB1_17
.LBB1_12:
mov rsi, qword ptr [rsp + 16] # 8-byte Reload
mov rdi, qword ptr [rsp + 8] # 8-byte Reload
add rsi, rdi
mov rcx, r15
sub rcx, rsi
cmp rcx, 32
jae .LBB1_14
mov rcx, r15
jmp .LBB1_17
.LBB1_14:
shr rdx, 2
inc rdx
movabs r9, 9223372036854775800
and r9, rdx
lea rsi, [4*r9]
sub r12, rsi
mov rcx, r15
sub rcx, rsi
mov rsi, qword ptr [rsp] # 8-byte Reload
lea rsi, [rdi + 4*rsi]
add rsi, -16
mov rdi, rdx
and rdi, -8
neg rdi
xor r8d, r8d
.LBB1_15: # =>This Inner Loop Header: Depth=1
movups xmm0, xmmword ptr [r15 + 4*r8 - 32]
movups xmm1, xmmword ptr [r15 + 4*r8 - 16]
movups xmmword ptr [rsi + 4*r8], xmm1
movups xmmword ptr [rsi + 4*r8 - 16], xmm0
add r8, -8
cmp rdi, r8
jne .LBB1_15
cmp rdx, r9
je .LBB1_18
.LBB1_17: # =>This Inner Loop Header: Depth=1
mov edx, dword ptr [rcx - 4]
add rcx, -4
mov dword ptr [r12 - 4], edx
add r12, -4
cmp rcx, r14
jne .LBB1_17
.LBB1_18:
mov qword ptr [rbx], r12
mov qword ptr [rbx + 8], rax
mov qword ptr [rbx + 16], rbp
test r14, r14
je .LBB1_19
mov rdi, r14
add rsp, 24
pop rbx
pop r12
pop r13
pop r14
pop r15
pop rbp
jmp operator delete(void*)@PLT # TAILCALL
.LBB1_19:
add rsp, 24
pop rbx
pop r12
pop r13
pop r14
pop r15
pop rbp
ret
.LBB1_20:
mov rdi, rbx
call std::__1::vector<int, std::__1::allocator<int> >::__throw_length_error[abi:v180000]() const
.LBB1_21:
call std::__throw_bad_array_new_length[abi:v180000]()
std::__1::vector<int, std::__1::allocator<int> >::__throw_length_error[abi:v180000]() const: # @std::__1::vector<int, std::__1::allocator<int> >::__throw_length_error[abi:v180000]() const
push rax
lea rdi, [rip + .L.str]
call std::__1::__throw_length_error[abi:v180000](char const*)
std::__1::__throw_length_error[abi:v180000](char const*): # @std::__1::__throw_length_error[abi:v180000](char const*)
push rax
mov rsi, rdi
lea rdi, [rip + .L.str.1]
xor eax, eax
call std::__1::__libcpp_verbose_abort(char const*, ...)@PLT
std::__throw_bad_array_new_length[abi:v180000](): # @std::__throw_bad_array_new_length[abi:v180000]()
push rax
lea rdi, [rip + .L.str.2]
xor eax, eax
call std::__1::__libcpp_verbose_abort(char const*, ...)@PLT
.L.str:
.asciz "vector"
.L.str.1:
.asciz "length_error was thrown in -fno-exceptions mode with message \"%s\""
.L.str.2:
.asciz "bad_array_new_length was thrown in -fno-exceptions mode"
Metadata
Metadata
Assignees
Labels
Type
Projects
Milestone
Relationships
Development
No branches or pull requests
Activity
llvmbot commentedon Jul 26, 2023
@llvm/issue-subscribers-clang-codegen
philnik777 commentedon Jul 26, 2023
Because we want to abort when an assumption is not met. If we didn't do that the code would do very weird things if you hit these cases. Although this seems quite obvious to me, so I guess you want something specific?
hiraditya commentedon Jul 26, 2023
Probably vector::resize calls allocator::allocate, where __throw_bad_array_new_length is called in call cases.
I think
operator new
would already give some error of sorts, based on its specification so it shouldn't be necessary to have a call to throw.hiraditya commentedon Jul 26, 2023
abort should be okay. I see
__throw_bad_array_new_length
calls abort when exceptions are disabled. Is it possible to inline the call to abort?C++20 has
operator new (std::size_t count, const std::nothrow_t& tag )
. Does it make sense to call this version of new with -fno-exceptions ?btw, gcc doesn't throw or call abort in this case (with or without -fno-exceptions) https://godbolt.org/z/TrvGsc3a6
philnik777 commentedon Jul 26, 2023
Why would we want to do that? It's not like throwing will ever be a hot path.
No. That would just result in having to handle the case in more places. Whether we abort in
operator new
or in a place where we want to allocate doesn't make any difference other than resulting in more work for libc++ developers.efriedma-quic commentedon Jul 26, 2023
That's because gcc ends up inlining more aggressively; libstdc++ has the same checks, the code just gets folded away. Which I guess could be considered an LLVM bug, but it only affects the case where the argument to resize() is a constant.
There's another missed optimization that seems more significant, though: I think the call to __throw_bad_array_new_length is unreachable, but LLVM can't manage to eliminate it. Not sure if there's some simple LLVM fix for that, or if the implementation of __append is structured in a way that makes the optimization impractical.
I think the idea here is to reduce the codesize of the cold codepath? Something along the lines of
"-D_LIBCPP_VERBOSE_ABORT(...)=__builtin_abort()"
.hiraditya commentedon Jul 27, 2023
I haven't looked at the code of libstdc++ but from this example (https://godbolt.org/z/3r1nGn4Pr) resize with unknown number doesn't generate a
throw
.Code size is an issue for sure.
w.r.t. performance, it might appear that this is not on a hot patch but this is std::vector. Almost everything in std::vector's constructor is way more frequently used than most libc++ data structures. I dont have data to provide unfortunately.
philnik777 commentedon Aug 2, 2023
Yes, it does: https://godbolt.org/z/zv3zWPjqn
"Way more frequently used" and "hot path" are completely different things. If 99.9% of calls don't take a path, that path is cold no matter how often the code is called. We don't want to pessimize the 99.9% of cases to get marginally better performance in the 0.01% of cases.
hiraditya commentedon Aug 2, 2023
Ah i see. the outlined cold function
use_idx_const_size_resize(int) [clone .cold]:
was filtered in my link for some reason. Thanks for clarifying. So it seems to be a regression only when argument to resize is a constant then.sure, my intent was not to pessimize remaining code. I was only looking for ways where we can avoid generating calls to throw when possible, as in the motivating testcase for this issue.
philnik777 commentedon Aug 2, 2023
I don't find you example to be particularly motivating TBH. This probably saves you just about nothing, since it will most likely just get generated by a different TU. It's also just a few bytes of code plus a tiny bit of data.
efriedma-quic commentedon Aug 2, 2023
Focusing on what we can do instead of what we can't do, I see the following possible sources of savings:
hiraditya commentedon Oct 19, 2023
A related PR is under review: #69498
philnik777 commentedon Mar 16, 2024
Closing, since I don't see anything actionable here.
efriedma-quic commentedon Mar 28, 2024
Opened #87010 for the redundant bounds-check.