forked from llvm/llvm-project
-
Notifications
You must be signed in to change notification settings - Fork 0
implement views::concat (P2542) #2
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
Conversation
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
You can test this locally with the following command:git-clang-format --diff 497b2ebb9edcfd5315586b796f47589e9820b4b9 bf9d232583ff7e6ad7bbb9e554a826bba5d529cb -- libcxx/include/__ranges/concat_view.h View the diff from clang-format here.diff --git a/libcxx/include/__ranges/concat_view.h b/libcxx/include/__ranges/concat_view.h
index 78bf3f4c5..fdc4e98c6 100644
--- a/libcxx/include/__ranges/concat_view.h
+++ b/libcxx/include/__ranges/concat_view.h
@@ -48,52 +48,40 @@
_LIBCPP_BEGIN_NAMESPACE_STD
-//#if _LIBCPP_STD_VER >= 20
+// #if _LIBCPP_STD_VER >= 20
namespace ranges {
+template <class View, class... Views>
+struct last_view : last_view<Views...> {};
-template<class View, class... Views>
-struct last_view : last_view<Views...>
-{
-
-};
-
-template<class View>
-struct last_view<View>
-{
- using type = View;
+template <class View>
+struct last_view<View> {
+ using type = View;
};
template <class Ref, class RRef, class It>
-concept concat_indirectly_readable_impl =
-requires (const It it) {
+concept concat_indirectly_readable_impl = requires(const It it) {
{ *it } -> convertible_to<Ref>;
{ ranges::iter_move(it) } -> convertible_to<RRef>;
};
-
template <class... Rs>
-using concat_reference_t = common_reference_t<range_reference_t<Rs>...>;
+using concat_reference_t = common_reference_t<range_reference_t<Rs>...>;
template <class... Rs>
-using concat_value_t = common_type_t<range_value_t<Rs>...>;
+using concat_value_t = common_type_t<range_value_t<Rs>...>;
template <class... Rs>
-using concat_rvalue_reference_t = common_reference_t<range_rvalue_reference_t<Rs>...>;
+using concat_rvalue_reference_t = common_reference_t<range_rvalue_reference_t<Rs>...>;
template <class... Rs>
-concept concat_indirectly_readable =
- common_reference_with<concat_reference_t<Rs...>&&,
- concat_value_t<Rs...>&> &&
- common_reference_with<concat_reference_t<Rs...>&&,
- concat_rvalue_reference_t<Rs...>&&> &&
- common_reference_with<concat_rvalue_reference_t<Rs...>&&,
- concat_value_t<Rs...> const&> &&
- (concat_indirectly_readable_impl<concat_reference_t<Rs...>,
- concat_rvalue_reference_t<Rs...>,
- iterator_t<Rs>> && ...);
-
+concept concat_indirectly_readable =
+ common_reference_with<concat_reference_t<Rs...>&&, concat_value_t<Rs...>&> &&
+ common_reference_with<concat_reference_t<Rs...>&&, concat_rvalue_reference_t<Rs...>&&> &&
+ common_reference_with<concat_rvalue_reference_t<Rs...>&&, concat_value_t<Rs...> const&> &&
+ (concat_indirectly_readable_impl<concat_reference_t<Rs...>, concat_rvalue_reference_t<Rs...>, iterator_t<Rs>> &&
+ ...);
template <class... Rs>
concept concatable = requires { // exposition only
@@ -103,301 +91,274 @@ concept concatable = requires { // exposition only
} && concat_indirectly_readable<Rs...>;
template <bool Const, class... Rs>
-concept concat_is_random_access =
- all_random_access<Const, Rs...> &&
- (sized_range<maybe_const<Const, Rs>> && ...);
+concept concat_is_random_access = all_random_access<Const, Rs...> && (sized_range<maybe_const<Const, Rs>> && ...);
template <class R>
-concept constant_time_reversible = // exposition only
-(bidirectional_range<R> && common_range<R>) ||
-(sized_range<R> && random_access_range<R>);
+concept constant_time_reversible = // exposition only
+ (bidirectional_range<R> && common_range<R>) || (sized_range<R> && random_access_range<R>);
template <bool Const, class... Rs>
-concept concat_is_bidirectional =
- (bidirectional_range<maybe_const<Const, V>>
- && ... &&
- constant-time-reversible<maybe_const<Const, Fs>>);
-
+concept concat_is_bidirectional =
+ (bidirectional_range<maybe_const<Const, V>> && ... && constant - time - reversible<maybe_const<Const, Fs>>);
template <input_range... Views>
- requires (view<Views> && ...) && (sizeof...(Views) > 0) &&
- concatable<Views...>
-class concat_view : public view_interface<concat_view<Views...>>
-{
- tuple<Views...> views_; // exposition only
+ requires(view<Views> && ...) && (sizeof...(Views) > 0) && concatable<Views...>
+class concat_view : public view_interface<concat_view<Views...>> {
+ tuple<Views...> views_; // exposition only
- template <bool Const>
- class iterator; // exposition only
- class sentinel;
-
- public:
+ template <bool Const>
+ class iterator; // exposition only
+ class sentinel;
- constexpr concat_view() = default;
+public:
+ constexpr concat_view() = default;
- constexpr explicit concat_view(Views... views): tuple(views...) {}
+ constexpr explicit concat_view(Views... views) : tuple(views...) {}
- constexpr iterator<false> begin() requires(!(simple-view<Views> && ...))
- {
- iterator<false> it(this, in_place_index<0>, ranges::begin(get<0>(views_)));
- it.template satisfy<0>();
- return it;
- }
+ constexpr iterator<false> begin()
+ requires(!(simple - view<Views> && ...))
+ {
+ iterator<false> it(this, in_place_index<0>, ranges::begin(get<0>(views_)));
+ it.template satisfy<0>();
+ return it;
+ }
- constexpr iterator<true> begin() const
+ constexpr iterator<true> begin() const
requires((range<const Views> && ...) && concatable<const Views...>)
- {
- iterator<true> it(this, in_place_index<0>, ranges::begin(get<0>(views_)));
- it.template satisfy<0>();
- return it;
+ {
+ iterator<true> it(this, in_place_index<0>, ranges::begin(get<0>(views_)));
+ it.template satisfy<0>();
+ return it;
+ }
+
+ constexpr auto end()
+ requires(!(simple - view<Views> && ...))
+ {
+ if constexpr (common_range<last_view<Views...>::type>) {
+ // last_view to be implemented
+ constexpr auto N = sizeof...(Views);
+ return iterator<false>(this, in_place_index<N - 1>, ranges::end(get<N - 1>(views_)));
+ } else {
+ return default_sentinel;
}
-
- constexpr auto end() requires(!(simple-view<Views> && ...))
- {
- if constexpr (common_range<last_view<Views...>::type>) {
- // last_view to be implemented
- constexpr auto N = sizeof...(Views);
- return iterator<false>(this, in_place_index<N - 1>,
- ranges::end(get<N - 1>(views_)));
- } else {
- return default_sentinel;
- }
- }
-
- constexpr auto end() const requires(range<const Views>&&...)
- {
- if constexpr (common_range<last_view>) {
- // last_view to be implemented
- constexpr auto N = sizeof...(Views);
- return iterator<true>(this, in_place_index<N - 1>,
- ranges::end(get<N - 1>(views_)));
- } else {
- return default_sentinel;
- }
+ }
+
+ constexpr auto end() const
+ requires(range<const Views> && ...)
+ {
+ if constexpr (common_range<last_view>) {
+ // last_view to be implemented
+ constexpr auto N = sizeof...(Views);
+ return iterator<true>(this, in_place_index<N - 1>, ranges::end(get<N - 1>(views_)));
+ } else {
+ return default_sentinel;
}
-
- constexpr auto size() requires(sized_range<Views>&&...)
- {
- return apply([](auto... sizes) {
- using CT = make_unsigned_like_t<common_type_t<decltype(sizes)...>>;
- return (CT(sizes) + ...);
+ }
+
+ constexpr auto size()
+ requires(sized_range<Views> && ...)
+ {
+ return apply(
+ [](auto... sizes) {
+ using CT = make_unsigned_like_t<common_type_t<decltype(sizes)...>>;
+ return (CT(sizes) + ...);
},
tuple_transform(ranges::size, views_));
- }
-
- constexpr auto size() const requires(sized_range<const Views>&&...)
- {
- return apply([](auto... sizes) {
- using CT = make_unsigned_like_t<common_type_t<decltype(sizes)...>>;
- return (CT(sizes) + ...);
+ }
+
+ constexpr auto size() const
+ requires(sized_range<const Views> && ...)
+ {
+ return apply(
+ [](auto... sizes) {
+ using CT = make_unsigned_like_t<common_type_t<decltype(sizes)...>>;
+ return (CT(sizes) + ...);
},
tuple_transform(ranges::size, views_));
- }
-
+ }
};
-
-
-// begin class iterator
-
+// begin class iterator
template <input_range... Views>
- requires (view<Views> && ...) && (sizeof...(Views) > 0) &&
- concatable<Views...>
- template <bool Const>
- class concat_view<Views...>::iterator {
-
- public:
- //using iterator_category = see below; // not always present.
- using iterator_concept = _If<random_access_range<_View...>,
- random_access_iterator_tag,
- _If<bidirectional_range<_View...>,
- bidirectional_iterator_tag,
- _If<forward_range<_View...>,
- forward_iterator_tag,
- /* else */ input_iterator_tag
- >
- >
- >;
- //using value_type = concat-value-t<maybe-const<Const, Views>...>;
- //using difference_type = common_type_t<range_difference_t<maybe-const<Const, Views>>...>;
-
- private:
- using base_iter = // exposition only
+ requires(view<Views> && ...) && (sizeof...(Views) > 0) && concatable<Views...>
+template <bool Const>
+class concat_view<Views...>::iterator {
+public:
+ // using iterator_category = see below; // not always present.
+ using iterator_concept =
+ _If<random_access_range<_View...>,
+ random_access_iterator_tag,
+ _If<bidirectional_range<_View...>,
+ bidirectional_iterator_tag,
+ _If<forward_range<_View...>,
+ forward_iterator_tag,
+ /* else */ input_iterator_tag > > >;
+ // using value_type = concat-value-t<maybe-const<Const, Views>...>;
+ // using difference_type = common_type_t<range_difference_t<maybe-const<Const, Views>>...>;
+
+private:
+ using base_iter = // exposition only
variant<iterator_t<maybe_const<Const, Views>>...>;
-
- maybe_const<Const, concat_view>* parent_ = nullptr; // exposition only
- base_iter it_; // exposition only
-
- template <std::size_t N>
- constexpr void satisfy()
- {
- if constexpr (N < (sizeof...(Views) - 1)) {
- if (get<N>(it_) == ranges::end(get<N>(parent_->views_))) {
- it_.template emplace<N + 1>(ranges::begin(get<N + 1>(parent_->views_)));
- satisfy<N + 1>();
- }
- }
- }
- template <std::size_t N>
- constexpr void prev()
- {
- if constexpr (N == 0) {
- --get<0>(it_);
- } else {
- if (get<N>(it_) == ranges::begin(get<N>(parent_->views_))) {
- using prev_view = maybe-const<Const, tuple_element_t<N - 1, tuple<Views...>>>;
- if constexpr (common_range<prev_view>) {
- it_.template emplace<N - 1>(ranges::end(get<N - 1>(parent_->views_)));
- } else {
- it_.template emplace<N - 1>(
- ranges::next(ranges::begin(get<N - 1>(parent_->views_)),
- ranges::size(get<N - 1>(parent_->views_))));
- }
- prev<N - 1>();
- } else {
- --get<N>(it_);
- }
- }
- }
+ maybe_const<Const, concat_view>* parent_ = nullptr; // exposition only
+ base_iter it_; // exposition only
- template <std::size_t N>
- constexpr void advance_fwd(difference_type offset, difference_type steps)
- {
- using underlying_diff_type = iter_difference_t<variant_alternative_t<N, base-iter>>;
- if constexpr (N == sizeof...(Views) - 1) {
- get<N>(it_) += static_cast<underlying_diff_type>(steps);
- }
- else {
- auto n_size = ranges::distance(get<N>(parent_->views_));
- if (offset + steps < n_size) {
- get<N>(it_) += static_cast<underlying_diff_type>(steps);
- } else {
- it_.template emplace<N + 1>(ranges::begin(get<N + 1>(parent_->views_)));
- advance-fwd<N + 1>(0, offset + steps - n_size);
- }
- }
+ template <std::size_t N>
+ constexpr void satisfy() {
+ if constexpr (N < (sizeof...(Views) - 1)) {
+ if (get<N>(it_) == ranges::end(get<N>(parent_->views_))) {
+ it_.template emplace<N + 1>(ranges::begin(get<N + 1>(parent_->views_)));
+ satisfy<N + 1>();
+ }
}
-
- template <std::size_t N>
- constexpr void advance_bwd(difference_type offset, difference_type steps)
- {
- using underlying_diff_type = iter_difference_t<variant_alternative_t<N, base-iter>>;
- if constexpr (N == 0) {
- get<N>(it_) -= static_cast<underlying_diff_type>(steps);
+ }
+
+ template <std::size_t N>
+ constexpr void prev() {
+ if constexpr (N == 0) {
+ --get<0>(it_);
+ } else {
+ if (get<N>(it_) == ranges::begin(get<N>(parent_->views_))) {
+ using prev_view = maybe - const<Const, tuple_element_t<N - 1, tuple<Views...>>>;
+ if constexpr (common_range<prev_view>) {
+ it_.template emplace<N - 1>(ranges::end(get<N - 1>(parent_->views_)));
} else {
- if (offset >= steps) {
- get<N>(it_) -= static_cast<underlying_diff_type>(steps);
- } else {
- auto prev_size = ranges::distance(get<N - 1>(parent_->views_));
- it_.template emplace<N - 1>(ranges::begin(get<N - 1>(parent_->views_)) + prev_size);
- advance-bwd<N - 1>(prev_size, steps - offset);
- }
+ it_.template emplace<N - 1>(
+ ranges::next(ranges::begin(get<N - 1>(parent_->views_)), ranges::size(get<N - 1>(parent_->views_))));
}
+ prev<N - 1>();
+ } else {
+ --get<N>(it_);
+ }
+ }
+ }
+
+ template <std::size_t N>
+ constexpr void advance_fwd(difference_type offset, difference_type steps) {
+ using underlying_diff_type = iter_difference_t<variant_alternative_t<N, base - iter>>;
+ if constexpr (N == sizeof...(Views) - 1) {
+ get<N>(it_) += static_cast<underlying_diff_type>(steps);
+ } else {
+ auto n_size = ranges::distance(get<N>(parent_->views_));
+ if (offset + steps < n_size) {
+ get<N>(it_) += static_cast<underlying_diff_type>(steps);
+ } else {
+ it_.template emplace<N + 1>(ranges::begin(get<N + 1>(parent_->views_)));
+ advance - fwd<N + 1>(0, offset + steps - n_size);
+ }
+ }
+ }
+
+ template <std::size_t N>
+ constexpr void advance_bwd(difference_type offset, difference_type steps) {
+ using underlying_diff_type = iter_difference_t<variant_alternative_t<N, base - iter>>;
+ if constexpr (N == 0) {
+ get<N>(it_) -= static_cast<underlying_diff_type>(steps);
+ } else {
+ if (offset >= steps) {
+ get<N>(it_) -= static_cast<underlying_diff_type>(steps);
+ } else {
+ auto prev_size = ranges::distance(get<N - 1>(parent_->views_));
+ it_.template emplace<N - 1>(ranges::begin(get<N - 1>(parent_->views_)) + prev_size);
+ advance - bwd<N - 1>(prev_size, steps - offset);
+ }
}
+ }
- template <class... Args>
- explicit constexpr iterator(maybe-const<Const, concat_view>* parent, Args&&... args)
- requires constructible_from<base-iter, Args&&...>
- : it_(std::forward<Args>...), parent_(parent) {}
+ template <class... Args>
+ explicit constexpr iterator(maybe - const<Const, concat_view>* parent, Args&&... args)
+ requires constructible_from<base - iter, Args&&...>
+ : it_(std::forward<Args>...), parent_(parent) {}
- public:
+public:
+ iterator() = default;
- iterator() = default;
+ constexpr iterator(iterator<!Const> i)
+ requires Const && (convertible_to<iterator_t<Views>, iterator_t<const Views>> && ...)
+ : it_(std::move(i.it_)), parent_(i.parent_) {}
- constexpr iterator(iterator<!Const> i)
- requires Const && (convertible_to<iterator_t<Views>, iterator_t<const Views>> && ...)
- : it_(std::move(i.it_)), parent_(i.parent_) {}
+ constexpr decltype(auto) operator*() const {
+ using reference = concat_reference_t<maybe_const<Const, Views>...>;
+ return std::visit([](auto&& it) -> reference { return *it; }, it_);
+ }
- constexpr decltype(auto) operator*() const
- {
- using reference = concat_reference_t<maybe_const<Const, Views>...>;
- return std::visit([](auto&& it) -> reference { return *it; }, it_);
- }
+ constexpr iterator& operator++() {
+ constexpr auto i = it_.index();
+ ++get<i>(it_);
+ satisfy<i>();
+ return *this;
+ }
- constexpr iterator& operator++()
- {
- constexpr auto i = it_.index();
- ++get<i>(it_);
- satisfy<i>();
- return *this;
- }
+ constexpr void operator++(int) { ++*this; }
- constexpr void operator++(int)
- {
- ++*this;
- }
+ /*
- /*
+ constexpr iterator operator++(int)
+ requires all-forward<Const, Views...>;
- constexpr iterator operator++(int)
- requires all-forward<Const, Views...>;
-
- constexpr iterator& operator--()
- requires concat-is-bidirectional<Const, Views...>;
+ constexpr iterator& operator--()
+ requires concat-is-bidirectional<Const, Views...>;
- constexpr iterator operator--(int)
- requires concat-is-bidirectional<Const, Views...>;
+ constexpr iterator operator--(int)
+ requires concat-is-bidirectional<Const, Views...>;
- constexpr iterator& operator+=(difference_type n)
- requires concat-is-random-access<Const, Views...>;
+ constexpr iterator& operator+=(difference_type n)
+ requires concat-is-random-access<Const, Views...>;
- constexpr iterator& operator-=(difference_type n)
- requires concat-is-random-access<Const, Views...>;
+ constexpr iterator& operator-=(difference_type n)
+ requires concat-is-random-access<Const, Views...>;
- constexpr decltype(auto) operator[](difference_type n) const
- requires concat-is-random-access<Const, Views...>;
+ constexpr decltype(auto) operator[](difference_type n) const
+ requires concat-is-random-access<Const, Views...>;
- friend constexpr bool operator==(const iterator& x, const iterator& y)
- requires(equality_comparable<iterator_t<maybe-const<Const, Views>>>&&...);
+ friend constexpr bool operator==(const iterator& x, const iterator& y)
+ requires(equality_comparable<iterator_t<maybe-const<Const, Views>>>&&...);
- friend constexpr bool operator==(const iterator& it, default_sentinel_t);
+ friend constexpr bool operator==(const iterator& it, default_sentinel_t);
- friend constexpr bool operator<(const iterator& x, const iterator& y)
- requires all-random-access<Const, Views...>;
+ friend constexpr bool operator<(const iterator& x, const iterator& y)
+ requires all-random-access<Const, Views...>;
- friend constexpr bool operator>(const iterator& x, const iterator& y)
- requires all-random-access<Const, Views...>;
+ friend constexpr bool operator>(const iterator& x, const iterator& y)
+ requires all-random-access<Const, Views...>;
- friend constexpr bool operator<=(const iterator& x, const iterator& y)
- requires all-random-access<Const, Views...>;
+ friend constexpr bool operator<=(const iterator& x, const iterator& y)
+ requires all-random-access<Const, Views...>;
- friend constexpr bool operator>=(const iterator& x, const iterator& y)
- requires all-random-access<Const, Views...>;
+ friend constexpr bool operator>=(const iterator& x, const iterator& y)
+ requires all-random-access<Const, Views...>;
- friend constexpr auto operator<=>(const iterator& x, const iterator& y)
- requires (all-random-access<Const, Views...> &&
- (three_way_comparable<maybe-const<Const, Views>> &&...));
+ friend constexpr auto operator<=>(const iterator& x, const iterator& y)
+ requires (all-random-access<Const, Views...> &&
+ (three_way_comparable<maybe-const<Const, Views>> &&...));
- friend constexpr iterator operator+(const iterator& it, difference_type n)
- requires concat-is-random-access<Const, Views...>;
+ friend constexpr iterator operator+(const iterator& it, difference_type n)
+ requires concat-is-random-access<Const, Views...>;
- friend constexpr iterator operator+(difference_type n, const iterator& it)
- requires concat-is-random-access<Const, Views...>;
+ friend constexpr iterator operator+(difference_type n, const iterator& it)
+ requires concat-is-random-access<Const, Views...>;
- friend constexpr iterator operator-(const iterator& it, difference_type n)
- requires concat-is-random-access<Const, Views...>;
+ friend constexpr iterator operator-(const iterator& it, difference_type n)
+ requires concat-is-random-access<Const, Views...>;
- friend constexpr difference_type operator-(const iterator& x, const iterator& y)
- requires concat-is-random-access<Const, Views...>;
+ friend constexpr difference_type operator-(const iterator& x, const iterator& y)
+ requires concat-is-random-access<Const, Views...>;
- friend constexpr difference_type operator-(const iterator& x, default_sentinel_t)
- requires see below;
+ friend constexpr difference_type operator-(const iterator& x, default_sentinel_t)
+ requires see below;
- friend constexpr difference_type operator-(default_sentinel_t, const iterator& x)
- requires see below;
+ friend constexpr difference_type operator-(default_sentinel_t, const iterator& x)
+ requires see below;
- friend constexpr decltype(auto) iter_move(const iterator& it) noexcept(see below);
+ friend constexpr decltype(auto) iter_move(const iterator& it) noexcept(see below);
- friend constexpr void iter_swap(const iterator& x, const iterator& y) noexcept(see below)
- requires see below;
+ friend constexpr void iter_swap(const iterator& x, const iterator& y) noexcept(see below)
+ requires see below;
- */
+ */
};
-
-
-
} // namespace ranges
#endif // _LIBCPP_STD_VER >= 20
|
changkhothuychung
pushed a commit
that referenced
this pull request
Nov 17, 2024
… depobj construct (llvm#114221) A codegen crash is occurring when a depend object was initialized with omp_all_memory in the depobj directive. llvm#114214 The root cause of issue looks to be the improper handling of the dependency list when omp_all_memory was specified. The change introduces the use of OMPTaskDataTy to manage dependencies. The buildDependences function is called to construct the dependency list, and the list is iterated over to emit and store the dependencies. Reduced Test Case : ``` #include <omp.h> int main() { omp_depend_t obj; #pragma omp depobj(obj) depend(inout: omp_all_memory) } ``` ``` #1 0x0000000003de6623 SignalHandler(int) Signals.cpp:0:0 #2 0x00007f8e4a6b990f (/lib64/libpthread.so.0+0x1690f) #3 0x00007f8e4a117d2a raise (/lib64/libc.so.6+0x4ad2a) llvm#4 0x00007f8e4a1193e4 abort (/lib64/libc.so.6+0x4c3e4) llvm#5 0x00007f8e4a10fc69 __assert_fail_base (/lib64/libc.so.6+0x42c69) llvm#6 0x00007f8e4a10fcf1 __assert_fail (/lib64/libc.so.6+0x42cf1) llvm#7 0x0000000004114367 clang::CodeGen::CodeGenFunction::EmitOMPDepobjDirective(clang::OMPDepobjDirective const&) (/opt/cray/pe/cce/18.0.1/cce-clang/x86_64/bin/clang-18+0x4114367) llvm#8 0x00000000040f8fac clang::CodeGen::CodeGenFunction::EmitStmt(clang::Stmt const*, llvm::ArrayRef<clang::Attr const*>) (/opt/cray/pe/cce/18.0.1/cce-clang/x86_64/bin/clang-18+0x40f8fac) llvm#9 0x00000000040ff4fb clang::CodeGen::CodeGenFunction::EmitCompoundStmtWithoutScope(clang::CompoundStmt const&, bool, clang::CodeGen::AggValueSlot) (/opt/cray/pe/cce/18.0.1/cce-clang/x86_64/bin/clang-18+0x40ff4fb) llvm#10 0x00000000041847b2 clang::CodeGen::CodeGenFunction::EmitFunctionBody(clang::Stmt const*) (/opt/cray/pe/cce/18.0.1/cce-clang/x86_64/bin/clang-18+0x41847b2) llvm#11 0x0000000004199e4a clang::CodeGen::CodeGenFunction::GenerateCode(clang::GlobalDecl, llvm::Function*, clang::CodeGen::CGFunctionInfo const&) (/opt/cray/pe/cce/18.0.1/cce-clang/x86_64/bin/clang-18+0x4199e4a) llvm#12 0x00000000041f7b9d clang::CodeGen::CodeGenModule::EmitGlobalFunctionDefinition(clang::GlobalDecl, llvm::GlobalValue*) (/opt/cray/pe/cce/18.0.1/cce-clang/x86_64/bin/clang-18+0x41f7b9d) llvm#13 0x00000000041f16a3 clang::CodeGen::CodeGenModule::EmitGlobalDefinition(clang::GlobalDecl, llvm::GlobalValue*) (/opt/cray/pe/cce/18.0.1/cce-clang/x86_64/bin/clang-18+0x41f16a3) llvm#14 0x00000000041fd954 clang::CodeGen::CodeGenModule::EmitDeferred() (/opt/cray/pe/cce/18.0.1/cce-clang/x86_64/bin/clang-18+0x41fd954) llvm#15 0x0000000004200277 clang::CodeGen::CodeGenModule::Release() (/opt/cray/pe/cce/18.0.1/cce-clang/x86_64/bin/clang-18+0x4200277) llvm#16 0x00000000046b6a49 (anonymous namespace)::CodeGeneratorImpl::HandleTranslationUnit(clang::ASTContext&) ModuleBuilder.cpp:0:0 llvm#17 0x00000000046b4cb6 clang::BackendConsumer::HandleTranslationUnit(clang::ASTContext&) (/opt/cray/pe/cce/18.0.1/cce-clang/x86_64/bin/clang-18+0x46b4cb6) llvm#18 0x0000000006204d5c clang::ParseAST(clang::Sema&, bool, bool) (/opt/cray/pe/cce/18.0.1/cce-clang/x86_64/bin/clang-18+0x6204d5c) llvm#19 0x000000000496b278 clang::FrontendAction::Execute() (/opt/cray/pe/cce/18.0.1/cce-clang/x86_64/bin/clang-18+0x496b278) llvm#20 0x00000000048dd074 clang::CompilerInstance::ExecuteAction(clang::FrontendAction&) (/opt/cray/pe/cce/18.0.1/cce-clang/x86_64/bin/clang-18+0x48dd074) llvm#21 0x0000000004a38092 clang::ExecuteCompilerInvocation(clang::CompilerInstance*) (/opt/cray/pe/cce/18.0.1/cce-clang/x86_64/bin/clang-18+0x4a38092) llvm#22 0x0000000000fd4e9c cc1_main(llvm::ArrayRef<char const*>, char const*, void*) (/opt/cray/pe/cce/18.0.1/cce-clang/x86_64/bin/clang-18+0xfd4e9c) llvm#23 0x0000000000fcca73 ExecuteCC1Tool(llvm::SmallVectorImpl<char const*>&, llvm::ToolContext const&) driver.cpp:0:0 llvm#24 0x0000000000fd140c clang_main(int, char**, llvm::ToolContext const&) (/opt/cray/pe/cce/18.0.1/cce-clang/x86_64/bin/clang-18+0xfd140c) llvm#25 0x0000000000ee2ef3 main (/opt/cray/pe/cce/18.0.1/cce-clang/x86_64/bin/clang-18+0xee2ef3) llvm#26 0x00007f8e4a10224c __libc_start_main (/lib64/libc.so.6+0x3524c) llvm#27 0x0000000000fcaae9 _start /home/abuild/rpmbuild/BUILD/glibc-2.31/csu/../sysdeps/x86_64/start.S:120:0 clang: error: unable to execute command: Aborted ``` --------- Co-authored-by: Chandra Ghale <[email protected]>
This PR simplifies the internal bitwise logic of the `flip()` function for `vector<bool>`, and creates new tests to validate the changes.
…120518) TestFirmwareCorefiles.py has a helper utility, create-empty-corefile.cpp, which creates corefiles with different metadata to specify the binary that should be loaded. It normally uses an actual binary's UUID for the metadata, and it uses the binary's cputype/cpusubtype for the corefile's mach header. There is one test where it creates a corefile with metadata for a UUID that cannot be found -- it is given no binary -- and in that case, the cputype/cpusubtype it sets in the core file mach header was uninitialized data. Through luck, on Darwin systems, the uninitialized data typically matched a CPU_TYPE from machine.h and the test would work. But when the value doens't match one of thoes defines, lldb would reject the corefile entirely, and the test would fail. This has been an infrequent failure on the CI bots for a while and I couldn't ever repo it. There's a recent configuration where it was happening every time and I was able to track it down. rdar://141727563
…lvm#116462)" This reverts commit 89da344. Reason: buildbot breakages e.g., https://lab.llvm.org/buildbot/#/builders/55/builds/4556 (for which the reverted patch is the only code change)
…m#120536) This allows us to write more range based for loops because we no longer need the iterator. It also matches IR's Use class.
)" (llvm#120594) This reverts commit e0526b0. The `v_minmax/maxmin_f16`(GFX11) needs to be updated to t16 with `v_minmax/maxmin_num_f16`(GFX12) together since they share the same codegen pattern. Revert the old patch and resubmit
) This can be used with /llvmlibthin to create thin archives without an index, which is a prerequisite for porting https://reviews.llvm.org/D117284 to lld-link. Creating files like this is already possible with `llvm-ar rcS`, so this doesn't add additional problems.
Add tests for horizontal add patterns with missing/undemanded elements - which typically prevents folding to the (add (shuffle a, b),(shuffle a, b)) optimal pattern
**Note:** The register reading and writing depends on new register flavor support in thread_get_state/thread_set_state in the kernel, which will be first available in macOS 15.4. The Apple M4 line of cores includes the Scalable Matrix Extension (SME) feature. The M4s do not implement Scalable Vector Extension (SVE), although the processor is in Streaming SVE Mode when the SME is being used. The most obvious side effects of being in SSVE Mode are that (on the M4 cores) NEON instructions cannot be used, and watchpoints may get false positives, the address comparisons are done at a lowered granularity. When SSVE mode is enabled, the kernel will provide the Streaming Vector Length register, which is a maximum of 64 bytes with the M4. Also provided are SVCR (with bits indicating if SSVE mode and SME mode are enabled), TPIDR2, SVL. Then the SVE registers Z0..31 (SVL bytes long), P0..15 (SVL/8 bytes), the ZA matrix register (SVL*SVL bytes), and the M4 supports SME2, so the ZT0 register (64 bytes). When SSVE/SME are disabled, none of these registers are provided by the kernel - reads and writes of them will fail. Unlike Linux, lldb cannot modify the SVL through a thread_set_state call, or change the processor state's SSVE/SME status. There is also no way for a process to request a lowered SVL size today, so the work that David did to handle VL/SVL changing while stepping through a process is not an issue on Darwin today. But debugserver should be providing everything necessary so we can reuse all of David's work on resizing the register contexts in lldb if it happens in the future. debugbserver sends svl, svcr, and tpidr2 in the expedited registers when a thread stops, if SSVE|SME mode are enabled (if the kernel allows it to read the ARM_SME_STATE register set). While the maximum SVL is 64 bytes on M4, the AArch64 maximum possible SVL is 256; this would give us a 64k ZA register. If debugserver sized all of its register contexts assuming the largest possible SVL, we could easily use 2MB more memory for the register contexts of all threads in a process -- and on iOS et al, processes must run within a small memory allotment and this would push us over that. Much of the work in debugserver was changing the arm64 register context from being a static compile-time array of register sets, to being initialized at runtime if debugserver is running on a machine with SME. The ZA is only created to the machine's actual maximum SVL. The size of the 32 SVE Z registers is less significant so I am statically allocating those to the architecturally largest possible SVL value today. Also, debugserver includes information about registers that share the same part of the register file. e.g. S0 and D0 are the lower parts of the NEON 128-bit V0 register. And when running on an SME machine, v0 is the lower 128 bits of the SVE Z0 register. So the register maps used when defining the VFP registers must differ depending on the capabilities of the cpu at runtime. I also changed register reading in debugserver, where formerly when debugserver was asked to read a register, and the thread_get_state read of that register failed, it would return all zero's. This is necessary when constructing a `g` packet that gets all registers - because there is no separation between register bytes, the offsets are fixed. But when we are asking for a single register (e.g. Z0) when not in SSVE/SME mode, this should return an error. This does mean that when you're running on an SME capabable machine, but not in SME mode, and do `register read -a`, lldb will report that 48 SVE registers were unavailable and 5 SME registers were unavailable. But that's only when `-a` is used. The register reading and writing depends on new register flavor support in thread_get_state/thread_set_state in the kernel, which is not yet in a release. The test case I wrote is skipped on current OSes. I pilfered the SME register setup from some of David's existing SME test files; there were a few Linux specific details in those tests that they weren't easy to reuse on Darwin. rdar://121608074
…#120488) This patch converts the profile for memprof_missing_leaf.ll to the recently introduced YAML-based text format.
Note that PointerUnion::{is,get} have been soft deprecated in PointerUnion.h: // FIXME: Replace the uses of is(), get() and dyn_cast() with // isa<T>, cast<T> and the llvm::dyn_cast<T> I'm not touching PointerUnion::dyn_cast for now because it's a bit complicated; we could blindly migrate it to dyn_cast_if_present, but we should probably use dyn_cast when the operand is known to be non-null.
Prior to this patch, we required that all users had the same VL in order to optimize. But as the FIXME said, we can use the largest VL to optimize, as long as we can determine what the largest is. This patch implements the FIXME.
…20491) Fix evaluation order problem identified in llvm#119088.
This also removes the `RangesReleased` which doesn't give much insight to whether we should adjust the heuristic of doing page release.
…no_overlap_error_icf.yaml (llvm#120330) Fixing broken test - calling `sed` in a cross-platform compatible way. Verified to pass on Mac (which uses BSD sed).
adoptRef in WebKit constructs Ref/RefPtr so treat it as such in isCtorOfRefCounted. Also removed the support for makeRef and makeRefPtr as they don't exist any more.
…120466) Rename HLSL resource-related intrinsics to be consistent with the naming conventions discussed in [wg-hlsl:0014]. This is an entirely mechanical change, consisting of the following commands and automated formatting. ```sh git grep -l handle.fromBinding | xargs perl -pi -e \ 's/(dx|spv)(.)handle.fromBinding/$1$2resource$2handlefrombinding/g' git grep -l typedBufferLoad_checkbit | xargs perl -pi -e \ 's/(dx|spv)(.)typedBufferLoad_checkbit/$1$2resource$2loadchecked$2typedbuffer/g' git grep -l typedBufferLoad | xargs perl -pi -e \ 's/(dx|spv)(.)typedBufferLoad/$1$2resource$2load$2typedbuffer/g' git grep -l typedBufferStore | xargs perl -pi -e \ 's/(dx|spv)(.)typedBufferStore/$1$2resource$2store$2typedbuffer/g' git grep -l bufferUpdateCounter | xargs perl -pi -e \ 's/(dx|spv)(.)bufferUpdateCounter/$1$2resource$2updatecounter/g' git grep -l cast_handle | xargs perl -pi -e \ 's/(dx|spv)(.)cast.handle/$1$2resource$2casthandle/g' ``` [wg-hlsl:0014]: https://github.com/llvm/wg-hlsl/blob/main/proposals/0014-consistent-naming-for-dx-intrinsics.md
- **[AMDGPU] Add new test.** - **[AMDGPU] Emit S_CBRANCH_SCC for floating-point conditions.** --------- Co-authored-by: Konstantina Mitropoulou <[email protected]>
…m#120626) Reverts llvm#119846. Introduced a failing test.
…c...` api (llvm#117635) - update `VectorUtils:isVectorIntrinsicWithScalarOpAtArg` to use TTI for all uses, to allow specifiction of target specific intrinsics - add TTI to the `isVectorIntrinsicWithStructReturnOverloadAtField` api - update TTI api to provide `isTargetIntrinsicWith...` functions and consistently name them - move `isTriviallyScalarizable` to VectorUtils - update all uses of the api and provide the TTI parameter Resolves llvm#117030
…pty doc (llvm#118459) Brought the class documentation in sync with the user documentation at container-size-empty.rst: https://github.com/llvm/llvm-project/blob/bfb26202e05ee2932b4368b5fca607df01e8247f/clang-tools-extra/docs/clang-tidy/checks/readability/container-size-empty.rst#L7-L14
This patch introduces IndexedCallstackIdConveter as a convenience wrapper around FrameIdConverter and CallStackIdConverter just for tests. With the new wrapper, we get to replace idioms like: FrameIdConverter<decltype(MemProfData.Frames)> FrameIdConv( MemProfData.Frames); CallStackIdConverter<decltype(MemProfData.CallStacks)> CSIdConv( MemProfData.CallStacks, FrameIdConv); with: IndexedCallstackIdConveter CSIdConv(MemProfData); Unfortunately, this exact pattern occurs in tests only; the combinations of the frame ID converter and call stack ID converter are diverse in production code.
These two constructs are very simple and similar, and only support 3 different clauses, two of which are already implemented. This patch adds AST nodes for both constructs, and leaves the device_num clause unimplemented, but enables the other two.
This is a very simple sema implementation, and just required AST node plus the existing diagnostics. This patch adds tests and adds the AST node required, plus enables it for 'init' and 'shutdown' (only!)
Note that PointerUnion::{is,get} have been soft deprecated in PointerUnion.h: // FIXME: Replace the uses of is(), get() and dyn_cast() with // isa<T>, cast<T> and the llvm::dyn_cast<T>
Split DerivedIV simplification off from llvm#112145 and use to remove the need for extra checks in createScalarIVSteps. Required an extra simplification run after IV transforms.
The paper is fixing a wording bug, so there's nothing to do for implementations.
…lvm#118499) Try to runtime-unroll loops with early-continues depending on loop-varying loads; this helps with branch-prediction for the early-continues and can significantly improve performance for such loops Builds on top of llvm#118317. PR: llvm#118499.
…m#120737) The existing comparison does not insert symbols in the intended place. Closes llvm#120559. --------- Co-authored-by: Bjorn Pettersson <[email protected]>
…llvm#120899) Add const to `import*Renderer` member functions and recursively to functions called from them. I didn't do that for `import*Matcher` functions because they mutate class variables.
…`vector<bool>` in C++20 (llvm#120774)
… cost-comparison Helps with debugging to show to that the fold found the match, and shows the old + new costs to indicate whether the fold was/wasn't profitable.
Add a number of tests with dereferenceable assumptions and different alignment info.
Helps with debugging to show to that the fold found the match.
Interferes with constant folding of the pcmpgt node. Yes another example where topological node sorting would have helped us. Fixes llvm#120906
1d7004f
to
766d53c
Compare
766d53c
to
cda974f
Compare
changkhothuychung
pushed a commit
that referenced
this pull request
Feb 22, 2025
For function declarations (i.e. func op has no entry block), the FunctionOpInterface method `insertArgument` and `eraseArgument` will cause segfault. This PR guards against manipulation of empty entry block by checking whether func op is external. An example can be seen in google/heir#1324 The segfault trace ``` #1 0x0000560f1289d9db PrintStackTraceSignalHandler(void*) /proc/self/cwd/external/llvm-project/llvm/lib/Support/Unix/Signals.inc:874:1 #2 0x0000560f1289b116 llvm::sys::RunSignalHandlers() /proc/self/cwd/external/llvm-project/llvm/lib/Support/Signals.cpp:105:5 #3 0x0000560f1289e145 SignalHandler(int) /proc/self/cwd/external/llvm-project/llvm/lib/Support/Unix/Signals.inc:415:1 llvm#4 0x00007f829a3d9520 (/lib/x86_64-linux-gnu/libc.so.6+0x42520) llvm#5 0x0000560f1257f8bc void __gnu_cxx::new_allocator<mlir::BlockArgument>::construct<mlir::BlockArgument, mlir::BlockArgument>(mlir::BlockArgument*, mlir::BlockArgument&&) /usr/lib/gcc/x86_64-linux-gnu/11/../../../../include/c++/11/ext/new_allocator.h:162:23 llvm#6 0x0000560f1257f84d void std::allocator_traits<std::allocator<mlir::BlockArgument> >::construct<mlir::BlockArgument, mlir::BlockArgument>(std::allocator<mlir::BlockArgument>&, mlir::BlockArgument*, mlir::BlockArgument&&) /usr/lib/gcc/x86_64-linux-gnu/11/../../../../include/c++/11/bits/alloc_traits.h:520:2 llvm#7 0x0000560f12580498 void std::vector<mlir::BlockArgument, std::allocator<mlir::BlockArgument> >::_M_insert_aux<mlir::BlockArgument>(__gnu_cxx::__normal_iterator<mlir::BlockArgument*, std::vector<mlir::BlockArgument, std::allocator<mlir::BlockArgument> > >, mlir::BlockArgument&&) /usr/lib/gcc/x86_64-linux-gnu/11/../../../../include/c++/11/bits/vector.tcc:405:7 llvm#8 0x0000560f1257cf7e std::vector<mlir::BlockArgument, std::allocator<mlir::BlockArgument> >::insert(__gnu_cxx::__normal_iterator<mlir::BlockArgument const*, std::vector<mlir::BlockArgument, std::allocator<mlir::BlockArgument> > >, mlir::BlockArgument const&) /usr/lib/gcc/x86_64-linux-gnu/11/../../../../include/c++/11/bits/vector.tcc:154:6 llvm#9 0x0000560f1257b349 mlir::Block::insertArgument(unsigned int, mlir::Type, mlir::Location) /proc/self/cwd/external/llvm-project/mlir/lib/IR/Block.cpp:178:13 llvm#10 0x0000560f123d2a1c mlir::function_interface_impl::insertFunctionArguments(mlir::FunctionOpInterface, llvm::ArrayRef<unsigned int>, mlir::TypeRange, llvm::ArrayRef<mlir::DictionaryAttr>, llvm::ArrayRef<mlir::Location>, unsigned int, mlir::Type) /proc/self/cwd/external/llvm-project/mlir/lib/Interfaces/FunctionInterfaces.cpp:232:11 llvm#11 0x0000560f0be6b727 mlir::detail::FunctionOpInterfaceTrait<mlir::func::FuncOp>::insertArguments(llvm::ArrayRef<unsigned int>, mlir::TypeRange, llvm::ArrayRef<mlir::DictionaryAttr>, llvm::ArrayRef<mlir::Location>) /proc/self/cwd/bazel-out/k8-dbg/bin/external/llvm-project/mlir/include/mlir/Interfaces/FunctionInterfaces.h.inc:809:7 llvm#12 0x0000560f0be6b536 mlir::detail::FunctionOpInterfaceTrait<mlir::func::FuncOp>::insertArgument(unsigned int, mlir::Type, mlir::DictionaryAttr, mlir::Location) /proc/self/cwd/bazel-out/k8-dbg/bin/external/llvm-project/mlir/include/mlir/Interfaces/FunctionInterfaces.h.inc:796:7 ```
changkhothuychung
pushed a commit
that referenced
this pull request
Mar 8, 2025
When compiling VLS SVE, the compiler often replaces VL-based offsets with immediate-based ones. This leads to a mismatch in the allowed addressing modes due to SVE loads/stores generally expecting immediate offsets relative to VL. For example, given: ```c svfloat64_t foo(const double *x) { svbool_t pg = svptrue_b64(); return svld1_f64(pg, x+svcntd()); } ``` When compiled with `-msve-vector-bits=128`, we currently generate: ```gas foo: ptrue p0.d mov x8, #2 ld1d { z0.d }, p0/z, [x0, x8, lsl #3] ret ``` Instead, we could be generating: ```gas foo: ldr z0, [x0, #1, mul vl] ret ``` Likewise for other types, stores, and other VLS lengths. This patch achieves the above by extending `SelectAddrModeIndexedSVE` to let constants through when `vscale` is known.
changkhothuychung
pushed a commit
that referenced
this pull request
Mar 26, 2025
…1027) No codegen regression on either target. The two builtin_ffs implied on nvptx CSE away. ``` define internal i64 @__gpu_read_first_lane_u64(i64 noundef %__lane_mask, i64 noundef %__x) #2 { entry: %shr = lshr i64 %__x, 32 %conv = trunc nuw i64 %shr to i32 %conv1 = trunc i64 %__x to i32 %conv2 = trunc i64 %__lane_mask to i32 %0 = tail call range(i32 0, 33) i32 @llvm.cttz.i32(i32 %conv2, i1 true) %iszero = icmp eq i32 %conv2, 0 %sub = select i1 %iszero, i32 -1, i32 %0 %1 = tail call i32 @llvm.nvvm.shfl.sync.idx.i32(i32 %conv2, i32 %conv, i32 %sub, i32 31) %conv4 = sext i32 %1 to i64 %shl = shl nsw i64 %conv4, 32 %2 = tail call i32 @llvm.nvvm.shfl.sync.idx.i32(i32 %conv2, i32 %conv1, i32 %sub, i32 31) %conv7 = zext i32 %2 to i64 %or = or disjoint i64 %shl, %conv7 ret i64 %or } ; becomes define internal i64 @__gpu_competing_read_first_lane_u64(i64 noundef %__lane_mask, i64 noundef %__x) #2 { entry: %shr = lshr i64 %__x, 32 %conv = trunc nuw i64 %shr to i32 %conv1 = trunc i64 %__x to i32 %conv.i = trunc i64 %__lane_mask to i32 %0 = tail call range(i32 0, 33) i32 @llvm.cttz.i32(i32 %conv.i, i1 true) %iszero = icmp eq i32 %conv.i, 0 %sub.i = select i1 %iszero, i32 -1, i32 %0 %1 = tail call i32 @llvm.nvvm.shfl.sync.idx.i32(i32 %conv.i, i32 %conv, i32 %sub.i, i32 31) %conv4 = zext i32 %1 to i64 %shl = shl nuw i64 %conv4, 32 %2 = tail call i32 @llvm.nvvm.shfl.sync.idx.i32(i32 %conv.i, i32 %conv1, i32 %sub.i, i32 31) %conv7 = zext i32 %2 to i64 %or = or disjoint i64 %shl, %conv7 ret i64 %or } ``` The sext vs zext difference is vaguely interesting but since the bits are immediately discarded in either case it make no odds. The amdgcn one doesn't need CSE, the readfirstlane function is a single call to an intrinsic. Drive by fix to __gpu_match_all_u32, it was calling first_lane_u64 and could use first_lane_u32 instead. Added the missing call to gpuintrin.c test case and a stray missing static as well.
changkhothuychung
pushed a commit
that referenced
this pull request
Mar 26, 2025
…too. (llvm#132267) Observed in Wine when trying to intercept `ExitThread`, which forwards to `ntdll.RtlExitUserThread`. `gdb` interprets it as `xchg %ax,%ax`. `llvm-mc` outputs simply `nop`. ``` ==Asan-i386-calls-Dynamic-Test.exe==964==interception_win: unhandled instruction at 0x7be27cf0: 66 90 55 89 e5 56 50 8b ``` ``` Wine-gdb> bt #0 0x789a1766 in __interception::GetInstructionSize (address=<optimized out>, rel_offset=<optimized out>) at C:/llvm-mingw/llvm-mingw/llvm-project/compiler-rt/lib/interception/interception_win.cpp:983 #1 0x789ab480 in __sanitizer::SharedPrintfCode(bool, char const*, char*) () at C:/llvm-mingw/llvm-mingw/llvm-project/compiler-rt/lib/sanitizer_common/sanitizer_printf.cpp:311 #2 0x789a18e7 in __interception::OverrideFunctionWithHotPatch (old_func=2078440688, new_func=2023702608, orig_old_func=warning: (Internal error: pc 0x792f1a2c in read in CU, but not in symtab.)warning: (Error: pc 0x792f1a2c in address map, but not in symtab.)0x792f1a2c) at C:/llvm-mingw/llvm-mingw/llvm-project/compiler-rt/lib/interception/interception_win.cpp:1118 #3 0x789a1f34 in __interception::OverrideFunction (old_func=2078440688, new_func=2023702608, orig_old_func=warning: (Internal error: pc 0x792f1a2c in read in CU, but not in symtab.)warning: (Error: pc 0x792f1a2c in address map, but not in symtab.)0x792f1a2c) at C:/llvm-mingw/llvm-mingw/llvm-project/compiler-rt/lib/interception/interception_win.cpp:1224 llvm#4 0x789a24ce in __interception::OverrideFunction (func_name=0x78a0bc43 <vtable for __asan::AsanThreadContext+1163> "ExitThread", new_func=2023702608, orig_old_func=warning: (Internal error: pc 0x792f1a2c in read in CU, but not in symtab.)warning: (Error: pc 0x792f1a2c in address map, but not in symtab.)0x792f1a2c) at C:/llvm-mingw/llvm-mingw/llvm-project/compiler-rt/lib/interception/interception_win.cpp:1369 llvm#5 0x789f40ef in __asan::InitializePlatformInterceptors () at C:/llvm-mingw/llvm-mingw/llvm-project/compiler-rt/lib/asan/asan_win.cpp:190 llvm#6 0x789e0c3c in __asan::InitializeAsanInterceptors () at C:/llvm-mingw/llvm-mingw/llvm-project/compiler-rt/lib/asan/asan_interceptors.cpp:802 llvm#7 0x789ee6b5 in __asan::AsanInitInternal () at C:/llvm-mingw/llvm-mingw/llvm-project/compiler-rt/lib/asan/asan_rtl.cpp:442 llvm#8 0x789eefb0 in __asan::AsanInitFromRtl () at C:/llvm-mingw/llvm-mingw/llvm-project/compiler-rt/lib/asan/asan_rtl.cpp:522 llvm#9 __asan::AsanInitializer::AsanInitializer (this=<optimized out>) at C:/llvm-mingw/llvm-mingw/llvm-project/compiler-rt/lib/asan/asan_rtl.cpp:542 llvm#10 __cxx_global_var_init () at C:/llvm-mingw/llvm-mingw/llvm-project/compiler-rt/lib/asan/asan_rtl.cpp:546 ... Wine-gdb> disassemble /r 2078440688,2078440688+20 Dump of assembler code from 0x7be27cf0 to 0x7be27d04: 0x7be27cf0 <_RtlExitUserThread@4+0>: 66 90 xchg %ax,%ax ... ```
changkhothuychung
pushed a commit
that referenced
this pull request
Apr 9, 2025
…#134130) This should fix failures caused by llvm#133967 Attn: @sarnex Thanks Signed-off-by: Arvind Sudarsanam <[email protected]>
changkhothuychung
pushed a commit
that referenced
this pull request
Apr 9, 2025
…d A520 (llvm#132246) Inefficient SVE codegen occurs on at least two in-order cores, those being Cortex-A510 and Cortex-A520. For example a simple vector add ``` void foo(float a, float b, float dst, unsigned n) { for (unsigned i = 0; i < n; ++i) dst[i] = a[i] + b[i]; } ``` Vectorizes the inner loop into the following interleaved sequence of instructions. ``` add x12, x1, x10 ld1b { z0.b }, p0/z, [x1, x10] add x13, x2, x10 ld1b { z1.b }, p0/z, [x2, x10] ldr z2, [x12, #1, mul vl] ldr z3, [x13, #1, mul vl] dech x11 add x12, x0, x10 fadd z0.s, z1.s, z0.s fadd z1.s, z3.s, z2.s st1b { z0.b }, p0, [x0, x10] addvl x10, x10, #2 str z1, [x12, #1, mul vl] ``` By adjusting the target features to prefer fixed over scalable if the cost is equal we get the following vectorized loop. ``` ldp q0, q3, [x11, #-16] subs x13, x13, llvm#8 ldp q1, q2, [x10, #-16] add x10, x10, llvm#32 add x11, x11, llvm#32 fadd v0.4s, v1.4s, v0.4s fadd v1.4s, v2.4s, v3.4s stp q0, q1, [x12, #-16] add x12, x12, llvm#32 ``` Which is more efficient.
changkhothuychung
pushed a commit
that referenced
this pull request
Apr 9, 2025
… A510/A520 (llvm#134606) Recommit. This work was done by llvm#132246 but failed buildbots due to the test introduced needing updates Inefficient SVE codegen occurs on at least two in-order cores, those being Cortex-A510 and Cortex-A520. For example a simple vector add ``` void foo(float a, float b, float dst, unsigned n) { for (unsigned i = 0; i < n; ++i) dst[i] = a[i] + b[i]; } ``` Vectorizes the inner loop into the following interleaved sequence of instructions. ``` add x12, x1, x10 ld1b { z0.b }, p0/z, [x1, x10] add x13, x2, x10 ld1b { z1.b }, p0/z, [x2, x10] ldr z2, [x12, #1, mul vl] ldr z3, [x13, #1, mul vl] dech x11 add x12, x0, x10 fadd z0.s, z1.s, z0.s fadd z1.s, z3.s, z2.s st1b { z0.b }, p0, [x0, x10] addvl x10, x10, #2 str z1, [x12, #1, mul vl] ``` By adjusting the target features to prefer fixed over scalable if the cost is equal we get the following vectorized loop. ``` ldp q0, q3, [x11, #-16] subs x13, x13, llvm#8 ldp q1, q2, [x10, #-16] add x10, x10, llvm#32 add x11, x11, llvm#32 fadd v0.4s, v1.4s, v0.4s fadd v1.4s, v2.4s, v3.4s stp q0, q1, [x12, #-16] add x12, x12, llvm#32 ``` Which is more efficient.
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
paper link: https://www.open-std.org/jtc1/sc22/wg21/docs/papers/2024/p2542r8.html