diff --git a/docs/cpp2/common.md b/docs/cpp2/common.md index b2fb28e385..e104f06a2e 100644 --- a/docs/cpp2/common.md +++ b/docs/cpp2/common.md @@ -168,6 +168,7 @@ Cpp2 supports using Cpp1 user-defined literals for compatibility, to support sea Both **`123.nm()`** and **`123.u8()`** are very similar to user-defined literal syntax, and more general. + ## Operators Operators have the same precedence and associativity as in Cpp1, but some unary operators that are prefix (always or sometimes) in Cpp1 are postfix (always) in Cpp2. @@ -188,7 +189,7 @@ if !vec.empty() { | `+` | `#!cpp +100` | `#!cpp +100` | | `-` | `#!cpp -100` | `#!cpp -100` | -The operators `.`, `*`, `&`, `~`, `++`, `--`, `()`, `[]`, and `$` are postfix. For example: +The operators `.`, `..`, `*`, `&`, `~`, `++`, `--`, `()`, `[]`, `...`, `..=`, and `$` are postfix. For example: ``` cpp title="Using postfix operators" // Cpp1 examples, from cppfront's own source code: @@ -201,7 +202,7 @@ The operators `.`, `*`, `&`, `~`, `++`, `--`, `()`, `[]`, and `$` are postfix. F Postfix notation lets the code read fluidly left-to-right, in the same order in which the operators will be applied, and lets declaration syntax be consistent with usage syntax. For more details, see [Design note: Postfix operators](https://github.com/hsutter/cppfront/wiki/Design-note%3A-Postfix-operators). -> Note: The function call syntax `f(x)` calls a namespace-scope function, or a function object, named `f`. The function call syntax `x.f()` is a unified function call syntax (aka UFCS) that calls a type-scope function in the type of `x` if available, otherwise calls the same as `f(x)`. For details, see [Design note: UFCS](https://github.com/hsutter/cppfront/wiki/Design-note%3A-UFCS). +> Note: The function call syntax `f(x)` calls a namespace-scope function, or a function object, named `f`. The function call syntax `x.f()` is a unified function call syntax (aka UFCS) that calls a type-scope function in the type of `x` if available, otherwise calls the same as `f(x)`. The function call syntax `x..f()` calls a type-scope function only. For details, see [Design note: UFCS](https://github.com/hsutter/cppfront/wiki/Design-note%3A-UFCS). | Unary operator | Cpp2 example | Cpp1 equivalent | |---|---|---| @@ -213,18 +214,13 @@ Postfix notation lets the code read fluidly left-to-right, in the same order in | `#!cpp --` | `#!cpp iter--` | `#!cpp --iter` | | `(` `)` | `#!cpp f( 1, 2, 3)` | `#!cpp f( 1, 2, 3)` | | `[` `]` | `#!cpp vec[123]` | `#!cpp vec[123]` | -| `$` | `val$` | _reflection — no Cpp1 equivalent yet_ | - -> Because `++` and `--` always have in-place update semantics, we never need to remember "use prefix `++`/`--` unless you need a copy of the old value." If you do need a copy of the old value, just take the copy before calling `++`/`--`. +| `...` (half-open range operator) | `#!cpp v.begin()...v.end()` | `#!cpp std::ranges::subrange(v.begin(), v.end())` | +| `..=` (closed range operator) | `#!cpp 1..=10` | `#!cpp std::views::iota(1, 11)` | +| `$` (capture operator) | `val$` | _reflection — no Cpp1 equivalent yet_ | -Unary suffix operators must not be preceded by whitespace. When `*`, `&`, and `~` are used as binary operators they must be preceded by whitespace. For example: - -| Unary postfix operators that
are also binary operators | Cpp2 example | Cpp1 equivalent | -|---|---|---| -| `#!cpp *` | `#!cpp pobj* * 42` | `#!cpp (*pobj)*42` | -| `#!cpp &` | `#!cpp obj& & mask`

(note: allowed in unsafe code only) | `#!cpp &obj & mask` | +> Note: The `...` pack expansion syntax is also supported. The above `...` and `..=` are the Cpp2 range operators, which overlap in syntax. -For more details, see [Design note: Postfix unary operators vs binary operators](https://github.com/hsutter/cppfront/wiki/Design-note%3A-Postfix-unary-operators-vs-binary-operators). +> Note: Because `++` and `--` always have in-place update semantics, we never need to remember "use prefix `++`/`--` unless you need a copy of the old value." If you do need a copy of the old value, just take the copy before calling `++`/`--`. ### Binary operators diff --git a/docs/cpp2/expressions.md b/docs/cpp2/expressions.md index 38d684b8b9..08350503ce 100644 --- a/docs/cpp2/expressions.md +++ b/docs/cpp2/expressions.md @@ -1,7 +1,7 @@ # Common expressions -## Calling functions: `f(x)` syntax, and `x.f()` UFCS syntax +## Calling functions: `f(x)` syntax, `x.f()` UFCS syntax, and `x..f()` members-only syntax A function argument list is a [list](common.md#lists) of arguments enclosed by `(` `)` parentheses. @@ -11,6 +11,8 @@ A function call like `x.f()` is a unified function call syntax (aka UFCS) call. An operator notation call like `#!cpp a + b` will call an overloaded operator function if one is available, as usual in C++. +A function call like `x..f()` will consider only member functions. + For example: ``` cpp title="Function calls" hl_lines="3 7 11 16 19 20" @@ -221,6 +223,41 @@ test(42); For more examples, see also the examples in the previous two sections on `is` and `as`, many of which use `inspect`. +## `...` and `..=` — range operators + +`...` and `..=` designate a range of things. In addition to using `...` for variadic parameters, variadic pack expansion, and fold expressions as in Cpp1, Cpp2 also supports using `begin...end` for a half-open range (that does not include `end`) and `first..=last` for a closed range (that does include `last`). + +For example: + +``` cpp title="Using ... and ..= for ranges" hl_lines="4,11" +test: (v: std::vector) = +{ + // Print strings from "Nonesuch" (if present) onward + i1 := v.std::ranges::find("Nonesuch"); + for i1 ... v.end() do (e) { + std::cout << " (e*)$\n"; + } + + if v.ssize() > 2 { + // Print indexes 1 and 2 of v + for 1 ..= 2 do (e) { + std::cout << " (e)$ (v[e])$\n"; + } + } +} + +main: () = { + vec: std::vector = ("Beholder", "Grue", "Nonesuch", "Wumpus"); + test( vec ); +} +// Prints: +// Nonesuch +// Wumpus +// 1 Grue +// 2 Nonesuch +``` + + ## `$` — captures, including interpolations Suffix `$` is pronounced **"paste the value of"** and captures the value of an expression at the point when the expression where the capture is written is evaluated. Depending on the complexity of the capture expression `expr$` and where it is used, parentheses `(expr)$` may be required for precedence or to show the boundaries of the expression. diff --git a/include/cpp2util.h b/include/cpp2util.h index bd00817752..03d7ba66b7 100644 --- a/include/cpp2util.h +++ b/include/cpp2util.h @@ -2046,12 +2046,15 @@ constexpr auto unsafe_narrow( X&& x ) noexcept -> decltype(auto) // // Does not perform any dynamic memory allocation - each string_view // is directly bound to the string provided by the host environment +// +// Note: These string_views happen to be null-terminated. We ought +// to also have a std::zstring_view to express that... // //----------------------------------------------------------------------- // -struct args_t +struct args { - args_t(int c, char** v) : argc{c}, argv{v} {} + args(int c, char** v) : argc{c}, argv{v} {} class iterator { public: @@ -2084,24 +2087,93 @@ struct args_t auto end() const -> iterator { return iterator{ argc, argv, argc }; } auto cbegin() const -> iterator { return begin(); } auto cend() const -> iterator { return end(); } - auto size() const -> std::size_t { return cpp2::unsafe_narrow(argc); } + auto size() const -> std::size_t { return cpp2::unsafe_narrow(ssize()); } auto ssize() const -> int { return argc; } auto operator[](int i) const { - if (0 <= i && i < argc) { return std::string_view{ argv[i] }; } - else { return std::string_view{}; } + if (0 <= i && i < ssize()) { return std::string_view{ argv[i] }; } + else { return std::string_view{}; } } mutable int argc = 0; // mutable for compatibility with frameworks that take 'int& argc' char** argv = nullptr; }; -inline auto make_args(int argc, char** argv) -> args_t +inline auto make_args(int argc, char** argv) -> args { - return args_t{argc, argv}; + return args{argc, argv}; } +//----------------------------------------------------------------------- +// +// range: a range of [begin, end) or [first, last] +// +//----------------------------------------------------------------------- +// +template +struct range +{ + range( + T const& f, + T const& l, + bool include_last = false + ) + : first{ f } + , last{ l } + { + if (include_last) { + ++last; + } + } + + class iterator { + public: + iterator(T const& f, T const& l, T start) : first{ f }, last{ l }, curr{ start } {} + + auto operator*() const { + if (curr != last) { return curr; } + else { return T{}; } + } + + auto operator+(int i) -> iterator { + if (i > 0) { return { first, last, std::min(curr + i, last) }; } + else { return { first, last, std::max(curr + i, 0) }; } + } + auto operator-(int i) -> iterator { return operator+(-i); } + auto operator++() -> iterator& { if (curr != last ) { ++curr; } return *this; } + auto operator--() -> iterator& { if (curr != first) { --curr; } return *this; } + auto operator++(int) -> iterator { auto old = *this; ++*this; return old; } + auto operator--(int) -> iterator { auto old = *this; ++*this; return old; } + + auto operator<=>(iterator const&) const = default; + + private: + T first; + T last; + T curr; + }; + + auto begin() const -> iterator { return iterator{ first, last, first }; } + auto end() const -> iterator { return iterator{ first, last, last }; } + auto cbegin() const -> iterator { return begin(); } + auto cend() const -> iterator { return end(); } + auto size() const -> std::size_t { return cpp2::unsafe_narrow(ssize()); } + auto ssize() const -> int { return last - first; } + + auto operator[](int i) const { + if (0 <= i && i < ssize()) { return first + i; } + else { return T{}; } + } + + T first; + T last; +}; + +template +range(T, U, bool = false) -> range>; + + //----------------------------------------------------------------------- // // alien_memory: memory typed as T but that is outside C++ and that the diff --git a/regression-tests/pure2-range-operators.cpp2 b/regression-tests/pure2-range-operators.cpp2 new file mode 100644 index 0000000000..12384c8713 --- /dev/null +++ b/regression-tests/pure2-range-operators.cpp2 @@ -0,0 +1,24 @@ +main: () = { + + v: std::vector = + ( "Aardvark", "Baboon", "Cat", "Dolphin", "Elephant", "Flicker", "Grue", "Wumpus" ); + + std::cout << "We have some alpabetical animals:\n"; + for v.begin()...v.end() do (e) { + std::cout << " (e*)$\n"; + } + + std::cout << "\nAnd from indexes 1..=5 they are:\n"; + for 1..=5 do (e) { + std::cout << " (e)$ (v[e])$\n"; + } + + all_about: std::list = + ( "Hokey", "Pokey" ); + + std::cout << "\nMake sure non-random-access iterators work:\n"; + for all_about.begin()...all_about.end() do (e) { + std::cout << " (e*)$\n"; + } + +} diff --git a/regression-tests/test-results/clang-12-c++20/pure2-range-operators.cpp.execution b/regression-tests/test-results/clang-12-c++20/pure2-range-operators.cpp.execution new file mode 100644 index 0000000000..5d6a1a5428 --- /dev/null +++ b/regression-tests/test-results/clang-12-c++20/pure2-range-operators.cpp.execution @@ -0,0 +1,20 @@ +We have some alpabetical animals: + Aardvark + Baboon + Cat + Dolphin + Elephant + Flicker + Grue + Wumpus + +And from indexes 1..=5 they are: + 1 Baboon + 2 Cat + 3 Dolphin + 4 Elephant + 5 Flicker + +Make sure non-random-access iterators work: + Hokey + Pokey diff --git a/regression-tests/test-results/clang-12-c++20/pure2-range-operators.cpp.output b/regression-tests/test-results/clang-12-c++20/pure2-range-operators.cpp.output new file mode 100644 index 0000000000..e69de29bb2 diff --git a/regression-tests/test-results/gcc-10-c++20/pure2-range-operators.cpp.execution b/regression-tests/test-results/gcc-10-c++20/pure2-range-operators.cpp.execution new file mode 100644 index 0000000000..5d6a1a5428 --- /dev/null +++ b/regression-tests/test-results/gcc-10-c++20/pure2-range-operators.cpp.execution @@ -0,0 +1,20 @@ +We have some alpabetical animals: + Aardvark + Baboon + Cat + Dolphin + Elephant + Flicker + Grue + Wumpus + +And from indexes 1..=5 they are: + 1 Baboon + 2 Cat + 3 Dolphin + 4 Elephant + 5 Flicker + +Make sure non-random-access iterators work: + Hokey + Pokey diff --git a/regression-tests/test-results/gcc-10-c++20/pure2-range-operators.cpp.output b/regression-tests/test-results/gcc-10-c++20/pure2-range-operators.cpp.output new file mode 100644 index 0000000000..e69de29bb2 diff --git a/regression-tests/test-results/gcc-14-c++2b/pure2-range-operators.cpp.execution b/regression-tests/test-results/gcc-14-c++2b/pure2-range-operators.cpp.execution new file mode 100644 index 0000000000..5d6a1a5428 --- /dev/null +++ b/regression-tests/test-results/gcc-14-c++2b/pure2-range-operators.cpp.execution @@ -0,0 +1,20 @@ +We have some alpabetical animals: + Aardvark + Baboon + Cat + Dolphin + Elephant + Flicker + Grue + Wumpus + +And from indexes 1..=5 they are: + 1 Baboon + 2 Cat + 3 Dolphin + 4 Elephant + 5 Flicker + +Make sure non-random-access iterators work: + Hokey + Pokey diff --git a/regression-tests/test-results/gcc-14-c++2b/pure2-range-operators.cpp.output b/regression-tests/test-results/gcc-14-c++2b/pure2-range-operators.cpp.output new file mode 100644 index 0000000000..e69de29bb2 diff --git a/regression-tests/test-results/msvc-2022-c++latest/pure2-range-operators.cpp.execution b/regression-tests/test-results/msvc-2022-c++latest/pure2-range-operators.cpp.execution new file mode 100644 index 0000000000..5d6a1a5428 --- /dev/null +++ b/regression-tests/test-results/msvc-2022-c++latest/pure2-range-operators.cpp.execution @@ -0,0 +1,20 @@ +We have some alpabetical animals: + Aardvark + Baboon + Cat + Dolphin + Elephant + Flicker + Grue + Wumpus + +And from indexes 1..=5 they are: + 1 Baboon + 2 Cat + 3 Dolphin + 4 Elephant + 5 Flicker + +Make sure non-random-access iterators work: + Hokey + Pokey diff --git a/regression-tests/test-results/msvc-2022-c++latest/pure2-range-operators.cpp.output b/regression-tests/test-results/msvc-2022-c++latest/pure2-range-operators.cpp.output new file mode 100644 index 0000000000..45f5d69f5d --- /dev/null +++ b/regression-tests/test-results/msvc-2022-c++latest/pure2-range-operators.cpp.output @@ -0,0 +1 @@ +pure2-range-operators.cpp diff --git a/regression-tests/test-results/pure2-range-operators.cpp b/regression-tests/test-results/pure2-range-operators.cpp new file mode 100644 index 0000000000..9f0d2a62ba --- /dev/null +++ b/regression-tests/test-results/pure2-range-operators.cpp @@ -0,0 +1,45 @@ + +#define CPP2_IMPORT_STD Yes + +//=== Cpp2 type declarations ==================================================== + + +#include "cpp2util.h" + +#line 1 "pure2-range-operators.cpp2" + + +//=== Cpp2 type definitions and function declarations =========================== + +#line 1 "pure2-range-operators.cpp2" +auto main() -> int; + +//=== Cpp2 function definitions ================================================= + +#line 1 "pure2-range-operators.cpp2" +auto main() -> int{ + +#line 3 "pure2-range-operators.cpp2" + std::vector v { + "Aardvark", "Baboon", "Cat", "Dolphin", "Elephant", "Flicker", "Grue", "Wumpus"}; + + std::cout << "We have some alpabetical animals:\n"; + for ( auto const& e : cpp2::range(CPP2_UFCS(begin)(v),CPP2_UFCS(end)(v)) ) { + std::cout << " " + cpp2::to_string(*cpp2::impl::assert_not_null(e)) + "\n"; + } + + std::cout << "\nAnd from indexes 1..=5 they are:\n"; + for ( auto const& e : cpp2::range(1,5,true) ) { + std::cout << " " + cpp2::to_string(e) + " " + cpp2::to_string(CPP2_ASSERT_IN_BOUNDS(v, e)) + "\n"; + } + + std::list all_about { + "Hokey", "Pokey"}; + + std::cout << "\nMake sure non-random-access iterators work:\n"; + for ( auto const& e : cpp2::range(CPP2_UFCS(begin)(all_about),CPP2_UFCS(end)(cpp2::move(all_about))) ) { + std::cout << " " + cpp2::to_string(*cpp2::impl::assert_not_null(e)) + "\n"; + } + +} + diff --git a/regression-tests/test-results/pure2-range-operators.cpp2.output b/regression-tests/test-results/pure2-range-operators.cpp2.output new file mode 100644 index 0000000000..f8710df705 --- /dev/null +++ b/regression-tests/test-results/pure2-range-operators.cpp2.output @@ -0,0 +1,2 @@ +pure2-range-operators.cpp2... ok (all Cpp2, passes safety checks) + diff --git a/source/lex.h b/source/lex.h index 348633b465..bf48449cf6 100644 --- a/source/lex.h +++ b/source/lex.h @@ -85,6 +85,7 @@ enum class lexeme : std::int8_t { Dot, DotDot, Ellipsis, + EllipsisEq, QuestionMark, At, Dollar, @@ -183,6 +184,7 @@ auto _as(lexeme l) break;case lexeme::Dot: return "Dot"; break;case lexeme::DotDot: return "DotDot"; break;case lexeme::Ellipsis: return "Ellipsis"; + break;case lexeme::EllipsisEq: return "EllipsisEq"; break;case lexeme::QuestionMark: return "QuestionMark"; break;case lexeme::At: return "At"; break;case lexeme::Dollar: return "Dollar"; @@ -1438,9 +1440,10 @@ auto lex_line( //G //G punctuator: one of - //G '.' '..' '...' + //G '.' '..' '...' '..=' break;case '.': if (peek1 == '.' && peek2 == '.') { store(3, lexeme::Ellipsis); } + else if (peek1 == '.' && peek2 == '=') { store(3, lexeme::EllipsisEq); } else if (peek1 == '.') { store(2, lexeme::DotDot); } else { store(1, lexeme::Dot); } diff --git a/source/parse.h b/source/parse.h index e7bd2b20e4..e00a9b11fb 100644 --- a/source/parse.h +++ b/source/parse.h @@ -61,7 +61,8 @@ auto is_postfix_operator(lexeme l) case lexeme::Tilde: case lexeme::Dollar: case lexeme::Ellipsis: - return true; + case lexeme::EllipsisEq: + return true; break;default: return false; } @@ -902,6 +903,9 @@ struct postfix_expression_node // These are used if *op is [ or ( - can be null std::unique_ptr expr_list = {}; token const* op_close = {}; + + // This is used if *op is ... to hold the 'last' expression + std::unique_ptr last_expr= {}; }; std::vector ops; capture_group* cap_grp = {}; @@ -1804,6 +1808,9 @@ auto postfix_expression_node::visit(auto& v, int depth) if (x.expr_list) { x.expr_list->visit(v, depth+1); } + if (x.last_expr) { + x.last_expr->visit(v, depth+1); + } } v.end(*this, depth); } @@ -4766,6 +4773,9 @@ auto pretty_print_visualize(postfix_expression_node const& n, int indent) if (op.id_expr) { ret += pretty_print_visualize(*op.id_expr, indent); } + if (op.last_expr) { + ret += pretty_print_visualize(*op.last_expr, indent); + } } } @@ -5928,6 +5938,7 @@ class parser //G postfix-expression '(' expression-list? ','? ')' //G postfix-expression '.' id-expression //G postfix-expression '..' id-expression + //G postfix-expression '...' primary-expression //G auto postfix_expression() -> std::unique_ptr @@ -5946,6 +5957,8 @@ class parser || curr().type() == lexeme::LeftParen || curr().type() == lexeme::Dot || curr().type() == lexeme::DotDot + || curr().type() == lexeme::Ellipsis + || curr().type() == lexeme::EllipsisEq ) ) { @@ -6038,6 +6051,16 @@ class parser return {}; } } + else if ( + ( + term.op->type() == lexeme::Ellipsis + || term.op->type() == lexeme::EllipsisEq + ) + && n->expr->to_string() != "sizeof" + ) + { + term.last_expr = expression(); + } n->ops.push_back( std::move(term) ); } @@ -6067,10 +6090,10 @@ class parser //G prefix-expression: //G postfix-expression //G prefix-operator prefix-expression - //GTODO await-expression + //G 'sizeof' '...' ( identifier ')' //GTODO 'sizeof' '(' type-id ')' - //GTODO 'sizeof' '...' ( identifier ')' //GTODO 'alignof' '(' type-id ')' + //GTODO await-expression //GTODO throws-expression //G auto prefix_expression() diff --git a/source/to_cpp1.h b/source/to_cpp1.h index f882d0c821..6f508dd348 100644 --- a/source/to_cpp1.h +++ b/source/to_cpp1.h @@ -3383,8 +3383,13 @@ class cppfront last_was_prefixed = true; } - // Handle the other Cpp2 postfix operators that stay postfix in Cpp1 (currently: '...') - else if (is_postfix_operator(i->op->type())) { + // Handle the other Cpp2 postfix operators that stay postfix in Cpp1 + // (currently '...' for expansion, not when used as a range operator) + else if ( + is_postfix_operator(i->op->type()) + && !i->last_expr // not being used as a range operator + ) + { flush_args(); suffix.emplace_back( i->op->to_string(), i->op->position()); } @@ -3438,6 +3443,16 @@ class cppfront } } + if (i->last_expr) + { + prefix.emplace_back( "cpp2::range(", i->op->position() ); + auto print = print_to_string( *i->last_expr ); + if (i->op->type() == lexeme::EllipsisEq) { + print += ",true"; + } + suffix.emplace_back( "," + print + ")", i->last_expr->position()); + } + // Enable subscript bounds checks if ( flag_safe_subscripts @@ -3458,10 +3473,13 @@ class cppfront } suffix.emplace_back( ", ", i->op->position() ); } + // If this is a .., emit . instead else if( i->op->type() == lexeme::DotDot) { suffix.emplace_back(".", i->op->position()); } - else { + // If this is a range expression, suppress emitting the .../..= operator + // Otherwise emit the postfix operator here + else if (!i->last_expr) { suffix.emplace_back( i->op->to_string(), i->op->position() ); } }