diff --git a/scripts/gen-s-parser.py b/scripts/gen-s-parser.py index 75e2d2f46da..1003c63f671 100755 --- a/scripts/gen-s-parser.py +++ b/scripts/gen-s-parser.py @@ -619,6 +619,8 @@ ("string.const", "makeStringConst(s)"), ("string.measure_wtf8", "makeStringMeasure(s, StringMeasureWTF8)"), ("string.measure_wtf16", "makeStringMeasure(s, StringMeasureWTF16)"), + ("string.encode_wtf8", "makeStringEncode(s, StringEncodeWTF8)"), + ("string.encode_wtf16", "makeStringEncode(s, StringEncodeWTF16)"), ] diff --git a/src/gen-s-parser.inc b/src/gen-s-parser.inc index 3020cf97710..c3b6bcec1d0 100644 --- a/src/gen-s-parser.inc +++ b/src/gen-s-parser.inc @@ -3132,6 +3132,17 @@ switch (op[0]) { case 'c': if (strcmp(op, "string.const") == 0) { return makeStringConst(s); } goto parse_error; + case 'e': { + switch (op[17]) { + case '1': + if (strcmp(op, "string.encode_wtf16") == 0) { return makeStringEncode(s, StringEncodeWTF16); } + goto parse_error; + case '8': + if (strcmp(op, "string.encode_wtf8") == 0) { return makeStringEncode(s, StringEncodeWTF8); } + goto parse_error; + default: goto parse_error; + } + } case 'm': { switch (op[18]) { case '1': diff --git a/src/ir/ReFinalize.cpp b/src/ir/ReFinalize.cpp index 59b994ec1b4..8acffaa342b 100644 --- a/src/ir/ReFinalize.cpp +++ b/src/ir/ReFinalize.cpp @@ -175,6 +175,7 @@ void ReFinalize::visitRefAs(RefAs* curr) { curr->finalize(); } void ReFinalize::visitStringNew(StringNew* curr) { curr->finalize(); } void ReFinalize::visitStringConst(StringConst* curr) { curr->finalize(); } void ReFinalize::visitStringMeasure(StringMeasure* curr) { curr->finalize(); } +void ReFinalize::visitStringEncode(StringEncode* curr) { curr->finalize(); } void ReFinalize::visitFunction(Function* curr) { // we may have changed the body from unreachable to none, which might be bad diff --git a/src/ir/cost.h b/src/ir/cost.h index 372c597a593..afc091ffbd6 100644 --- a/src/ir/cost.h +++ b/src/ir/cost.h @@ -678,6 +678,9 @@ struct CostAnalyzer : public OverriddenVisitor { CostType visitStringMeasure(StringMeasure* curr) { return 6 + visit(curr->ref); } + CostType visitStringEncode(StringEncode* curr) { + return 6 + visit(curr->ref) + visit(curr->ptr); + } private: CostType nullCheckCost(Expression* ref) { diff --git a/src/ir/effects.h b/src/ir/effects.h index b8007cfbae2..8e33f65bf90 100644 --- a/src/ir/effects.h +++ b/src/ir/effects.h @@ -734,7 +734,14 @@ class EffectAnalyzer { } void visitStringNew(StringNew* curr) {} void visitStringConst(StringConst* curr) {} - void visitStringMeasure(StringMeasure* curr) {} + void visitStringMeasure(StringMeasure* curr) { + // traps when ref is null. + parent.implicitTrap = true; + } + void visitStringEncode(StringEncode* curr) { + // traps when ref is null or we write out of bounds. + parent.implicitTrap = true; + } }; public: diff --git a/src/ir/possible-contents.cpp b/src/ir/possible-contents.cpp index 3b5c19f1536..c865585f1a6 100644 --- a/src/ir/possible-contents.cpp +++ b/src/ir/possible-contents.cpp @@ -685,6 +685,10 @@ struct InfoCollector // TODO: optimize when possible addRoot(curr); } + void visitStringEncode(StringEncode* curr) { + // TODO: optimize when possible + addRoot(curr); + } // TODO: Model which throws can go to which catches. For now, anything thrown // is sent to the location of that tag, and any catch of that tag can diff --git a/src/passes/Print.cpp b/src/passes/Print.cpp index 26140c030b3..49cade313f2 100644 --- a/src/passes/Print.cpp +++ b/src/passes/Print.cpp @@ -2257,6 +2257,21 @@ struct PrintExpressionContents WASM_UNREACHABLE("invalid string.measure*"); } } + void visitStringEncode(StringEncode* curr) { + switch (curr->op) { + case StringEncodeUTF8: + printMedium(o, "string.encode_wtf8 utf8"); + break; + case StringEncodeWTF8: + printMedium(o, "string.encode_wtf8 wtf8"); + break; + case StringEncodeWTF16: + printMedium(o, "string.encode_wtf16"); + break; + default: + WASM_UNREACHABLE("invalid string.encode*"); + } + } }; // Prints an expression in s-expr format, including both the diff --git a/src/wasm-binary.h b/src/wasm-binary.h index cb881301ff1..14658890e3d 100644 --- a/src/wasm-binary.h +++ b/src/wasm-binary.h @@ -1142,6 +1142,8 @@ enum ASTNodes { StringConst = 0x82, StringMeasureWTF8 = 0x84, StringMeasureWTF16 = 0x85, + StringEncodeWTF8 = 0x86, + StringEncodeWTF16 = 0x87, }; enum MemoryAccess { @@ -1725,6 +1727,7 @@ class WasmBinaryBuilder { bool maybeVisitStringNew(Expression*& out, uint32_t code); bool maybeVisitStringConst(Expression*& out, uint32_t code); bool maybeVisitStringMeasure(Expression*& out, uint32_t code); + bool maybeVisitStringEncode(Expression*& out, uint32_t code); void visitSelect(Select* curr, uint8_t code); void visitReturn(Return* curr); void visitMemorySize(MemorySize* curr); diff --git a/src/wasm-builder.h b/src/wasm-builder.h index 0829719edfb..75ebcd0a306 100644 --- a/src/wasm-builder.h +++ b/src/wasm-builder.h @@ -1011,6 +1011,15 @@ class Builder { ret->finalize(); return ret; } + StringEncode* + makeStringEncode(StringEncodeOp op, Expression* ref, Expression* ptr) { + auto* ret = wasm.allocator.alloc(); + ret->op = op; + ret->ref = ref; + ret->ptr = ptr; + ret->finalize(); + return ret; + } // Additional helpers diff --git a/src/wasm-delegations-fields.def b/src/wasm-delegations-fields.def index a08a887a4b9..7d9ffc37d5e 100644 --- a/src/wasm-delegations-fields.def +++ b/src/wasm-delegations-fields.def @@ -734,6 +734,14 @@ switch (DELEGATE_ID) { DELEGATE_END(StringMeasure); break; } + case Expression::Id::StringEncodeId: { + DELEGATE_START(StringEncode); + DELEGATE_FIELD_INT(StringEncode, op); + DELEGATE_FIELD_CHILD(StringEncode, ptr); + DELEGATE_FIELD_CHILD(StringEncode, ref); + DELEGATE_END(StringEncode); + break; + } } #undef DELEGATE_ID diff --git a/src/wasm-delegations.def b/src/wasm-delegations.def index e0a2ff13e7b..5ee2c0a672a 100644 --- a/src/wasm-delegations.def +++ b/src/wasm-delegations.def @@ -88,5 +88,6 @@ DELEGATE(RefAs); DELEGATE(StringNew); DELEGATE(StringConst); DELEGATE(StringMeasure); +DELEGATE(StringEncode); #undef DELEGATE diff --git a/src/wasm-interpreter.h b/src/wasm-interpreter.h index e9f79df82b2..c1bd06e1cc9 100644 --- a/src/wasm-interpreter.h +++ b/src/wasm-interpreter.h @@ -1964,6 +1964,9 @@ class ExpressionRunner : public OverriddenVisitor { Flow visitStringMeasure(StringMeasure* curr) { WASM_UNREACHABLE("unimplemented string.measure"); } + Flow visitStringEncode(StringEncode* curr) { + WASM_UNREACHABLE("unimplemented string.encode"); + } virtual void trap(const char* why) { WASM_UNREACHABLE("unimp"); } diff --git a/src/wasm-s-parser.h b/src/wasm-s-parser.h index b21b865054c..3ecde76330d 100644 --- a/src/wasm-s-parser.h +++ b/src/wasm-s-parser.h @@ -306,6 +306,7 @@ class SExpressionWasmBuilder { Expression* makeStringNew(Element& s, StringNewOp op); Expression* makeStringConst(Element& s); Expression* makeStringMeasure(Element& s, StringMeasureOp op); + Expression* makeStringEncode(Element& s, StringEncodeOp op); // Helper functions Type parseOptionalResultType(Element& s, Index& i); diff --git a/src/wasm.h b/src/wasm.h index 092ffb7defa..1cb0b381bce 100644 --- a/src/wasm.h +++ b/src/wasm.h @@ -596,6 +596,12 @@ enum StringMeasureOp { StringMeasureWTF16, }; +enum StringEncodeOp { + StringEncodeUTF8, + StringEncodeWTF8, + StringEncodeWTF16, +}; + // // Expressions // @@ -694,6 +700,7 @@ class Expression { StringNewId, StringConstId, StringMeasureId, + StringEncodeId, NumExpressionIds }; Id _id; @@ -1695,6 +1702,18 @@ class StringMeasure : public SpecificExpression { void finalize(); }; +class StringEncode : public SpecificExpression { +public: + StringEncode(MixedArena& allocator) {} + + StringEncodeOp op; + + Expression* ref; + Expression* ptr; + + void finalize(); +}; + // Globals struct Named { diff --git a/src/wasm/wasm-binary.cpp b/src/wasm/wasm-binary.cpp index d81aa0ca4c5..429ff7bae34 100644 --- a/src/wasm/wasm-binary.cpp +++ b/src/wasm/wasm-binary.cpp @@ -3927,6 +3927,9 @@ BinaryConsts::ASTNodes WasmBinaryBuilder::readExpression(Expression*& curr) { if (maybeVisitStringMeasure(curr, opcode)) { break; } + if (maybeVisitStringEncode(curr, opcode)) { + break; + } if (opcode == BinaryConsts::RefIsFunc || opcode == BinaryConsts::RefIsData || opcode == BinaryConsts::RefIsI31) { @@ -7190,6 +7193,33 @@ bool WasmBinaryBuilder::maybeVisitStringMeasure(Expression*& out, return true; } +bool WasmBinaryBuilder::maybeVisitStringEncode(Expression*& out, + uint32_t code) { + StringEncodeOp op; + // TODO: share this code with string.measure? + if (code == BinaryConsts::StringEncodeWTF8) { + auto policy = getU32LEB(); + switch (policy) { + case BinaryConsts::StringPolicy::UTF8: + op = StringEncodeUTF8; + break; + case BinaryConsts::StringPolicy::WTF8: + op = StringEncodeWTF8; + break; + default: + throwError("bad policy for string.encode"); + } + } else if (code == BinaryConsts::StringEncodeWTF16) { + op = StringEncodeWTF16; + } else { + return false; + } + auto* ptr = popNonVoidExpression(); + auto* ref = popNonVoidExpression(); + out = Builder(wasm).makeStringEncode(op, ref, ptr); + return true; +} + void WasmBinaryBuilder::visitRefAs(RefAs* curr, uint8_t code) { BYN_TRACE("zz node: RefAs\n"); switch (code) { diff --git a/src/wasm/wasm-s-parser.cpp b/src/wasm/wasm-s-parser.cpp index 98ed64c2d17..68bd691042c 100644 --- a/src/wasm/wasm-s-parser.cpp +++ b/src/wasm/wasm-s-parser.cpp @@ -2973,6 +2973,23 @@ Expression* SExpressionWasmBuilder::makeStringMeasure(Element& s, return Builder(wasm).makeStringMeasure(op, parseExpression(s[i])); } +Expression* SExpressionWasmBuilder::makeStringEncode(Element& s, + StringEncodeOp op) { + size_t i = 1; + if (op == StringEncodeWTF8) { + const char* str = s[i++]->c_str(); + if (strncmp(str, "utf8", 4) == 0) { + op = StringEncodeUTF8; + } else if (strncmp(str, "wtf8", 4) == 0) { + op = StringEncodeWTF8; + } else { + throw ParseException("bad string.new op", s.line, s.col); + } + } + return Builder(wasm).makeStringEncode( + op, parseExpression(s[i]), parseExpression(s[i + 1])); +} + // converts an s-expression string representing binary data into an output // sequence of raw bytes this appends to data, which may already contain // content. diff --git a/src/wasm/wasm-stack.cpp b/src/wasm/wasm-stack.cpp index e3740ecee8d..a8dfd16e689 100644 --- a/src/wasm/wasm-stack.cpp +++ b/src/wasm/wasm-stack.cpp @@ -2281,6 +2281,25 @@ void BinaryInstWriter::visitStringMeasure(StringMeasure* curr) { } } +void BinaryInstWriter::visitStringEncode(StringEncode* curr) { + o << int8_t(BinaryConsts::GCPrefix); + switch (curr->op) { + case StringEncodeUTF8: + o << U32LEB(BinaryConsts::StringEncodeWTF8) + << U32LEB(BinaryConsts::StringPolicy::UTF8); + break; + case StringEncodeWTF8: + o << U32LEB(BinaryConsts::StringEncodeWTF8) + << U32LEB(BinaryConsts::StringPolicy::WTF8); + break; + case StringEncodeWTF16: + o << U32LEB(BinaryConsts::StringEncodeWTF16); + break; + default: + WASM_UNREACHABLE("invalid string.new*"); + } +} + void BinaryInstWriter::emitScopeEnd(Expression* curr) { assert(!breakStack.empty()); breakStack.pop_back(); diff --git a/src/wasm/wasm.cpp b/src/wasm/wasm.cpp index 749a4433905..d4337a87d66 100644 --- a/src/wasm/wasm.cpp +++ b/src/wasm/wasm.cpp @@ -1192,6 +1192,14 @@ void StringMeasure::finalize() { } } +void StringEncode::finalize() { + if (ref->type == Type::unreachable || ptr->type == Type::unreachable) { + type = Type::unreachable; + } else { + type = Type::i32; + } +} + size_t Function::getNumParams() { return getParams().size(); } size_t Function::getNumVars() { return vars.size(); } diff --git a/src/wasm2js.h b/src/wasm2js.h index a6782ac045e..6042e2f5bc5 100644 --- a/src/wasm2js.h +++ b/src/wasm2js.h @@ -2315,6 +2315,10 @@ Ref Wasm2JSBuilder::processFunctionBody(Module* m, unimplemented(curr); WASM_UNREACHABLE("unimp"); } + Ref visitStringEncode(StringEncode* curr) { + unimplemented(curr); + WASM_UNREACHABLE("unimp"); + } Ref visitRefAs(RefAs* curr) { unimplemented(curr); WASM_UNREACHABLE("unimp"); diff --git a/test/lit/strings.wast b/test/lit/strings.wast index 9776b569f6d..5afdfdbcbe2 100644 --- a/test/lit/strings.wast +++ b/test/lit/strings.wast @@ -5,12 +5,12 @@ ;; RUN: foreach %s %t wasm-opt --enable-strings --enable-reference-types --roundtrip -S -o - | filecheck %s (module + ;; CHECK: (type $ref?|string|_=>_none (func (param stringref))) + ;; CHECK: (type $ref?|string|_ref?|stringview_wtf8|_ref?|stringview_wtf16|_ref?|stringview_iter|_ref?|string|_ref?|stringview_wtf8|_ref?|stringview_wtf16|_ref?|stringview_iter|_ref|string|_ref|stringview_wtf8|_ref|stringview_wtf16|_ref|stringview_iter|_=>_none (func (param stringref stringview_wtf8 stringview_wtf16 stringview_iter stringref stringview_wtf8 stringview_wtf16 stringview_iter (ref string) (ref stringview_wtf8) (ref stringview_wtf16) (ref stringview_iter)))) ;; CHECK: (type $none_=>_none (func)) - ;; CHECK: (type $ref?|string|_=>_none (func (param stringref))) - ;; CHECK: (global $string-const stringref (string.const "string in a global")) (global $string-const stringref (string.const "string in a global")) @@ -140,4 +140,49 @@ ) ) ) + + ;; CHECK: (func $string.encode (param $ref stringref) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i32.eqz + ;; CHECK-NEXT: (string.encode_wtf8 wtf8 + ;; CHECK-NEXT: (local.get $ref) + ;; CHECK-NEXT: (i32.const 10) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (string.encode_wtf8 utf8 + ;; CHECK-NEXT: (local.get $ref) + ;; CHECK-NEXT: (i32.const 20) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (string.encode_wtf16 + ;; CHECK-NEXT: (local.get $ref) + ;; CHECK-NEXT: (i32.const 30) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $string.encode (param $ref stringref) + (drop + (i32.eqz ;; validate the output is i32 + (string.encode_wtf8 wtf8 + (local.get $ref) + (i32.const 10) + ) + ) + ) + (drop + (string.encode_wtf8 utf8 + (local.get $ref) + (i32.const 20) + ) + ) + (drop + (string.encode_wtf16 + (local.get $ref) + (i32.const 30) + ) + ) + ) )