Skip to content

[Strings] string.encode #4776

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Jul 7, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions scripts/gen-s-parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -619,6 +619,8 @@
("string.const", "makeStringConst(s)"),
("string.measure_wtf8", "makeStringMeasure(s, StringMeasureWTF8)"),
("string.measure_wtf16", "makeStringMeasure(s, StringMeasureWTF16)"),
("string.encode_wtf8", "makeStringEncode(s, StringEncodeWTF8)"),
("string.encode_wtf16", "makeStringEncode(s, StringEncodeWTF16)"),
]


Expand Down
11 changes: 11 additions & 0 deletions src/gen-s-parser.inc
Original file line number Diff line number Diff line change
Expand Up @@ -3132,6 +3132,17 @@ switch (op[0]) {
case 'c':
if (strcmp(op, "string.const") == 0) { return makeStringConst(s); }
goto parse_error;
case 'e': {
switch (op[17]) {
case '1':
if (strcmp(op, "string.encode_wtf16") == 0) { return makeStringEncode(s, StringEncodeWTF16); }
goto parse_error;
case '8':
if (strcmp(op, "string.encode_wtf8") == 0) { return makeStringEncode(s, StringEncodeWTF8); }
goto parse_error;
default: goto parse_error;
}
}
case 'm': {
switch (op[18]) {
case '1':
Expand Down
1 change: 1 addition & 0 deletions src/ir/ReFinalize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -175,6 +175,7 @@ void ReFinalize::visitRefAs(RefAs* curr) { curr->finalize(); }
void ReFinalize::visitStringNew(StringNew* curr) { curr->finalize(); }
void ReFinalize::visitStringConst(StringConst* curr) { curr->finalize(); }
void ReFinalize::visitStringMeasure(StringMeasure* curr) { curr->finalize(); }
void ReFinalize::visitStringEncode(StringEncode* curr) { curr->finalize(); }

void ReFinalize::visitFunction(Function* curr) {
// we may have changed the body from unreachable to none, which might be bad
Expand Down
3 changes: 3 additions & 0 deletions src/ir/cost.h
Original file line number Diff line number Diff line change
Expand Up @@ -678,6 +678,9 @@ struct CostAnalyzer : public OverriddenVisitor<CostAnalyzer, CostType> {
CostType visitStringMeasure(StringMeasure* curr) {
return 6 + visit(curr->ref);
}
CostType visitStringEncode(StringEncode* curr) {
return 6 + visit(curr->ref) + visit(curr->ptr);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I guess cost for encoding should be bigger than visitArrayCopy has. Also it should depends on format. encode_wtf16 probably will have slightly less cost than encode_wtf8

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

True. I'm not sure how much it matters though, or how we can pick a better number here. "6" is just what we use for an operation that performs a loop, which we can't really predict. If we wanted to be really precise perhaps we should emit "cannot predict" here somehow. But in practice these numbers mainly matter when comparing equivalent code, and there isn't really equivalent code to StringEncode (that is, there is no other sequence that is identical to it, like say eqz(eqz(x)) == (x != 0)).

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Got it!

}

private:
CostType nullCheckCost(Expression* ref) {
Expand Down
9 changes: 8 additions & 1 deletion src/ir/effects.h
Original file line number Diff line number Diff line change
Expand Up @@ -734,7 +734,14 @@ class EffectAnalyzer {
}
void visitStringNew(StringNew* curr) {}
void visitStringConst(StringConst* curr) {}
void visitStringMeasure(StringMeasure* curr) {}
void visitStringMeasure(StringMeasure* curr) {
// traps when ref is null.
parent.implicitTrap = true;
}
void visitStringEncode(StringEncode* curr) {
// traps when ref is null or we write out of bounds.
parent.implicitTrap = true;
}
};

public:
Expand Down
4 changes: 4 additions & 0 deletions src/ir/possible-contents.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -685,6 +685,10 @@ struct InfoCollector
// TODO: optimize when possible
addRoot(curr);
}
void visitStringEncode(StringEncode* curr) {
// TODO: optimize when possible
addRoot(curr);
}

// TODO: Model which throws can go to which catches. For now, anything thrown
// is sent to the location of that tag, and any catch of that tag can
Expand Down
15 changes: 15 additions & 0 deletions src/passes/Print.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2257,6 +2257,21 @@ struct PrintExpressionContents
WASM_UNREACHABLE("invalid string.measure*");
}
}
void visitStringEncode(StringEncode* curr) {
switch (curr->op) {
case StringEncodeUTF8:
printMedium(o, "string.encode_wtf8 utf8");
break;
case StringEncodeWTF8:
printMedium(o, "string.encode_wtf8 wtf8");
break;
case StringEncodeWTF16:
printMedium(o, "string.encode_wtf16");
break;
default:
WASM_UNREACHABLE("invalid string.encode*");
}
}
};

// Prints an expression in s-expr format, including both the
Expand Down
3 changes: 3 additions & 0 deletions src/wasm-binary.h
Original file line number Diff line number Diff line change
Expand Up @@ -1142,6 +1142,8 @@ enum ASTNodes {
StringConst = 0x82,
StringMeasureWTF8 = 0x84,
StringMeasureWTF16 = 0x85,
StringEncodeWTF8 = 0x86,
StringEncodeWTF16 = 0x87,
};

enum MemoryAccess {
Expand Down Expand Up @@ -1725,6 +1727,7 @@ class WasmBinaryBuilder {
bool maybeVisitStringNew(Expression*& out, uint32_t code);
bool maybeVisitStringConst(Expression*& out, uint32_t code);
bool maybeVisitStringMeasure(Expression*& out, uint32_t code);
bool maybeVisitStringEncode(Expression*& out, uint32_t code);
void visitSelect(Select* curr, uint8_t code);
void visitReturn(Return* curr);
void visitMemorySize(MemorySize* curr);
Expand Down
9 changes: 9 additions & 0 deletions src/wasm-builder.h
Original file line number Diff line number Diff line change
Expand Up @@ -1011,6 +1011,15 @@ class Builder {
ret->finalize();
return ret;
}
StringEncode*
makeStringEncode(StringEncodeOp op, Expression* ref, Expression* ptr) {
auto* ret = wasm.allocator.alloc<StringEncode>();
ret->op = op;
ret->ref = ref;
ret->ptr = ptr;
ret->finalize();
return ret;
}

// Additional helpers

Expand Down
8 changes: 8 additions & 0 deletions src/wasm-delegations-fields.def
Original file line number Diff line number Diff line change
Expand Up @@ -734,6 +734,14 @@ switch (DELEGATE_ID) {
DELEGATE_END(StringMeasure);
break;
}
case Expression::Id::StringEncodeId: {
DELEGATE_START(StringEncode);
DELEGATE_FIELD_INT(StringEncode, op);
DELEGATE_FIELD_CHILD(StringEncode, ptr);
DELEGATE_FIELD_CHILD(StringEncode, ref);
DELEGATE_END(StringEncode);
break;
}
}

#undef DELEGATE_ID
Expand Down
1 change: 1 addition & 0 deletions src/wasm-delegations.def
Original file line number Diff line number Diff line change
Expand Up @@ -88,5 +88,6 @@ DELEGATE(RefAs);
DELEGATE(StringNew);
DELEGATE(StringConst);
DELEGATE(StringMeasure);
DELEGATE(StringEncode);

#undef DELEGATE
3 changes: 3 additions & 0 deletions src/wasm-interpreter.h
Original file line number Diff line number Diff line change
Expand Up @@ -1964,6 +1964,9 @@ class ExpressionRunner : public OverriddenVisitor<SubType, Flow> {
Flow visitStringMeasure(StringMeasure* curr) {
WASM_UNREACHABLE("unimplemented string.measure");
}
Flow visitStringEncode(StringEncode* curr) {
WASM_UNREACHABLE("unimplemented string.encode");
}

virtual void trap(const char* why) { WASM_UNREACHABLE("unimp"); }

Expand Down
1 change: 1 addition & 0 deletions src/wasm-s-parser.h
Original file line number Diff line number Diff line change
Expand Up @@ -306,6 +306,7 @@ class SExpressionWasmBuilder {
Expression* makeStringNew(Element& s, StringNewOp op);
Expression* makeStringConst(Element& s);
Expression* makeStringMeasure(Element& s, StringMeasureOp op);
Expression* makeStringEncode(Element& s, StringEncodeOp op);

// Helper functions
Type parseOptionalResultType(Element& s, Index& i);
Expand Down
19 changes: 19 additions & 0 deletions src/wasm.h
Original file line number Diff line number Diff line change
Expand Up @@ -596,6 +596,12 @@ enum StringMeasureOp {
StringMeasureWTF16,
};

enum StringEncodeOp {
StringEncodeUTF8,
StringEncodeWTF8,
StringEncodeWTF16,
};

//
// Expressions
//
Expand Down Expand Up @@ -694,6 +700,7 @@ class Expression {
StringNewId,
StringConstId,
StringMeasureId,
StringEncodeId,
NumExpressionIds
};
Id _id;
Expand Down Expand Up @@ -1695,6 +1702,18 @@ class StringMeasure : public SpecificExpression<Expression::StringMeasureId> {
void finalize();
};

class StringEncode : public SpecificExpression<Expression::StringEncodeId> {
public:
StringEncode(MixedArena& allocator) {}

StringEncodeOp op;

Expression* ref;
Expression* ptr;

void finalize();
};

// Globals

struct Named {
Expand Down
30 changes: 30 additions & 0 deletions src/wasm/wasm-binary.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3927,6 +3927,9 @@ BinaryConsts::ASTNodes WasmBinaryBuilder::readExpression(Expression*& curr) {
if (maybeVisitStringMeasure(curr, opcode)) {
break;
}
if (maybeVisitStringEncode(curr, opcode)) {
break;
}
if (opcode == BinaryConsts::RefIsFunc ||
opcode == BinaryConsts::RefIsData ||
opcode == BinaryConsts::RefIsI31) {
Expand Down Expand Up @@ -7190,6 +7193,33 @@ bool WasmBinaryBuilder::maybeVisitStringMeasure(Expression*& out,
return true;
}

bool WasmBinaryBuilder::maybeVisitStringEncode(Expression*& out,
uint32_t code) {
StringEncodeOp op;
// TODO: share this code with string.measure?
if (code == BinaryConsts::StringEncodeWTF8) {
auto policy = getU32LEB();
switch (policy) {
case BinaryConsts::StringPolicy::UTF8:
op = StringEncodeUTF8;
break;
case BinaryConsts::StringPolicy::WTF8:
op = StringEncodeWTF8;
break;
default:
throwError("bad policy for string.encode");
}
} else if (code == BinaryConsts::StringEncodeWTF16) {
op = StringEncodeWTF16;
} else {
return false;
}
auto* ptr = popNonVoidExpression();
auto* ref = popNonVoidExpression();
out = Builder(wasm).makeStringEncode(op, ref, ptr);
return true;
}

void WasmBinaryBuilder::visitRefAs(RefAs* curr, uint8_t code) {
BYN_TRACE("zz node: RefAs\n");
switch (code) {
Expand Down
17 changes: 17 additions & 0 deletions src/wasm/wasm-s-parser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2973,6 +2973,23 @@ Expression* SExpressionWasmBuilder::makeStringMeasure(Element& s,
return Builder(wasm).makeStringMeasure(op, parseExpression(s[i]));
}

Expression* SExpressionWasmBuilder::makeStringEncode(Element& s,
StringEncodeOp op) {
size_t i = 1;
if (op == StringEncodeWTF8) {
const char* str = s[i++]->c_str();
if (strncmp(str, "utf8", 4) == 0) {
op = StringEncodeUTF8;
} else if (strncmp(str, "wtf8", 4) == 0) {
op = StringEncodeWTF8;
} else {
throw ParseException("bad string.new op", s.line, s.col);
}
}
return Builder(wasm).makeStringEncode(
op, parseExpression(s[i]), parseExpression(s[i + 1]));
}

// converts an s-expression string representing binary data into an output
// sequence of raw bytes this appends to data, which may already contain
// content.
Expand Down
19 changes: 19 additions & 0 deletions src/wasm/wasm-stack.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2281,6 +2281,25 @@ void BinaryInstWriter::visitStringMeasure(StringMeasure* curr) {
}
}

void BinaryInstWriter::visitStringEncode(StringEncode* curr) {
o << int8_t(BinaryConsts::GCPrefix);
switch (curr->op) {
case StringEncodeUTF8:
o << U32LEB(BinaryConsts::StringEncodeWTF8)
<< U32LEB(BinaryConsts::StringPolicy::UTF8);
break;
case StringEncodeWTF8:
o << U32LEB(BinaryConsts::StringEncodeWTF8)
<< U32LEB(BinaryConsts::StringPolicy::WTF8);
break;
case StringEncodeWTF16:
o << U32LEB(BinaryConsts::StringEncodeWTF16);
break;
default:
WASM_UNREACHABLE("invalid string.new*");
}
}

void BinaryInstWriter::emitScopeEnd(Expression* curr) {
assert(!breakStack.empty());
breakStack.pop_back();
Expand Down
8 changes: 8 additions & 0 deletions src/wasm/wasm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1192,6 +1192,14 @@ void StringMeasure::finalize() {
}
}

void StringEncode::finalize() {
if (ref->type == Type::unreachable || ptr->type == Type::unreachable) {
type = Type::unreachable;
} else {
type = Type::i32;
}
}

size_t Function::getNumParams() { return getParams().size(); }

size_t Function::getNumVars() { return vars.size(); }
Expand Down
4 changes: 4 additions & 0 deletions src/wasm2js.h
Original file line number Diff line number Diff line change
Expand Up @@ -2315,6 +2315,10 @@ Ref Wasm2JSBuilder::processFunctionBody(Module* m,
unimplemented(curr);
WASM_UNREACHABLE("unimp");
}
Ref visitStringEncode(StringEncode* curr) {
unimplemented(curr);
WASM_UNREACHABLE("unimp");
}
Ref visitRefAs(RefAs* curr) {
unimplemented(curr);
WASM_UNREACHABLE("unimp");
Expand Down
49 changes: 47 additions & 2 deletions test/lit/strings.wast
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,12 @@
;; RUN: foreach %s %t wasm-opt --enable-strings --enable-reference-types --roundtrip -S -o - | filecheck %s

(module
;; CHECK: (type $ref?|string|_=>_none (func (param stringref)))

;; CHECK: (type $ref?|string|_ref?|stringview_wtf8|_ref?|stringview_wtf16|_ref?|stringview_iter|_ref?|string|_ref?|stringview_wtf8|_ref?|stringview_wtf16|_ref?|stringview_iter|_ref|string|_ref|stringview_wtf8|_ref|stringview_wtf16|_ref|stringview_iter|_=>_none (func (param stringref stringview_wtf8 stringview_wtf16 stringview_iter stringref stringview_wtf8 stringview_wtf16 stringview_iter (ref string) (ref stringview_wtf8) (ref stringview_wtf16) (ref stringview_iter))))

;; CHECK: (type $none_=>_none (func))

;; CHECK: (type $ref?|string|_=>_none (func (param stringref)))

;; CHECK: (global $string-const stringref (string.const "string in a global"))
(global $string-const stringref (string.const "string in a global"))

Expand Down Expand Up @@ -140,4 +140,49 @@
)
)
)

;; CHECK: (func $string.encode (param $ref stringref)
;; CHECK-NEXT: (drop
;; CHECK-NEXT: (i32.eqz
;; CHECK-NEXT: (string.encode_wtf8 wtf8
;; CHECK-NEXT: (local.get $ref)
;; CHECK-NEXT: (i32.const 10)
;; CHECK-NEXT: )
;; CHECK-NEXT: )
;; CHECK-NEXT: )
;; CHECK-NEXT: (drop
;; CHECK-NEXT: (string.encode_wtf8 utf8
;; CHECK-NEXT: (local.get $ref)
;; CHECK-NEXT: (i32.const 20)
;; CHECK-NEXT: )
;; CHECK-NEXT: )
;; CHECK-NEXT: (drop
;; CHECK-NEXT: (string.encode_wtf16
;; CHECK-NEXT: (local.get $ref)
;; CHECK-NEXT: (i32.const 30)
;; CHECK-NEXT: )
;; CHECK-NEXT: )
;; CHECK-NEXT: )
(func $string.encode (param $ref stringref)
(drop
(i32.eqz ;; validate the output is i32
(string.encode_wtf8 wtf8
(local.get $ref)
(i32.const 10)
)
)
)
(drop
(string.encode_wtf8 utf8
(local.get $ref)
(i32.const 20)
)
)
(drop
(string.encode_wtf16
(local.get $ref)
(i32.const 30)
)
)
)
)