Skip to content

Commit ee33371

Browse files
Scanner: Generate error on inbalanced RLO/LRO/PDF override markers.
1 parent 8d315ee commit ee33371

17 files changed

+268
-11
lines changed

Changelog.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
### 0.7.6 (unreleased)
22

3+
* Scanner: Generates a parser error when comments do contain an unbalanced or underflowing set of unicode direction override markers (LRO, RLO, PDF)
34

45
### 0.7.5 (2020-11-18)
56

liblangutil/CharStream.h

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,19 @@ class CharStream
8585
/// @returns The character of the current location after update is returned.
8686
char setPosition(size_t _location);
8787

88+
/// Tests whether or not given octet sequence is present at the current reading position.
89+
/// @returns true if the sequence could be found, false otherwise.
90+
bool prefixMatch(std::string_view _sequence) const
91+
{
92+
if (m_position + _sequence.size() >= m_source.size())
93+
return false;
94+
95+
for (size_t i = 0; i < _sequence.size(); ++i)
96+
if (_sequence[i] != get(i))
97+
return false;
98+
return true;
99+
}
100+
88101
void reset() { m_position = 0; }
89102

90103
std::string const& source() const noexcept { return m_source; }

liblangutil/Scanner.cpp

Lines changed: 79 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,8 @@ string to_string(ScannerError _errorCode)
7979
case ScannerError::IllegalExponent: return "Invalid exponent.";
8080
case ScannerError::IllegalNumberEnd: return "Identifier-start is not allowed at end of a number.";
8181
case ScannerError::OctalNotAllowed: return "Octal numbers not allowed.";
82+
case ScannerError::DirectionalOverrideUnderflowInComment: return "Unicode direction override underflow in comment.";
83+
case ScannerError::DirectionalOverrideMismatchInComment: return "Mismatching directional override markers in comment.";
8284
default:
8385
solAssert(false, "Unhandled case in to_string(ScannerError)");
8486
return "";
@@ -271,12 +273,67 @@ bool Scanner::skipWhitespaceExceptUnicodeLinebreak()
271273
return sourcePos() != startPosition;
272274
}
273275

276+
struct Scanner::UnicodeDirectionOverrideProcessor
277+
{
278+
Scanner& scanner;
279+
int rtlOverrideDepth = 0;
280+
281+
pair<bool, ScannerError> operator()()
282+
{
283+
// U+202D (LRO - Left-to-Right Override)
284+
// U+202E (RLO - Right-to-Left Override)
285+
if (
286+
scanner.tryScanByteSequence("\xE2\x80\xAD") ||
287+
scanner.tryScanByteSequence("\xE2\x80\xAE")
288+
)
289+
{
290+
rtlOverrideDepth++;
291+
return pair{true, ScannerError::NoError};
292+
}
293+
else if (scanner.tryScanByteSequence("\xE2\x80\xAC")) // U+202C (PDF - Pop Directional Formatting)
294+
{
295+
rtlOverrideDepth--;
296+
if (rtlOverrideDepth < 0)
297+
return pair{true, ScannerError::DirectionalOverrideUnderflowInComment};
298+
else
299+
return pair{true, ScannerError::NoError};
300+
}
301+
else
302+
return pair{false, ScannerError::NoError};
303+
}
304+
305+
ScannerError error() const noexcept
306+
{
307+
if (rtlOverrideDepth < 0)
308+
return ScannerError::DirectionalOverrideUnderflowInComment;
309+
else if (rtlOverrideDepth > 0)
310+
return ScannerError::DirectionalOverrideMismatchInComment;
311+
else
312+
return ScannerError::NoError;
313+
}
314+
};
315+
274316
Token Scanner::skipSingleLineComment()
275317
{
318+
UnicodeDirectionOverrideProcessor unicodeDirectionalOverride{*this};
319+
276320
// Line terminator is not part of the comment. If it is a
277321
// non-ascii line terminator, it will result in a parser error.
278322
while (!isUnicodeLinebreak())
279-
if (!advance()) break;
323+
{
324+
auto const [processed, errorCode] = unicodeDirectionalOverride();
325+
if (processed && errorCode != ScannerError::NoError)
326+
return setError(errorCode);
327+
else if (!processed)
328+
{
329+
if (!advance())
330+
break;
331+
}
332+
}
333+
334+
if (unicodeDirectionalOverride.error() != ScannerError::NoError)
335+
// Unbalanced RLO/LRO/PDF codepoint sequences in comment.
336+
return setError(unicodeDirectionalOverride.error());
280337

281338
return Token::Whitespace;
282339
}
@@ -349,18 +406,30 @@ size_t Scanner::scanSingleLineDocComment()
349406

350407
Token Scanner::skipMultiLineComment()
351408
{
409+
UnicodeDirectionOverrideProcessor unicodeDirectionalOverride{*this};
410+
352411
while (!isSourcePastEndOfInput())
353412
{
354-
char ch = m_char;
355-
advance();
356-
357-
// If we have reached the end of the multi-line comment, we
358-
// consume the '/' and insert a whitespace. This way all
359-
// multi-line comments are treated as whitespace.
360-
if (ch == '*' && m_char == '/')
413+
auto const [processed, errorCode] = unicodeDirectionalOverride();
414+
if (processed && errorCode != ScannerError::NoError)
415+
return setError(errorCode);
416+
else if (!processed)
361417
{
362-
m_char = ' ';
363-
return Token::Whitespace;
418+
char ch = m_char;
419+
advance();
420+
421+
// If we have reached the end of the multi-line comment, we
422+
// consume the '/' and insert a whitespace. This way all
423+
// multi-line comments are treated as whitespace.
424+
if (ch == '*' && m_char == '/')
425+
{
426+
if (unicodeDirectionalOverride.error() != ScannerError::NoError)
427+
// Unbalanced RLO/LRO/PDF codepoint sequences in comment.
428+
return setError(unicodeDirectionalOverride.error());
429+
430+
m_char = ' ';
431+
return Token::Whitespace;
432+
}
364433
}
365434
}
366435
// Unterminated multi-line comment.

liblangutil/Scanner.h

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,9 @@ enum class ScannerError
8989
IllegalExponent,
9090
IllegalNumberEnd,
9191

92+
DirectionalOverrideUnderflowInComment,
93+
DirectionalOverrideMismatchInComment,
94+
9295
OctalNotAllowed,
9396
};
9497

@@ -183,6 +186,8 @@ class Scanner
183186
///@}
184187

185188
private:
189+
struct UnicodeDirectionOverrideProcessor;
190+
186191
inline Token setError(ScannerError _error) noexcept
187192
{
188193
m_tokens[NextNext].error = _error;
@@ -248,6 +253,19 @@ class Scanner
248253
/// Scans a slash '/' and depending on the characters returns the appropriate token
249254
Token scanSlash();
250255

256+
/// Tries scanning given octet sequence and advances reading position respectively iff found.
257+
/// @returns true if it could be scanned, false otherwise.
258+
bool tryScanByteSequence(std::string_view _sequence)
259+
{
260+
if (!m_source->prefixMatch(_sequence))
261+
return false;
262+
263+
for (size_t i = 0; i < _sequence.size(); ++i)
264+
advance();
265+
266+
return true;
267+
}
268+
251269
/// Scans an escape-sequence which is part of a string and adds the
252270
/// decoded character to the current literal. Returns true if a pattern
253271
/// is scanned.

scripts/test_antlr_grammar.sh

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -116,7 +116,9 @@ done < <(
116116
grep -riL -E \
117117
"^\/\/ (Syntax|Type|Declaration)Error|^\/\/ ParserError (2837|3716|3997|5333|6275|6281|6933|7319)|^==== Source:" \
118118
"${ROOT_DIR}/test/libsolidity/syntaxTests" \
119-
"${ROOT_DIR}/test/libsolidity/semanticTests" \
119+
"${ROOT_DIR}/test/libsolidity/semanticTests" |
120+
grep -v -E 'comments/.*_unicode.*.sol'
121+
# Skipping the unicode tests as I couldn't adapt the lexical grammar to recursively counting RLO/LRO/PDF's.
120122
)
121123

122124
YUL_FILES=()
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
// SPDX-License-Identifier: UNLICENSED
2+
pragma solidity ^0.7.0;
3+
4+
contract C {
5+
function f() public pure
6+
{
7+
// pop 1
8+
/*underflow ‬*/
9+
}
10+
}
11+
// ----
12+
// ParserError 8936: (137-152): Unicode direction override underflow in comment.
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
// SPDX-License-Identifier: UNLICENSED
2+
pragma solidity ^0.7.0;
3+
4+
contract C {
5+
function f() public pure
6+
{
7+
// pop 2
8+
/*underflow ‬‬*/
9+
}
10+
}
11+
// ----
12+
// ParserError 8936: (137-152): Unicode direction override underflow in comment.
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
// SPDX-License-Identifier: UNLICENSED
2+
pragma solidity ^0.7.0;
3+
4+
contract C {
5+
function f() public pure
6+
{
7+
// push 1
8+
/*overflow ‮*/
9+
}
10+
}
11+
// ----
12+
// ParserError 8936: (138-153): Mismatching directional override markers in comment.
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
// SPDX-License-Identifier: UNLICENSED
2+
pragma solidity ^0.7.0;
3+
4+
contract C {
5+
function f() public pure
6+
{
7+
// push 2
8+
/*overflow ‮‮*/
9+
}
10+
}
11+
// ----
12+
// ParserError 8936: (138-156): Mismatching directional override markers in comment.
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
// SPDX-License-Identifier: UNLICENSED
2+
pragma solidity ^0.7.0;
3+
4+
contract C {
5+
function f() public pure
6+
{
7+
// first push 1, then pop 1
8+
/*ok ‮‬*/
9+
10+
// first push 2, then pop 2
11+
/*ok ‮‮‬‬*/
12+
13+
// first push 3, then pop 3
14+
/*ok ‮‮‮‬‬‬*/
15+
}
16+
}
17+
// ----
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
// SPDX-License-Identifier: UNLICENSED
2+
pragma solidity ^0.7.0;
3+
4+
contract C {
5+
function f() public pure
6+
{
7+
// first pop, then push
8+
/*overflow ‬‮*/
9+
}
10+
}
11+
// ----
12+
// ParserError 8936: (152-166): Unicode direction override underflow in comment.
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
// SPDX-License-Identifier: UNLICENSED
2+
pragma solidity ^0.7.0;
3+
4+
contract C {
5+
function f() public pure
6+
{
7+
// pop 1
8+
// underflow ‬
9+
}
10+
}
11+
// ----
12+
// ParserError 8936: (137-153): Unicode direction override underflow in comment.
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
// SPDX-License-Identifier: UNLICENSED
2+
pragma solidity ^0.7.0;
3+
4+
contract C {
5+
function f() public pure
6+
{
7+
// pop 2
8+
// underflow ‬‬
9+
}
10+
}
11+
// ----
12+
// ParserError 8936: (137-153): Unicode direction override underflow in comment.
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
// SPDX-License-Identifier: UNLICENSED
2+
pragma solidity ^0.7.0;
3+
4+
contract C {
5+
function f() public pure
6+
{
7+
// push 1
8+
// overflow ‮
9+
}
10+
}
11+
// ----
12+
// ParserError 8936: (138-153): Mismatching directional override markers in comment.
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
// SPDX-License-Identifier: UNLICENSED
2+
pragma solidity ^0.7.0;
3+
4+
contract C {
5+
function f() public pure
6+
{
7+
// push 2
8+
// overflow ‮‮
9+
}
10+
}
11+
// ----
12+
// ParserError 8936: (138-156): Mismatching directional override markers in comment.
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
// SPDX-License-Identifier: UNLICENSED
2+
pragma solidity ^0.7.0;
3+
4+
contract C {
5+
function f() public pure
6+
{
7+
// first push 1, then pop 1
8+
// ok ‮‬
9+
10+
// first push 2, then pop 2
11+
// ok ‮‮‬‬
12+
13+
// first push 3, then pop 3
14+
// ok ‮‮‮‬‬‬
15+
}
16+
}
17+
// ----
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
// SPDX-License-Identifier: UNLICENSED
2+
pragma solidity ^0.7.0;
3+
4+
contract C {
5+
function f() public pure
6+
{
7+
// first pop, then push
8+
// underflow ‬‮
9+
}
10+
}
11+
// ----
12+
// ParserError 8936: (152-168): Unicode direction override underflow in comment.

0 commit comments

Comments
 (0)