From f32891402b9b4b52903a19c7ec9cebc3cf66b8b7 Mon Sep 17 00:00:00 2001 From: Samir Jindel Date: Thu, 5 Sep 2019 18:56:31 +0200 Subject: [PATCH 1/9] Add Utf8 --- .gitignore | 2 ++ lib/ffi.dart | 7 +++++++ lib/src/allocator.dart | 14 ++++++++++++++ lib/src/utf8.dart | 43 ++++++++++++++++++++++++++++++++++++++++++ pubspec.yaml | 1 + test/utf8_test.dart | 14 ++++++++++++++ 6 files changed, 81 insertions(+) create mode 100644 .gitignore create mode 100644 lib/ffi.dart create mode 100644 lib/src/allocator.dart create mode 100644 lib/src/utf8.dart create mode 100644 test/utf8_test.dart diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..93d2a1e --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +.packages +pubspec.lock diff --git a/lib/ffi.dart b/lib/ffi.dart new file mode 100644 index 0000000..511dc5e --- /dev/null +++ b/lib/ffi.dart @@ -0,0 +1,7 @@ +// Copyright (c) 2019, the Dart project authors. Please see the AUTHORS file +// for details. All rights reserved. Use of this source code is governed by a +// BSD-style license that can be found in the LICENSE file. + +library ffi; + +export 'src/utf8.dart'; \ No newline at end of file diff --git a/lib/src/allocator.dart b/lib/src/allocator.dart new file mode 100644 index 0000000..98084c2 --- /dev/null +++ b/lib/src/allocator.dart @@ -0,0 +1,14 @@ +// Copyright (c) 2019, the Dart project authors. Please see the AUTHORS file +// for details. All rights reserved. Use of this source code is governed by a +// BSD-style license that can be found in the LICENSE file. + +import 'dart:ffi'; + +abstract class Allocator { + Pointer allocate(int bytes); +} + +class MallocAllocator implements Allocator { + Pointer allocate(int bytes) => + Pointer.allocate(count: bytes).cast(); +} diff --git a/lib/src/utf8.dart b/lib/src/utf8.dart new file mode 100644 index 0000000..ead4b4b --- /dev/null +++ b/lib/src/utf8.dart @@ -0,0 +1,43 @@ +// Copyright (c) 2019, the Dart project authors. Please see the AUTHORS file +// for details. All rights reserved. Use of this source code is governed by a +// BSD-style license that can be found in the LICENSE file. + +library utf8; + +import 'dart:convert'; +import 'dart:ffi'; +import 'dart:typed_data'; + +/// [Utf8] implements conversion between Dart strings and null-termianted +/// Utf8-encoded "char*" strings in C. +class Utf8 extends Struct { + @Uint8() + int char; + + static String fromUtf8(Pointer str) { + final Pointer array = str.cast(); + int count = 0x1000; + Uint8List string = array.asExternalTypedData(count: count); + int i = 0; + for (; string[i] != 0; ++i) { + if (i == count) { + count *= 2; + string = array.asExternalTypedData(count: count); + } + } + return Utf8Decoder().convert(Uint8List.view(string.buffer, 0, i)); + } + + static Pointer toUtf8(String s) { + final List units = Utf8Encoder().convert(s); + final Pointer result = + Pointer.allocate(count: units.length + 1); + final Uint8List string = + result.asExternalTypedData(count: units.length + 1); + string.setAll(0, units); + string[units.length] = 0; + return result.cast(); + } + + String toString() => fromUtf8(addressOf); +} diff --git a/pubspec.yaml b/pubspec.yaml index 0baeb71..b16bad2 100644 --- a/pubspec.yaml +++ b/pubspec.yaml @@ -12,3 +12,4 @@ dependencies: dev_dependencies: pedantic: ^1.0.0 + test: ^1.6.8 diff --git a/test/utf8_test.dart b/test/utf8_test.dart new file mode 100644 index 0000000..4bc736f --- /dev/null +++ b/test/utf8_test.dart @@ -0,0 +1,14 @@ +// Copyright (c) 2019, the Dart project authors. Please see the AUTHORS file +// for details. All rights reserved. Use of this source code is governed by a +// BSD-style license that can be found in the LICENSE file. + +import 'package:test/test.dart'; +import 'package:ffi/ffi.dart'; + +main() { + test("fromUtf8 . toUtf8 is identity", () { + final String start = "Hello World!\n"; + final String end = Utf8.fromUtf8(Utf8.toUtf8(start)); + expect(end, equals(start)); + }); +} From 7199de77154a028dc2b3f074a29cc391f6cd811b Mon Sep 17 00:00:00 2001 From: Samir Jindel Date: Thu, 5 Sep 2019 18:57:42 +0200 Subject: [PATCH 2/9] newline --- lib/ffi.dart | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/ffi.dart b/lib/ffi.dart index 511dc5e..68486ac 100644 --- a/lib/ffi.dart +++ b/lib/ffi.dart @@ -4,4 +4,4 @@ library ffi; -export 'src/utf8.dart'; \ No newline at end of file +export 'src/utf8.dart'; From 79acb9dc3552a2215eba0896b7576d0c49e2562d Mon Sep 17 00:00:00 2001 From: Samir Jindel Date: Thu, 5 Sep 2019 18:58:21 +0200 Subject: [PATCH 3/9] .dart_tool --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 93d2a1e..12901d3 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,3 @@ .packages pubspec.lock +.dart_tool From 8fd8f51569230b81a3d9e7b1a31b83a448011788 Mon Sep 17 00:00:00 2001 From: Samir Jindel Date: Thu, 5 Sep 2019 18:59:34 +0200 Subject: [PATCH 4/9] remove accidental file --- lib/src/allocator.dart | 14 -------------- 1 file changed, 14 deletions(-) delete mode 100644 lib/src/allocator.dart diff --git a/lib/src/allocator.dart b/lib/src/allocator.dart deleted file mode 100644 index 98084c2..0000000 --- a/lib/src/allocator.dart +++ /dev/null @@ -1,14 +0,0 @@ -// Copyright (c) 2019, the Dart project authors. Please see the AUTHORS file -// for details. All rights reserved. Use of this source code is governed by a -// BSD-style license that can be found in the LICENSE file. - -import 'dart:ffi'; - -abstract class Allocator { - Pointer allocate(int bytes); -} - -class MallocAllocator implements Allocator { - Pointer allocate(int bytes) => - Pointer.allocate(count: bytes).cast(); -} From 5c49a3e4224aefc10f9359a841d88382edd833fb Mon Sep 17 00:00:00 2001 From: Samir Jindel Date: Thu, 5 Sep 2019 19:00:23 +0200 Subject: [PATCH 5/9] Simplify definition --- lib/src/utf8.dart | 3 --- 1 file changed, 3 deletions(-) diff --git a/lib/src/utf8.dart b/lib/src/utf8.dart index ead4b4b..d506f95 100644 --- a/lib/src/utf8.dart +++ b/lib/src/utf8.dart @@ -11,9 +11,6 @@ import 'dart:typed_data'; /// [Utf8] implements conversion between Dart strings and null-termianted /// Utf8-encoded "char*" strings in C. class Utf8 extends Struct { - @Uint8() - int char; - static String fromUtf8(Pointer str) { final Pointer array = str.cast(); int count = 0x1000; From 1c6d08d73b92eaa4ceb673db36566a10ca407380 Mon Sep 17 00:00:00 2001 From: Samir Jindel Date: Fri, 6 Sep 2019 14:43:48 +0200 Subject: [PATCH 6/9] Review comments --- .gitignore | 2 +- lib/ffi.dart | 2 -- lib/src/utf8.dart | 42 ++++++++++++++++++++++++++++-------------- 3 files changed, 29 insertions(+), 17 deletions(-) diff --git a/.gitignore b/.gitignore index 12901d3..79f51c3 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,3 @@ +.dart_tool .packages pubspec.lock -.dart_tool diff --git a/lib/ffi.dart b/lib/ffi.dart index 68486ac..b43c3e9 100644 --- a/lib/ffi.dart +++ b/lib/ffi.dart @@ -2,6 +2,4 @@ // for details. All rights reserved. Use of this source code is governed by a // BSD-style license that can be found in the LICENSE file. -library ffi; - export 'src/utf8.dart'; diff --git a/lib/src/utf8.dart b/lib/src/utf8.dart index d506f95..079e1df 100644 --- a/lib/src/utf8.dart +++ b/lib/src/utf8.dart @@ -2,37 +2,51 @@ // for details. All rights reserved. Use of this source code is governed by a // BSD-style license that can be found in the LICENSE file. -library utf8; - import 'dart:convert'; import 'dart:ffi'; import 'dart:typed_data'; /// [Utf8] implements conversion between Dart strings and null-termianted /// Utf8-encoded "char*" strings in C. +// +// TODO(https://github.com/dart-lang/sdk/issues/38172): No need to use +// 'asExternalTypedData' when Pointer operations are performant. class Utf8 extends Struct { - static String fromUtf8(Pointer str) { - final Pointer array = str.cast(); + /// Creates a [String] containing the characters UTF-8 encoded in [string]. + /// + /// The [string] must be a zero-terminated byte sequence of valid UTF-8 + /// encodings of Unicode code points. It may also contain UTF-8 encodings of + /// unpaired surrogate code points, which is not otherwise valid UTF-8, but + /// which may be created when encoding a Dart string containing an unpaired + /// surrogate. + /// + /// Returns a Dart string containing the decoded code points. + static String fromUtf8(Pointer string) { + final Pointer array = string.cast(); int count = 0x1000; - Uint8List string = array.asExternalTypedData(count: count); + Uint8List nativeString = array.asExternalTypedData(count: count); int i = 0; - for (; string[i] != 0; ++i) { - if (i == count) { + while (nativeString[i] != 0) { + if (++i == count) { count *= 2; - string = array.asExternalTypedData(count: count); + nativeString = array.asExternalTypedData(count: count); } } - return Utf8Decoder().convert(Uint8List.view(string.buffer, 0, i)); + return utf8.decode(Uint8List.view(nativeString.buffer, 0, i)); } - static Pointer toUtf8(String s) { - final List units = Utf8Encoder().convert(s); + /// Convert a [String] to a Utf8-encoded null-terminated C string. + /// + /// If 'string' contains NULL bytes, the converted string will be truncated + /// prematurely. + static Pointer toUtf8(String string) { + final units = utf8.encode(string); final Pointer result = Pointer.allocate(count: units.length + 1); - final Uint8List string = + final Uint8List nativeString = result.asExternalTypedData(count: units.length + 1); - string.setAll(0, units); - string[units.length] = 0; + nativeString.setAll(0, units); + nativeString[units.length] = 0; return result.cast(); } From d9a79dec141dbd29aca9b6be80230f5321049167 Mon Sep 17 00:00:00 2001 From: Samir Jindel Date: Fri, 6 Sep 2019 15:07:05 +0200 Subject: [PATCH 7/9] Improve documentation --- lib/src/utf8.dart | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/lib/src/utf8.dart b/lib/src/utf8.dart index 079e1df..a4edfa0 100644 --- a/lib/src/utf8.dart +++ b/lib/src/utf8.dart @@ -18,7 +18,7 @@ class Utf8 extends Struct { /// encodings of Unicode code points. It may also contain UTF-8 encodings of /// unpaired surrogate code points, which is not otherwise valid UTF-8, but /// which may be created when encoding a Dart string containing an unpaired - /// surrogate. + /// surrogate. See [Utf8Decoder] for details on decoding. /// /// Returns a Dart string containing the decoded code points. static String fromUtf8(Pointer string) { @@ -38,7 +38,10 @@ class Utf8 extends Struct { /// Convert a [String] to a Utf8-encoded null-terminated C string. /// /// If 'string' contains NULL bytes, the converted string will be truncated - /// prematurely. + /// prematurely. Unpaired surrogate code points in [string] will be preserved + /// in the UTF-8 encoded result. See [Utf8Encoder] for details on encoding + /// + /// Returns a malloc-allocated pointer to the result. static Pointer toUtf8(String string) { final units = utf8.encode(string); final Pointer result = From 2335e7f87cb40c553f15696619fe9656fc75d62a Mon Sep 17 00:00:00 2001 From: Samir Jindel Date: Mon, 9 Sep 2019 12:59:17 +0200 Subject: [PATCH 8/9] tests --- lib/src/utf8.dart | 36 ++++++++++++++--------- test/utf8_test.dart | 69 +++++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 89 insertions(+), 16 deletions(-) diff --git a/lib/src/utf8.dart b/lib/src/utf8.dart index a4edfa0..0304fec 100644 --- a/lib/src/utf8.dart +++ b/lib/src/utf8.dart @@ -6,22 +6,15 @@ import 'dart:convert'; import 'dart:ffi'; import 'dart:typed_data'; -/// [Utf8] implements conversion between Dart strings and null-termianted +/// [Utf8] implements conversion between Dart strings and null-terminated /// Utf8-encoded "char*" strings in C. // -// TODO(https://github.com/dart-lang/sdk/issues/38172): No need to use +// TODO(https://github.com/dart-lang/ffi/issues/4): No need to use // 'asExternalTypedData' when Pointer operations are performant. class Utf8 extends Struct { - /// Creates a [String] containing the characters UTF-8 encoded in [string]. - /// - /// The [string] must be a zero-terminated byte sequence of valid UTF-8 - /// encodings of Unicode code points. It may also contain UTF-8 encodings of - /// unpaired surrogate code points, which is not otherwise valid UTF-8, but - /// which may be created when encoding a Dart string containing an unpaired - /// surrogate. See [Utf8Decoder] for details on decoding. - /// - /// Returns a Dart string containing the decoded code points. - static String fromUtf8(Pointer string) { + /// Returns the length of a null-terminated string -- the number of (one-byte) + /// characters before the first null byte. + static int strlen(Pointer string) { final Pointer array = string.cast(); int count = 0x1000; Uint8List nativeString = array.asExternalTypedData(count: count); @@ -32,7 +25,24 @@ class Utf8 extends Struct { nativeString = array.asExternalTypedData(count: count); } } - return utf8.decode(Uint8List.view(nativeString.buffer, 0, i)); + return i; + } + + /// Creates a [String] containing the characters UTF-8 encoded in [string]. + /// + /// The [string] must be a zero-terminated byte sequence of valid UTF-8 + /// encodings of Unicode code points. It may also contain UTF-8 encodings of + /// unpaired surrogate code points, which is not otherwise valid UTF-8, but + /// which may be created when encoding a Dart string containing an unpaired + /// surrogate. See [Utf8Decoder] for details on decoding. + /// + /// Returns a Dart string containing the decoded code points. + static String fromUtf8(Pointer string) { + final int length = strlen(string); + return utf8.decode(Uint8List.view( + string.cast().asExternalTypedData(count: length).buffer, + 0, + length)); } /// Convert a [String] to a Utf8-encoded null-terminated C string. diff --git a/test/utf8_test.dart b/test/utf8_test.dart index 4bc736f..dd048a0 100644 --- a/test/utf8_test.dart +++ b/test/utf8_test.dart @@ -2,13 +2,76 @@ // for details. All rights reserved. Use of this source code is governed by a // BSD-style license that can be found in the LICENSE file. +import 'dart:ffi'; +import 'dart:typed_data'; + import 'package:test/test.dart'; import 'package:ffi/ffi.dart'; +Pointer _bytesFromList(List ints) { + Pointer ptr = Pointer.allocate(count: ints.length); + Uint8List list = ptr.asExternalTypedData(count: ints.length); + list.setAll(0, ints); + return ptr; +} + main() { - test("fromUtf8 . toUtf8 is identity", () { + test("toUtf8 ASCII", () { final String start = "Hello World!\n"; - final String end = Utf8.fromUtf8(Utf8.toUtf8(start)); - expect(end, equals(start)); + final Pointer converted = Utf8.toUtf8(start).cast(); + final Uint8List end = + converted.asExternalTypedData(count: start.length + 1); + final matcher = + equals([72, 101, 108, 108, 111, 32, 87, 111, 114, 108, 100, 33, 10, 0]); + expect(end, matcher); + converted.free(); + }); + + test("fromUtf8 ASCII", () { + final Pointer utf8 = _bytesFromList( + [72, 101, 108, 108, 111, 32, 87, 111, 114, 108, 100, 33, 10, 0]).cast(); + final String end = Utf8.fromUtf8(utf8); + expect(end, "Hello World!\n"); + }); + + test("toUtf8 emoji", () { + final String start = "πŸ˜ŽπŸ‘ΏπŸ’¬"; + final Pointer converted = Utf8.toUtf8(start).cast(); + final int length = Utf8.strlen(converted); + final Uint8List end = + converted.cast().asExternalTypedData(count: length + 1); + final matcher = + equals([240, 159, 152, 142, 240, 159, 145, 191, 240, 159, 146, 172, 0]); + expect(end, matcher); + converted.free(); + }); + + test("formUtf8 emoji", () { + final Pointer utf8 = _bytesFromList( + [240, 159, 152, 142, 240, 159, 145, 191, 240, 159, 146, 172, 0]).cast(); + final String end = Utf8.fromUtf8(utf8); + expect(end, "πŸ˜ŽπŸ‘ΏπŸ’¬"); + }); + + test("toUtf8 unpaired surrogate", () { + final String start = String.fromCharCodes([0xD800, 0x1000]); + final Pointer converted = Utf8.toUtf8(start).cast(); + final int length = Utf8.strlen(converted); + final Uint8List end = + converted.cast().asExternalTypedData(count: length + 1); + expect(end, equals([237, 160, 128, 225, 128, 128, 0])); + converted.free(); + }); + + test("fromUtf8 unpaired surrogate", () { + final Pointer utf8 = + _bytesFromList([237, 160, 128, 225, 128, 128, 0]).cast(); + final String end = Utf8.fromUtf8(utf8); + expect(end, equals(String.fromCharCodes([0xD800, 0x1000]))); + }); + + test("fromUtf8 invalid", () { + final Pointer utf8 = _bytesFromList([0x80, 0x00]).cast(); + expect(() => Utf8.fromUtf8(utf8), throwsA(isFormatException)); }); } From 6551a80c62031b2ddf2236e1850c531df7908be9 Mon Sep 17 00:00:00 2001 From: Samir Jindel Date: Mon, 9 Sep 2019 16:19:25 +0200 Subject: [PATCH 9/9] comments --- lib/src/utf8.dart | 21 ++++++++++----------- test/utf8_test.dart | 4 ++-- 2 files changed, 12 insertions(+), 13 deletions(-) diff --git a/lib/src/utf8.dart b/lib/src/utf8.dart index 0304fec..52d0e95 100644 --- a/lib/src/utf8.dart +++ b/lib/src/utf8.dart @@ -6,8 +6,15 @@ import 'dart:convert'; import 'dart:ffi'; import 'dart:typed_data'; +const int _kMaxSmi64 = (1 << 62) - 1; +const int _kMaxSmi32 = (1 << 30) - 1; +final int _maxSize = sizeOf() == 8 ? _kMaxSmi64 : _kMaxSmi32; + /// [Utf8] implements conversion between Dart strings and null-terminated /// Utf8-encoded "char*" strings in C. +/// +/// [Utf8] is respresented as a struct so that `Pointer` can be used in +/// native function signatures. // // TODO(https://github.com/dart-lang/ffi/issues/4): No need to use // 'asExternalTypedData' when Pointer operations are performant. @@ -16,16 +23,8 @@ class Utf8 extends Struct { /// characters before the first null byte. static int strlen(Pointer string) { final Pointer array = string.cast(); - int count = 0x1000; - Uint8List nativeString = array.asExternalTypedData(count: count); - int i = 0; - while (nativeString[i] != 0) { - if (++i == count) { - count *= 2; - nativeString = array.asExternalTypedData(count: count); - } - } - return i; + final Uint8List nativeString = array.asExternalTypedData(count: _maxSize); + return nativeString.indexWhere((char) => char == 0); } /// Creates a [String] containing the characters UTF-8 encoded in [string]. @@ -49,7 +48,7 @@ class Utf8 extends Struct { /// /// If 'string' contains NULL bytes, the converted string will be truncated /// prematurely. Unpaired surrogate code points in [string] will be preserved - /// in the UTF-8 encoded result. See [Utf8Encoder] for details on encoding + /// in the UTF-8 encoded result. See [Utf8Encoder] for details on encoding. /// /// Returns a malloc-allocated pointer to the result. static Pointer toUtf8(String string) { diff --git a/test/utf8_test.dart b/test/utf8_test.dart index dd048a0..cf4e97e 100644 --- a/test/utf8_test.dart +++ b/test/utf8_test.dart @@ -9,8 +9,8 @@ import 'package:test/test.dart'; import 'package:ffi/ffi.dart'; Pointer _bytesFromList(List ints) { - Pointer ptr = Pointer.allocate(count: ints.length); - Uint8List list = ptr.asExternalTypedData(count: ints.length); + final Pointer ptr = Pointer.allocate(count: ints.length); + final Uint8List list = ptr.asExternalTypedData(count: ints.length); list.setAll(0, ints); return ptr; }