Skip to content

Commit efc9acb

Browse files
[ffi] Helpers for null-terminated Utf8 (dart-archive/ffi#3)
1 parent 9263c71 commit efc9acb

File tree

5 files changed

+152
-0
lines changed

5 files changed

+152
-0
lines changed

pkgs/ffi/.gitignore

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
.dart_tool
2+
.packages
3+
pubspec.lock

pkgs/ffi/lib/ffi.dart

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
// Copyright (c) 2019, the Dart project authors. Please see the AUTHORS file
2+
// for details. All rights reserved. Use of this source code is governed by a
3+
// BSD-style license that can be found in the LICENSE file.
4+
5+
export 'src/utf8.dart';

pkgs/ffi/lib/src/utf8.dart

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
// Copyright (c) 2019, the Dart project authors. Please see the AUTHORS file
2+
// for details. All rights reserved. Use of this source code is governed by a
3+
// BSD-style license that can be found in the LICENSE file.
4+
5+
import 'dart:convert';
6+
import 'dart:ffi';
7+
import 'dart:typed_data';
8+
9+
const int _kMaxSmi64 = (1 << 62) - 1;
10+
const int _kMaxSmi32 = (1 << 30) - 1;
11+
final int _maxSize = sizeOf<IntPtr>() == 8 ? _kMaxSmi64 : _kMaxSmi32;
12+
13+
/// [Utf8] implements conversion between Dart strings and null-terminated
14+
/// Utf8-encoded "char*" strings in C.
15+
///
16+
/// [Utf8] is respresented as a struct so that `Pointer<Utf8>` can be used in
17+
/// native function signatures.
18+
//
19+
// TODO(https://github.com/dart-lang/ffi/issues/4): No need to use
20+
// 'asExternalTypedData' when Pointer operations are performant.
21+
class Utf8 extends Struct<Utf8> {
22+
/// Returns the length of a null-terminated string -- the number of (one-byte)
23+
/// characters before the first null byte.
24+
static int strlen(Pointer<Utf8> string) {
25+
final Pointer<Uint8> array = string.cast<Uint8>();
26+
final Uint8List nativeString = array.asExternalTypedData(count: _maxSize);
27+
return nativeString.indexWhere((char) => char == 0);
28+
}
29+
30+
/// Creates a [String] containing the characters UTF-8 encoded in [string].
31+
///
32+
/// The [string] must be a zero-terminated byte sequence of valid UTF-8
33+
/// encodings of Unicode code points. It may also contain UTF-8 encodings of
34+
/// unpaired surrogate code points, which is not otherwise valid UTF-8, but
35+
/// which may be created when encoding a Dart string containing an unpaired
36+
/// surrogate. See [Utf8Decoder] for details on decoding.
37+
///
38+
/// Returns a Dart string containing the decoded code points.
39+
static String fromUtf8(Pointer<Utf8> string) {
40+
final int length = strlen(string);
41+
return utf8.decode(Uint8List.view(
42+
string.cast<Uint8>().asExternalTypedData(count: length).buffer,
43+
0,
44+
length));
45+
}
46+
47+
/// Convert a [String] to a Utf8-encoded null-terminated C string.
48+
///
49+
/// If 'string' contains NULL bytes, the converted string will be truncated
50+
/// prematurely. Unpaired surrogate code points in [string] will be preserved
51+
/// in the UTF-8 encoded result. See [Utf8Encoder] for details on encoding.
52+
///
53+
/// Returns a malloc-allocated pointer to the result.
54+
static Pointer<Utf8> toUtf8(String string) {
55+
final units = utf8.encode(string);
56+
final Pointer<Uint8> result =
57+
Pointer<Uint8>.allocate(count: units.length + 1);
58+
final Uint8List nativeString =
59+
result.asExternalTypedData(count: units.length + 1);
60+
nativeString.setAll(0, units);
61+
nativeString[units.length] = 0;
62+
return result.cast();
63+
}
64+
65+
String toString() => fromUtf8(addressOf);
66+
}

pkgs/ffi/pubspec.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,3 +12,4 @@ dependencies:
1212

1313
dev_dependencies:
1414
pedantic: ^1.0.0
15+
test: ^1.6.8

pkgs/ffi/test/utf8_test.dart

Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,77 @@
1+
// Copyright (c) 2019, the Dart project authors. Please see the AUTHORS file
2+
// for details. All rights reserved. Use of this source code is governed by a
3+
// BSD-style license that can be found in the LICENSE file.
4+
5+
import 'dart:ffi';
6+
import 'dart:typed_data';
7+
8+
import 'package:test/test.dart';
9+
import 'package:ffi/ffi.dart';
10+
11+
Pointer<Uint8> _bytesFromList(List<int> ints) {
12+
final Pointer<Uint8> ptr = Pointer.allocate(count: ints.length);
13+
final Uint8List list = ptr.asExternalTypedData(count: ints.length);
14+
list.setAll(0, ints);
15+
return ptr;
16+
}
17+
18+
main() {
19+
test("toUtf8 ASCII", () {
20+
final String start = "Hello World!\n";
21+
final Pointer<Uint8> converted = Utf8.toUtf8(start).cast();
22+
final Uint8List end =
23+
converted.asExternalTypedData(count: start.length + 1);
24+
final matcher =
25+
equals([72, 101, 108, 108, 111, 32, 87, 111, 114, 108, 100, 33, 10, 0]);
26+
expect(end, matcher);
27+
converted.free();
28+
});
29+
30+
test("fromUtf8 ASCII", () {
31+
final Pointer<Utf8> utf8 = _bytesFromList(
32+
[72, 101, 108, 108, 111, 32, 87, 111, 114, 108, 100, 33, 10, 0]).cast();
33+
final String end = Utf8.fromUtf8(utf8);
34+
expect(end, "Hello World!\n");
35+
});
36+
37+
test("toUtf8 emoji", () {
38+
final String start = "😎👿💬";
39+
final Pointer<Utf8> converted = Utf8.toUtf8(start).cast();
40+
final int length = Utf8.strlen(converted);
41+
final Uint8List end =
42+
converted.cast<Uint8>().asExternalTypedData(count: length + 1);
43+
final matcher =
44+
equals([240, 159, 152, 142, 240, 159, 145, 191, 240, 159, 146, 172, 0]);
45+
expect(end, matcher);
46+
converted.free();
47+
});
48+
49+
test("formUtf8 emoji", () {
50+
final Pointer<Utf8> utf8 = _bytesFromList(
51+
[240, 159, 152, 142, 240, 159, 145, 191, 240, 159, 146, 172, 0]).cast();
52+
final String end = Utf8.fromUtf8(utf8);
53+
expect(end, "😎👿💬");
54+
});
55+
56+
test("toUtf8 unpaired surrogate", () {
57+
final String start = String.fromCharCodes([0xD800, 0x1000]);
58+
final Pointer<Utf8> converted = Utf8.toUtf8(start).cast();
59+
final int length = Utf8.strlen(converted);
60+
final Uint8List end =
61+
converted.cast<Uint8>().asExternalTypedData(count: length + 1);
62+
expect(end, equals([237, 160, 128, 225, 128, 128, 0]));
63+
converted.free();
64+
});
65+
66+
test("fromUtf8 unpaired surrogate", () {
67+
final Pointer<Utf8> utf8 =
68+
_bytesFromList([237, 160, 128, 225, 128, 128, 0]).cast();
69+
final String end = Utf8.fromUtf8(utf8);
70+
expect(end, equals(String.fromCharCodes([0xD800, 0x1000])));
71+
});
72+
73+
test("fromUtf8 invalid", () {
74+
final Pointer<Utf8> utf8 = _bytesFromList([0x80, 0x00]).cast();
75+
expect(() => Utf8.fromUtf8(utf8), throwsA(isFormatException));
76+
});
77+
}

0 commit comments

Comments
 (0)