Skip to content
This repository was archived by the owner on Jan 17, 2024. It is now read-only.

Helpers for null-terminated Utf8 #3

Merged
merged 9 commits into from
Sep 9, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
.dart_tool
.packages
pubspec.lock
5 changes: 5 additions & 0 deletions lib/ffi.dart
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
// Copyright (c) 2019, the Dart project authors. Please see the AUTHORS file
// for details. All rights reserved. Use of this source code is governed by a
// BSD-style license that can be found in the LICENSE file.

export 'src/utf8.dart';
66 changes: 66 additions & 0 deletions lib/src/utf8.dart
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
// Copyright (c) 2019, the Dart project authors. Please see the AUTHORS file
// for details. All rights reserved. Use of this source code is governed by a
// BSD-style license that can be found in the LICENSE file.

import 'dart:convert';
import 'dart:ffi';
import 'dart:typed_data';

const int _kMaxSmi64 = (1 << 62) - 1;
const int _kMaxSmi32 = (1 << 30) - 1;
final int _maxSize = sizeOf<IntPtr>() == 8 ? _kMaxSmi64 : _kMaxSmi32;

/// [Utf8] implements conversion between Dart strings and null-terminated
/// Utf8-encoded "char*" strings in C.
///
/// [Utf8] is respresented as a struct so that `Pointer<Utf8>` can be used in
/// native function signatures.
//
// TODO(https://github.com/dart-lang/ffi/issues/4): No need to use
// 'asExternalTypedData' when Pointer operations are performant.
class Utf8 extends Struct<Utf8> {
/// Returns the length of a null-terminated string -- the number of (one-byte)
/// characters before the first null byte.
static int strlen(Pointer<Utf8> string) {
final Pointer<Uint8> array = string.cast<Uint8>();
final Uint8List nativeString = array.asExternalTypedData(count: _maxSize);
return nativeString.indexWhere((char) => char == 0);
}

/// Creates a [String] containing the characters UTF-8 encoded in [string].
///
/// The [string] must be a zero-terminated byte sequence of valid UTF-8
/// encodings of Unicode code points. It may also contain UTF-8 encodings of
/// unpaired surrogate code points, which is not otherwise valid UTF-8, but
/// which may be created when encoding a Dart string containing an unpaired
/// surrogate. See [Utf8Decoder] for details on decoding.
///
/// Returns a Dart string containing the decoded code points.
static String fromUtf8(Pointer<Utf8> string) {
final int length = strlen(string);
return utf8.decode(Uint8List.view(
string.cast<Uint8>().asExternalTypedData(count: length).buffer,
0,
length));
}

/// Convert a [String] to a Utf8-encoded null-terminated C string.
///
/// If 'string' contains NULL bytes, the converted string will be truncated
/// prematurely. Unpaired surrogate code points in [string] will be preserved
/// in the UTF-8 encoded result. See [Utf8Encoder] for details on encoding.
///
/// Returns a malloc-allocated pointer to the result.
static Pointer<Utf8> toUtf8(String string) {
final units = utf8.encode(string);
final Pointer<Uint8> result =
Pointer<Uint8>.allocate(count: units.length + 1);
final Uint8List nativeString =
result.asExternalTypedData(count: units.length + 1);
nativeString.setAll(0, units);
nativeString[units.length] = 0;
return result.cast();
}

String toString() => fromUtf8(addressOf);
}
1 change: 1 addition & 0 deletions pubspec.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,3 +12,4 @@ dependencies:

dev_dependencies:
pedantic: ^1.0.0
test: ^1.6.8
77 changes: 77 additions & 0 deletions test/utf8_test.dart
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
// Copyright (c) 2019, the Dart project authors. Please see the AUTHORS file
// for details. All rights reserved. Use of this source code is governed by a
// BSD-style license that can be found in the LICENSE file.

import 'dart:ffi';
import 'dart:typed_data';

import 'package:test/test.dart';
import 'package:ffi/ffi.dart';

Pointer<Uint8> _bytesFromList(List<int> ints) {
final Pointer<Uint8> ptr = Pointer.allocate(count: ints.length);
final Uint8List list = ptr.asExternalTypedData(count: ints.length);
list.setAll(0, ints);
return ptr;
}

main() {
test("toUtf8 ASCII", () {
final String start = "Hello World!\n";
final Pointer<Uint8> converted = Utf8.toUtf8(start).cast();
final Uint8List end =
converted.asExternalTypedData(count: start.length + 1);
final matcher =
equals([72, 101, 108, 108, 111, 32, 87, 111, 114, 108, 100, 33, 10, 0]);
expect(end, matcher);
converted.free();
});

test("fromUtf8 ASCII", () {
final Pointer<Utf8> utf8 = _bytesFromList(
[72, 101, 108, 108, 111, 32, 87, 111, 114, 108, 100, 33, 10, 0]).cast();
final String end = Utf8.fromUtf8(utf8);
expect(end, "Hello World!\n");
});

test("toUtf8 emoji", () {
final String start = "😎👿💬";
final Pointer<Utf8> converted = Utf8.toUtf8(start).cast();
final int length = Utf8.strlen(converted);
final Uint8List end =
converted.cast<Uint8>().asExternalTypedData(count: length + 1);
final matcher =
equals([240, 159, 152, 142, 240, 159, 145, 191, 240, 159, 146, 172, 0]);
expect(end, matcher);
converted.free();
});

test("formUtf8 emoji", () {
final Pointer<Utf8> utf8 = _bytesFromList(
[240, 159, 152, 142, 240, 159, 145, 191, 240, 159, 146, 172, 0]).cast();
final String end = Utf8.fromUtf8(utf8);
expect(end, "😎👿💬");
});

test("toUtf8 unpaired surrogate", () {
final String start = String.fromCharCodes([0xD800, 0x1000]);
final Pointer<Utf8> converted = Utf8.toUtf8(start).cast();
final int length = Utf8.strlen(converted);
final Uint8List end =
converted.cast<Uint8>().asExternalTypedData(count: length + 1);
expect(end, equals([237, 160, 128, 225, 128, 128, 0]));
converted.free();
});

test("fromUtf8 unpaired surrogate", () {
final Pointer<Utf8> utf8 =
_bytesFromList([237, 160, 128, 225, 128, 128, 0]).cast();
final String end = Utf8.fromUtf8(utf8);
expect(end, equals(String.fromCharCodes([0xD800, 0x1000])));
});

test("fromUtf8 invalid", () {
final Pointer<Utf8> utf8 = _bytesFromList([0x80, 0x00]).cast();
expect(() => Utf8.fromUtf8(utf8), throwsA(isFormatException));
});
}