Skip to content

Type Independent String Functions #891

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 45 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
45 commits
Select commit Hold shift + click to select a range
40f016f
String Utilities
BraedonWooding Apr 4, 2018
54b5c00
string utilities
BraedonWooding Apr 4, 2018
ae1b7e4
remove weird files
BraedonWooding Apr 4, 2018
3f20233
Changed higher to upper
BraedonWooding Apr 4, 2018
39d92e8
Added unicode encoding
BraedonWooding Apr 4, 2018
ec3194a
Fixes an overflow bug (wow Zig is actually good at protecting against…
BraedonWooding Apr 5, 2018
714b89c
Basics of locale
BraedonWooding Apr 5, 2018
4793904
Fixed up array
BraedonWooding Apr 5, 2018
23d424c
allowing slice size to be larger then 4
BraedonWooding Apr 5, 2018
1f47341
Fixed bugs in unicode mostly type issues, and then generalised the it…
BraedonWooding Apr 5, 2018
48028ce
Finished ascii example
BraedonWooding Apr 5, 2018
ebca31a
Updated init to be check the characters
BraedonWooding Apr 5, 2018
66a0559
Removed change in git ignore
BraedonWooding Apr 5, 2018
932eb6f
Mem.Split doesn't care about your type now
BraedonWooding Apr 6, 2018
1648192
Locale can figure out if a codepoint is a number, letter, or whitespa…
BraedonWooding Apr 7, 2018
1a75cdf
Unicode encoding now throws
BraedonWooding Apr 7, 2018
15ccce7
Added Utf8 Locale
BraedonWooding Apr 8, 2018
6868c71
Implemented join
BraedonWooding Apr 9, 2018
a307291
Remove Locale, fix split bug presuming length 1
BraedonWooding Apr 12, 2018
9e4206c
Fixed bugs in join
BraedonWooding Apr 12, 2018
2ace38b
Merge branch 'master' into StringUtils
BraedonWooding Apr 12, 2018
f570f18
Fixed spawn process error
BraedonWooding Apr 12, 2018
dfdfde2
Merge branch 'StringUtils' of github.com:BraedonWooding/zig into Stri…
BraedonWooding Apr 12, 2018
0999ea8
Fixed more compile time errors
BraedonWooding Apr 12, 2018
fb24ebc
Should be the last of fixes for path
BraedonWooding Apr 12, 2018
8b9db75
Generic trim
BraedonWooding Apr 12, 2018
2f027b7
Merge branch 'master' of github.com:zig-lang/zig into StringUtils
BraedonWooding Apr 13, 2018
3639e7a
Cleaned up and added docs
BraedonWooding Apr 13, 2018
dc3cfda
Name changes stuffed compile
BraedonWooding Apr 13, 2018
dd349fb
Tests both windows and posix joins
BraedonWooding Apr 13, 2018
5b8012b
Fixed up term color error
BraedonWooding Apr 13, 2018
f3bf4f6
This 'should' fix the issues with appveyor
BraedonWooding Apr 13, 2018
b0ea58a
removed const qualifier
BraedonWooding Apr 13, 2018
6fcbf6e
Merge branch 'master' into StringUtils
BraedonWooding Apr 14, 2018
2d2477a
Fixup merge
BraedonWooding Apr 14, 2018
b2cbcb5
Made requested changes
BraedonWooding Apr 14, 2018
ded5e5d
Fixed commit
BraedonWooding Apr 14, 2018
8453786
Removed 'std.string' from main.zig
BraedonWooding Apr 14, 2018
c6ff5e4
Fixed up wrong call
BraedonWooding Apr 14, 2018
a2269f9
Restructured it nicer, and removed the workaround with a proper solut…
BraedonWooding Apr 14, 2018
6e61be7
Things use utf8 for splitting rather than ascii call
BraedonWooding Apr 14, 2018
6294dd4
Added error to possible errors in spawnError
BraedonWooding Apr 14, 2018
4dc263e
typo
BraedonWooding Apr 14, 2018
17dc853
Fixed up a bug I've been meaning to fix for a while
BraedonWooding Apr 14, 2018
b8d1995
Made requested changes
BraedonWooding Apr 20, 2018
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -452,6 +452,10 @@ set(ZIG_STD_FILES
"index.zig"
"io.zig"
"linked_list.zig"
"string/string_utils.zig"
"string/utf8.zig"
"string/index.zig"
"string/ascii.zig"
"macho.zig"
"math/acos.zig"
"math/acosh.zig"
Expand Down Expand Up @@ -543,7 +547,6 @@ set(ZIG_STD_FILES
"special/compiler_rt/umodti3.zig"
"special/panic.zig"
"special/test_runner.zig"
"unicode.zig"
"zig/ast.zig"
"zig/index.zig"
"zig/parser.zig"
Expand Down
27 changes: 14 additions & 13 deletions build.zig
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ const mem = std.mem;
const ArrayList = std.ArrayList;
const Buffer = std.Buffer;
const io = std.io;
const string = std.string;

pub fn build(b: &Builder) !void {
const mode = b.standardReleaseOptions();
Expand Down Expand Up @@ -55,8 +56,8 @@ pub fn build(b: &Builder) !void {
addCppLib(b, exe, cmake_binary_dir, "zig_cpp");
if (lld_include_dir.len != 0) {
exe.addIncludeDir(lld_include_dir);
var it = mem.split(lld_libraries, ";");
while (it.next()) |lib| {
var it = try string.utf8Split(lld_libraries, ";");
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I haven't changed my mind about this since the last PR comments

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I need more clarification about what specifically you dislike :). Is it the call name ? Like is it just because it is not called string.split or is it the fact it uses generics behind the scene?

Again i'm happy to remove the type independent stuff if need be :)

while (it.nextBytes()) |lib| {
exe.addObjectFile(lib);
}
} else {
Expand All @@ -68,7 +69,7 @@ pub fn build(b: &Builder) !void {

if (exe.target.getOs() == builtin.Os.linux) {
const libstdcxx_path_padded = try b.exec([][]const u8{cxx_compiler, "-print-file-name=libstdc++.a"});
const libstdcxx_path = ??mem.split(libstdcxx_path_padded, "\r\n").next();
const libstdcxx_path = ??(try string.utf8Split(libstdcxx_path_padded, "\r\n")).nextBytes();
if (mem.eql(u8, libstdcxx_path, "libstdc++.a")) {
warn(
\\Unable to determine path to libstdc++.a
Expand Down Expand Up @@ -172,8 +173,8 @@ fn findLLVM(b: &Builder, llvm_config_exe: []const u8) !LibraryDep {
.libdirs = ArrayList([]const u8).init(b.allocator),
};
{
var it = mem.split(libs_output, " \r\n");
while (it.next()) |lib_arg| {
var it = try string.utf8Split(libs_output, " \r\n");
while (it.nextBytes()) |lib_arg| {
if (mem.startsWith(u8, lib_arg, "-l")) {
try result.system_libs.append(lib_arg[2..]);
} else {
Expand All @@ -186,8 +187,8 @@ fn findLLVM(b: &Builder, llvm_config_exe: []const u8) !LibraryDep {
}
}
{
var it = mem.split(includes_output, " \r\n");
while (it.next()) |include_arg| {
var it = try string.utf8Split(includes_output, " \r\n");
while (it.nextBytes()) |include_arg| {
if (mem.startsWith(u8, include_arg, "-I")) {
try result.includes.append(include_arg[2..]);
} else {
Expand All @@ -196,8 +197,8 @@ fn findLLVM(b: &Builder, llvm_config_exe: []const u8) !LibraryDep {
}
}
{
var it = mem.split(libdir_output, " \r\n");
while (it.next()) |libdir| {
var it = try string.utf8Split(libdir_output, " \r\n");
while (it.nextBytes()) |libdir| {
if (mem.startsWith(u8, libdir, "-L")) {
try result.libdirs.append(libdir[2..]);
} else {
Expand All @@ -209,17 +210,17 @@ fn findLLVM(b: &Builder, llvm_config_exe: []const u8) !LibraryDep {
}

pub fn installStdLib(b: &Builder, stdlib_files: []const u8) void {
var it = mem.split(stdlib_files, ";");
while (it.next()) |stdlib_file| {
var it = string.utf8Split(stdlib_files, ";") catch unreachable;
while (it.nextBytes()) |stdlib_file| {
const src_path = os.path.join(b.allocator, "std", stdlib_file) catch unreachable;
const dest_path = os.path.join(b.allocator, "lib", "zig", "std", stdlib_file) catch unreachable;
b.installFile(src_path, dest_path);
}
}

pub fn installCHeaders(b: &Builder, c_header_files: []const u8) void {
var it = mem.split(c_header_files, ";");
while (it.next()) |c_header_file| {
var it = string.utf8Split(c_header_files, ";") catch unreachable;
while (it.nextBytes()) |c_header_file| {
const src_path = os.path.join(b.allocator, "c_headers", c_header_file) catch unreachable;
const dest_path = os.path.join(b.allocator, "lib", "zig", "include", c_header_file) catch unreachable;
b.installFile(src_path, dest_path);
Expand Down
9 changes: 5 additions & 4 deletions doc/docgen.zig
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ const os = std.os;
const warn = std.debug.warn;
const mem = std.mem;
const assert = std.debug.assert;
const string = std.string;

const max_doc_file_size = 10 * 1024 * 1024;

Expand Down Expand Up @@ -309,7 +310,7 @@ const Node = union(enum) {
const Toc = struct {
nodes: []Node,
toc: []u8,
urls: std.HashMap([]const u8, Token, mem.hash_slice_u8, mem.eql_slice_u8),
urls: std.HashMap([]const u8, Token, string.hashStr, string.strEql),
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this hash map is hashing bytes, not strings.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this shows a problem with the way its done because the hash_slice_u8 for example and hashStr would be identical and that leads to a singular name meaning two different things, I guess how would you approach this because in situations where it is hashing strings not bytes it leads to this same problem? I would either declare aliases or just live with one of them.

};

const Action = enum {
Expand All @@ -318,7 +319,7 @@ const Action = enum {
};

fn genToc(allocator: &mem.Allocator, tokenizer: &Tokenizer) !Toc {
var urls = std.HashMap([]const u8, Token, mem.hash_slice_u8, mem.eql_slice_u8).init(allocator);
var urls = std.HashMap([]const u8, Token, string.hashStr, string.strEql).init(allocator);
errdefer urls.deinit();

var header_stack_size: usize = 0;
Expand Down Expand Up @@ -600,7 +601,7 @@ const TermState = enum {
test "term color" {
const input_bytes = "A\x1b[32;1mgreen\x1b[0mB";
const result = try termColor(std.debug.global_allocator, input_bytes);
assert(mem.eql(u8, result, "A<span class=\"t32\">green</span>B"));
assert(mem.eql(u8, result, "A<span class=\"t32_1\">green</span>B"));
}

fn termColor(allocator: &mem.Allocator, input: []const u8) ![]u8 {
Expand Down Expand Up @@ -718,7 +719,7 @@ fn genHtml(allocator: &mem.Allocator, tokenizer: &Tokenizer, toc: &Toc, out: var
warn("docgen example code {}/{}...", code_progress_index, tokenizer.code_node_count);

const raw_source = tokenizer.buffer[code.source_token.start..code.source_token.end];
const trimmed_raw_source = mem.trim(u8, raw_source, " \n");
const trimmed_raw_source = mem.trim(u8, raw_source, " \n", mem.Side.BOTH);
const escaped_source = try escapeHtml(allocator, trimmed_raw_source);
if (!code.is_inline) {
try out.print("<p class=\"file\">{}.zig</p>", code.name);
Expand Down
2 changes: 1 addition & 1 deletion src-self-hosted/arg.zig
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ fn readFlagArguments(allocator: &Allocator, args: []const []const u8, required:
}
}

const HashMapFlags = HashMap([]const u8, FlagArg, std.hash.Fnv1a_32.hash, mem.eql_slice_u8);
const HashMapFlags = HashMap([]const u8, FlagArg, std.hash.Fnv1a_32.hash, std.string.strEql);

// A store for querying found flags and positional arguments.
pub const Args = struct {
Expand Down
3 changes: 2 additions & 1 deletion std/buf_map.zig
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,15 @@ const std = @import("index.zig");
const HashMap = std.HashMap;
const mem = std.mem;
const Allocator = mem.Allocator;
const string = std.string;
const assert = std.debug.assert;

/// BufMap copies keys and values before they go into the map, and
/// frees them when they get removed.
pub const BufMap = struct {
hash_map: BufMapHashMap,

const BufMapHashMap = HashMap([]const u8, []const u8, mem.hash_slice_u8, mem.eql_slice_u8);
const BufMapHashMap = HashMap([]const u8, []const u8, string.hashStr, string.strEql);

pub fn init(allocator: &Allocator) BufMap {
var self = BufMap {
Expand Down
8 changes: 5 additions & 3 deletions std/buf_set.zig
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
const HashMap = @import("hash_map.zig").HashMap;
const mem = @import("mem.zig");
const std = @import("index.zig");
const HashMap = std.HashMap;
const mem = std.mem;
const Allocator = mem.Allocator;
const string = std.string;

pub const BufSet = struct {
hash_map: BufSetHashMap,

const BufSetHashMap = HashMap([]const u8, void, mem.hash_slice_u8, mem.eql_slice_u8);
const BufSetHashMap = HashMap([]const u8, void, string.hashStr, string.strEql);

pub fn init(a: &Allocator) BufSet {
var self = BufSet {
Expand Down
21 changes: 11 additions & 10 deletions std/build.zig
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ const Term = os.ChildProcess.Term;
const BufSet = std.BufSet;
const BufMap = std.BufMap;
const fmt_lib = std.fmt;
const string = std.string;

pub const Builder = struct {
uninstall_tls: TopLevelStep,
Expand Down Expand Up @@ -48,8 +49,8 @@ pub const Builder = struct {
cache_root: []const u8,
release_mode: ?builtin.Mode,

const UserInputOptionsMap = HashMap([]const u8, UserInputOption, mem.hash_slice_u8, mem.eql_slice_u8);
const AvailableOptionsMap = HashMap([]const u8, AvailableOption, mem.hash_slice_u8, mem.eql_slice_u8);
const UserInputOptionsMap = HashMap([]const u8, UserInputOption, string.hashStr, string.strEql);
const AvailableOptionsMap = HashMap([]const u8, AvailableOption, string.hashStr, string.strEql);

const AvailableOption = struct {
name: []const u8,
Expand Down Expand Up @@ -318,11 +319,11 @@ pub const Builder = struct {

fn processNixOSEnvVars(self: &Builder) void {
if (os.getEnvVarOwned(self.allocator, "NIX_CFLAGS_COMPILE")) |nix_cflags_compile| {
var it = mem.split(nix_cflags_compile, " ");
var it = string.utf8Split(nix_cflags_compile, " ") catch unreachable;
while (true) {
const word = it.next() ?? break;
const word = it.nextBytes() ?? break;
if (mem.eql(u8, word, "-isystem")) {
const include_path = it.next() ?? {
const include_path = it.nextBytes() ?? {
warn("Expected argument after -isystem in NIX_CFLAGS_COMPILE\n");
break;
};
Expand All @@ -336,11 +337,11 @@ pub const Builder = struct {
assert(err == error.EnvironmentVariableNotFound);
}
if (os.getEnvVarOwned(self.allocator, "NIX_LDFLAGS")) |nix_ldflags| {
var it = mem.split(nix_ldflags, " ");
var it = string.utf8Split(nix_ldflags, " ") catch unreachable;
while (true) {
const word = it.next() ?? break;
const word = it.nextBytes() ?? break;
if (mem.eql(u8, word, "-rpath")) {
const rpath = it.next() ?? {
const rpath = it.nextBytes() ?? {
warn("Expected argument after -rpath in NIX_LDFLAGS\n");
break;
};
Expand Down Expand Up @@ -687,8 +688,8 @@ pub const Builder = struct {
if (os.path.isAbsolute(name)) {
return name;
}
var it = mem.split(PATH, []u8{os.path.delimiter});
while (it.next()) |path| {
var it = try string.utf8Split(PATH, []u8{os.path.delimiter});
while (it.nextBytes()) |path| {
const full_path = try os.path.join(self.allocator, path, self.fmt("{}{}", name, exe_extension));
if (os.path.real(self.allocator, full_path)) |real_path| {
return real_path;
Expand Down
4 changes: 2 additions & 2 deletions std/index.zig
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ pub const net = @import("net.zig");
pub const os = @import("os/index.zig");
pub const rand = @import("rand/index.zig");
pub const sort = @import("sort.zig");
pub const unicode = @import("unicode.zig");
pub const string = @import("string/index.zig");
pub const zig = @import("zig/index.zig");

test "std" {
Expand Down Expand Up @@ -62,6 +62,6 @@ test "std" {
_ = @import("os/index.zig");
_ = @import("rand/index.zig");
_ = @import("sort.zig");
_ = @import("unicode.zig");
_ = @import("string/index.zig");
_ = @import("zig/index.zig");
}
2 changes: 1 addition & 1 deletion std/macho.zig
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,7 @@ pub fn loadSymbols(allocator: &mem.Allocator, in: &io.FileInStream) !SymbolTable
for (syms) |sym| {
if (!isSymbol(sym)) continue;
const start = sym.n_strx;
const end = ??mem.indexOfScalarPos(u8, strings, start, 0);
const end = ??mem.indexOfScalarPos(u8, strings, start, 0, false);
const name = strings[start..end];
const address = sym.n_value;
symbols[nsym] = Symbol { .name = name, .address = address };
Expand Down
Loading