ziglang · BraedonWooding · Apr 4, 2018 · Apr 4, 2018 · Apr 4, 2018 · Apr 4, 2018
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -452,6 +452,10 @@ set(ZIG_STD_FILES
     "index.zig"
     "io.zig"
     "linked_list.zig"
+    "string/string_utils.zig"
+    "string/utf8.zig"
+    "string/index.zig"
+    "string/ascii.zig"
     "macho.zig"
     "math/acos.zig"
     "math/acosh.zig"
@@ -543,7 +547,6 @@ set(ZIG_STD_FILES
     "special/compiler_rt/umodti3.zig"
     "special/panic.zig"
     "special/test_runner.zig"
-    "unicode.zig"
     "zig/ast.zig"
     "zig/index.zig"
     "zig/parser.zig"

diff --git a/build.zig b/build.zig
@@ -9,6 +9,7 @@ const mem = std.mem;
 const ArrayList = std.ArrayList;
 const Buffer = std.Buffer;
 const io = std.io;
+const string = std.string;
 
 pub fn build(b: &Builder) !void {
     const mode = b.standardReleaseOptions();
@@ -55,8 +56,8 @@ pub fn build(b: &Builder) !void {
     addCppLib(b, exe, cmake_binary_dir, "zig_cpp");
     if (lld_include_dir.len != 0) {
         exe.addIncludeDir(lld_include_dir);
-        var it = mem.split(lld_libraries, ";");
-        while (it.next()) |lib| {
+        var it = try string.utf8Split(lld_libraries, ";");
+        while (it.nextBytes()) |lib| {
             exe.addObjectFile(lib);
         }
     } else {
@@ -68,7 +69,7 @@ pub fn build(b: &Builder) !void {
 
     if (exe.target.getOs() == builtin.Os.linux) {
         const libstdcxx_path_padded = try b.exec([][]const u8{cxx_compiler, "-print-file-name=libstdc++.a"});
-        const libstdcxx_path = ??mem.split(libstdcxx_path_padded, "\r\n").next();
+        const libstdcxx_path = ??(try string.utf8Split(libstdcxx_path_padded, "\r\n")).nextBytes();
         if (mem.eql(u8, libstdcxx_path, "libstdc++.a")) {
             warn(
                 \\Unable to determine path to libstdc++.a
@@ -172,8 +173,8 @@ fn findLLVM(b: &Builder, llvm_config_exe: []const u8) !LibraryDep {
         .libdirs = ArrayList([]const u8).init(b.allocator),
     };
     {
-        var it = mem.split(libs_output, " \r\n");
-        while (it.next()) |lib_arg| {
+        var it = try string.utf8Split(libs_output, " \r\n");
+        while (it.nextBytes()) |lib_arg| {
             if (mem.startsWith(u8, lib_arg, "-l")) {
                 try result.system_libs.append(lib_arg[2..]);
             } else {
@@ -186,8 +187,8 @@ fn findLLVM(b: &Builder, llvm_config_exe: []const u8) !LibraryDep {
         }
     }
     {
-        var it = mem.split(includes_output, " \r\n");
-        while (it.next()) |include_arg| {
+        var it = try string.utf8Split(includes_output, " \r\n");
+        while (it.nextBytes()) |include_arg| {
             if (mem.startsWith(u8, include_arg, "-I")) {
                 try result.includes.append(include_arg[2..]);
             } else {
@@ -196,8 +197,8 @@ fn findLLVM(b: &Builder, llvm_config_exe: []const u8) !LibraryDep {
         }
     }
     {
-        var it = mem.split(libdir_output, " \r\n");
-        while (it.next()) |libdir| {
+        var it = try string.utf8Split(libdir_output, " \r\n");
+        while (it.nextBytes()) |libdir| {
             if (mem.startsWith(u8, libdir, "-L")) {
                 try result.libdirs.append(libdir[2..]);
             } else {
@@ -209,17 +210,17 @@ fn findLLVM(b: &Builder, llvm_config_exe: []const u8) !LibraryDep {
 }
 
 pub fn installStdLib(b: &Builder, stdlib_files: []const u8) void {
-    var it = mem.split(stdlib_files, ";");
-    while (it.next()) |stdlib_file| {
+    var it = string.utf8Split(stdlib_files, ";") catch unreachable;
+    while (it.nextBytes()) |stdlib_file| {
         const src_path = os.path.join(b.allocator, "std", stdlib_file) catch unreachable;
         const dest_path = os.path.join(b.allocator, "lib", "zig", "std", stdlib_file) catch unreachable;
         b.installFile(src_path, dest_path);
     }
 }
 
 pub fn installCHeaders(b: &Builder, c_header_files: []const u8) void {
-    var it = mem.split(c_header_files, ";");
-    while (it.next()) |c_header_file| {
+    var it = string.utf8Split(c_header_files, ";") catch unreachable;
+    while (it.nextBytes()) |c_header_file| {
         const src_path = os.path.join(b.allocator, "c_headers", c_header_file) catch unreachable;
         const dest_path = os.path.join(b.allocator, "lib", "zig", "include", c_header_file) catch unreachable;
         b.installFile(src_path, dest_path);

diff --git a/doc/docgen.zig b/doc/docgen.zig
@@ -5,6 +5,7 @@ const os = std.os;
 const warn = std.debug.warn;
 const mem = std.mem;
 const assert = std.debug.assert;
+const string = std.string;
 
 const max_doc_file_size = 10 * 1024 * 1024;
 
@@ -309,7 +310,7 @@ const Node = union(enum) {
 const Toc = struct {
     nodes: []Node,
     toc: []u8,
-    urls: std.HashMap([]const u8, Token, mem.hash_slice_u8, mem.eql_slice_u8),
+    urls: std.HashMap([]const u8, Token, string.hashStr, string.strEql),
 };
 
 const Action = enum {
@@ -318,7 +319,7 @@ const Action = enum {
 };
 
 fn genToc(allocator: &mem.Allocator, tokenizer: &Tokenizer) !Toc {
-    var urls = std.HashMap([]const u8, Token, mem.hash_slice_u8, mem.eql_slice_u8).init(allocator);
+    var urls = std.HashMap([]const u8, Token, string.hashStr, string.strEql).init(allocator);
     errdefer urls.deinit();
 
     var header_stack_size: usize = 0;
@@ -600,7 +601,7 @@ const TermState = enum {
 test "term color" {
     const input_bytes = "A\x1b[32;1mgreen\x1b[0mB";
     const result = try termColor(std.debug.global_allocator, input_bytes);
-    assert(mem.eql(u8, result, "A<span class=\"t32\">green</span>B"));
+    assert(mem.eql(u8, result, "A<span class=\"t32_1\">green</span>B"));
 }
 
 fn termColor(allocator: &mem.Allocator, input: []const u8) ![]u8 {
@@ -718,7 +719,7 @@ fn genHtml(allocator: &mem.Allocator, tokenizer: &Tokenizer, toc: &Toc, out: var
                 warn("docgen example code {}/{}...", code_progress_index, tokenizer.code_node_count);
 
                 const raw_source = tokenizer.buffer[code.source_token.start..code.source_token.end];
-                const trimmed_raw_source = mem.trim(u8, raw_source, " \n");
+                const trimmed_raw_source = mem.trim(u8, raw_source, " \n", mem.Side.BOTH);
                 const escaped_source = try escapeHtml(allocator, trimmed_raw_source);
                 if (!code.is_inline) {
                     try out.print("<p class=\"file\">{}.zig</p>", code.name);

diff --git a/src-self-hosted/arg.zig b/src-self-hosted/arg.zig
@@ -74,7 +74,7 @@ fn readFlagArguments(allocator: &Allocator, args: []const []const u8, required:
     }
 }
 
-const HashMapFlags = HashMap([]const u8, FlagArg, std.hash.Fnv1a_32.hash, mem.eql_slice_u8);
+const HashMapFlags = HashMap([]const u8, FlagArg, std.hash.Fnv1a_32.hash, std.string.strEql);
 
 // A store for querying found flags and positional arguments.
 pub const Args = struct {

diff --git a/std/buf_map.zig b/std/buf_map.zig
@@ -2,14 +2,15 @@ const std = @import("index.zig");
 const HashMap = std.HashMap;
 const mem = std.mem;
 const Allocator = mem.Allocator;
+const string = std.string;
 const assert = std.debug.assert;
 
 /// BufMap copies keys and values before they go into the map, and
 /// frees them when they get removed.
 pub const BufMap = struct {
     hash_map: BufMapHashMap,
 
-    const BufMapHashMap = HashMap([]const u8, []const u8, mem.hash_slice_u8, mem.eql_slice_u8);
+    const BufMapHashMap = HashMap([]const u8, []const u8, string.hashStr, string.strEql);
 
     pub fn init(allocator: &Allocator) BufMap {
         var self = BufMap {

diff --git a/std/buf_set.zig b/std/buf_set.zig
@@ -1,11 +1,13 @@
-const HashMap = @import("hash_map.zig").HashMap;
-const mem = @import("mem.zig");
+const std = @import("index.zig");
+const HashMap = std.HashMap;
+const mem = std.mem;
 const Allocator = mem.Allocator;
+const string = std.string;
 
 pub const BufSet = struct {
     hash_map: BufSetHashMap,
 
-    const BufSetHashMap = HashMap([]const u8, void, mem.hash_slice_u8, mem.eql_slice_u8);
+    const BufSetHashMap = HashMap([]const u8, void, string.hashStr, string.strEql);
 
     pub fn init(a: &Allocator) BufSet {
         var self = BufSet {

diff --git a/std/build.zig b/std/build.zig
@@ -14,6 +14,7 @@ const Term = os.ChildProcess.Term;
 const BufSet = std.BufSet;
 const BufMap = std.BufMap;
 const fmt_lib = std.fmt;
+const string = std.string;
 
 pub const Builder = struct {
     uninstall_tls: TopLevelStep,
@@ -48,8 +49,8 @@ pub const Builder = struct {
     cache_root: []const u8,
     release_mode: ?builtin.Mode,
 
-    const UserInputOptionsMap = HashMap([]const u8, UserInputOption, mem.hash_slice_u8, mem.eql_slice_u8);
-    const AvailableOptionsMap = HashMap([]const u8, AvailableOption, mem.hash_slice_u8, mem.eql_slice_u8);
+    const UserInputOptionsMap = HashMap([]const u8, UserInputOption, string.hashStr, string.strEql);
+    const AvailableOptionsMap = HashMap([]const u8, AvailableOption, string.hashStr, string.strEql);
 
     const AvailableOption = struct {
         name: []const u8,
@@ -318,11 +319,11 @@ pub const Builder = struct {
 
     fn processNixOSEnvVars(self: &Builder) void {
         if (os.getEnvVarOwned(self.allocator, "NIX_CFLAGS_COMPILE")) |nix_cflags_compile| {
-            var it = mem.split(nix_cflags_compile, " ");
+            var it = string.utf8Split(nix_cflags_compile, " ") catch unreachable;
             while (true) {
-                const word = it.next() ?? break;
+                const word = it.nextBytes() ?? break;
                 if (mem.eql(u8, word, "-isystem")) {
-                    const include_path = it.next() ?? {
+                    const include_path = it.nextBytes() ?? {
                         warn("Expected argument after -isystem in NIX_CFLAGS_COMPILE\n");
                         break;
                     };
@@ -336,11 +337,11 @@ pub const Builder = struct {
             assert(err == error.EnvironmentVariableNotFound);
         }
         if (os.getEnvVarOwned(self.allocator, "NIX_LDFLAGS")) |nix_ldflags| {
-            var it = mem.split(nix_ldflags, " ");
+            var it = string.utf8Split(nix_ldflags, " ") catch unreachable;
             while (true) {
-                const word = it.next() ?? break;
+                const word = it.nextBytes() ?? break;
                 if (mem.eql(u8, word, "-rpath")) {
-                    const rpath = it.next() ?? {
+                    const rpath = it.nextBytes() ?? {
                         warn("Expected argument after -rpath in NIX_LDFLAGS\n");
                         break;
                     };
@@ -687,8 +688,8 @@ pub const Builder = struct {
                 if (os.path.isAbsolute(name)) {
                     return name;
                 }
-                var it = mem.split(PATH, []u8{os.path.delimiter});
-                while (it.next()) |path| {
+                var it = try string.utf8Split(PATH, []u8{os.path.delimiter});
+                while (it.nextBytes()) |path| {
                     const full_path = try os.path.join(self.allocator, path, self.fmt("{}{}", name, exe_extension));
                     if (os.path.real(self.allocator, full_path)) |real_path| {
                         return real_path;

diff --git a/std/index.zig b/std/index.zig
@@ -29,7 +29,7 @@ pub const net = @import("net.zig");
 pub const os = @import("os/index.zig");
 pub const rand = @import("rand/index.zig");
 pub const sort = @import("sort.zig");
-pub const unicode = @import("unicode.zig");
+pub const string = @import("string/index.zig");
 pub const zig = @import("zig/index.zig");
 
 test "std" {
@@ -62,6 +62,6 @@ test "std" {
     _ = @import("os/index.zig");
     _ = @import("rand/index.zig");
     _ = @import("sort.zig");
-    _ = @import("unicode.zig");
+    _ = @import("string/index.zig");
     _ = @import("zig/index.zig");
 }
diff --git a/std/macho.zig b/std/macho.zig
@@ -129,7 +129,7 @@ pub fn loadSymbols(allocator: &mem.Allocator, in: &io.FileInStream) !SymbolTable
     for (syms) |sym| {
         if (!isSymbol(sym)) continue;
         const start = sym.n_strx;
-        const end = ??mem.indexOfScalarPos(u8, strings, start, 0);
+        const end = ??mem.indexOfScalarPos(u8, strings, start, 0, false);
         const name = strings[start..end];
         const address = sym.n_value;
         symbols[nsym] = Symbol { .name = name, .address = address };