From 444edd9aed84ebfd9153817259e2a4e1228b7120 Mon Sep 17 00:00:00 2001 From: Shawn Landden Date: Fri, 10 Aug 2018 19:01:37 -0700 Subject: [PATCH 1/2] std.crypto: add chaCha20 v3 --- CMakeLists.txt | 1 + std/crypto/chacha20.zig | 431 ++++++++++++++++++++++++++++++++++++++++ std/crypto/index.zig | 5 + 3 files changed, 437 insertions(+) create mode 100644 std/crypto/chacha20.zig diff --git a/CMakeLists.txt b/CMakeLists.txt index d7487ce905a2..8e753b5137c4 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -451,6 +451,7 @@ set(ZIG_STD_FILES "crypto/sha1.zig" "crypto/sha2.zig" "crypto/sha3.zig" + "crypto/chacha20.zig" "cstr.zig" "debug/failing_allocator.zig" "debug/index.zig" diff --git a/std/crypto/chacha20.zig b/std/crypto/chacha20.zig new file mode 100644 index 000000000000..3030569ae6fe --- /dev/null +++ b/std/crypto/chacha20.zig @@ -0,0 +1,431 @@ +// Based on public domain Supercop by Daniel J. Bernstein + +const std = @import("../index.zig"); +const mem = std.mem; +const endian = std.endian; +const assert = std.debug.assert; +const builtin = @import("builtin"); + +const QuarterRound = struct { + a: usize, + b: usize, + c: usize, + d: usize, +}; + +fn Rp(a: usize, b: usize, c: usize, d: usize) QuarterRound { + return QuarterRound{ + .a = a, + .b = b, + .c = c, + .d = d, + }; +} + +fn rotate(a: u32, b: u5) u32 { + return ((a << b) | + (a >> @intCast(u5, (32 - @intCast(u6, b)))) + ); +} + +// The chacha family of ciphers are based on the salsa family. +fn salsa20_wordtobyte(input: [16]u32) [64]u8 { + var x: [16]u32 = undefined; + var out: [64]u8 = undefined; + + for (x) |_, i| + x[i] = input[i]; + const rounds = comptime []QuarterRound{ + Rp( 0, 4, 8,12), + Rp( 1, 5, 9,13), + Rp( 2, 6,10,14), + Rp( 3, 7,11,15), + Rp( 0, 5,10,15), + Rp( 1, 6,11,12), + Rp( 2, 7, 8,13), + Rp( 3, 4, 9,14), + }; + comptime var j: usize = 20; + inline while (j > 0) : (j -=2) { + for (rounds) |r| { + x[r.a] +%= x[r.b]; x[r.d] = rotate(x[r.d] ^ x[r.a], 16); + x[r.c] +%= x[r.d]; x[r.b] = rotate(x[r.b] ^ x[r.c], 12); + x[r.a] +%= x[r.b]; x[r.d] = rotate(x[r.d] ^ x[r.a], 8); + x[r.c] +%= x[r.d]; x[r.b] = rotate(x[r.b] ^ x[r.c], 7); + } + } + for (x) |_, i| + x[i] +%= input[i]; + for (x) |_, i| + mem.writeInt(out[4 * i .. 4 * i + 4], x[i], builtin.Endian.Little); + return out; +} + +fn chaCha20_internal(out: []u8, in: []const u8, key: [8]u32, counter: [4]u32) void { + var ctx: [16]u32 = undefined; + var remaining: usize = if (in.len > out.len) in.len else out.len; + var cursor: usize = 0; + + const c = "expand 32-byte k"; + const constant_le = []u32{ + mem.readIntLE(u32, c[0..4]), + mem.readIntLE(u32, c[4..8]), + mem.readIntLE(u32, c[8..12]), + mem.readIntLE(u32, c[12..16]), + }; + + mem.copy(u32, ctx[0..], constant_le[0..4]); + mem.copy(u32, ctx[4..12], key[0..8]); + mem.copy(u32, ctx[12..16], counter[0..4]); + + while (true) { + var buf = salsa20_wordtobyte(ctx); + + if (remaining < 64) { + var i: usize = 0; + while (i < remaining) : (i += 1) + out[cursor + i] = in[cursor + i] ^ buf[i]; + return; + } + + comptime var i: usize = 0; + inline while (i < 64) : (i += 1) + out[cursor + i] = in[cursor + i] ^ buf[i]; + + cursor += 64; + remaining -= 64; + + ctx[12] += 1; + } +} + +/// ChaCha20 avoids the possibility of timing attacks, as there are no branches +/// on secret key data. +/// +/// in and out should be the same length. +/// counter should generally be 0 or 1 +/// +/// ChaCha20 is self-reversing. To decrypt just run the cipher with the same +/// counter, nonce, and key. +pub fn chaCha20IETF(out: []u8, in: []const u8, counter: u32, key: [32]u8, nonce: [12]u8) void { + assert(in.len >= out.len); + assert((in.len >> 6) + counter <= @maxValue(u32)); + + var k: [8]u32 = undefined; + var c: [4]u32 = undefined; + + k[0] = mem.readIntLE(u32, key[0..4]); + k[1] = mem.readIntLE(u32, key[4..8]); + k[2] = mem.readIntLE(u32, key[8..12]); + k[3] = mem.readIntLE(u32, key[12..16]); + k[4] = mem.readIntLE(u32, key[16..20]); + k[5] = mem.readIntLE(u32, key[20..24]); + k[6] = mem.readIntLE(u32, key[24..28]); + k[7] = mem.readIntLE(u32, key[28..32]); + + c[0] = counter; + c[1] = mem.readIntLE(u32, nonce[0..4]); + c[2] = mem.readIntLE(u32, nonce[4..8]); + c[3] = mem.readIntLE(u32, nonce[8..12]); + chaCha20_internal(out, in, k, c); +} + +/// This is the original ChaCha20 before RFC 7539, which recommends using the +/// orgininal version on applications such as disk or file encryption that might +/// exceed the 256 GiB limit of the 96-bit nonce version. +pub fn chaCha20With64BitNonce(out: []u8, in: []const u8, counter: u64, key: [32]u8, nonce: [8]u8) void { + assert(in.len >= out.len); + assert(counter +% (in.len >> 6) >= counter); + + var cursor: u64 = 0; + var k: [8]u32 = undefined; + var c: [4]u32 = undefined; + + k[0] = mem.readIntLE(u32, key[0..4]); + k[1] = mem.readIntLE(u32, key[4..8]); + k[2] = mem.readIntLE(u32, key[8..12]); + k[3] = mem.readIntLE(u32, key[12..16]); + k[4] = mem.readIntLE(u32, key[16..20]); + k[5] = mem.readIntLE(u32, key[20..24]); + k[6] = mem.readIntLE(u32, key[24..28]); + k[7] = mem.readIntLE(u32, key[28..32]); + + c[0] = @truncate(u32, counter); + c[1] = @truncate(u32, counter >> 32); + c[2] = mem.readIntLE(u32, nonce[0..4]); + c[3] = mem.readIntLE(u32, nonce[4..8]); + + const block_size = (1 << 6); + const big_block = (block_size << 32); + + // first partial big block + if (((@intCast(u64, @maxValue(u32) - @truncate(u32, counter)) + 1) << 6) < in.len) { + chaCha20_internal(out[cursor..big_block], in[cursor..big_block], k, c); + cursor = big_block - cursor; + c[1] += 1; + if (comptime @sizeOf(usize) > 4) { + // A big block is giant: 256 GiB, but we can avoid this limitation + var remaining_blocks: u32 = @intCast(u32, (in.len / big_block)); + var i: u32 = 0; + while (remaining_blocks > 0) : (remaining_blocks -= 1) { + chaCha20_internal(out[cursor..cursor + big_block], in[cursor..cursor + big_block], k, c); + c[1] += 1; // upper 32-bit of counter, generic chaCha20_internal() doesn't + // know about this. + cursor += big_block; + } + } + } + + chaCha20_internal(out[cursor..], in[cursor..], k, c); +} + +// https://tools.ietf.org/html/rfc7539#section-2.4.2 +test "crypto.chacha20 test vector sunscreen" { + const expected_result = []u8{ + 0x6e, 0x2e, 0x35, 0x9a, 0x25, 0x68, 0xf9, 0x80, + 0x41, 0xba, 0x07, 0x28, 0xdd, 0x0d, 0x69, 0x81, + 0xe9, 0x7e, 0x7a, 0xec, 0x1d, 0x43, 0x60, 0xc2, + 0x0a, 0x27, 0xaf, 0xcc, 0xfd, 0x9f, 0xae, 0x0b, + 0xf9, 0x1b, 0x65, 0xc5, 0x52, 0x47, 0x33, 0xab, + 0x8f, 0x59, 0x3d, 0xab, 0xcd, 0x62, 0xb3, 0x57, + 0x16, 0x39, 0xd6, 0x24, 0xe6, 0x51, 0x52, 0xab, + 0x8f, 0x53, 0x0c, 0x35, 0x9f, 0x08, 0x61, 0xd8, + 0x07, 0xca, 0x0d, 0xbf, 0x50, 0x0d, 0x6a, 0x61, + 0x56, 0xa3, 0x8e, 0x08, 0x8a, 0x22, 0xb6, 0x5e, + 0x52, 0xbc, 0x51, 0x4d, 0x16, 0xcc, 0xf8, 0x06, + 0x81, 0x8c, 0xe9, 0x1a, 0xb7, 0x79, 0x37, 0x36, + 0x5a, 0xf9, 0x0b, 0xbf, 0x74, 0xa3, 0x5b, 0xe6, + 0xb4, 0x0b, 0x8e, 0xed, 0xf2, 0x78, 0x5e, 0x42, + 0x87, 0x4d, + }; + const input = "Ladies and Gentlemen of the class of '99: If I could offer you only one tip for the future, sunscreen would be it."; + var result: [114]u8 = undefined; + const key = []u8{ + 0, 1, 2, 3, 4, 5, 6, 7, + 8, 9,10,11,12,13,14,15, + 16,17,18,19,20,21,22,23, + 24,25,26,27,28,29,30,31, + }; + const nonce = []u8{ + 0, 0, 0, 0, + 0, 0, 0, 0x4a, + 0, 0, 0, 0, + }; + + chaCha20IETF(result[0..], input[0..], 1, key, nonce); + assert(mem.eql(u8, expected_result, result)); + + // Chacha20 is self-reversing. + var plaintext: [114]u8 = undefined; + chaCha20IETF(plaintext[0..], result[0..], 1, key, nonce); + assert(mem.compare(u8, input, plaintext) == mem.Compare.Equal); +} + +// https://tools.ietf.org/html/draft-agl-tls-chacha20poly1305-04#section-7 +test "crypto.chacha20 test vector 1" { + const expected_result = []u8{ + 0x76, 0xb8, 0xe0, 0xad, 0xa0, 0xf1, 0x3d, 0x90, + 0x40, 0x5d, 0x6a, 0xe5, 0x53, 0x86, 0xbd, 0x28, + 0xbd, 0xd2, 0x19, 0xb8, 0xa0, 0x8d, 0xed, 0x1a, + 0xa8, 0x36, 0xef, 0xcc, 0x8b, 0x77, 0x0d, 0xc7, + 0xda, 0x41, 0x59, 0x7c, 0x51, 0x57, 0x48, 0x8d, + 0x77, 0x24, 0xe0, 0x3f, 0xb8, 0xd8, 0x4a, 0x37, + 0x6a, 0x43, 0xb8, 0xf4, 0x15, 0x18, 0xa1, 0x1c, + 0xc3, 0x87, 0xb6, 0x69, 0xb2, 0xee, 0x65, 0x86, + }; + const input = []u8{ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + }; + var result: [64]u8 = undefined; + const key = []u8{ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + }; + const nonce = []u8{0, 0, 0, 0, 0, 0, 0, 0}; + + chaCha20With64BitNonce(result[0..], input[0..], 0, key, nonce); + assert(mem.eql(u8, expected_result, result)); +} + +test "crypto.chacha20 test vector 2" { + const expected_result = []u8{ + 0x45, 0x40, 0xf0, 0x5a, 0x9f, 0x1f, 0xb2, 0x96, + 0xd7, 0x73, 0x6e, 0x7b, 0x20, 0x8e, 0x3c, 0x96, + 0xeb, 0x4f, 0xe1, 0x83, 0x46, 0x88, 0xd2, 0x60, + 0x4f, 0x45, 0x09, 0x52, 0xed, 0x43, 0x2d, 0x41, + 0xbb, 0xe2, 0xa0, 0xb6, 0xea, 0x75, 0x66, 0xd2, + 0xa5, 0xd1, 0xe7, 0xe2, 0x0d, 0x42, 0xaf, 0x2c, + 0x53, 0xd7, 0x92, 0xb1, 0xc4, 0x3f, 0xea, 0x81, + 0x7e, 0x9a, 0xd2, 0x75, 0xae, 0x54, 0x69, 0x63, + }; + const input = []u8{ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + }; + var result: [64]u8 = undefined; + const key = []u8{ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 1, + }; + const nonce = []u8{0, 0, 0, 0, 0, 0, 0, 0}; + + chaCha20With64BitNonce(result[0..], input[0..], 0, key, nonce); + assert(mem.eql(u8, expected_result, result)); +} + +test "crypto.chacha20 test vector 3" { + const expected_result = []u8{ + 0xde, 0x9c, 0xba, 0x7b, 0xf3, 0xd6, 0x9e, 0xf5, + 0xe7, 0x86, 0xdc, 0x63, 0x97, 0x3f, 0x65, 0x3a, + 0x0b, 0x49, 0xe0, 0x15, 0xad, 0xbf, 0xf7, 0x13, + 0x4f, 0xcb, 0x7d, 0xf1, 0x37, 0x82, 0x10, 0x31, + 0xe8, 0x5a, 0x05, 0x02, 0x78, 0xa7, 0x08, 0x45, + 0x27, 0x21, 0x4f, 0x73, 0xef, 0xc7, 0xfa, 0x5b, + 0x52, 0x77, 0x06, 0x2e, 0xb7, 0xa0, 0x43, 0x3e, + 0x44, 0x5f, 0x41, 0xe3, + }; + const input = []u8{ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + }; + var result: [60]u8 = undefined; + const key = []u8{ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + }; + const nonce = []u8{0, 0, 0, 0, 0, 0, 0, 1}; + + chaCha20With64BitNonce(result[0..], input[0..], 0, key, nonce); + assert(mem.eql(u8, expected_result, result)); +} + +test "crypto.chacha20 test vector 4" { + const expected_result = []u8{ + 0xef, 0x3f, 0xdf, 0xd6, 0xc6, 0x15, 0x78, 0xfb, + 0xf5, 0xcf, 0x35, 0xbd, 0x3d, 0xd3, 0x3b, 0x80, + 0x09, 0x63, 0x16, 0x34, 0xd2, 0x1e, 0x42, 0xac, + 0x33, 0x96, 0x0b, 0xd1, 0x38, 0xe5, 0x0d, 0x32, + 0x11, 0x1e, 0x4c, 0xaf, 0x23, 0x7e, 0xe5, 0x3c, + 0xa8, 0xad, 0x64, 0x26, 0x19, 0x4a, 0x88, 0x54, + 0x5d, 0xdc, 0x49, 0x7a, 0x0b, 0x46, 0x6e, 0x7d, + 0x6b, 0xbd, 0xb0, 0x04, 0x1b, 0x2f, 0x58, 0x6b, + }; + const input = []u8{ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + }; + var result: [64]u8 = undefined; + const key = []u8{ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + }; + const nonce = []u8{1, 0, 0, 0, 0, 0, 0, 0}; + + chaCha20With64BitNonce(result[0..], input[0..], 0, key, nonce); + assert(mem.eql(u8, expected_result, result)); +} + +test "crypto.chacha20 test vector 5" { + const expected_result = []u8{ + 0xf7, 0x98, 0xa1, 0x89, 0xf1, 0x95, 0xe6, 0x69, + 0x82, 0x10, 0x5f, 0xfb, 0x64, 0x0b, 0xb7, 0x75, + 0x7f, 0x57, 0x9d, 0xa3, 0x16, 0x02, 0xfc, 0x93, + 0xec, 0x01, 0xac, 0x56, 0xf8, 0x5a, 0xc3, 0xc1, + 0x34, 0xa4, 0x54, 0x7b, 0x73, 0x3b, 0x46, 0x41, + 0x30, 0x42, 0xc9, 0x44, 0x00, 0x49, 0x17, 0x69, + 0x05, 0xd3, 0xbe, 0x59, 0xea, 0x1c, 0x53, 0xf1, + 0x59, 0x16, 0x15, 0x5c, 0x2b, 0xe8, 0x24, 0x1a, + + 0x38, 0x00, 0x8b, 0x9a, 0x26, 0xbc, 0x35, 0x94, + 0x1e, 0x24, 0x44, 0x17, 0x7c, 0x8a, 0xde, 0x66, + 0x89, 0xde, 0x95, 0x26, 0x49, 0x86, 0xd9, 0x58, + 0x89, 0xfb, 0x60, 0xe8, 0x46, 0x29, 0xc9, 0xbd, + 0x9a, 0x5a, 0xcb, 0x1c, 0xc1, 0x18, 0xbe, 0x56, + 0x3e, 0xb9, 0xb3, 0xa4, 0xa4, 0x72, 0xf8, 0x2e, + 0x09, 0xa7, 0xe7, 0x78, 0x49, 0x2b, 0x56, 0x2e, + 0xf7, 0x13, 0x0e, 0x88, 0xdf, 0xe0, 0x31, 0xc7, + + 0x9d, 0xb9, 0xd4, 0xf7, 0xc7, 0xa8, 0x99, 0x15, + 0x1b, 0x9a, 0x47, 0x50, 0x32, 0xb6, 0x3f, 0xc3, + 0x85, 0x24, 0x5f, 0xe0, 0x54, 0xe3, 0xdd, 0x5a, + 0x97, 0xa5, 0xf5, 0x76, 0xfe, 0x06, 0x40, 0x25, + 0xd3, 0xce, 0x04, 0x2c, 0x56, 0x6a, 0xb2, 0xc5, + 0x07, 0xb1, 0x38, 0xdb, 0x85, 0x3e, 0x3d, 0x69, + 0x59, 0x66, 0x09, 0x96, 0x54, 0x6c, 0xc9, 0xc4, + 0xa6, 0xea, 0xfd, 0xc7, 0x77, 0xc0, 0x40, 0xd7, + + 0x0e, 0xaf, 0x46, 0xf7, 0x6d, 0xad, 0x39, 0x79, + 0xe5, 0xc5, 0x36, 0x0c, 0x33, 0x17, 0x16, 0x6a, + 0x1c, 0x89, 0x4c, 0x94, 0xa3, 0x71, 0x87, 0x6a, + 0x94, 0xdf, 0x76, 0x28, 0xfe, 0x4e, 0xaa, 0xf2, + 0xcc, 0xb2, 0x7d, 0x5a, 0xaa, 0xe0, 0xad, 0x7a, + 0xd0, 0xf9, 0xd4, 0xb6, 0xad, 0x3b, 0x54, 0x09, + 0x87, 0x46, 0xd4, 0x52, 0x4d, 0x38, 0x40, 0x7a, + 0x6d, 0xeb, 0x3a, 0xb7, 0x8f, 0xab, 0x78, 0xc9, + }; + const input = []u8{ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + }; + var result: [256]u8 = undefined; + const key = []u8{ + 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, + 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, + 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, + 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, + }; + const nonce = []u8{ + 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, + }; + + chaCha20With64BitNonce(result[0..], input[0..], 0, key, nonce); + assert(mem.eql(u8, expected_result, result)); +} diff --git a/std/crypto/index.zig b/std/crypto/index.zig index 2f39020228db..1b18a5dc6fab 100644 --- a/std/crypto/index.zig +++ b/std/crypto/index.zig @@ -24,6 +24,10 @@ pub const HmacMd5 = hmac.HmacMd5; pub const HmacSha1 = hmac.Sha1; pub const HmacSha256 = hmac.Sha256; +const import_chaCha20 = @import("chacha20.zig"); +pub const chaCha20IETF = import_chaCha20.chaCha20IETF; +pub const chaCha20With64BitNonce = import_chaCha20.chaCha20With64BitNonce; + test "crypto" { _ = @import("md5.zig"); _ = @import("sha1.zig"); @@ -31,4 +35,5 @@ test "crypto" { _ = @import("sha3.zig"); _ = @import("blake2.zig"); _ = @import("hmac.zig"); + _ = @import("chacha20.zig"); } From 87eb95f816b01c0133de47eb3c94ac470f9d8bf2 Mon Sep 17 00:00:00 2001 From: Marc Tiehuis Date: Mon, 27 Aug 2018 22:55:53 -0700 Subject: [PATCH 2/2] speed up chacha20 The main changes are: Unrolling the inner rounds of salsa20_wordtobyte which doubles the speed. Passing the slice explicitly instead of returning the array saves a copy (can optimize out in future with copy elision) and gives ~10% improvement. Inlining the outer loop gives ~15-20% improvement but it costs an extra 4Kb of code space. I think the tradeoff is worthwhile here. The other inline loops are small and can be done by the compiler if it is worthwhile. The rotate function replacement doesn't alter the performance from the former. The modified throughput test I've used to benchmark is as follows. Interestingly we need to allocate memory instead of using a fixed buffer else Zig optimizes the whole thing out. https://github.com/ziglang/zig/pull/1369#issuecomment-416456628 --- std/crypto/chacha20.zig | 46 ++++++++++++++++++++--------------------- 1 file changed, 22 insertions(+), 24 deletions(-) diff --git a/std/crypto/chacha20.zig b/std/crypto/chacha20.zig index 3030569ae6fe..836c8c8878b3 100644 --- a/std/crypto/chacha20.zig +++ b/std/crypto/chacha20.zig @@ -22,19 +22,15 @@ fn Rp(a: usize, b: usize, c: usize, d: usize) QuarterRound { }; } -fn rotate(a: u32, b: u5) u32 { - return ((a << b) | - (a >> @intCast(u5, (32 - @intCast(u6, b)))) - ); -} - // The chacha family of ciphers are based on the salsa family. -fn salsa20_wordtobyte(input: [16]u32) [64]u8 { +fn salsa20_wordtobyte(out: []u8, input: [16]u32) void { + assert(out.len >= 64); + var x: [16]u32 = undefined; - var out: [64]u8 = undefined; for (x) |_, i| x[i] = input[i]; + const rounds = comptime []QuarterRound{ Rp( 0, 4, 8,12), Rp( 1, 5, 9,13), @@ -45,20 +41,21 @@ fn salsa20_wordtobyte(input: [16]u32) [64]u8 { Rp( 2, 7, 8,13), Rp( 3, 4, 9,14), }; - comptime var j: usize = 20; - inline while (j > 0) : (j -=2) { - for (rounds) |r| { - x[r.a] +%= x[r.b]; x[r.d] = rotate(x[r.d] ^ x[r.a], 16); - x[r.c] +%= x[r.d]; x[r.b] = rotate(x[r.b] ^ x[r.c], 12); - x[r.a] +%= x[r.b]; x[r.d] = rotate(x[r.d] ^ x[r.a], 8); - x[r.c] +%= x[r.d]; x[r.b] = rotate(x[r.b] ^ x[r.c], 7); + + comptime var j: usize = 0; + inline while (j < 20) : (j += 2) { + // two-round cycles + inline for (rounds) |r| { + x[r.a] +%= x[r.b]; x[r.d] = std.math.rotl(u32, x[r.d] ^ x[r.a], u32(16)); + x[r.c] +%= x[r.d]; x[r.b] = std.math.rotl(u32, x[r.b] ^ x[r.c], u32(12)); + x[r.a] +%= x[r.b]; x[r.d] = std.math.rotl(u32, x[r.d] ^ x[r.a], u32(8)); + x[r.c] +%= x[r.d]; x[r.b] = std.math.rotl(u32, x[r.b] ^ x[r.c], u32(7)); } } - for (x) |_, i| - x[i] +%= input[i]; - for (x) |_, i| - mem.writeInt(out[4 * i .. 4 * i + 4], x[i], builtin.Endian.Little); - return out; + + for (x) |_, i| { + mem.writeInt(out[4 * i .. 4 * i + 4], x[i] +% input[i], builtin.Endian.Little); + } } fn chaCha20_internal(out: []u8, in: []const u8, key: [8]u32, counter: [4]u32) void { @@ -73,13 +70,14 @@ fn chaCha20_internal(out: []u8, in: []const u8, key: [8]u32, counter: [4]u32) vo mem.readIntLE(u32, c[8..12]), mem.readIntLE(u32, c[12..16]), }; - + mem.copy(u32, ctx[0..], constant_le[0..4]); mem.copy(u32, ctx[4..12], key[0..8]); mem.copy(u32, ctx[12..16], counter[0..4]); while (true) { - var buf = salsa20_wordtobyte(ctx); + var buf: [64]u8 = undefined; + salsa20_wordtobyte(buf[0..], ctx); if (remaining < 64) { var i: usize = 0; @@ -88,8 +86,8 @@ fn chaCha20_internal(out: []u8, in: []const u8, key: [8]u32, counter: [4]u32) vo return; } - comptime var i: usize = 0; - inline while (i < 64) : (i += 1) + var i: usize = 0; + while (i < 64) : (i += 1) out[cursor + i] = in[cursor + i] ^ buf[i]; cursor += 64;