Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
34 commits
Select commit Hold shift + click to select a range
c48b932
First building commit with sample matchfinder
embg Nov 10, 2022
006eecb
Set up ZSTD_externalMatchCtx struct
embg Nov 21, 2022
297de1c
move seqBuffer to ZSTD_Sequence*
embg Nov 21, 2022
3d3cc4e
support non-contiguous dictionary
embg Nov 21, 2022
46d3dc8
clean up parens
embg Nov 21, 2022
6ed7d05
add clearExternalMatchfinder, handle allocation errors
embg Nov 21, 2022
d4ecd79
Add useExternalMatchfinder cParam
embg Nov 23, 2022
06abdf8
validate useExternalMatchfinder cParam
embg Nov 23, 2022
875c3ae
Disable LDM + external matchfinder
embg Nov 23, 2022
a3580e8
Check for static CCtx
embg Nov 23, 2022
c660b00
Validate mState and mStateDestructor
embg Nov 23, 2022
d574df8
Improve LDM check to cover both branches
embg Nov 24, 2022
bd249d4
Error API with optional fallback
embg Nov 24, 2022
2a00787
handle RLE properly for external matchfinder
embg Nov 27, 2022
bb48c16
nit
embg Nov 27, 2022
06b3607
Move to a CDict-like model for resource ownership
embg Nov 29, 2022
182dbf8
Add hidden useExternalMatchfinder bool to CCtx_params_s
embg Nov 30, 2022
39a467f
Eliminate malloc, move to cwksp allocation
embg Nov 30, 2022
9f8aedf
Handle CCtx reset properly
embg Nov 30, 2022
59cc838
Ensure seqStore has enough space for external sequences
embg Nov 30, 2022
fd9c467
fix capitalization
embg Dec 1, 2022
c79ac43
Add DEBUGLOG statements
embg Dec 1, 2022
14d18ae
Add compressionLevel param to matchfinder API
embg Dec 1, 2022
a84ec58
fix c99 issues and add a param combination error code
embg Dec 1, 2022
807314c
nits
embg Dec 1, 2022
c58a56d
Test external matchfinder API
embg Dec 5, 2022
78a9e4a
C90 compat for simpleExternalMatchFinder
embg Dec 6, 2022
0de43b9
Fix some @nocommits and an ASAN bug
embg Dec 6, 2022
99215cf
nit
embg Dec 7, 2022
08d6c39
nit
embg Dec 7, 2022
321500f
nits
embg Dec 7, 2022
785c3dd
forward declare copySequencesToSeqStore functions in zstd_compress_in…
embg Dec 7, 2022
ef7fa30
nit
embg Dec 7, 2022
8d4f42d
nit
embg Dec 7, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 40 additions & 0 deletions contrib/externalMatchfinder/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
# ################################################################
# Copyright (c) 2018-present, Yann Collet, Facebook, Inc.
# All rights reserved.
#
# This source code is licensed under both the BSD-style license (found in the
# LICENSE file in the root directory of this source tree) and the GPLv2 (found
# in the COPYING file in the root directory of this source tree).
# ################################################################

PROGDIR = ../../programs
LIBDIR = ../../lib

LIBZSTD = $(LIBDIR)/libzstd.a

CPPFLAGS+= -I$(LIBDIR) -I$(LIBDIR)/compress -I$(LIBDIR)/common

CFLAGS ?= -O3
CFLAGS += -std=gnu99
DEBUGFLAGS= -Wall -Wextra -Wcast-qual -Wcast-align -Wshadow \
-Wstrict-aliasing=1 -Wswitch-enum \
-Wstrict-prototypes -Wundef -Wpointer-arith \
-Wvla -Wformat=2 -Winit-self -Wfloat-equal -Wwrite-strings \
-Wredundant-decls
CFLAGS += $(DEBUGFLAGS) $(MOREFLAGS)

default: externalMatchfinder

all: externalMatchfinder

externalMatchfinder: matchfinder.c main.c $(LIBZSTD)
$(CC) $(CPPFLAGS) $(CFLAGS) $^ $(LDFLAGS) -o $@

.PHONY: $(LIBZSTD)
$(LIBZSTD):
$(MAKE) -C $(LIBDIR) libzstd.a CFLAGS="$(CFLAGS)"

clean:
$(RM) *.o
$(MAKE) -C $(LIBDIR) clean > /dev/null
$(RM) externalMatchfinder
80 changes: 80 additions & 0 deletions contrib/externalMatchfinder/main.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#define ZSTD_STATIC_LINKING_ONLY
#include "zstd.h"
#include "zstd_errors.h"
#include "matchfinder.h" // simpleExternalMatchFinder

int main(int argc, char *argv[]) {
size_t res;

if (argc != 2) {
printf("Usage: exampleMatchfinder <file>\n");
return 1;
}

ZSTD_CCtx* zc = ZSTD_createCCtx();

int simpleExternalMatchState = 0xdeadbeef;

// Here is the crucial bit of code!
ZSTD_refExternalMatchFinder(
zc,
&simpleExternalMatchState,
simpleExternalMatchFinder
);

res = ZSTD_CCtx_setParameter(zc, ZSTD_c_enableMatchFinderFallback, 1);

if (ZSTD_isError(res)) {
printf("ERROR: %s\n", ZSTD_getErrorName(res));
return 1;
}

FILE *f = fopen(argv[1], "rb");
fseek(f, 0, SEEK_END);
long const srcSize = ftell(f);
fseek(f, 0, SEEK_SET);

char *src = malloc(srcSize + 1);
fread(src, srcSize, 1, f);
fclose(f);

size_t const dstSize = ZSTD_compressBound(srcSize);
char *dst = malloc(dstSize);

size_t const cSize = ZSTD_compress2(zc, dst, dstSize, src, srcSize);

if (ZSTD_isError(cSize)) {
printf("ERROR: %s\n", ZSTD_getErrorName(cSize));
return 1;
}

char *val = malloc(srcSize);
res = ZSTD_decompress(val, srcSize, dst, cSize);

ZSTD_freeCCtx(zc);

if (ZSTD_isError(res)) {
printf("ERROR: %s\n", ZSTD_getErrorName(res));
return 1;
}

if (memcmp(src, val, srcSize) == 0) {
printf("Compression and decompression were successful!\n");
printf("Original size: %lu\n", srcSize);
printf("Compressed size: %lu\n", cSize);
return 0;
} else {
printf("ERROR: input and validation buffers don't match!\n");
for (int i = 0; i < srcSize; i++) {
if (src[i] != val[i]) {
printf("First bad index: %d\n", i);
break;
}
}
return 1;
}
}
65 changes: 65 additions & 0 deletions contrib/externalMatchfinder/matchfinder.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
#include "zstd_compress_internal.h"
#include "matchfinder.h"

#define HSIZE 1024
static U32 const HLOG = 10;
static U32 const MLS = 4;
static U32 const BADIDX = (1 << 31);

size_t simpleExternalMatchFinder(
void* externalMatchState,
ZSTD_Sequence* outSeqs, size_t outSeqsCapacity,
const void* src, size_t srcSize,
const void* dict, size_t dictSize,
int compressionLevel
) {
const BYTE* const istart = (const BYTE*)src;
const BYTE* const iend = istart + srcSize;
const BYTE* ip = istart;
const BYTE* anchor = istart;
size_t seqCount = 0;
U32 hashTable[HSIZE];

(void)externalMatchState;
(void)dict;
(void)dictSize;
(void)outSeqsCapacity;
(void)compressionLevel;

{ int i;
for (i=0; i < HSIZE; i++) {
hashTable[i] = BADIDX;
} }

while (ip + 4 < iend) {
size_t const hash = ZSTD_hashPtr(ip, HLOG, MLS);
U32 const matchIndex = hashTable[hash];
hashTable[hash] = (U32)(ip - istart);

if (matchIndex != BADIDX) {
const BYTE* const match = istart + matchIndex;
U32 const matchLen = (U32)ZSTD_count(ip, match, iend);
if (matchLen >= ZSTD_MINMATCH_MIN) {
U32 const litLen = (U32)(ip - anchor);
U32 const offset = (U32)(ip - match);
ZSTD_Sequence const seq = {
offset, litLen, matchLen, 0
};
outSeqs[seqCount++] = seq;
ip += matchLen;
anchor = ip;
continue;
}
}

ip++;
}

{ ZSTD_Sequence const finalSeq = {
0, (U32)(iend - anchor), 0, 0
};
outSeqs[seqCount++] = finalSeq;
}

return seqCount;
}
15 changes: 15 additions & 0 deletions contrib/externalMatchfinder/matchfinder.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
#ifndef MATCHFINDER_H
#define MATCHFINDER_H

#define ZSTD_STATIC_LINKING_ONLY
#include "zstd.h"

size_t simpleExternalMatchFinder(
void* externalMatchState,
ZSTD_Sequence* outSeqs, size_t outSeqsCapacity,
const void* src, size_t srcSize,
const void* dict, size_t dictSize,
int compressionLevel
);

#endif
2 changes: 2 additions & 0 deletions lib/common/error_private.c
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ const char* ERR_getErrorString(ERR_enum code)
case PREFIX(corruption_detected): return "Data corruption detected";
case PREFIX(checksum_wrong): return "Restored data doesn't match checksum";
case PREFIX(parameter_unsupported): return "Unsupported parameter";
case PREFIX(parameter_combination_unsupported): return "Unsupported combination of parameters";
case PREFIX(parameter_outOfBound): return "Parameter is out of bound";
case PREFIX(init_missing): return "Context should be init first";
case PREFIX(memory_allocation): return "Allocation error : not enough memory";
Expand All @@ -50,6 +51,7 @@ const char* ERR_getErrorString(ERR_enum code)
case PREFIX(seekableIO): return "An I/O error occurred when reading/seeking";
case PREFIX(dstBuffer_wrong): return "Destination buffer is wrong";
case PREFIX(srcBuffer_wrong): return "Source buffer is wrong";
case PREFIX(externalMatchFinder_failed): return "External matchfinder returned an error code";
case PREFIX(maxCode):
default: return notErrorCode;
}
Expand Down
Loading