diff --git a/afl.pyx b/afl.pyx index 598e2f0..47a1245 100644 --- a/afl.pyx +++ b/afl.pyx @@ -46,6 +46,7 @@ from cpython.exc cimport PyErr_SetFromErrno from libc cimport errno from libc.signal cimport SIG_DFL from libc.stddef cimport size_t +from libc.stdint cimport uint8_t from libc.stdint cimport uint32_t from libc.stdlib cimport getenv from libc.string cimport strlen @@ -69,7 +70,8 @@ cdef extern from 'sys/shm.h': unsigned char *shmat(int shmid, void *shmaddr, int shmflg) cdef unsigned char *afl_area = NULL -cdef unsigned int prev_location = 0 +cdef uint32_t prev_location = 0 +cdef uint32_t prev_h = 0 cdef inline unsigned int lhash(const char *key, size_t offset): # 32-bit Fowler–Noll–Vo hash function @@ -86,6 +88,18 @@ cdef inline unsigned int lhash(const char *key, size_t offset): offset >>= 8 return h +cdef inline uint32_t lhash_init(): + return 0x811C9DC5 + +cdef inline uint32_t lhash_raw(uint32_t h, const uint8_t *key, size_t len): + # 32-bit Fowler–Noll–Vo hash function + while len > 0: + h ^= key[0] + h *= 0x01000193 + len -= 1 + key += 1 + return h + def _hash(key, offset): # This function is not a part of public API. # It is provided only to facilitate testing. @@ -93,14 +107,47 @@ def _hash(key, offset): cdef object trace def trace(frame, event, arg): - global prev_location, tstl_mode - cdef unsigned int location, offset - cdef object filename = frame.f_code.co_filename + global prev_location, prev_h, tstl_mode + + frame.f_trace_lines = True + frame.f_trace_opcodes = True + code = frame.f_code + filename = code.co_filename + if tstl_mode and (filename[-7:] in ['sut.py', '/sut.py']): return None + + # If opcode, add codes to hash + if prev_h == 0: + prev_h = lhash_init() + + cdef uint8_t c_opcode + if event == "opcode": + c_opcode = code.co_code[frame.f_lasti] + prev_h = lhash_raw(prev_h, &c_opcode, 1) + return trace + + # Other, line/return/call, use accumulated opcodes + cdef uint32_t h + h = prev_h + prev_h = 0 + + strings = [filename] + + cdef char *c_str + cdef ssize_t c_str_len + for s in strings: + c_str = s + c_str_len = len(s) + h = lhash_raw(h, c_str, c_str_len) + + cdef uint32_t c_lineno + c_lineno = frame.f_lineno + h = lhash_raw(h, &c_lineno, 4) + + cdef uint32_t location, offset location = ( - lhash(filename, frame.f_lineno) - % MAP_SIZE + h % MAP_SIZE ) offset = location ^ prev_location prev_location = location // 2 diff --git a/tests/target.py b/tests/target.py index 80485fb..dae0426 100644 --- a/tests/target.py +++ b/tests/target.py @@ -31,7 +31,7 @@ def main(): print('Hum?') sys.exit(1) s.encode('ASCII') - if s[0] == '0': + if s[0] == '0' or s[0] == '\0' or s == 'zero' or s == "zero\n": print('Looks like a zero to me!') else: print('A non-zero value? How quaint!') diff --git a/tests/target_persistent.py b/tests/target_persistent.py index 430e7b6..332656e 100644 --- a/tests/target_persistent.py +++ b/tests/target_persistent.py @@ -32,7 +32,7 @@ def main(): print('Hum?') sys.exit(1) s.encode('ASCII') - if s[0] == '0': + if s[0] == '0' or s[0] == '\0' or s == 'zero' or s == "zero\n": print('Looks like a zero to me!') else: print('A non-zero value? How quaint!')