From f95bcf45ad0e537a124f5aa0c7b9e97a78811289 Mon Sep 17 00:00:00 2001 From: flow Date: Sat, 23 Jul 2022 23:14:49 -0300 Subject: feat(libs): add incremental version of murmurhash2 calculation This does two passes for a given file, which is kinda slow, but I don't know how else to get the size excluding the filtered ones :< Signed-off-by: flow --- libraries/murmur2/src/MurmurHash2.h | 38 ++++++++++++++++++------------------- 1 file changed, 19 insertions(+), 19 deletions(-) (limited to 'libraries/murmur2/src/MurmurHash2.h') diff --git a/libraries/murmur2/src/MurmurHash2.h b/libraries/murmur2/src/MurmurHash2.h index c7b83bca..acea27ea 100644 --- a/libraries/murmur2/src/MurmurHash2.h +++ b/libraries/murmur2/src/MurmurHash2.h @@ -1,30 +1,30 @@ //----------------------------------------------------------------------------- -// MurmurHash2 was written by Austin Appleby, and is placed in the public -// domain. The author hereby disclaims copyright to this source code. +// The original MurmurHash2 was written by Austin Appleby, and is placed in the +// public domain. The author hereby disclaims copyright to this source code. +// +// This was modified as to possibilitate it's usage incrementally. +// Those modifications are also placed in the public domain, and the author of +// such modifications hereby disclaims copyright to this source code. #pragma once -//----------------------------------------------------------------------------- -// Platform-specific functions and macros - -// Microsoft Visual Studio - -#if defined(_MSC_VER) && (_MSC_VER < 1600) - -typedef unsigned char uint8_t; -typedef unsigned int uint32_t; -typedef unsigned __int64 uint64_t; +#include +#include -// Other compilers +#include -#else // defined(_MSC_VER) - -#include +//----------------------------------------------------------------------------- -#endif // !defined(_MSC_VER) +uint32_t MurmurHash2( + std::ifstream&& file_stream, + std::size_t buffer_size = 4096, + std::function filter_out = [](char) { return true; }); -//----------------------------------------------------------------------------- +struct IncrementalHashInfo { + uint32_t h; + uint32_t len; +}; -uint64_t MurmurHash2 ( const void* key, int len, uint32_t seed = 1 ); +void FourBytes_MurmurHash2(const unsigned char* data, IncrementalHashInfo& prev); //----------------------------------------------------------------------------- -- cgit