diff options
author | Sefa Eyeoglu <contact@scrumplex.net> | 2022-08-28 11:03:12 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2022-08-28 11:03:12 +0200 |
commit | afcd669d2f6934c2b6076939d7665f791d495994 (patch) | |
tree | b5868b4e26fc52a6b48443847fbd7ef0aafee908 /libraries | |
parent | fbf542d2051576ee25556c3b28112eea094da309 (diff) | |
parent | 7b27f200b1f131f0ea3b23433974cbe68eb979bb (diff) | |
download | PrismLauncher-afcd669d2f6934c2b6076939d7665f791d495994.tar.gz PrismLauncher-afcd669d2f6934c2b6076939d7665f791d495994.tar.bz2 PrismLauncher-afcd669d2f6934c2b6076939d7665f791d495994.zip |
Merge pull request #965 from flowln/fat_files_in_memory
Refactor a bit EnsureMetadataTask and calculate hashes in a incremental manner
Diffstat (limited to 'libraries')
-rw-r--r-- | libraries/murmur2/src/MurmurHash2.cpp | 172 | ||||
-rw-r--r-- | libraries/murmur2/src/MurmurHash2.h | 39 |
2 files changed, 119 insertions, 92 deletions
diff --git a/libraries/murmur2/src/MurmurHash2.cpp b/libraries/murmur2/src/MurmurHash2.cpp index 3e52e6d1..b625efb1 100644 --- a/libraries/murmur2/src/MurmurHash2.cpp +++ b/libraries/murmur2/src/MurmurHash2.cpp @@ -1,86 +1,110 @@ //----------------------------------------------------------------------------- // MurmurHash2 was written by Austin Appleby, and is placed in the public // domain. The author hereby disclaims copyright to this source code. - -// Note - This code makes a few assumptions about how your machine behaves - - -// 1. We can read a 4-byte value from any address without crashing -// 2. sizeof(int) == 4 - -// And it has a few limitations - - -// 1. It will not work incrementally. -// 2. It will not produce the same results on little-endian and big-endian -// machines. +// +// This was modified as to possibilitate it's usage incrementally. +// Those modifications are also placed in the public domain, and the author of +// such modifications hereby disclaims copyright to this source code. #include "MurmurHash2.h" //----------------------------------------------------------------------------- -// Platform-specific functions and macros - -// Microsoft Visual Studio - -#if defined(_MSC_VER) - -#define BIG_CONSTANT(x) (x) -// Other compilers +// 'm' and 'r' are mixing constants generated offline. +// They're not really 'magic', they just happen to work well. +const uint32_t m = 0x5bd1e995; +const int r = 24; -#else // defined(_MSC_VER) - -#define BIG_CONSTANT(x) (x##LLU) - -#endif // !defined(_MSC_VER) - -//----------------------------------------------------------------------------- - -uint64_t MurmurHash2 ( const void* key, int len, uint32_t seed ) +uint32_t MurmurHash2(std::ifstream&& file_stream, std::size_t buffer_size, std::function<bool(char)> filter_out) { - // 'm' and 'r' are mixing constants generated offline. - // They're not really 'magic', they just happen to work well. - - const uint32_t m = 0x5bd1e995; - const int r = 24; - - // Initialize the hash to a 'random' value - - uint32_t h = seed ^ len; - - // Mix 4 bytes at a time into the hash - const auto* data = (const unsigned char*) key; - while(len >= 4) - { - uint32_t k = *(uint32_t*)data; - - k *= m; - k ^= k >> r; - k *= m; - - h *= m; - h ^= k; - - data += 4*sizeof(char); - len -= 4; - } - - // Handle the last few bytes of the input array - - switch(len) - { - case 3: h ^= data[2] << 16; - case 2: h ^= data[1] << 8; - case 1: h ^= data[0]; - h *= m; - }; - - // Do a few final mixes of the hash to ensure the last few - // bytes are well-incorporated. - - h ^= h >> 13; - h *= m; - h ^= h >> 15; - - return h; -} + auto* buffer = new char[buffer_size]; + char data[4]; + + int read = 0; + uint32_t size = 0; + + // We need the size without the filtered out characters before actually calculating the hash, + // to setup the initial value for the hash. + do { + file_stream.read(buffer, buffer_size); + read = file_stream.gcount(); + for (int i = 0; i < read; i++) { + if (!filter_out(buffer[i])) + size += 1; + } + } while (!file_stream.eof()); + + file_stream.clear(); + file_stream.seekg(0, file_stream.beg); + + int index = 0; + + // This forces a seed of 1. + IncrementalHashInfo info{ (uint32_t)1 ^ size, (uint32_t)size }; + do { + file_stream.read(buffer, buffer_size); + read = file_stream.gcount(); + for (int i = 0; i < read; i++) { + char c = buffer[i]; + + if (filter_out(c)) + continue; + + data[index] = c; + index = (index + 1) % 4; + + // Mix 4 bytes at a time into the hash + if (index == 0) + FourBytes_MurmurHash2((unsigned char*)&data, info); + } + } while (!file_stream.eof()); + + // Do one last bit shuffle in the hash + FourBytes_MurmurHash2((unsigned char*)&data, info); + + delete[] buffer; + + file_stream.close(); + return info.h; +} + +void FourBytes_MurmurHash2(const unsigned char* data, IncrementalHashInfo& prev) +{ + if (prev.len >= 4) { + // Not the final mix + uint32_t k = *(uint32_t*)data; + + k *= m; + k ^= k >> r; + k *= m; + + prev.h *= m; + prev.h ^= k; + + prev.len -= 4; + } else { + // The final mix + + // Handle the last few bytes of the input array + switch (prev.len) { + case 3: + prev.h ^= data[2] << 16; + case 2: + prev.h ^= data[1] << 8; + case 1: + prev.h ^= data[0]; + prev.h *= m; + }; + + // Do a few final mixes of the hash to ensure the last few + // bytes are well-incorporated. + + prev.h ^= prev.h >> 13; + prev.h *= m; + prev.h ^= prev.h >> 15; + + prev.len = 0; + } +} //----------------------------------------------------------------------------- diff --git a/libraries/murmur2/src/MurmurHash2.h b/libraries/murmur2/src/MurmurHash2.h index c7b83bca..dc2c9681 100644 --- a/libraries/murmur2/src/MurmurHash2.h +++ b/libraries/murmur2/src/MurmurHash2.h @@ -1,30 +1,33 @@ //----------------------------------------------------------------------------- -// MurmurHash2 was written by Austin Appleby, and is placed in the public -// domain. The author hereby disclaims copyright to this source code. +// The original MurmurHash2 was written by Austin Appleby, and is placed in the +// public domain. The author hereby disclaims copyright to this source code. +// +// This was modified as to possibilitate it's usage incrementally. +// Those modifications are also placed in the public domain, and the author of +// such modifications hereby disclaims copyright to this source code. #pragma once -//----------------------------------------------------------------------------- -// Platform-specific functions and macros - -// Microsoft Visual Studio - -#if defined(_MSC_VER) && (_MSC_VER < 1600) +#include <cstdint> +#include <fstream> -typedef unsigned char uint8_t; -typedef unsigned int uint32_t; -typedef unsigned __int64 uint64_t; +#include <functional> -// Other compilers - -#else // defined(_MSC_VER) +//----------------------------------------------------------------------------- -#include <stdint.h> +#define KiB 1024 +#define MiB 1024*KiB -#endif // !defined(_MSC_VER) +uint32_t MurmurHash2( + std::ifstream&& file_stream, + std::size_t buffer_size = 4*MiB, + std::function<bool(char)> filter_out = [](char) { return false; }); -//----------------------------------------------------------------------------- +struct IncrementalHashInfo { + uint32_t h; + uint32_t len; +}; -uint64_t MurmurHash2 ( const void* key, int len, uint32_t seed = 1 ); +void FourBytes_MurmurHash2(const unsigned char* data, IncrementalHashInfo& prev); //----------------------------------------------------------------------------- |