aboutsummaryrefslogtreecommitdiff
path: root/libraries
diff options
context:
space:
mode:
authorflow <flowlnlnln@gmail.com>2022-07-23 23:14:49 -0300
committerflow <flowlnlnln@gmail.com>2022-07-24 17:46:53 -0300
commitf95bcf45ad0e537a124f5aa0c7b9e97a78811289 (patch)
treeef501d49352200ad76671093fe1f9ad82ea8afa2 /libraries
parent15ec1abb6a3acb77b36f14d3ddccc97a8df8c8e1 (diff)
downloadPrismLauncher-f95bcf45ad0e537a124f5aa0c7b9e97a78811289.tar.gz
PrismLauncher-f95bcf45ad0e537a124f5aa0c7b9e97a78811289.tar.bz2
PrismLauncher-f95bcf45ad0e537a124f5aa0c7b9e97a78811289.zip
feat(libs): add incremental version of murmurhash2 calculation
This does two passes for a given file, which is kinda slow, but I don't know how else to get the size excluding the filtered ones :< Signed-off-by: flow <flowlnlnln@gmail.com>
Diffstat (limited to 'libraries')
-rw-r--r--libraries/murmur2/src/MurmurHash2.cpp172
-rw-r--r--libraries/murmur2/src/MurmurHash2.h38
2 files changed, 117 insertions, 93 deletions
diff --git a/libraries/murmur2/src/MurmurHash2.cpp b/libraries/murmur2/src/MurmurHash2.cpp
index 3e52e6d1..b625efb1 100644
--- a/libraries/murmur2/src/MurmurHash2.cpp
+++ b/libraries/murmur2/src/MurmurHash2.cpp
@@ -1,86 +1,110 @@
//-----------------------------------------------------------------------------
// MurmurHash2 was written by Austin Appleby, and is placed in the public
// domain. The author hereby disclaims copyright to this source code.
-
-// Note - This code makes a few assumptions about how your machine behaves -
-
-// 1. We can read a 4-byte value from any address without crashing
-// 2. sizeof(int) == 4
-
-// And it has a few limitations -
-
-// 1. It will not work incrementally.
-// 2. It will not produce the same results on little-endian and big-endian
-// machines.
+//
+// This was modified as to possibilitate it's usage incrementally.
+// Those modifications are also placed in the public domain, and the author of
+// such modifications hereby disclaims copyright to this source code.
#include "MurmurHash2.h"
//-----------------------------------------------------------------------------
-// Platform-specific functions and macros
-
-// Microsoft Visual Studio
-
-#if defined(_MSC_VER)
-
-#define BIG_CONSTANT(x) (x)
-// Other compilers
+// 'm' and 'r' are mixing constants generated offline.
+// They're not really 'magic', they just happen to work well.
+const uint32_t m = 0x5bd1e995;
+const int r = 24;
-#else // defined(_MSC_VER)
-
-#define BIG_CONSTANT(x) (x##LLU)
-
-#endif // !defined(_MSC_VER)
-
-//-----------------------------------------------------------------------------
-
-uint64_t MurmurHash2 ( const void* key, int len, uint32_t seed )
+uint32_t MurmurHash2(std::ifstream&& file_stream, std::size_t buffer_size, std::function<bool(char)> filter_out)
{
- // 'm' and 'r' are mixing constants generated offline.
- // They're not really 'magic', they just happen to work well.
-
- const uint32_t m = 0x5bd1e995;
- const int r = 24;
-
- // Initialize the hash to a 'random' value
-
- uint32_t h = seed ^ len;
-
- // Mix 4 bytes at a time into the hash
- const auto* data = (const unsigned char*) key;
- while(len >= 4)
- {
- uint32_t k = *(uint32_t*)data;
-
- k *= m;
- k ^= k >> r;
- k *= m;
-
- h *= m;
- h ^= k;
-
- data += 4*sizeof(char);
- len -= 4;
- }
-
- // Handle the last few bytes of the input array
-
- switch(len)
- {
- case 3: h ^= data[2] << 16;
- case 2: h ^= data[1] << 8;
- case 1: h ^= data[0];
- h *= m;
- };
-
- // Do a few final mixes of the hash to ensure the last few
- // bytes are well-incorporated.
-
- h ^= h >> 13;
- h *= m;
- h ^= h >> 15;
-
- return h;
-}
+ auto* buffer = new char[buffer_size];
+ char data[4];
+
+ int read = 0;
+ uint32_t size = 0;
+
+ // We need the size without the filtered out characters before actually calculating the hash,
+ // to setup the initial value for the hash.
+ do {
+ file_stream.read(buffer, buffer_size);
+ read = file_stream.gcount();
+ for (int i = 0; i < read; i++) {
+ if (!filter_out(buffer[i]))
+ size += 1;
+ }
+ } while (!file_stream.eof());
+
+ file_stream.clear();
+ file_stream.seekg(0, file_stream.beg);
+
+ int index = 0;
+
+ // This forces a seed of 1.
+ IncrementalHashInfo info{ (uint32_t)1 ^ size, (uint32_t)size };
+ do {
+ file_stream.read(buffer, buffer_size);
+ read = file_stream.gcount();
+ for (int i = 0; i < read; i++) {
+ char c = buffer[i];
+
+ if (filter_out(c))
+ continue;
+
+ data[index] = c;
+ index = (index + 1) % 4;
+
+ // Mix 4 bytes at a time into the hash
+ if (index == 0)
+ FourBytes_MurmurHash2((unsigned char*)&data, info);
+ }
+ } while (!file_stream.eof());
+
+ // Do one last bit shuffle in the hash
+ FourBytes_MurmurHash2((unsigned char*)&data, info);
+
+ delete[] buffer;
+
+ file_stream.close();
+ return info.h;
+}
+
+void FourBytes_MurmurHash2(const unsigned char* data, IncrementalHashInfo& prev)
+{
+ if (prev.len >= 4) {
+ // Not the final mix
+ uint32_t k = *(uint32_t*)data;
+
+ k *= m;
+ k ^= k >> r;
+ k *= m;
+
+ prev.h *= m;
+ prev.h ^= k;
+
+ prev.len -= 4;
+ } else {
+ // The final mix
+
+ // Handle the last few bytes of the input array
+ switch (prev.len) {
+ case 3:
+ prev.h ^= data[2] << 16;
+ case 2:
+ prev.h ^= data[1] << 8;
+ case 1:
+ prev.h ^= data[0];
+ prev.h *= m;
+ };
+
+ // Do a few final mixes of the hash to ensure the last few
+ // bytes are well-incorporated.
+
+ prev.h ^= prev.h >> 13;
+ prev.h *= m;
+ prev.h ^= prev.h >> 15;
+
+ prev.len = 0;
+ }
+}
//-----------------------------------------------------------------------------
diff --git a/libraries/murmur2/src/MurmurHash2.h b/libraries/murmur2/src/MurmurHash2.h
index c7b83bca..acea27ea 100644
--- a/libraries/murmur2/src/MurmurHash2.h
+++ b/libraries/murmur2/src/MurmurHash2.h
@@ -1,30 +1,30 @@
//-----------------------------------------------------------------------------
-// MurmurHash2 was written by Austin Appleby, and is placed in the public
-// domain. The author hereby disclaims copyright to this source code.
+// The original MurmurHash2 was written by Austin Appleby, and is placed in the
+// public domain. The author hereby disclaims copyright to this source code.
+//
+// This was modified as to possibilitate it's usage incrementally.
+// Those modifications are also placed in the public domain, and the author of
+// such modifications hereby disclaims copyright to this source code.
#pragma once
-//-----------------------------------------------------------------------------
-// Platform-specific functions and macros
-
-// Microsoft Visual Studio
-
-#if defined(_MSC_VER) && (_MSC_VER < 1600)
-
-typedef unsigned char uint8_t;
-typedef unsigned int uint32_t;
-typedef unsigned __int64 uint64_t;
+#include <cstdint>
+#include <fstream>
-// Other compilers
+#include <functional>
-#else // defined(_MSC_VER)
-
-#include <stdint.h>
+//-----------------------------------------------------------------------------
-#endif // !defined(_MSC_VER)
+uint32_t MurmurHash2(
+ std::ifstream&& file_stream,
+ std::size_t buffer_size = 4096,
+ std::function<bool(char)> filter_out = [](char) { return true; });
-//-----------------------------------------------------------------------------
+struct IncrementalHashInfo {
+ uint32_t h;
+ uint32_t len;
+};
-uint64_t MurmurHash2 ( const void* key, int len, uint32_t seed = 1 );
+void FourBytes_MurmurHash2(const unsigned char* data, IncrementalHashInfo& prev);
//-----------------------------------------------------------------------------