From 2e273af127dca407ef041e8f4a60b1a6230cfb35 Mon Sep 17 00:00:00 2001 From: Phantom0299 Date: Sun, 12 Apr 2026 00:25:20 +0530 Subject: [PATCH 1/3] perf: optimize bloom filter using double hashing and zlib --- chuck/tasks/memory_index/task.py | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/chuck/tasks/memory_index/task.py b/chuck/tasks/memory_index/task.py index f08a924..abf3501 100644 --- a/chuck/tasks/memory_index/task.py +++ b/chuck/tasks/memory_index/task.py @@ -1,6 +1,6 @@ from __future__ import annotations -from hashlib import blake2b, sha256 +import zlib from random import Random from typing import Any @@ -8,12 +8,18 @@ def _bloom_hashes(value: str, bit_count: int, hash_count: int) -> list[int]: - digest_a = int.from_bytes(blake2b(value.encode(), digest_size=8).digest(), "big") - digest_b = int.from_bytes(sha256(value.encode()).digest()[:8], "big") - return [((digest_a + index * digest_b) % bit_count) for index in range(hash_count)] + """Computes probe positions for a Bloom filter using double hashing.""" + raw = value.encode() + h1 = zlib.crc32(raw) + h2 = zlib.adler32(raw) + + step = 1 + (h2 % (bit_count - 1)) if bit_count > 1 else 1 + + return [((h1 + i * step) % bit_count) for i in range(hash_count)] def generate(count: int, seed: int) -> dict[str, Any]: + """Generates synthetic data and probe items for the memory index task.""" rng = Random(seed) items = [f"item_{seed}_{index}_{rng.randrange(10_000)}" for index in range(count)] probes = items[: count // 2] @@ -22,6 +28,7 @@ def generate(count: int, seed: int) -> dict[str, Any]: def solve(payload: dict[str, Any]) -> dict[str, Any]: + """Performs Bloom filter membership testing and calculates accuracy metrics.""" items = payload["items"] probes = payload["probes"] bit_count = payload["bit_count"] @@ -29,9 +36,11 @@ def solve(payload: dict[str, Any]) -> dict[str, Any]: bits = bytearray((bit_count + 7) // 8) def set_bit(position: int) -> None: + """Sets a bit at the given position in the bitset.""" bits[position // 8] |= 1 << (position % 8) def get_bit(position: int) -> bool: + """Checks if a bit is set at the given position.""" return bool(bits[position // 8] & (1 << (position % 8))) for item in items: From bd8729001a2ae9ebefece9d4e0144cd9b94c663e Mon Sep 17 00:00:00 2001 From: Phantom0299 Date: Sun, 12 Apr 2026 02:52:31 +0530 Subject: [PATCH 2/3] style: remove docstrings --- chuck/tasks/memory_index/task.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/chuck/tasks/memory_index/task.py b/chuck/tasks/memory_index/task.py index abf3501..cbb74cd 100644 --- a/chuck/tasks/memory_index/task.py +++ b/chuck/tasks/memory_index/task.py @@ -8,7 +8,7 @@ def _bloom_hashes(value: str, bit_count: int, hash_count: int) -> list[int]: - """Computes probe positions for a Bloom filter using double hashing.""" + raw = value.encode() h1 = zlib.crc32(raw) h2 = zlib.adler32(raw) @@ -19,7 +19,7 @@ def _bloom_hashes(value: str, bit_count: int, hash_count: int) -> list[int]: def generate(count: int, seed: int) -> dict[str, Any]: - """Generates synthetic data and probe items for the memory index task.""" + rng = Random(seed) items = [f"item_{seed}_{index}_{rng.randrange(10_000)}" for index in range(count)] probes = items[: count // 2] @@ -28,7 +28,7 @@ def generate(count: int, seed: int) -> dict[str, Any]: def solve(payload: dict[str, Any]) -> dict[str, Any]: - """Performs Bloom filter membership testing and calculates accuracy metrics.""" + items = payload["items"] probes = payload["probes"] bit_count = payload["bit_count"] @@ -36,11 +36,11 @@ def solve(payload: dict[str, Any]) -> dict[str, Any]: bits = bytearray((bit_count + 7) // 8) def set_bit(position: int) -> None: - """Sets a bit at the given position in the bitset.""" + bits[position // 8] |= 1 << (position % 8) def get_bit(position: int) -> bool: - """Checks if a bit is set at the given position.""" + return bool(bits[position // 8] & (1 << (position % 8))) for item in items: From 0641e1cb1c1a16a758d9c782ad7329701985b023 Mon Sep 17 00:00:00 2001 From: Phantom0299 Date: Sun, 12 Apr 2026 02:55:05 +0530 Subject: [PATCH 3/3] style: fix trailing whitespace errors from docstring removal --- chuck/tasks/memory_index/task.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/chuck/tasks/memory_index/task.py b/chuck/tasks/memory_index/task.py index cbb74cd..31f7073 100644 --- a/chuck/tasks/memory_index/task.py +++ b/chuck/tasks/memory_index/task.py @@ -8,7 +8,6 @@ def _bloom_hashes(value: str, bit_count: int, hash_count: int) -> list[int]: - raw = value.encode() h1 = zlib.crc32(raw) h2 = zlib.adler32(raw) @@ -19,7 +18,6 @@ def _bloom_hashes(value: str, bit_count: int, hash_count: int) -> list[int]: def generate(count: int, seed: int) -> dict[str, Any]: - rng = Random(seed) items = [f"item_{seed}_{index}_{rng.randrange(10_000)}" for index in range(count)] probes = items[: count // 2] @@ -28,7 +26,6 @@ def generate(count: int, seed: int) -> dict[str, Any]: def solve(payload: dict[str, Any]) -> dict[str, Any]: - items = payload["items"] probes = payload["probes"] bit_count = payload["bit_count"] @@ -36,11 +33,9 @@ def solve(payload: dict[str, Any]) -> dict[str, Any]: bits = bytearray((bit_count + 7) // 8) def set_bit(position: int) -> None: - bits[position // 8] |= 1 << (position % 8) def get_bit(position: int) -> bool: - return bool(bits[position // 8] & (1 << (position % 8))) for item in items: