From 3059e2e4e9fbb5c8f235e28e34a57c7425183224 Mon Sep 17 00:00:00 2001 From: Aryan-Kumar117 Date: Sat, 11 Apr 2026 22:13:10 +0530 Subject: [PATCH 1/3] perf: replaced pure python with numpy in solve function --- chuck/tasks/graph_analytics/task.py | 43 +++++++++++++++++------------ 1 file changed, 26 insertions(+), 17 deletions(-) diff --git a/chuck/tasks/graph_analytics/task.py b/chuck/tasks/graph_analytics/task.py index 79cb5d3..9506765 100644 --- a/chuck/tasks/graph_analytics/task.py +++ b/chuck/tasks/graph_analytics/task.py @@ -4,6 +4,7 @@ from typing import Any from ...common import TaskSpec, round6 +import numpy as np def generate(node_count: int, seed: int) -> dict[str, list[str]]: @@ -19,28 +20,36 @@ def generate(node_count: int, seed: int) -> dict[str, list[str]]: return graph -def solve(graph: dict[str, list[str]], iterations: int = 16, damping: float = 0.85) -> dict[str, Any]: - nodes = sorted(graph) - if not nodes: +def solve(graph: dict[str, list[str]] , iterations: int = 16, damping: float = 0.85) -> dict[str, Any]: + if not graph: return {"node_count": 0, "top_node": "", "top_score": 0.0, "checksum": 0.0} - rank = {node: 1.0 / len(nodes) for node in nodes} - outgoing = {node: graph[node] if graph[node] else nodes for node in nodes} - base = (1.0 - damping) / len(nodes) + nodes = sorted(graph) + idx_map = {node: i for i, node in enumerate(nodes)} + + rows = [idx_map[src] for src, targets in graph.items() for _ in targets] + cols = [idx_map[tgt] for _, targets in graph.items() for tgt in targets] + + N = len(nodes) + adj = np.zeros((N, N), dtype = np.float128) + adj[rows, cols] = 1 + rank = np.full((N,), 1.0/N, dtype=np.float128) + degree = np.sum(adj, axis=1) + transition_matrix = adj / degree[:, np.newaxis] + for _ in range(iterations): - new_rank = {node: base for node in nodes} - for node in nodes: - share = rank[node] / len(outgoing[node]) - for target in outgoing[node]: - new_rank[target] += damping * share + new_rank = (damping*np.matmul(transition_matrix.T, rank)) + (1.0-damping)/N rank = new_rank - top_node = max(nodes, key=lambda node: (rank[node], node)) - checksum = sum((index + 1) * rank[node] for index, node in enumerate(nodes)) + + top_node = int(np.argmax(rank)) + mult = np.arange(1, N+1) + checksum = float(np.dot(mult, rank)) + return { - "node_count": len(nodes), - "top_node": top_node, - "top_score": round6(rank[top_node]), - "checksum": round6(checksum), + "node_count": N, + "top_node": f"n{top_node:04d}", + "top_score": round6(float(rank[top_node])), + "checksum" : round6(checksum), } From cd19a00376ba4822f9200d6726cd3ac60248f708 Mon Sep 17 00:00:00 2001 From: Aryan-Kumar117 Date: Sat, 11 Apr 2026 22:55:43 +0530 Subject: [PATCH 2/3] perf: ditched dense matrix and improved on performance benchmarks --- chuck/tasks/graph_analytics/task.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/chuck/tasks/graph_analytics/task.py b/chuck/tasks/graph_analytics/task.py index 9506765..7504003 100644 --- a/chuck/tasks/graph_analytics/task.py +++ b/chuck/tasks/graph_analytics/task.py @@ -25,21 +25,21 @@ def solve(graph: dict[str, list[str]] , iterations: int = 16, damping: float = 0 return {"node_count": 0, "top_node": "", "top_score": 0.0, "checksum": 0.0} nodes = sorted(graph) + N = len(nodes) idx_map = {node: i for i, node in enumerate(nodes)} - rows = [idx_map[src] for src, targets in graph.items() for _ in targets] - cols = [idx_map[tgt] for _, targets in graph.items() for tgt in targets] + rows = np.array([idx_map[src] for src, targets in graph.items() for _ in targets], dtype=np.int64) + cols = np.array([idx_map[tgt] for _, targets in graph.items() for tgt in targets], dtype=np.int64) + out_degree = np.bincount(rows, minlength=N).astype(np.float64) - N = len(nodes) - adj = np.zeros((N, N), dtype = np.float128) - adj[rows, cols] = 1 - rank = np.full((N,), 1.0/N, dtype=np.float128) - degree = np.sum(adj, axis=1) - transition_matrix = adj / degree[:, np.newaxis] + rank = np.full((N,), 1.0/N, dtype=np.float64) + base = (1.0 - damping) / N + trans_wt = damping / out_degree for _ in range(iterations): - new_rank = (damping*np.matmul(transition_matrix.T, rank)) + (1.0-damping)/N - rank = new_rank + msgs = (rank * trans_wt)[rows] + recieved = np.bincount(cols, weights=msgs, minlength=N) + rank = recieved + base top_node = int(np.argmax(rank)) mult = np.arange(1, N+1) From 1041f781b514c82d3d79ac808c52bbf6f3122c33 Mon Sep 17 00:00:00 2001 From: Aryan-Kumar117 Date: Sat, 11 Apr 2026 23:44:41 +0530 Subject: [PATCH 3/3] build: added numpy as a dependency in pyproject.toml --- pyproject.toml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pyproject.toml b/pyproject.toml index 11341c3..ebbf51b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -8,6 +8,9 @@ version = "0.1.0" description = "Solver toolkit for 10 computational tasks with optional native C++ backends" requires-python = ">=3.10" readme = "README.md" +dependencies = [ + "numpy" +] [tool.setuptools]