diff --git a/chuck/tasks/graph_analytics/task.py b/chuck/tasks/graph_analytics/task.py index 79cb5d3..7504003 100644 --- a/chuck/tasks/graph_analytics/task.py +++ b/chuck/tasks/graph_analytics/task.py @@ -4,6 +4,7 @@ from typing import Any from ...common import TaskSpec, round6 +import numpy as np def generate(node_count: int, seed: int) -> dict[str, list[str]]: @@ -19,28 +20,36 @@ def generate(node_count: int, seed: int) -> dict[str, list[str]]: return graph -def solve(graph: dict[str, list[str]], iterations: int = 16, damping: float = 0.85) -> dict[str, Any]: - nodes = sorted(graph) - if not nodes: +def solve(graph: dict[str, list[str]] , iterations: int = 16, damping: float = 0.85) -> dict[str, Any]: + if not graph: return {"node_count": 0, "top_node": "", "top_score": 0.0, "checksum": 0.0} - rank = {node: 1.0 / len(nodes) for node in nodes} - outgoing = {node: graph[node] if graph[node] else nodes for node in nodes} - base = (1.0 - damping) / len(nodes) + nodes = sorted(graph) + N = len(nodes) + idx_map = {node: i for i, node in enumerate(nodes)} + + rows = np.array([idx_map[src] for src, targets in graph.items() for _ in targets], dtype=np.int64) + cols = np.array([idx_map[tgt] for _, targets in graph.items() for tgt in targets], dtype=np.int64) + out_degree = np.bincount(rows, minlength=N).astype(np.float64) + + rank = np.full((N,), 1.0/N, dtype=np.float64) + base = (1.0 - damping) / N + trans_wt = damping / out_degree + for _ in range(iterations): - new_rank = {node: base for node in nodes} - for node in nodes: - share = rank[node] / len(outgoing[node]) - for target in outgoing[node]: - new_rank[target] += damping * share - rank = new_rank - top_node = max(nodes, key=lambda node: (rank[node], node)) - checksum = sum((index + 1) * rank[node] for index, node in enumerate(nodes)) + msgs = (rank * trans_wt)[rows] + recieved = np.bincount(cols, weights=msgs, minlength=N) + rank = recieved + base + + top_node = int(np.argmax(rank)) + mult = np.arange(1, N+1) + checksum = float(np.dot(mult, rank)) + return { - "node_count": len(nodes), - "top_node": top_node, - "top_score": round6(rank[top_node]), - "checksum": round6(checksum), + "node_count": N, + "top_node": f"n{top_node:04d}", + "top_score": round6(float(rank[top_node])), + "checksum" : round6(checksum), } diff --git a/pyproject.toml b/pyproject.toml index 11341c3..ebbf51b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -8,6 +8,9 @@ version = "0.1.0" description = "Solver toolkit for 10 computational tasks with optional native C++ backends" requires-python = ">=3.10" readme = "README.md" +dependencies = [ + "numpy" +] [tool.setuptools]