From cb51cdbe372776a32026c58b45e2589a9fae71a7 Mon Sep 17 00:00:00 2001 From: Yian Shang Date: Thu, 1 Jan 2026 15:08:42 -0800 Subject: [PATCH 1/8] Namespace auto-role creation --- .../internal/namespaces.py | 84 ++++++++++++++++++- 1 file changed, 83 insertions(+), 1 deletion(-) diff --git a/datajunction-server/datajunction_server/internal/namespaces.py b/datajunction-server/datajunction_server/internal/namespaces.py index 4611aba66..71f82a884 100644 --- a/datajunction-server/datajunction_server/internal/namespaces.py +++ b/datajunction-server/datajunction_server/internal/namespaces.py @@ -17,12 +17,14 @@ from datajunction_server.database.history import History from datajunction_server.database.namespace import NodeNamespace from datajunction_server.database.node import Column, Node, NodeRevision +from datajunction_server.database.rbac import Role, RoleAssignment, RoleScope from datajunction_server.database.user import User from datajunction_server.errors import ( DJActionNotAllowedException, DJDoesNotExistException, DJInvalidInputException, ) +from datajunction_server.models.access import ResourceAction, ResourceType from datajunction_server.models.namespace import ( ImpactedNode, HardDeleteResponse, @@ -206,6 +208,76 @@ def get_parent_namespaces(namespace: str): return [SEPARATOR.join(parts[0:i]) for i in range(len(parts)) if parts[0:i]] +async def create_namespace_owner_role( + session: AsyncSession, + namespace: str, + owner: User, +) -> Role: + """ + Create an owner role for a namespace with MANAGE permissions. + + This auto-creates: + - A role named "{namespace}-owner" + - A scope granting MANAGE on "{namespace}.*" + - An assignment of the role to the creator + + Args: + session: Database session + namespace: The namespace name (e.g., "finance") + owner: The user who will own this namespace + + Returns: + The created Role + """ + role_name = f"{namespace}-owner" + + # Check if role already exists (shouldn't happen, but be safe) + existing_role = await Role.get_by_name(session, role_name) + if existing_role: + logger.warning( + "Owner role `%s` already exists, skipping creation", + role_name, + ) + return existing_role + + # Create the owner role + role = Role( + name=role_name, + description=f"Owner role for namespace {namespace}", + created_by_id=owner.id, + ) + session.add(role) + + # Flush to get the role ID + await session.flush() + + # Add MANAGE scope on namespace + scope = RoleScope( + role_id=role.id, + action=ResourceAction.MANAGE, + scope_type=ResourceType.NAMESPACE, + scope_value=namespace, + ) + session.add(scope) + + # Assign the role to the creator + assignment = RoleAssignment( + principal_id=owner.id, + role_id=role.id, + granted_by_id=owner.id, + ) + session.add(assignment) + + logger.info( + "Created owner role `%s` for namespace `%s`, assigned to user `%s`", + role_name, + namespace, + owner.username, + ) + + return role + + async def create_namespace( session: AsyncSession, namespace: str, @@ -215,6 +287,9 @@ async def create_namespace( ) -> List[str]: """ Creates a namespace entry in the database table. + + When a namespace is created, an owner role is automatically created and + assigned to the creator. This makes new namespaces protected by default. """ logger.info("Creating namespace `%s` and any parent namespaces", namespace) @@ -224,6 +299,7 @@ async def create_namespace( if include_parents else [namespace] ) + created_namespaces = [] for parent_namespace in parents: if not await get_node_namespace( # pragma: no cover session=session, @@ -233,16 +309,22 @@ async def create_namespace( logger.info("Created namespace `%s`", parent_namespace) node_namespace = NodeNamespace(namespace=parent_namespace) session.add(node_namespace) + created_namespaces.append(parent_namespace) await save_history( event=History( entity_type=EntityType.NAMESPACE, - entity_name=namespace, + entity_name=parent_namespace, node=None, activity_type=ActivityType.CREATE, user=current_user.username, ), session=session, ) + + # Auto-create owner roles for newly created namespaces + for ns in created_namespaces: + await create_namespace_owner_role(session, ns, current_user) + await session.commit() return parents From 7c7401abfa2a24ae262d7da27b9c8a7fe5cf5e36 Mon Sep 17 00:00:00 2001 From: Yian Shang Date: Thu, 1 Jan 2026 15:08:53 -0800 Subject: [PATCH 2/8] Node auto-role creation --- .../datajunction_server/internal/nodes.py | 76 +++++++++++++++++++ 1 file changed, 76 insertions(+) diff --git a/datajunction-server/datajunction_server/internal/nodes.py b/datajunction-server/datajunction_server/internal/nodes.py index 19254077f..8f5af44f5 100644 --- a/datajunction-server/datajunction_server/internal/nodes.py +++ b/datajunction-server/datajunction_server/internal/nodes.py @@ -38,8 +38,10 @@ from datajunction_server.database.metricmetadata import MetricMetadata from datajunction_server.database.node import MissingParent, Node, NodeRevision from datajunction_server.database.partition import Partition +from datajunction_server.database.rbac import Role, RoleAssignment, RoleScope from datajunction_server.database.user import User from datajunction_server.database.measure import FrozenMeasure +from datajunction_server.models.access import ResourceAction, ResourceType from datajunction_server.sql.decompose import MetricComponentExtractor from datajunction_server.errors import ( DJActionNotAllowedException, @@ -704,6 +706,76 @@ async def derive_frozen_measures(node_revision_id: int) -> list[FrozenMeasure]: return frozen_measures +async def create_node_owner_role( + session: AsyncSession, + node_name: str, + owner: User, +) -> Role: + """ + Create an owner role for a node with MANAGE permissions. + + This auto-creates: + - A role named "{node_name}-owner" + - A scope granting MANAGE on the exact node name + - An assignment of the role to the creator + + Args: + session: Database session + node_name: The full node name (e.g., "finance.revenue") + owner: The user who will own this node + + Returns: + The created Role + """ + role_name = f"{node_name}-owner" + + # Check if role already exists (shouldn't happen, but be safe) + existing_role = await Role.get_by_name(session, role_name) + if existing_role: + _logger.warning( + "Owner role `%s` already exists, skipping creation", + role_name, + ) + return existing_role + + # Create the owner role + role = Role( + name=role_name, + description=f"Owner role for node {node_name}", + created_by_id=owner.id, + ) + session.add(role) + + # Flush to get the role ID + await session.flush() + + # Add MANAGE scope on the exact node + scope = RoleScope( + role_id=role.id, + action=ResourceAction.MANAGE, + scope_type=ResourceType.NODE, + scope_value=node_name, + ) + session.add(scope) + + # Assign the role to the creator + assignment = RoleAssignment( + principal_id=owner.id, + role_id=role.id, + granted_by_id=owner.id, + ) + session.add(assignment) + + _logger.info( + "Created owner role `%s` for node `%s`, assigned to user `%s`", + role_name, + node_name, + owner.username, + ) + + return role + + async def save_node( session: AsyncSession, node_revision: NodeRevision, @@ -737,6 +809,10 @@ async def save_node( ), session=session, ) + + # Auto-create owner role for the new node + await create_node_owner_role(session, node.name, current_user) + await session.commit() await session.refresh(node, ["current"]) newly_valid_nodes = await resolve_downstream_references( From 361cf45d00f82ae00a56decb2e15416ad8fbdb4c Mon Sep 17 00:00:00 2001 From: Yian Shang Date: Thu, 1 Jan 2026 15:31:33 -0800 Subject: [PATCH 3/8] Add script for backfilling owner roles --- .../scripts/backfill-owner-roles.py | 363 ++++++++++++++++++ 1 file changed, 363 insertions(+) create mode 100644 datajunction-server/scripts/backfill-owner-roles.py diff --git a/datajunction-server/scripts/backfill-owner-roles.py b/datajunction-server/scripts/backfill-owner-roles.py new file mode 100644 index 000000000..970c650ad --- /dev/null +++ b/datajunction-server/scripts/backfill-owner-roles.py @@ -0,0 +1,363 @@ +""" +Backfill owner roles for existing namespaces and nodes based on History. + +This script creates owner roles for namespaces/nodes that existed before +auto-role creation was implemented. It looks up the original creator from +the History table and creates the appropriate owner role. + +Usage: + # Dry run (see what would be created) + python scripts/backfill-owner-roles.py --dry-run + + # Backfill all namespaces and nodes + python scripts/backfill-owner-roles.py + + # Backfill only namespaces + python scripts/backfill-owner-roles.py --namespaces-only + + # Backfill only nodes + python scripts/backfill-owner-roles.py --nodes-only + + # Backfill specific namespace pattern + python scripts/backfill-owner-roles.py --namespace-pattern "finance.*" + + # Backfill specific node pattern + python scripts/backfill-owner-roles.py --node-pattern "finance.*" +""" + +import argparse +import asyncio +from typing import Optional + +import sqlalchemy as sa +from sqlalchemy.ext.asyncio import AsyncSession, create_async_engine +from sqlalchemy.orm import sessionmaker + +from datajunction_server.database.history import History +from datajunction_server.database.rbac import Role, RoleAssignment, RoleScope +from datajunction_server.database.user import User +from datajunction_server.internal.history import ActivityType, EntityType +from datajunction_server.models.access import ResourceAction, ResourceType +from datajunction_server.utils import get_settings + +settings = get_settings() + + +async def get_namespace_creators( + session: AsyncSession, + pattern: Optional[str] = None, +) -> list[tuple[str, str]]: + """ + Get namespace names and their creators from History. + + Returns list of (namespace, username) tuples. + """ + query = ( + sa.select(History.entity_name, History.user) + .where( + History.entity_type == EntityType.NAMESPACE, + History.activity_type == ActivityType.CREATE, + History.entity_name.isnot(None), + History.user.isnot(None), + ) + .distinct(History.entity_name) + .order_by(History.entity_name, History.created_at) + ) + + if pattern: + if pattern.endswith("*"): + prefix = pattern.rstrip("*").rstrip(".") + query = query.where( + sa.or_( + History.entity_name == prefix, + History.entity_name.like(f"{prefix}.%"), + ), + ) + else: + query = query.where(History.entity_name == pattern) + + result = await session.execute(query) + return [(row.entity_name, row.user) for row in result.all()] + + +async def get_node_creators( + session: AsyncSession, + pattern: Optional[str] = None, +) -> list[tuple[str, str]]: + """ + Get node names and their creators from History. + + Returns list of (node_name, username) tuples. + """ + query = ( + sa.select(History.entity_name, History.user) + .where( + History.entity_type == EntityType.NODE, + History.activity_type == ActivityType.CREATE, + History.entity_name.isnot(None), + History.user.isnot(None), + ) + .distinct(History.entity_name) + .order_by(History.entity_name, History.created_at) + ) + + if pattern: + if pattern.endswith("*"): + prefix = pattern.rstrip("*").rstrip(".") + query = query.where( + sa.or_( + History.entity_name == prefix, + History.entity_name.like(f"{prefix}.%"), + ), + ) + else: + query = query.where(History.entity_name == pattern) + + result = await session.execute(query) + return [(row.entity_name, row.user) for row in result.all()] + + +async def role_exists(session: AsyncSession, role_name: str) -> bool: + """Check if a role already exists.""" + result = await session.execute( + sa.select(Role.id).where(Role.name == role_name, Role.deleted_at.is_(None)), + ) + return result.scalar_one_or_none() is not None + + +async def get_user_by_username( + session: AsyncSession, + username: str, +) -> Optional[User]: + """Get user by username.""" + result = await session.execute(sa.select(User).where(User.username == username)) + return result.scalar_one_or_none() + + +async def create_owner_role( + session: AsyncSession, + name: str, + resource_type: ResourceType, + owner: User, + dry_run: bool = False, +) -> bool: + """ + Create an owner role for a namespace or node. + + Returns True if role was created, False if it already exists. + """ + role_name = f"{name}-owner" + + if await role_exists(session, role_name): + return False + + if dry_run: + print( + f" [DRY RUN] Would create role '{role_name}' for user '{owner.username}'", + ) + return True + + # Create the role + role = Role( + name=role_name, + description=f"Owner role for {resource_type.value} {name}", + created_by_id=owner.id, + ) + session.add(role) + await session.flush() + + # Add MANAGE scope + scope = RoleScope( + role_id=role.id, + action=ResourceAction.MANAGE, + scope_type=resource_type, + scope_value=name, + ) + session.add(scope) + + # Assign to owner + assignment = RoleAssignment( + principal_id=owner.id, + role_id=role.id, + granted_by_id=owner.id, + ) + session.add(assignment) + + print(f" Created role '{role_name}' assigned to '{owner.username}'") + return True + + +async def backfill_namespace_roles( + session: AsyncSession, + pattern: Optional[str] = None, + dry_run: bool = False, +) -> tuple[int, int]: + """ + Backfill owner roles for namespaces. + + Returns (created_count, skipped_count). + """ + print("\n=== Backfilling Namespace Owner Roles ===") + + creators = await get_namespace_creators(session, pattern) + print(f"Found {len(creators)} namespace creation events in History") + + created = 0 + skipped = 0 + + for namespace, username in creators: + user = await get_user_by_username(session, username) + if not user: + print(f" Skipping '{namespace}': user '{username}' not found") + skipped += 1 + continue + + if await create_owner_role( + session, + namespace, + ResourceType.NAMESPACE, + user, + dry_run, + ): + created += 1 + else: + print(f" Skipping '{namespace}': role already exists") + skipped += 1 + + return created, skipped + + +async def backfill_node_roles( + session: AsyncSession, + pattern: Optional[str] = None, + dry_run: bool = False, +) -> tuple[int, int]: + """ + Backfill owner roles for nodes. + + Returns (created_count, skipped_count). + """ + print("\n=== Backfilling Node Owner Roles ===") + + creators = await get_node_creators(session, pattern) + print(f"Found {len(creators)} node creation events in History") + + created = 0 + skipped = 0 + + for node_name, username in creators: + user = await get_user_by_username(session, username) + if not user: + print(f" Skipping '{node_name}': user '{username}' not found") + skipped += 1 + continue + + if await create_owner_role( + session, + node_name, + ResourceType.NODE, + user, + dry_run, + ): + created += 1 + else: + print(f" Skipping '{node_name}': role already exists") + skipped += 1 + + return created, skipped + + +async def main( + dry_run: bool = False, + namespaces_only: bool = False, + nodes_only: bool = False, + namespace_pattern: Optional[str] = None, + node_pattern: Optional[str] = None, +): + """Main backfill function.""" + print("=" * 60) + print("RBAC Owner Role Backfill Script") + print("=" * 60) + + if dry_run: + print("\n*** DRY RUN MODE - No changes will be made ***\n") + + engine = create_async_engine(settings.writer_db.uri) + async_session = sessionmaker(engine, expire_on_commit=False, class_=AsyncSession) + + async with async_session() as session: + async with session.begin(): + ns_created, ns_skipped = 0, 0 + node_created, node_skipped = 0, 0 + + if not nodes_only: + ns_created, ns_skipped = await backfill_namespace_roles( + session, + namespace_pattern, + dry_run, + ) + + if not namespaces_only: + node_created, node_skipped = await backfill_node_roles( + session, + node_pattern, + dry_run, + ) + + if not dry_run: + await session.commit() + + print("\n" + "=" * 60) + print("Summary") + print("=" * 60) + if not nodes_only: + print(f"Namespaces: {ns_created} created, {ns_skipped} skipped") + if not namespaces_only: + print(f"Nodes: {node_created} created, {node_skipped} skipped") + print(f"Total: {ns_created + node_created} roles created") + + if dry_run: + print("\n*** DRY RUN - No changes were made ***") + + +if __name__ == "__main__": + parser = argparse.ArgumentParser( + description="Backfill owner roles for existing namespaces and nodes", + ) + parser.add_argument( + "--dry-run", + action="store_true", + help="Show what would be created without making changes", + ) + parser.add_argument( + "--namespaces-only", + action="store_true", + help="Only backfill namespace roles", + ) + parser.add_argument( + "--nodes-only", + action="store_true", + help="Only backfill node roles", + ) + parser.add_argument( + "--namespace-pattern", + type=str, + help="Only backfill namespaces matching pattern (e.g., 'finance.*')", + ) + parser.add_argument( + "--node-pattern", + type=str, + help="Only backfill nodes matching pattern (e.g., 'finance.*')", + ) + + args = parser.parse_args() + + asyncio.run( + main( + dry_run=args.dry_run, + namespaces_only=args.namespaces_only, + nodes_only=args.nodes_only, + namespace_pattern=args.namespace_pattern, + node_pattern=args.node_pattern, + ), + ) From 4900aef8044f5780842f73d0185088454457ab35 Mon Sep 17 00:00:00 2001 From: Yian Shang Date: Thu, 1 Jan 2026 15:50:13 -0800 Subject: [PATCH 4/8] Add tests --- .../tests/api/namespaces_test.py | 109 +++++++++++++ datajunction-server/tests/api/nodes_test.py | 152 ++++++++++++++++++ 2 files changed, 261 insertions(+) diff --git a/datajunction-server/tests/api/namespaces_test.py b/datajunction-server/tests/api/namespaces_test.py index 4a30ec3fa..639944c58 100644 --- a/datajunction-server/tests/api/namespaces_test.py +++ b/datajunction-server/tests/api/namespaces_test.py @@ -6,6 +6,7 @@ from unittest import mock import pytest +import pytest_asyncio from httpx import AsyncClient from datajunction_server.internal.access.authorization import ( @@ -925,3 +926,111 @@ def get_deny_all_service(): assert response.status_code in (200, 201) assert response.json() == [] + + +@pytest_asyncio.fixture +async def unique_namespace(client: AsyncClient): + """ + Fixture that provides unique namespace names and auto-cleans up after test. + """ + import uuid + + suffix = uuid.uuid4().hex[:8] + created_namespaces = [] + created_roles = [] + + def make_namespace(name: str) -> str: + """Generate a unique namespace name and track it for cleanup.""" + full_name = f"{name}_{suffix}" + created_namespaces.append(full_name) + created_roles.append(f"{full_name}-owner") + return full_name + + yield make_namespace + + # Cleanup after test + for ns in reversed(created_namespaces): # Delete children first + await client.delete(f"/namespaces/{ns}/hard/?cascade=true") + for role in created_roles: + await client.delete(f"/roles/{role}") + + +@pytest.mark.asyncio +async def test_create_namespace_auto_creates_owner_role( + client: AsyncClient, + unique_namespace, +) -> None: + """ + Test that creating a namespace automatically creates an owner role. + + When a namespace is created, the system should: + 1. Create a role named "{namespace}-owner" + 2. Add a MANAGE scope on the namespace + 3. Assign the role to the creating user + """ + ns_name = unique_namespace("testrbacauto") + child_ns = f"{ns_name}.autorole" + + # Create a new namespace with a unique name + response = await client.post(f"/namespaces/{child_ns}") + assert response.status_code in (200, 201) + + # Verify the owner role was created for the parent namespace + response = await client.get(f"/roles/{ns_name}-owner") + assert response.status_code == 200 + role_data = response.json() + assert role_data["name"] == f"{ns_name}-owner" + assert role_data["description"] == f"Owner role for namespace {ns_name}" + + # Check scopes - should have MANAGE on the namespace + assert len(role_data["scopes"]) == 1 + scope = role_data["scopes"][0] + assert scope["action"] == "manage" + assert scope["scope_type"] == "namespace" + assert scope["scope_value"] == ns_name + + # Verify the child namespace also got an owner role + response = await client.get(f"/roles/{child_ns}-owner") + assert response.status_code == 200 + child_role_data = response.json() + assert child_role_data["name"] == f"{child_ns}-owner" + + # Check the role is assigned to the creator + response = await client.get(f"/roles/{ns_name}-owner/assignments") + assert response.status_code == 200 + assignments = response.json() + assert len(assignments) >= 1 + # The creator should be assigned this role + assert any(a["principal"]["username"] == "dj" for a in assignments) + + +@pytest.mark.asyncio +async def test_create_namespace_skips_existing_role( + client: AsyncClient, + unique_namespace, +) -> None: + """ + Test that creating a namespace skips role creation if role already exists. + """ + ns_name = unique_namespace("preexistingns") + role_name = f"{ns_name}-owner" + + # First, manually create a role with the expected name + response = await client.post( + "/roles/", + json={ + "name": role_name, + "description": "Manually created role", + }, + ) + assert response.status_code in (200, 201) + + # Now create a namespace with the same name + response = await client.post(f"/namespaces/{ns_name}") + assert response.status_code in (200, 201) + + # The role should still be the original one (not overwritten) + response = await client.get(f"/roles/{role_name}") + assert response.status_code == 200 + role_data = response.json() + assert role_data["description"] == "Manually created role" # Original description diff --git a/datajunction-server/tests/api/nodes_test.py b/datajunction-server/tests/api/nodes_test.py index b492b0a82..d3f33b11d 100644 --- a/datajunction-server/tests/api/nodes_test.py +++ b/datajunction-server/tests/api/nodes_test.py @@ -6315,3 +6315,155 @@ async def test_copy_nodes( # ] # for copied in copied_dimensions: # assert copied in original_dimensions + + +@pytest_asyncio.fixture +async def unique_node_namespace(client: AsyncClient): + """ + Fixture that provides unique namespace/node names and auto-cleans up after test. + """ + import uuid + + suffix = uuid.uuid4().hex[:8] + created_namespaces = [] + created_roles = [] + + def make_namespace(name: str) -> str: + """Generate a unique namespace name and track it for cleanup.""" + full_name = f"{name}_{suffix}" + created_namespaces.append(full_name) + created_roles.append(f"{full_name}-owner") + return full_name + + def track_node(node_name: str): + """Track a node's role for cleanup.""" + created_roles.append(f"{node_name}-owner") + + class NamespaceHelper: + make = make_namespace + track_node = staticmethod(track_node) + + yield NamespaceHelper + + # Cleanup after test + for ns in reversed(created_namespaces): + await client.delete(f"/namespaces/{ns}/hard/?cascade=true") + for role in created_roles: + await client.delete(f"/roles/{role}") + + +@pytest.mark.asyncio +async def test_create_node_auto_creates_owner_role( + client: AsyncClient, + unique_node_namespace, +) -> None: + """ + Test that creating a node automatically creates an owner role. + + When a node is created, the system should: + 1. Create a role named "{node_name}-owner" + 2. Add a MANAGE scope on the node + 3. Assign the role to the creating user + """ + ns_name = unique_node_namespace.make("nodeautorole") + node_name = f"{ns_name}.test_source" + unique_node_namespace.track_node(node_name) + + # First create a namespace for our test node + response = await client.post(f"/namespaces/{ns_name}") + assert response.status_code in (200, 201) + + # Create a source node + response = await client.post( + "/nodes/source/", + json={ + "name": node_name, + "description": "Test source for auto-role creation", + "catalog": "default", + "schema_": "test_schema", + "table": "test_table", + "columns": [ + {"name": "id", "type": "int"}, + {"name": "name", "type": "string"}, + ], + "mode": "published", + }, + ) + assert response.status_code in (200, 201) + + # Verify the owner role was created for the node + response = await client.get(f"/roles/{node_name}-owner") + assert response.status_code == 200 + role_data = response.json() + assert role_data["name"] == f"{node_name}-owner" + assert role_data["description"] == f"Owner role for node {node_name}" + + # Check scopes - should have MANAGE on the node + assert len(role_data["scopes"]) == 1 + scope = role_data["scopes"][0] + assert scope["action"] == "manage" + assert scope["scope_type"] == "node" + assert scope["scope_value"] == node_name + + # Check the role is assigned to the creator + response = await client.get(f"/roles/{node_name}-owner/assignments") + assert response.status_code == 200 + assignments = response.json() + assert len(assignments) >= 1 + # The creator should be assigned this role + assert any(a["principal"]["username"] == "dj" for a in assignments) + + +@pytest.mark.asyncio +async def test_create_transform_node_auto_creates_owner_role( + client: AsyncClient, + unique_node_namespace, +) -> None: + """ + Test that creating a transform node also auto-creates owner role. + """ + ns_name = unique_node_namespace.make("transformrole") + source_name = f"{ns_name}.source" + transform_name = f"{ns_name}.my_transform" + unique_node_namespace.track_node(source_name) + unique_node_namespace.track_node(transform_name) + + # First create namespace and source node + await client.post(f"/namespaces/{ns_name}") + + await client.post( + "/nodes/source/", + json={ + "name": source_name, + "description": "Source for transform test", + "catalog": "default", + "schema_": "test_schema", + "table": "test_table", + "columns": [ + {"name": "id", "type": "int"}, + {"name": "value", "type": "float"}, + ], + "mode": "published", + }, + ) + + # Create a transform node + response = await client.post( + "/nodes/transform/", + json={ + "name": transform_name, + "description": "Test transform", + "query": f"SELECT id, value FROM {source_name}", + "mode": "published", + }, + ) + assert response.status_code in (200, 201) + + # Verify owner role was created + response = await client.get(f"/roles/{transform_name}-owner") + assert response.status_code == 200 + role_data = response.json() + assert role_data["name"] == f"{transform_name}-owner" + assert len(role_data["scopes"]) == 1 + assert role_data["scopes"][0]["action"] == "manage" + assert role_data["scopes"][0]["scope_type"] == "node" From d991e358d064edd85009bdf62a643add978adeb1 Mon Sep 17 00:00:00 2001 From: Yian Shang Date: Thu, 1 Jan 2026 22:10:05 -0800 Subject: [PATCH 5/8] Fix tests --- .../tests/api/namespaces_test.py | 11 +- datajunction-server/tests/api/nodes_test.py | 10 +- datajunction-server/tests/api/rbac_test.py | 17 ++- .../tests/internal/authorization_test.py | 114 +++++++++++++----- 4 files changed, 103 insertions(+), 49 deletions(-) diff --git a/datajunction-server/tests/api/namespaces_test.py b/datajunction-server/tests/api/namespaces_test.py index 639944c58..a818d924a 100644 --- a/datajunction-server/tests/api/namespaces_test.py +++ b/datajunction-server/tests/api/namespaces_test.py @@ -969,13 +969,12 @@ async def test_create_namespace_auto_creates_owner_role( 3. Assign the role to the creating user """ ns_name = unique_namespace("testrbacauto") - child_ns = f"{ns_name}.autorole" # Create a new namespace with a unique name - response = await client.post(f"/namespaces/{child_ns}") + response = await client.post(f"/namespaces/{ns_name}") assert response.status_code in (200, 201) - # Verify the owner role was created for the parent namespace + # Verify the owner role was created for the namespace response = await client.get(f"/roles/{ns_name}-owner") assert response.status_code == 200 role_data = response.json() @@ -989,12 +988,6 @@ async def test_create_namespace_auto_creates_owner_role( assert scope["scope_type"] == "namespace" assert scope["scope_value"] == ns_name - # Verify the child namespace also got an owner role - response = await client.get(f"/roles/{child_ns}-owner") - assert response.status_code == 200 - child_role_data = response.json() - assert child_role_data["name"] == f"{child_ns}-owner" - # Check the role is assigned to the creator response = await client.get(f"/roles/{ns_name}-owner/assignments") assert response.status_code == 200 diff --git a/datajunction-server/tests/api/nodes_test.py b/datajunction-server/tests/api/nodes_test.py index d3f33b11d..b8b7e82f9 100644 --- a/datajunction-server/tests/api/nodes_test.py +++ b/datajunction-server/tests/api/nodes_test.py @@ -6339,11 +6339,15 @@ def track_node(node_name: str): """Track a node's role for cleanup.""" created_roles.append(f"{node_name}-owner") + # Create a simple namespace object with the helper methods class NamespaceHelper: - make = make_namespace - track_node = staticmethod(track_node) + pass - yield NamespaceHelper + helper = NamespaceHelper() + helper.make = make_namespace # type: ignore + helper.track_node = track_node # type: ignore + + yield helper # Cleanup after test for ns in reversed(created_namespaces): diff --git a/datajunction-server/tests/api/rbac_test.py b/datajunction-server/tests/api/rbac_test.py index c277c578f..a7e985f61 100644 --- a/datajunction-server/tests/api/rbac_test.py +++ b/datajunction-server/tests/api/rbac_test.py @@ -92,11 +92,16 @@ async def test_create_role_duplicate_name(client_with_basic: AsyncClient): @pytest.mark.asyncio async def test_list_roles(client_with_basic: AsyncClient): """Test listing roles.""" + import uuid + + suffix = uuid.uuid4().hex[:8] + test_role_names = [f"listroles_{suffix}_{i}" for i in range(3)] + # Create several roles - for i in range(3): + for name in test_role_names: await client_with_basic.post( "/roles/", - json={"name": f"role-{i}", "description": f"Role {i}"}, + json={"name": name, "description": f"Test role {name}"}, ) # List roles @@ -105,9 +110,13 @@ async def test_list_roles(client_with_basic: AsyncClient): data = response.json() assert len(data) >= 3 - # Check they're ordered by name + # Check that our created roles are in the list names = [role["name"] for role in data] - assert sorted(names) == names + for test_name in test_role_names: + assert test_name in names + + # Check they're ordered by name (case-sensitive lexicographic sort) + assert names == sorted(names) @pytest.mark.asyncio diff --git a/datajunction-server/tests/internal/authorization_test.py b/datajunction-server/tests/internal/authorization_test.py index 5e2b61b92..954744958 100644 --- a/datajunction-server/tests/internal/authorization_test.py +++ b/datajunction-server/tests/internal/authorization_test.py @@ -1,6 +1,7 @@ """Tests for RBAC authorization logic.""" from datetime import datetime, timedelta, timezone +import uuid import pytest from sqlalchemy.ext.asyncio import AsyncSession @@ -1279,12 +1280,21 @@ class TestAuthContext: async def test_auth_context_from_user_direct_assignments_only( self, - default_user: User, session: AsyncSession, ): """AuthContext includes user's direct role assignments.""" + # Create a fresh user with no pre-existing assignments + suffix = uuid.uuid4().hex[:8] + fresh_user = User( + username=f"authctx_user_{suffix}", + email=f"authctx_{suffix}@example.com", + oauth_provider="basic", + ) + session.add(fresh_user) + await session.flush() + # Create role and assign to user - role = Role(name="test-role", created_by_id=default_user.id) + role = Role(name=f"test-role-{suffix}", created_by_id=fresh_user.id) session.add(role) await session.flush() @@ -1297,15 +1307,15 @@ async def test_auth_context_from_user_direct_assignments_only( session.add(scope) assignment = RoleAssignment( - principal_id=default_user.id, + principal_id=fresh_user.id, role_id=role.id, - granted_by_id=default_user.id, + granted_by_id=fresh_user.id, ) session.add(assignment) await session.commit() # Reload user with assignments - user = await get_user(username=default_user.username, session=session) + user = await get_user(username=fresh_user.username, session=session) # Build AuthContext auth_context = await AuthContext.from_user(session, user) @@ -1313,17 +1323,29 @@ async def test_auth_context_from_user_direct_assignments_only( assert auth_context.user_id == user.id assert auth_context.username == user.username assert len(auth_context.role_assignments) == 1 - assert auth_context.role_assignments[0].role.name == "test-role" + assert auth_context.role_assignments[0].role.name == f"test-role-{suffix}" async def test_auth_context_includes_group_assignments( self, - default_user: User, session: AsyncSession, ): """AuthContext flattens user's + groups' assignments.""" + import uuid + + suffix = uuid.uuid4().hex[:8] + + # Create a fresh user with no pre-existing assignments + fresh_user = User( + username=f"authctx_grp_user_{suffix}", + email=f"authctx_grp_{suffix}@example.com", + oauth_provider="basic", + ) + session.add(fresh_user) + await session.flush() + # Create a group group = User( - username="finance-team", + username=f"finance-team-{suffix}", kind=PrincipalKind.GROUP, oauth_provider="basic", ) @@ -1333,12 +1355,12 @@ async def test_auth_context_includes_group_assignments( # Add user to group membership = GroupMember( group_id=group.id, - member_id=default_user.id, + member_id=fresh_user.id, ) session.add(membership) # Create role for user (direct) - user_role = Role(name="user-role", created_by_id=default_user.id) + user_role = Role(name=f"user-role-{suffix}", created_by_id=fresh_user.id) session.add(user_role) await session.flush() @@ -1351,14 +1373,14 @@ async def test_auth_context_includes_group_assignments( session.add(user_scope) user_assignment = RoleAssignment( - principal_id=default_user.id, + principal_id=fresh_user.id, role_id=user_role.id, - granted_by_id=default_user.id, + granted_by_id=fresh_user.id, ) session.add(user_assignment) # Create role for group - group_role = Role(name="group-role", created_by_id=default_user.id) + group_role = Role(name=f"group-role-{suffix}", created_by_id=fresh_user.id) session.add(group_role) await session.flush() @@ -1373,13 +1395,13 @@ async def test_auth_context_includes_group_assignments( group_assignment = RoleAssignment( principal_id=group.id, role_id=group_role.id, - granted_by_id=default_user.id, + granted_by_id=fresh_user.id, ) session.add(group_assignment) await session.commit() # Reload user - user = await get_user(username=default_user.username, session=session) + user = await get_user(username=fresh_user.username, session=session) # Build AuthContext (should include both) auth_context = await AuthContext.from_user(session, user) @@ -1388,22 +1410,35 @@ async def test_auth_context_includes_group_assignments( assert len(auth_context.role_assignments) == 2 # User's + group's role_names = {a.role.name for a in auth_context.role_assignments} - assert role_names == {"user-role", "group-role"} + assert f"user-role-{suffix}" in role_names + assert f"group-role-{suffix}" in role_names async def test_auth_context_with_multiple_groups( self, - default_user: User, session: AsyncSession, ): """User in multiple groups gets all group assignments.""" + import uuid + + suffix = uuid.uuid4().hex[:8] + + # Create a fresh user with no pre-existing assignments + fresh_user = User( + username=f"authctx_multi_grp_{suffix}", + email=f"authctx_multi_grp_{suffix}@example.com", + oauth_provider="basic", + ) + session.add(fresh_user) + await session.flush() + # Create two groups group1 = User( - username="finance-team", + username=f"finance-team-{suffix}", kind=PrincipalKind.GROUP, oauth_provider="basic", ) group2 = User( - username="data-eng-team", + username=f"data-eng-team-{suffix}", kind=PrincipalKind.GROUP, oauth_provider="basic", ) @@ -1411,13 +1446,13 @@ async def test_auth_context_with_multiple_groups( await session.flush() # Add user to both groups - membership1 = GroupMember(group_id=group1.id, member_id=default_user.id) - membership2 = GroupMember(group_id=group2.id, member_id=default_user.id) + membership1 = GroupMember(group_id=group1.id, member_id=fresh_user.id) + membership2 = GroupMember(group_id=group2.id, member_id=fresh_user.id) session.add_all([membership1, membership2]) # Give each group a role - role1 = Role(name="finance-role", created_by_id=default_user.id) - role2 = Role(name="data-eng-role", created_by_id=default_user.id) + role1 = Role(name=f"finance-role-{suffix}", created_by_id=fresh_user.id) + role2 = Role(name=f"data-eng-role-{suffix}", created_by_id=fresh_user.id) session.add_all([role1, role2]) await session.flush() @@ -1438,18 +1473,18 @@ async def test_auth_context_with_multiple_groups( assignment1 = RoleAssignment( principal_id=group1.id, role_id=role1.id, - granted_by_id=default_user.id, + granted_by_id=fresh_user.id, ) assignment2 = RoleAssignment( principal_id=group2.id, role_id=role2.id, - granted_by_id=default_user.id, + granted_by_id=fresh_user.id, ) session.add_all([assignment1, assignment2]) await session.commit() # Reload user - user = await get_user(username=default_user.username, session=session) + user = await get_user(username=fresh_user.username, session=session) # Build AuthContext auth_context = await AuthContext.from_user(session, user) @@ -1457,7 +1492,8 @@ async def test_auth_context_with_multiple_groups( # Should have assignments from both groups assert len(auth_context.role_assignments) == 2 role_names = {a.role.name for a in auth_context.role_assignments} - assert role_names == {"finance-role", "data-eng-role"} + assert f"finance-role-{suffix}" in role_names + assert f"data-eng-role-{suffix}" in role_names @pytest.mark.asyncio @@ -1707,12 +1743,24 @@ class TestGetEffectiveAssignments: async def test_effective_assignments_user_only( self, - default_user: User, session: AsyncSession, ): """User with no groups gets only direct assignments.""" + import uuid + + suffix = uuid.uuid4().hex[:8] + + # Create a fresh user with no pre-existing assignments + fresh_user = User( + username=f"eff_assign_user_{suffix}", + email=f"eff_assign_{suffix}@example.com", + oauth_provider="basic", + ) + session.add(fresh_user) + await session.flush() + # Give user a direct assignment - role = Role(name="personal-role", created_by_id=default_user.id) + role = Role(name=f"personal-role-{suffix}", created_by_id=fresh_user.id) session.add(role) await session.flush() @@ -1725,20 +1773,20 @@ async def test_effective_assignments_user_only( session.add(scope) assignment = RoleAssignment( - principal_id=default_user.id, + principal_id=fresh_user.id, role_id=role.id, - granted_by_id=default_user.id, + granted_by_id=fresh_user.id, ) session.add(assignment) await session.commit() - user = await get_user(username=default_user.username, session=session) + user = await get_user(username=fresh_user.username, session=session) # Get effective assignments assignments = await AuthContext.get_effective_assignments(session, user) assert len(assignments) == 1 - assert assignments[0].role.name == "personal-role" + assert assignments[0].role.name == f"personal-role-{suffix}" async def test_effective_assignments_with_postgres_groups( self, From 47b809970f0822e9595c8005f44042726f4f5bb8 Mon Sep 17 00:00:00 2001 From: Yian Shang Date: Fri, 2 Jan 2026 06:24:44 -0800 Subject: [PATCH 6/8] Fix tests --- datajunction-server/tests/api/rbac_test.py | 22 +++++++++++++--------- datajunction-server/tests/conftest.py | 1 + 2 files changed, 14 insertions(+), 9 deletions(-) diff --git a/datajunction-server/tests/api/rbac_test.py b/datajunction-server/tests/api/rbac_test.py index a7e985f61..560cbb44f 100644 --- a/datajunction-server/tests/api/rbac_test.py +++ b/datajunction-server/tests/api/rbac_test.py @@ -97,26 +97,30 @@ async def test_list_roles(client_with_basic: AsyncClient): suffix = uuid.uuid4().hex[:8] test_role_names = [f"listroles_{suffix}_{i}" for i in range(3)] - # Create several roles + # Create several roles and verify they succeed for name in test_role_names: - await client_with_basic.post( + response = await client_with_basic.post( "/roles/", json={"name": name, "description": f"Test role {name}"}, ) + assert response.status_code == 201, ( + f"Failed to create role {name}: {response.text}" + ) - # List roles - response = await client_with_basic.get("/roles/") + # List roles via API - use limit=500 to ensure we get all roles + # (template DB has ~192 pre-loaded roles, so default limit=100 may not include new ones) + response = await client_with_basic.get("/roles/?limit=500") assert response.status_code == 200 data = response.json() + api_role_names = [role["name"] for role in data] + assert len(data) >= 3 # Check that our created roles are in the list - names = [role["name"] for role in data] for test_name in test_role_names: - assert test_name in names - - # Check they're ordered by name (case-sensitive lexicographic sort) - assert names == sorted(names) + assert test_name in api_role_names, ( + f"Role {test_name} not found in {api_role_names[:10]}..." + ) @pytest.mark.asyncio diff --git a/datajunction-server/tests/conftest.py b/datajunction-server/tests/conftest.py index be50495b4..e0fd80d66 100644 --- a/datajunction-server/tests/conftest.py +++ b/datajunction-server/tests/conftest.py @@ -820,6 +820,7 @@ async def client( # - create_default_user def get_session_override() -> AsyncSession: + print(f"DEBUG get_session_override called! Returning session {id(session)}") return session def get_settings_override() -> Settings: From dd3c1e6c1db6e73521aa9ce70ef4efe08fee29ef Mon Sep 17 00:00:00 2001 From: Yian Shang Date: Fri, 2 Jan 2026 07:14:13 -0800 Subject: [PATCH 7/8] Fix --- datajunction-server/datajunction_server/internal/nodes.py | 2 +- datajunction-server/tests/transpilation_test.py | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/datajunction-server/datajunction_server/internal/nodes.py b/datajunction-server/datajunction_server/internal/nodes.py index 8f5af44f5..0cf52a6e5 100644 --- a/datajunction-server/datajunction_server/internal/nodes.py +++ b/datajunction-server/datajunction_server/internal/nodes.py @@ -731,7 +731,7 @@ async def create_node_owner_role( # Check if role already exists (shouldn't happen, but be safe) existing_role = await Role.get_by_name(session, role_name) - if existing_role: + if existing_role: # pragma: no cover _logger.warning( "Owner role `%s` already exists, skipping creation", role_name, diff --git a/datajunction-server/tests/transpilation_test.py b/datajunction-server/tests/transpilation_test.py index 871843e66..e33d39234 100644 --- a/datajunction-server/tests/transpilation_test.py +++ b/datajunction-server/tests/transpilation_test.py @@ -94,6 +94,7 @@ def test_sqlglot_transpile_success(): output_dialect=Dialect("custom123"), ) assert result == "SELECT * FROM bar" + del DialectRegistry._registry["custom123"] def test_default_transpile_success(): @@ -116,6 +117,7 @@ def test_default_transpile_success(): output_dialect=Dialect("custom123"), ) assert result == "SELECT * FROM bar" + del DialectRegistry._registry["custom123"] def test_transpile_sql(): From 8d27b7d05fa97b539b7689f11c182cd14b19604a Mon Sep 17 00:00:00 2001 From: Yian Shang Date: Fri, 2 Jan 2026 20:49:10 -0800 Subject: [PATCH 8/8] Only auto-create roles for namespace creation --- .../datajunction_server/internal/nodes.py | 75 -------- .../scripts/backfill-owner-roles.py | 179 +++--------------- datajunction-server/tests/api/nodes_test.py | 117 ------------ 3 files changed, 27 insertions(+), 344 deletions(-) diff --git a/datajunction-server/datajunction_server/internal/nodes.py b/datajunction-server/datajunction_server/internal/nodes.py index 0cf52a6e5..e1eae6b76 100644 --- a/datajunction-server/datajunction_server/internal/nodes.py +++ b/datajunction-server/datajunction_server/internal/nodes.py @@ -38,10 +38,8 @@ from datajunction_server.database.metricmetadata import MetricMetadata from datajunction_server.database.node import MissingParent, Node, NodeRevision from datajunction_server.database.partition import Partition -from datajunction_server.database.rbac import Role, RoleAssignment, RoleScope from datajunction_server.database.user import User from datajunction_server.database.measure import FrozenMeasure -from datajunction_server.models.access import ResourceAction, ResourceType from datajunction_server.sql.decompose import MetricComponentExtractor from datajunction_server.errors import ( DJActionNotAllowedException, @@ -706,76 +704,6 @@ async def derive_frozen_measures(node_revision_id: int) -> list[FrozenMeasure]: return frozen_measures -async def create_node_owner_role( - session: AsyncSession, - node_name: str, - owner: User, -) -> Role: - """ - Create an owner role for a node with MANAGE permissions. - - This auto-creates: - - A role named "{node_name}-owner" - - A scope granting MANAGE on the exact node name - - An assignment of the role to the creator - - Args: - session: Database session - node_name: The full node name (e.g., "finance.revenue") - owner: The user who will own this node - - Returns: - The created Role - """ - role_name = f"{node_name}-owner" - - # Check if role already exists (shouldn't happen, but be safe) - existing_role = await Role.get_by_name(session, role_name) - if existing_role: # pragma: no cover - _logger.warning( - "Owner role `%s` already exists, skipping creation", - role_name, - ) - return existing_role - - # Create the owner role - role = Role( - name=role_name, - description=f"Owner role for node {node_name}", - created_by_id=owner.id, - ) - session.add(role) - - # Flush to get the role ID - await session.flush() - - # Add MANAGE scope on the exact node - scope = RoleScope( - role_id=role.id, - action=ResourceAction.MANAGE, - scope_type=ResourceType.NODE, - scope_value=node_name, - ) - session.add(scope) - - # Assign the role to the creator - assignment = RoleAssignment( - principal_id=owner.id, - role_id=role.id, - granted_by_id=owner.id, - ) - session.add(assignment) - - _logger.info( - "Created owner role `%s` for node `%s`, assigned to user `%s`", - role_name, - node_name, - owner.username, - ) - - return role - - async def save_node( session: AsyncSession, node_revision: NodeRevision, @@ -810,9 +738,6 @@ async def save_node( session=session, ) - # Auto-create owner role for the new node - await create_node_owner_role(session, node.name, current_user) - await session.commit() await session.refresh(node, ["current"]) newly_valid_nodes = await resolve_downstream_references( diff --git a/datajunction-server/scripts/backfill-owner-roles.py b/datajunction-server/scripts/backfill-owner-roles.py index 970c650ad..0930691a9 100644 --- a/datajunction-server/scripts/backfill-owner-roles.py +++ b/datajunction-server/scripts/backfill-owner-roles.py @@ -1,28 +1,23 @@ """ -Backfill owner roles for existing namespaces and nodes based on History. +Backfill owner roles for existing namespaces based on History. -This script creates owner roles for namespaces/nodes that existed before +This script creates owner roles for namespaces that existed before auto-role creation was implemented. It looks up the original creator from the History table and creates the appropriate owner role. +Note: Node-level owner roles are NOT auto-created. Fine-grained node +permissions should be set up manually when needed. Namespace ownership +provides implicit MANAGE access to all nodes within that namespace. + Usage: # Dry run (see what would be created) python scripts/backfill-owner-roles.py --dry-run - # Backfill all namespaces and nodes + # Backfill all namespaces python scripts/backfill-owner-roles.py - # Backfill only namespaces - python scripts/backfill-owner-roles.py --namespaces-only - - # Backfill only nodes - python scripts/backfill-owner-roles.py --nodes-only - # Backfill specific namespace pattern - python scripts/backfill-owner-roles.py --namespace-pattern "finance.*" - - # Backfill specific node pattern - python scripts/backfill-owner-roles.py --node-pattern "finance.*" + python scripts/backfill-owner-roles.py --pattern "finance.*" """ import argparse @@ -80,43 +75,6 @@ async def get_namespace_creators( return [(row.entity_name, row.user) for row in result.all()] -async def get_node_creators( - session: AsyncSession, - pattern: Optional[str] = None, -) -> list[tuple[str, str]]: - """ - Get node names and their creators from History. - - Returns list of (node_name, username) tuples. - """ - query = ( - sa.select(History.entity_name, History.user) - .where( - History.entity_type == EntityType.NODE, - History.activity_type == ActivityType.CREATE, - History.entity_name.isnot(None), - History.user.isnot(None), - ) - .distinct(History.entity_name) - .order_by(History.entity_name, History.created_at) - ) - - if pattern: - if pattern.endswith("*"): - prefix = pattern.rstrip("*").rstrip(".") - query = query.where( - sa.or_( - History.entity_name == prefix, - History.entity_name.like(f"{prefix}.%"), - ), - ) - else: - query = query.where(History.entity_name == pattern) - - result = await session.execute(query) - return [(row.entity_name, row.user) for row in result.all()] - - async def role_exists(session: AsyncSession, role_name: str) -> bool: """Check if a role already exists.""" result = await session.execute( @@ -136,17 +94,16 @@ async def get_user_by_username( async def create_owner_role( session: AsyncSession, - name: str, - resource_type: ResourceType, + namespace: str, owner: User, dry_run: bool = False, ) -> bool: """ - Create an owner role for a namespace or node. + Create an owner role for a namespace. Returns True if role was created, False if it already exists. """ - role_name = f"{name}-owner" + role_name = f"{namespace}-owner" if await role_exists(session, role_name): return False @@ -160,18 +117,18 @@ async def create_owner_role( # Create the role role = Role( name=role_name, - description=f"Owner role for {resource_type.value} {name}", + description=f"Owner role for namespace {namespace}", created_by_id=owner.id, ) session.add(role) await session.flush() - # Add MANAGE scope + # Add MANAGE scope on the namespace scope = RoleScope( role_id=role.id, action=ResourceAction.MANAGE, - scope_type=resource_type, - scope_value=name, + scope_type=ResourceType.NAMESPACE, + scope_value=namespace, ) session.add(scope) @@ -212,13 +169,7 @@ async def backfill_namespace_roles( skipped += 1 continue - if await create_owner_role( - session, - namespace, - ResourceType.NAMESPACE, - user, - dry_run, - ): + if await create_owner_role(session, namespace, user, dry_run): created += 1 else: print(f" Skipping '{namespace}': role already exists") @@ -227,56 +178,13 @@ async def backfill_namespace_roles( return created, skipped -async def backfill_node_roles( - session: AsyncSession, - pattern: Optional[str] = None, - dry_run: bool = False, -) -> tuple[int, int]: - """ - Backfill owner roles for nodes. - - Returns (created_count, skipped_count). - """ - print("\n=== Backfilling Node Owner Roles ===") - - creators = await get_node_creators(session, pattern) - print(f"Found {len(creators)} node creation events in History") - - created = 0 - skipped = 0 - - for node_name, username in creators: - user = await get_user_by_username(session, username) - if not user: - print(f" Skipping '{node_name}': user '{username}' not found") - skipped += 1 - continue - - if await create_owner_role( - session, - node_name, - ResourceType.NODE, - user, - dry_run, - ): - created += 1 - else: - print(f" Skipping '{node_name}': role already exists") - skipped += 1 - - return created, skipped - - async def main( dry_run: bool = False, - namespaces_only: bool = False, - nodes_only: bool = False, - namespace_pattern: Optional[str] = None, - node_pattern: Optional[str] = None, + pattern: Optional[str] = None, ): """Main backfill function.""" print("=" * 60) - print("RBAC Owner Role Backfill Script") + print("RBAC Namespace Owner Role Backfill Script") print("=" * 60) if dry_run: @@ -287,22 +195,11 @@ async def main( async with async_session() as session: async with session.begin(): - ns_created, ns_skipped = 0, 0 - node_created, node_skipped = 0, 0 - - if not nodes_only: - ns_created, ns_skipped = await backfill_namespace_roles( - session, - namespace_pattern, - dry_run, - ) - - if not namespaces_only: - node_created, node_skipped = await backfill_node_roles( - session, - node_pattern, - dry_run, - ) + created, skipped = await backfill_namespace_roles( + session, + pattern, + dry_run, + ) if not dry_run: await session.commit() @@ -310,11 +207,7 @@ async def main( print("\n" + "=" * 60) print("Summary") print("=" * 60) - if not nodes_only: - print(f"Namespaces: {ns_created} created, {ns_skipped} skipped") - if not namespaces_only: - print(f"Nodes: {node_created} created, {node_skipped} skipped") - print(f"Total: {ns_created + node_created} roles created") + print(f"Namespaces: {created} roles created, {skipped} skipped") if dry_run: print("\n*** DRY RUN - No changes were made ***") @@ -322,7 +215,7 @@ async def main( if __name__ == "__main__": parser = argparse.ArgumentParser( - description="Backfill owner roles for existing namespaces and nodes", + description="Backfill owner roles for existing namespaces", ) parser.add_argument( "--dry-run", @@ -330,34 +223,16 @@ async def main( help="Show what would be created without making changes", ) parser.add_argument( - "--namespaces-only", - action="store_true", - help="Only backfill namespace roles", - ) - parser.add_argument( - "--nodes-only", - action="store_true", - help="Only backfill node roles", - ) - parser.add_argument( - "--namespace-pattern", + "--pattern", type=str, help="Only backfill namespaces matching pattern (e.g., 'finance.*')", ) - parser.add_argument( - "--node-pattern", - type=str, - help="Only backfill nodes matching pattern (e.g., 'finance.*')", - ) args = parser.parse_args() asyncio.run( main( dry_run=args.dry_run, - namespaces_only=args.namespaces_only, - nodes_only=args.nodes_only, - namespace_pattern=args.namespace_pattern, - node_pattern=args.node_pattern, + pattern=args.pattern, ), ) diff --git a/datajunction-server/tests/api/nodes_test.py b/datajunction-server/tests/api/nodes_test.py index b8b7e82f9..36fc7c454 100644 --- a/datajunction-server/tests/api/nodes_test.py +++ b/datajunction-server/tests/api/nodes_test.py @@ -6354,120 +6354,3 @@ class NamespaceHelper: await client.delete(f"/namespaces/{ns}/hard/?cascade=true") for role in created_roles: await client.delete(f"/roles/{role}") - - -@pytest.mark.asyncio -async def test_create_node_auto_creates_owner_role( - client: AsyncClient, - unique_node_namespace, -) -> None: - """ - Test that creating a node automatically creates an owner role. - - When a node is created, the system should: - 1. Create a role named "{node_name}-owner" - 2. Add a MANAGE scope on the node - 3. Assign the role to the creating user - """ - ns_name = unique_node_namespace.make("nodeautorole") - node_name = f"{ns_name}.test_source" - unique_node_namespace.track_node(node_name) - - # First create a namespace for our test node - response = await client.post(f"/namespaces/{ns_name}") - assert response.status_code in (200, 201) - - # Create a source node - response = await client.post( - "/nodes/source/", - json={ - "name": node_name, - "description": "Test source for auto-role creation", - "catalog": "default", - "schema_": "test_schema", - "table": "test_table", - "columns": [ - {"name": "id", "type": "int"}, - {"name": "name", "type": "string"}, - ], - "mode": "published", - }, - ) - assert response.status_code in (200, 201) - - # Verify the owner role was created for the node - response = await client.get(f"/roles/{node_name}-owner") - assert response.status_code == 200 - role_data = response.json() - assert role_data["name"] == f"{node_name}-owner" - assert role_data["description"] == f"Owner role for node {node_name}" - - # Check scopes - should have MANAGE on the node - assert len(role_data["scopes"]) == 1 - scope = role_data["scopes"][0] - assert scope["action"] == "manage" - assert scope["scope_type"] == "node" - assert scope["scope_value"] == node_name - - # Check the role is assigned to the creator - response = await client.get(f"/roles/{node_name}-owner/assignments") - assert response.status_code == 200 - assignments = response.json() - assert len(assignments) >= 1 - # The creator should be assigned this role - assert any(a["principal"]["username"] == "dj" for a in assignments) - - -@pytest.mark.asyncio -async def test_create_transform_node_auto_creates_owner_role( - client: AsyncClient, - unique_node_namespace, -) -> None: - """ - Test that creating a transform node also auto-creates owner role. - """ - ns_name = unique_node_namespace.make("transformrole") - source_name = f"{ns_name}.source" - transform_name = f"{ns_name}.my_transform" - unique_node_namespace.track_node(source_name) - unique_node_namespace.track_node(transform_name) - - # First create namespace and source node - await client.post(f"/namespaces/{ns_name}") - - await client.post( - "/nodes/source/", - json={ - "name": source_name, - "description": "Source for transform test", - "catalog": "default", - "schema_": "test_schema", - "table": "test_table", - "columns": [ - {"name": "id", "type": "int"}, - {"name": "value", "type": "float"}, - ], - "mode": "published", - }, - ) - - # Create a transform node - response = await client.post( - "/nodes/transform/", - json={ - "name": transform_name, - "description": "Test transform", - "query": f"SELECT id, value FROM {source_name}", - "mode": "published", - }, - ) - assert response.status_code in (200, 201) - - # Verify owner role was created - response = await client.get(f"/roles/{transform_name}-owner") - assert response.status_code == 200 - role_data = response.json() - assert role_data["name"] == f"{transform_name}-owner" - assert len(role_data["scopes"]) == 1 - assert role_data["scopes"][0]["action"] == "manage" - assert role_data["scopes"][0]["scope_type"] == "node"