From 4449bc2165a2e7146e6105c2695d651eaa402092 Mon Sep 17 00:00:00 2001 From: "Joseph T. French" Date: Wed, 8 Apr 2026 12:40:04 -0500 Subject: [PATCH] Refactor Docker Compose configuration and enhance taxonomy seeding logic - Updated memory and CPU resource limits in `compose.yaml` for improved performance. - Added structure-root elements in `seed.py` to facilitate associations in financial statements. - Adjusted comments for clarity and updated the numbering of taxonomy association steps for consistency. --- compose.yaml | 10 +++--- robosystems/config/taxonomy/seed.py | 54 ++++++++++++++++++++++++++--- 2 files changed, 55 insertions(+), 9 deletions(-) diff --git a/compose.yaml b/compose.yaml index 065fbe6f..26478079 100644 --- a/compose.yaml +++ b/compose.yaml @@ -100,16 +100,16 @@ services: depends_on: pg: condition: service_healthy - # Runs sec_process job locally via `dagster job execute` - # Needs headroom for fastembed model + embedding computation (matches prod 16GB) + # Runs dagster job execute locally (SEC processing, materialization, etc.) + # Needs headroom for fastembed model + DuckDB + XBRL processing restart: always deploy: resources: limits: - memory: 16g - cpus: "8" + memory: 8g + cpus: "4" reservations: - memory: 4g + memory: 1g cpus: "2" profiles: ["dagster", "robosystems", "all"] diff --git a/robosystems/config/taxonomy/seed.py b/robosystems/config/taxonomy/seed.py index 6691de24..c0c93863 100644 --- a/robosystems/config/taxonomy/seed.py +++ b/robosystems/config/taxonomy/seed.py @@ -847,9 +847,55 @@ def seed_reporting_taxonomy(connection) -> None: for e in ALL_GAAP_ELEMENTS: _insert_element(connection, e) - # 5. Create root-ordering associations (structure → SFAC6 root) + # 5. Create structure-root elements so they can be referenced in associations. + # The associations FK requires both from/to to be valid element IDs. + # These abstract elements represent the root of each financial statement. + STRUCTURE_ROOT_ELEMENTS = [ + { + "id": STRUCT_BS_ID, + "qname": "report:BalanceSheet", + "namespace": "report", + "name": "Balance Sheet", + "classification": "asset", + "balance_type": "debit", + "period_type": "instant", + "is_abstract": True, + "element_type": "abstract", + "source": "native", + "depth": 0, + }, + { + "id": STRUCT_IS_ID, + "qname": "report:IncomeStatement", + "namespace": "report", + "name": "Income Statement", + "classification": "revenue", + "balance_type": "credit", + "period_type": "duration", + "is_abstract": True, + "element_type": "abstract", + "source": "native", + "depth": 0, + }, + { + "id": STRUCT_CF_ID, + "qname": "report:CashFlowStatement", + "namespace": "report", + "name": "Cash Flow Statement", + "classification": "asset", + "balance_type": "debit", + "period_type": "duration", + "is_abstract": True, + "element_type": "abstract", + "source": "native", + "depth": 0, + }, + ] + for e in STRUCTURE_ROOT_ELEMENTS: + _insert_element(connection, e) + + # 6. Create root-ordering associations (structure root → SFAC6 root) # These control the order of top-level sections in each statement. - # Convention: from_element_id = structure_id, to_element_id = SFAC6 root. ROOT_ORDER: list[tuple[str, str, int]] = [ # Balance Sheet: Assets → Liabilities → Equity (STRUCT_BS_ID, "elem_sfac6_assets", 1), @@ -885,7 +931,7 @@ def seed_reporting_taxonomy(connection) -> None: }, ) - # 6. Create hierarchy associations (parent → child via structures) + # 7. Create hierarchy associations (parent → child via structures) # Order per-parent so siblings sort correctly within each section order_by_parent: dict[str, int] = {} for e in ALL_GAAP_ELEMENTS: @@ -911,7 +957,7 @@ def seed_reporting_taxonomy(connection) -> None: }, ) - # 7. Create calculation associations for computed elements. + # 8. Create calculation associations for computed elements. # from_element = the computed total, to_element = the source, weight = multiplier. # The renderer sums (source_value * weight) for each source. CALCULATION_ASSOCIATIONS = [