Skip to content
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- Added `ci` command for CI/CD-optimized test runs: multi-file support, GitHub Actions annotations and step summary, Azure DevOps annotations, `--fail-on` flag, `--json` output
- Added `changelog` command and API endpoint (#1118)
- Added opt-in `--all-errors` mode for `datacontract lint` to report all JSON Schema validation errors, with matching `all_errors` support in the Python library and API
- Added `--schema-name` option to custom model export (#978)

### Fixed
- Avro importer now raises an error for union fields with multiple non-null types, which are not supported by ODCS
Expand Down
82 changes: 37 additions & 45 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -1557,67 +1557,59 @@ The export function converts the data contract specification into the custom for
datacontract export --format custom --template template.txt datacontract.yaml
```

##### Jinja variables
##### Jinja templates & variables

You can directly use the Data Contract Specification as template variables.

```shell
$ cat template.txt
title: {{ data_contract.info.title }}
models:
{%- for model_name, model in data_contract.models.items() %}
- name: {{ model.name }}
{%- endfor %}

$ datacontract export --format custom --template template.txt datacontract.yaml
title: Orders Latest
```

##### Example Jinja Templates
##### Example Jinja Templates for a customized dbt model

###### Customized dbt model
You can export a given dbt model containing any logic by adding the `schema-name` filter/parameter (in ODCS, "schemas" are the equivalent of "models" in dbt).

You can export the dbt models containing any logic.
It adds jinja variable passed to your template.file:
- `schema_name`: str
- `schema`: SchemaObject from ODCS

Below is an example of a dbt staging layer that converts a field of `type: timestamp` to a `DATETIME` type with time zone conversion.

template.sql

{% raw %}
```sql
{%- for model_name, model in data_contract.models.items() %}
{#- Export only the first model #}
{%- if loop.first -%}
SELECT
{%- for field_name, field in model.fields.items() %}
{%- if field.type == "timestamp" %}
DATETIME({{ field_name }}, "Asia/Tokyo") AS {{ field_name }},
{%- else %}
{{ field_name }} AS {{ field_name }},
{%- endif %}
{%- endfor %}
FROM
{{ "{{" }} ref('{{ model_name }}') {{ "}}" }}
{%- endif %}
{%- endfor %}
```
{% endraw %}

command

```shell
datacontract export --format custom --template template.sql --output output.sql datacontract.yaml
```

output.sql

```sql
SELECT
order_id AS order_id,
DATETIME(order_timestamp, "Asia/Tokyo") AS order_timestamp,
order_total AS order_total,
customer_id AS customer_id,
customer_email_address AS customer_email_address,
DATETIME(processed_timestamp, "Asia/Tokyo") AS processed_timestamp,
FROM
{{ ref('orders') }}
```
- `template.sql`
```sql
SELECT
{%- for field in schema.properties %}
{%- if field.physicalType == "timestamp" %}
DATETIME({{ field.name }}, "Asia/Tokyo") AS {{ field.name }},
{%- else %}
{{ field.name }} AS {{ field.name }},
{%- endif %}
{%- endfor %}
FROM {{ "{{" }} ref('{{ schema_name }}') {{ "}}" }}
```
- export command
```shell
datacontract export datacontract.odcs.yaml --format custom --template template.sql --schema-name orders
```
- `output.sql`
```sql
SELECT
order_id AS order_id,
DATETIME(order_timestamp, "Asia/Tokyo") AS order_timestamp,
order_total AS order_total,
customer_id AS customer_id,
customer_email_address AS customer_email_address,
DATETIME(processed_timestamp, "Asia/Tokyo") AS processed_timestamp,
FROM {{ ref('orders') }}
```

#### ODCS Excel Template

Expand Down
28 changes: 20 additions & 8 deletions datacontract/export/custom_exporter.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
from pathlib import Path

from jinja2 import Environment, FileSystemLoader
from open_data_contract_standard.model import OpenDataContractStandard
from open_data_contract_standard.model import OpenDataContractStandard, SchemaObject

from datacontract.export.exporter import Exporter
from datacontract.export.exporter import Exporter, _check_schema_name_for_export


class CustomExporter(Exporter):
Expand All @@ -22,16 +22,28 @@ def export(
if template is None:
raise RuntimeError("Export to custom requires template argument.")

return to_custom(data_contract, template)
if schema_name and schema_name != "all":
schema_name, model_obj = _check_schema_name_for_export(data_contract, schema_name, self.export_format)
return to_custom(data_contract, template, schema_name=schema_name, schema=model_obj)
else:
return to_custom(data_contract, template)


def to_custom(data_contract: OpenDataContractStandard, template_path: Path) -> str:
def to_custom(
data_contract: OpenDataContractStandard,
template_path: Path,
schema_name: str | None = None,
schema: SchemaObject | None = None,
) -> str:
template = get_template(template_path)
rendered_sql = template.render(data_contract=data_contract)
return rendered_sql
context = {"data_contract": data_contract}
if schema is not None:
context["schema"] = schema
context["schema_name"] = schema_name
return template.render(**context)


def get_template(path: Path):
abosolute_path = Path(path).resolve()
env = Environment(loader=FileSystemLoader(str(abosolute_path.parent)))
absolute_path = Path(path).resolve()
env = Environment(loader=FileSystemLoader(str(absolute_path.parent)))
return env.get_template(path.name)
86 changes: 86 additions & 0 deletions tests/fixtures/custom/export_model/datacontract.odcs.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
kind: DataContract
apiVersion: v3.1.0
id: orders-unit-test
name: Orders Unit Test
version: 1.0.0
status: active
description:
purpose: The orders data contract
team:
name: checkout
description: Checkout team
servers:
- server: production
type: bigquery
environment: production
account: my-account
project: my-database
dataset: my-schema
schema:
- name: orders
businessName: orders
physicalType: table
description: The orders model
properties:
- name: order_id
businessName: Order ID
logicalType: string
physicalType: varchar
unique: true
required: true
classification: sensitive
tags:
- order_id
logicalTypeOptions:
minLength: 8
maxLength: 10
pattern: ^B[0-9]+$
customProperties:
- property: pii
value: "true"
examples:
- B12345678
- B12345679
- name: order_total
logicalType: integer
physicalType: bigint
required: true
description: The order_total field
logicalTypeOptions:
minimum: 0
maximum: 1000000
quality:
- type: sql
description: 95% of all order total values are expected to be between 10 and 499 EUR.
query: |
SELECT quantile_cont(order_total, 0.95) AS percentile_95
FROM orders
mustBeBetween:
- 1000
- 49900
- name: order_status
logicalType: string
physicalType: text
required: true
customProperties:
- property: enum
value: "[\"pending\", \"shipped\", \"delivered\"]"
- name: user_id
businessName: User ID
logicalType: string
physicalType: varchar
required: true
relationships:
- type: foreignKey
to: users.user_id
- name: users
businessName: users
physicalType: table
description: The users model
properties:
- name: user_id
businessName: User ID
logicalType: string
physicalType: varchar
unique: true
required: true
4 changes: 4 additions & 0 deletions tests/fixtures/custom/export_model/expected.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@

SELECT
user_id AS user_id,
FROM {{ ref('users') }}
10 changes: 10 additions & 0 deletions tests/fixtures/custom/export_model/template.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@

SELECT
{%- for field in schema.properties %}
{%- if field.physicalType == "timestamp" %}
DATETIME({{ field.name }}, "Asia/Tokyo") AS {{ field.name }},
{%- else %}
{{ field.name }} AS {{ field.name }},
{%- endif %}
{%- endfor %}
FROM {{ "{{" }} ref('{{ schema_name }}') {{ "}}" }}
36 changes: 36 additions & 0 deletions tests/test_export_custom_model.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
from pathlib import Path

from typer.testing import CliRunner

from datacontract.cli import app
from datacontract.data_contract import DataContract


def test_cli():
runner = CliRunner()
result = runner.invoke(
app,
[
"export",
"./fixtures/custom/export_model/datacontract.odcs.yaml",
"--format",
"custom",
"--template",
"./fixtures/custom/export_model/template.sql",
"--schema-name",
"users",
],
)
assert result.exit_code == 0


def test_export_custom_schema_name():
path_fixtures = Path("fixtures/custom/export_model")

data_contract = DataContract(data_contract_file=str(path_fixtures / "datacontract.odcs.yaml"))
template = path_fixtures / "template.sql"

result = data_contract.export(export_format="custom", schema_name="users", template=template)

with open(path_fixtures / "expected.sql", "r") as file:
assert result == file.read()
Loading