Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 5 additions & 5 deletions example_notebook/TTD Connector Data SDK Example Notebook.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -156,12 +156,12 @@
"# Example: create a small sample DataFrame\n",
"# In practice, read from a Delta table or other data source\n",
"sample_data = [\n",
" {\"id_type\": \"tdid\", \"id_value\": \"a3f1c2d4-8e7b-4f6a-9c0d-1b2e3f4a5b6c\", \"segment_name\": \"segment_1\"},\n",
" {\"id_type\": \"daid\", \"id_value\": \"7d9e0f1a-2b3c-4d5e-6f7a-8b9c0d1e2f3a\", \"segment_name\": \"segment_2\"},\n",
" {\"id_type\": \"TDID\", \"id_value\": \"a3f1c2d4-8e7b-4f6a-9c0d-1b2e3f4a5b6c\", \"segment_name\": \"segment_1\"},\n",
" {\"id_type\": \"DAID\", \"id_value\": \"7d9e0f1a-2b3c-4d5e-6f7a-8b9c0d1e2f3a\", \"segment_name\": \"segment_2\"},\n",
" # intentionally incorrect format for ramp_id to showcase error enrties in output\n",
" {\"id_type\": \"ramp_id\", \"id_value\": \"c4d5e6f7-a8b9-4c0d-1e2f-3a4b5c6d7e8f\", \"segment_name\": \"segment_3\"},\n",
" {\"id_type\": \"tdid\", \"id_value\": \"1f2a3b4c-5d6e-4f7a-8b9c-0d1e2f3a4b5c\", \"segment_name\": \"segment_4\"},\n",
" {\"id_type\": \"daid\", \"id_value\": \"9b0c1d2e-3f4a-4b5c-6d7e-8f9a0b1c2d3e\", \"segment_name\": \"segment_5\"},\n",
" {\"id_type\": \"RampID\", \"id_value\": \"c4d5e6f7-a8b9-4c0d-1e2f-3a4b5c6d7e8f\", \"segment_name\": \"segment_3\"},\n",
" {\"id_type\": \"TDID\", \"id_value\": \"1f2a3b4c-5d6e-4f7a-8b9c-0d1e2f3a4b5c\", \"segment_name\": \"segment_4\"},\n",
" {\"id_type\": \"DAID\", \"id_value\": \"9b0c1d2e-3f4a-4b5c-6d7e-8f9a0b1c2d3e\", \"segment_name\": \"segment_5\"},\n",
"]\n",
"\n",
"input_df = spark.createDataFrame(sample_data)\n",
Expand Down
2 changes: 1 addition & 1 deletion tests/unit/test_call_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ def _make_failed_line(item_number: int, error_code: str = "INVALID", message: st


_CONTEXT = AdvertiserContext(advertiser_id="adv123")
_ROW = {"id_type": "tdid", "id_value": "abc", "segment_name": "seg"}
_ROW = {"id_type": "TDID", "id_value": "abc", "segment_name": "seg"}


# --------------------------------------------------------------------------- #
Expand Down
21 changes: 11 additions & 10 deletions tests/unit/test_handlers_build_items.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from datetime import datetime, timezone

import ttd_databricks_python.ttd_databricks.handlers.advertiser as adv_handler
from ttd_databricks_python.ttd_databricks.id_types import normalize_id_type
import ttd_databricks_python.ttd_databricks.handlers.deletion_optout_advertiser as del_adv_handler
import ttd_databricks_python.ttd_databricks.handlers.deletion_optout_merchant as del_merch_handler
import ttd_databricks_python.ttd_databricks.handlers.deletion_optout_thirdparty as del_tp_handler
Expand All @@ -25,7 +26,7 @@


class TestAdvertiserBuildItems:
_MINIMAL = {"id_type": "tdid", "id_value": "test-tdid-value", "segment_name": "test-segment-name"}
_MINIMAL = {"id_type": "TDID", "id_value": "test-tdid-value", "segment_name": "test-segment-name"}

def test_builds_advertiser_data_item_with_correct_fields(self):
# Handler maps id_type → AdvertiserDataItem field dynamically: {d["id_type"]: d["id_value"]}
Expand All @@ -48,9 +49,9 @@ def test_optional_fields_are_passed_through_when_provided(self):
assert item.cookie_mapping_partner_id == "test-partner-id"

def test_non_tdid_id_types_map_correctly(self):
for id_type in ["daid", "uid2", "ramp_id"]:
for id_type in ["DAID", "UID2", "RampID"]:
row = {**self._MINIMAL, "id_type": id_type, "id_value": f"test-{id_type}-value"}
assert getattr(adv_handler.build_items([row])[0], id_type) == f"test-{id_type}-value"
assert getattr(adv_handler.build_items([row])[0], normalize_id_type(id_type)) == f"test-{id_type}-value"


# --------------------------------------------------------------------------- #
Expand All @@ -59,7 +60,7 @@ def test_non_tdid_id_types_map_correctly(self):


class TestThirdPartyBuildItems:
_MINIMAL = {"id_type": "tdid", "id_value": "test-tdid-value", "segment_name": "test-segment-name"}
_MINIMAL = {"id_type": "TDID", "id_value": "test-tdid-value", "segment_name": "test-segment-name"}

def test_builds_third_party_data_item_with_correct_fields(self):
item = tp_handler.build_items([self._MINIMAL])[0]
Expand All @@ -85,19 +86,19 @@ def test_optional_fields_are_passed_through_when_provided(self):


def test_deletion_optout_advertiser_returns_partner_dsr_item_with_correct_id():
item = del_adv_handler.build_items([{"id_type": "tdid", "id_value": "test-advertiser-tdid"}])[0]
item = del_adv_handler.build_items([{"id_type": "TDID", "id_value": "test-advertiser-tdid"}])[0]
assert isinstance(item, PartnerDsrDataItem)
assert getattr(item, "tdid") == "test-advertiser-tdid"


def test_deletion_optout_thirdparty_returns_partner_dsr_item_with_correct_id():
item = del_tp_handler.build_items([{"id_type": "uid2", "id_value": "test-thirdparty-uid2"}])[0]
item = del_tp_handler.build_items([{"id_type": "UID2", "id_value": "test-thirdparty-uid2"}])[0]
assert isinstance(item, PartnerDsrDataItem)
assert getattr(item, "uid2") == "test-thirdparty-uid2"


def test_deletion_optout_merchant_returns_partner_dsr_item_with_correct_id():
item = del_merch_handler.build_items([{"id_type": "tdid", "id_value": "test-merchant-tdid"}])[0]
item = del_merch_handler.build_items([{"id_type": "TDID", "id_value": "test-merchant-tdid"}])[0]
assert isinstance(item, PartnerDsrDataItem)
assert getattr(item, "tdid") == "test-merchant-tdid"

Expand All @@ -121,15 +122,15 @@ def test_builds_offline_conversion_data_item_with_correct_fields(self):
def test_user_ids_converted_to_user_id_array_with_type_codes(self):
row = {
**self._MINIMAL,
"user_ids": [{"type": "tdid", "id": "test-tdid-value"}, {"type": "daid", "id": "test-daid-value"}],
"user_ids": [{"type": "TDID", "id": "test-tdid-value"}, {"type": "DAID", "id": "test-daid-value"}],
}
item = oc_handler.build_items([row])[0]
assert item.user_id_array == [["0", "test-tdid-value"], ["1", "test-daid-value"]]

def test_all_user_id_types_map_to_correct_codes(self):
type_map = {
"tdid": "0", "daid": "1", "uid2": "2", "uid2token": "3",
"euid": "4", "euidtoken": "5", "rampid": "6",
"TDID": "0", "DAID": "1", "UID2": "2", "UID2Token": "3",
"EUID": "4", "EUIDToken": "5", "RampID": "6",
}
for id_type, expected_code in type_map.items():
row = {**self._MINIMAL, "user_ids": [{"type": id_type, "id": f"test-{id_type}-value"}]}
Expand Down
10 changes: 5 additions & 5 deletions tests/unit/test_push_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ def _make_handler(failed_lines: list | None = None) -> MagicMock:


def test_all_rows_succeed_output_has_success_true(spark: SparkSession) -> None:
data = [("tdid", "abc123", "seg1"), ("uid2", "def456", "seg2")]
data = [("TDID", "abc123", "seg1"), ("UID2", "def456", "seg2")]
df = spark.createDataFrame(data, _REQUIRED_SCHEMA)

with patch("importlib.import_module", return_value=_make_handler()):
Expand All @@ -69,7 +69,7 @@ def test_all_rows_succeed_output_has_success_true(spark: SparkSession) -> None:


def test_output_dataframe_has_status_columns(spark: SparkSession) -> None:
df = spark.createDataFrame([("tdid", "abc123", "seg1")], _REQUIRED_SCHEMA)
df = spark.createDataFrame([("TDID", "abc123", "seg1")], _REQUIRED_SCHEMA)

with patch("importlib.import_module", return_value=_make_handler()):
result = _make_client(spark).push_data(df, _CONTEXT)
Expand Down Expand Up @@ -97,7 +97,7 @@ def test_extra_columns_preserved_in_output(spark: SparkSession) -> None:
StructField("custom_col", StringType(), True),
]
)
df = spark.createDataFrame([("tdid", "abc123", "seg1", "my_value")], schema)
df = spark.createDataFrame([("TDID", "abc123", "seg1", "my_value")], schema)

with patch("importlib.import_module", return_value=_make_handler()):
result = _make_client(spark).push_data(df, _CONTEXT)
Expand All @@ -113,7 +113,7 @@ def test_extra_columns_preserved_in_output(spark: SparkSession) -> None:

def test_partial_failure_maps_error_to_correct_row(spark: SparkSession) -> None:
# Two rows in one batch; item #1 fails, item #2 succeeds
data = [("tdid", "abc123", "seg1"), ("tdid", "def456", "seg2")]
data = [("TDID", "abc123", "seg1"), ("TDID", "def456", "seg2")]
df = spark.createDataFrame(data, _REQUIRED_SCHEMA)

failed_line = MagicMock()
Expand All @@ -137,7 +137,7 @@ def test_missing_required_column_raises_schema_validation_error(spark: SparkSess
# missing: segment_name
]
)
df = spark.createDataFrame([("tdid", "abc123")], schema)
df = spark.createDataFrame([("TDID", "abc123")], schema)

with pytest.raises(TTDSchemaValidationError) as exc_info:
_make_client(spark).push_data(df, _CONTEXT)
Expand Down
3 changes: 2 additions & 1 deletion ttd_databricks_python/ttd_databricks/handlers/advertiser.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

from ttd_databricks_python.ttd_databricks.constants import TTD_DATABRICKS_SDK_ORIGIN_ID
from ttd_databricks_python.ttd_databricks.contexts import AdvertiserContext
from ttd_databricks_python.ttd_databricks.id_types import normalize_id_type

if TYPE_CHECKING:
from ttd_data import DataClient
Expand All @@ -26,7 +27,7 @@ def build_items(items_data: list[dict[str, Any]]) -> list[AdvertiserDataItem]:
adv_data_kwargs[field] = d[field]

adv_item_kwargs = {
d["id_type"]: d["id_value"],
normalize_id_type(d["id_type"]): d["id_value"],
"data": [AdvertiserData(**adv_data_kwargs)],
}
for field in ITEM_OPTIONAL_FIELDS:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from typing import TYPE_CHECKING, Any, Optional, cast

from ttd_databricks_python.ttd_databricks.contexts import DeletionOptOutAdvertiserContext
from ttd_databricks_python.ttd_databricks.id_types import normalize_id_type

if TYPE_CHECKING:
from ttd_data import DataClient
Expand All @@ -17,7 +18,7 @@ def build_items(items_data: list[dict[str, Any]]) -> list[PartnerDsrDataItem]:

items = []
for d in items_data:
items.append(PartnerDsrDataItem(**{d["id_type"]: d["id_value"]}))
items.append(PartnerDsrDataItem(**{normalize_id_type(d["id_type"]): d["id_value"]}))
return items


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from typing import TYPE_CHECKING, Any, Optional, cast

from ttd_databricks_python.ttd_databricks.contexts import DeletionOptOutMerchantContext
from ttd_databricks_python.ttd_databricks.id_types import normalize_id_type

if TYPE_CHECKING:
from ttd_data import DataClient
Expand All @@ -17,7 +18,7 @@ def build_items(items_data: list[dict[str, Any]]) -> list[PartnerDsrDataItem]:

items = []
for d in items_data:
items.append(PartnerDsrDataItem(**{d["id_type"]: d["id_value"]}))
items.append(PartnerDsrDataItem(**{normalize_id_type(d["id_type"]): d["id_value"]}))
return items


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from typing import TYPE_CHECKING, Any, Optional, cast

from ttd_databricks_python.ttd_databricks.contexts import DeletionOptOutThirdPartyContext
from ttd_databricks_python.ttd_databricks.id_types import normalize_id_type

if TYPE_CHECKING:
from ttd_data import DataClient
Expand All @@ -17,7 +18,7 @@ def build_items(items_data: list[dict[str, Any]]) -> list[PartnerDsrDataItem]:

items = []
for d in items_data:
items.append(PartnerDsrDataItem(**{d["id_type"]: d["id_value"]}))
items.append(PartnerDsrDataItem(**{normalize_id_type(d["id_type"]): d["id_value"]}))
return items


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,15 +11,16 @@
from ttd_data import DataClient
from ttd_data.models import OfflineConversionDataItem

# Maps user_ids[].type (lowercase) → string type code used in UserIdArray
# Maps user_ids[].type → string type code used in UserIdArray.
# Keys are uppercased so that lookup is case-insensitive.
_USER_ID_TYPE_CODE: dict[str, str] = {
"tdid": "0",
"daid": "1",
"uid2": "2",
"uid2token": "3",
"euid": "4",
"euidtoken": "5",
"rampid": "6",
"TDID": "0",
"DAID": "1",
"UID2": "2",
"UID2TOKEN": "3",
"EUID": "4",
"EUIDTOKEN": "5",
"RAMPID": "6",
}


Expand All @@ -43,7 +44,7 @@ def build_items(items_data: list[dict[str, Any]]) -> list[OfflineConversionDataI
raw_user_ids = row.get("user_ids")
if raw_user_ids:
kwargs["user_id_array"] = [
[_USER_ID_TYPE_CODE[user_id["type"].lower()], user_id["id"]] for user_id in raw_user_ids
[_USER_ID_TYPE_CODE[user_id["type"].upper()], user_id["id"]] for user_id in raw_user_ids
]

for field in ITEM_OPTIONAL_FIELDS:
Expand Down
3 changes: 2 additions & 1 deletion ttd_databricks_python/ttd_databricks/handlers/third_party.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

from ttd_databricks_python.ttd_databricks.constants import TTD_DATABRICKS_SDK_ORIGIN_ID
from ttd_databricks_python.ttd_databricks.contexts import ThirdPartyContext
from ttd_databricks_python.ttd_databricks.id_types import normalize_id_type

if TYPE_CHECKING:
from ttd_data import DataClient
Expand All @@ -26,7 +27,7 @@ def build_items(items_data: list[dict[str, Any]]) -> list[ThirdPartyDataItem]:
tp_data_kwargs[field] = d[field]

tp_item_kwargs = {
d["id_type"]: d["id_value"],
normalize_id_type(d["id_type"]): d["id_value"],
"data": [ThirdPartyData(**tp_data_kwargs)],
}
for field in ITEM_OPTIONAL_FIELDS:
Expand Down
35 changes: 35 additions & 0 deletions ttd_databricks_python/ttd_databricks/id_types.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
"""User-facing id_type names and normalization to ttd-data Pydantic field names."""

from __future__ import annotations

# TTD public documentation specifies id_type values in PascalCase/ALLCAPS.
# Keys are uppercased so that normalization is case-insensitive — both
# "RampID" and "rampid" resolve to the same Python field name.
_NORMALIZATION: dict[str, str] = {
"TDID": "tdid",
"DAID": "daid",
"UID2": "uid2",
"UID2TOKEN": "uid2_token",
"EUID": "euid",
"EUIDTOKEN": "euid_token",
"RAMPID": "ramp_id",
"ID5": "id5",
"NETID": "net_id",
"FIRSTID": "first_id",
"MERKURYID": "merkury_id",
"IQVIAPPID": "iqvia_ppid",
}

VALID_ID_TYPES: frozenset[str] = frozenset(_NORMALIZATION)


def normalize_id_type(id_type: str) -> str:
"""Map a user-facing id_type to the Python field name expected by ttd-data models.

Matching is case-insensitive. Raises ValueError for unrecognized values.
"""
try:
return _NORMALIZATION[id_type.upper()]
except KeyError:
valid = ", ".join(sorted(_NORMALIZATION))
raise ValueError(f"Unknown id_type {id_type!r}. Must be one of: {valid}.") from None
4 changes: 2 additions & 2 deletions ttd_databricks_python/ttd_databricks/schemas/advertiser.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,8 @@ def input_schema() -> StructType:
Mandatory columns (not nullable):
id_type → which AdvertiserDataItem identity field this row uses.
Must be one of: tdid, daid, uid2, uid2_token, ramp_id, core_id,
euid, euid_token, id5, net_id, first_id, merkury_id, iqvia_ppid.
Must be one of: TDID, DAID, UID2, UID2Token, EUID, EUIDToken,
RampID, ID5, netID, FirstId, CoreID, MerkuryID, IqviaPPID.
id_value → the identifier value for the given id_type.
segment_name → AdvertiserData.name (audience segment / data element name).
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,8 @@ def input_schema() -> StructType:
Mandatory columns (not nullable):
id_type → which PartnerDsrDataItem identity field this row uses.
Must be one of: tdid, daid, uid2, uid2_token, ramp_id, core_id,
euid, euid_token, id5, net_id, first_id, merkury_id, iqvia_ppid.
Must be one of: TDID, DAID, UID2, UID2Token, EUID, EUIDToken,
RampID, ID5, netID, FirstId, CoreID, MerkuryID, IqviaPPID.
id_value → the identifier value for the given id_type.
"""
from pyspark.sql.types import StringType, StructField, StructType
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,8 @@ def input_schema() -> StructType:
Mandatory columns (not nullable):
id_type → which PartnerDsrDataItem identity field this row uses.
Must be one of: tdid, daid, uid2, uid2_token, ramp_id, core_id,
euid, euid_token, id5, net_id, first_id, merkury_id, iqvia_ppid.
Must be one of: TDID, DAID, UID2, UID2Token, EUID, EUIDToken,
RampID, ID5, netID, FirstId, CoreID, MerkuryID, IqviaPPID.
id_value → the identifier value for the given id_type.
"""
from pyspark.sql.types import StringType, StructField, StructType
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,8 @@ def input_schema() -> StructType:

Mandatory columns (not nullable):
id_type → which PartnerDsrDataItem identity field this row uses.
Must be one of: tdid, daid, uid2, uid2_token, ramp_id, core_id,
euid, euid_token, id5, net_id, first_id, merkury_id, iqvia_ppid.
Must be one of: TDID, DAID, UID2, UID2Token, EUID, EUIDToken,
RampID, ID5, netID, FirstId, CoreID, MerkuryID, IqviaPPID.
id_value → the identifier value for the given id_type.
"""
from pyspark.sql.types import StringType, StructField, StructType
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -50,9 +50,9 @@ def input_schema() -> StructType:
user_ids → OfflineConversionDataItem.UserIdArray in the API request.
Required only if impression_id is not provided.
Array of structs with fields:
type — identity type name (case-insensitive).
Must be one of: tdid, daid, uid2, uid2token,
euid, euidtoken, rampid.
type — identity type name.
Must be one of: TDID, DAID, UID2, UID2Token,
EUID, EUIDToken, RampID.
Converted to integer code (0–6) in the request.
id — identity value string.
UserIdArrayMetadataFormat is hardcoded to ["type", "id"].
Expand Down
4 changes: 2 additions & 2 deletions ttd_databricks_python/ttd_databricks/schemas/third_party.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,8 @@ def input_schema() -> StructType:
Mandatory columns (not nullable):
id_type → which ThirdPartyDataItem identity field this row uses.
Must be one of: tdid, daid, uid2, uid2_token, ramp_id, core_id,
euid, euid_token, id5, net_id, first_id, merkury_id, iqvia_ppid.
Must be one of: TDID, DAID, UID2, UID2Token, EUID, EUIDToken,
RampID, ID5, netID, FirstId, CoreID, MerkuryID, IqviaPPID.
id_value → the identifier value for the given id_type.
segment_name → ThirdPartyData.name (audience segment / data element name).
Expand Down
Loading