A collection of utility functions for data processing, encoding, decoding, and sanitization in Python applications. Designed to work seamlessly with both synchronous and asynchronous code.
- JSON Field Processing - Encode and decode JSON fields in data structures
- Data Sanitization - Clean and normalize data with customizable rules
- Async Support - All functions support async/await patterns
- Type Safety - Full type hints and modern Python support
- Flexible Configuration - Customizable field processors and mapping rules
- Pydantic Integration - Works seamlessly with Pydantic models
- Error Handling - Robust error handling with detailed logging
# Using Poetry (recommended)
poetry add oguild
# Using pip
pip install oguildfrom oguild.utils import encode_json_fields, decode_json_fields
# Encode dictionary/list fields to JSON strings
data = [
{"id": 1, "metadata": {"key": "value"}, "tags": ["tag1", "tag2"]},
{"id": 2, "metadata": {"key2": "value2"}, "tags": ["tag3"]}
]
# Encode specific fields to JSON strings
encoded_data = await encode_json_fields(data, ["metadata", "tags"])
# Result: [{"id": 1, "metadata": '{"key": "value"}', "tags": '["tag1", "tag2"]'}, ...]
# Decode JSON strings back to objects
decoded_data = await decode_json_fields(encoded_data, ["metadata", "tags"])
# Result: [{"id": 1, "metadata": {"key": "value"}, "tags": ["tag1", "tag2"]}, ...]from oguild.utils import sanitize_fields
from pydantic import BaseModel
# Basic sanitization
data = {
"id": 1,
"name": "John Doe",
"email": "",
"age": None,
"metadata": {"key": "value", "empty": ""}
}
sanitized = await sanitize_fields(data)
# Result: {"id": 1, "name": "John Doe", "metadata": {"key": "value"}}
# Empty values (None, "") are removed by default
# Custom sanitization with field mapping
custom_sanitized = await sanitize_fields(
data,
empty_values={None, "", "N/A"},
key_mapping={"_id": "id", "user_name": "name"}
)Serializes specified dictionary fields into JSON strings.
async def encode_json_fields(
rows: Union[list, dict],
json_keys: list[str]
) -> Union[list, dict]Parameters:
rows- List of dictionaries or single dictionary to processjson_keys- List of field names to encode as JSON strings
Returns:
- Processed data with specified fields encoded as JSON strings
Example:
data = {"id": 1, "config": {"setting": "value"}}
result = await encode_json_fields(data, ["config"])
# Result: {"id": 1, "config": '{"setting": "value"}'}Parses specified JSON fields in each row of the result set.
async def decode_json_fields(
rows: list,
json_keys: list[str]
) -> listParameters:
rows- List of dictionaries to processjson_keys- List of field names to decode from JSON strings
Returns:
- List of dictionaries with specified fields decoded from JSON
Example:
data = [{"id": 1, "config": '{"setting": "value"}'}]
result = await decode_json_fields(data, ["config"])
# Result: [{"id": 1, "config": {"setting": "value"}}]Cleans and normalizes data structures with customizable rules.
async def sanitize_fields(
data: Any,
empty_values: set = DEFAULT_EMPTY_VALUES,
key_mapping: dict = DEFAULT_KEY_MAPPING,
field_processors: dict = DEFAULT_FIELD_PROCESSORS
) -> AnyParameters:
data- Data to sanitize (dict, list, Pydantic model, or primitive)empty_values- Set of values to consider as empty and removekey_mapping- Dictionary mapping old keys to new keysfield_processors- Dictionary of field-specific processing functions
Returns:
- Sanitized data with the same structure
Default Values:
DEFAULT_EMPTY_VALUES = {None, ""}DEFAULT_KEY_MAPPING = {"_id": "id"}DEFAULT_FIELD_PROCESSORS = {}
from oguild.utils import sanitize_fields
from datetime import datetime
from decimal import Decimal
from uuid import UUID
# Complex data structure
data = {
"_id": "123e4567-e89b-12d3-a456-426614174000",
"user_name": "John Doe",
"email": "",
"age": None,
"balance": Decimal("123.45"),
"created_at": datetime.now(),
"metadata": {
"last_login": None,
"preferences": {"theme": "dark", "notifications": ""}
},
"tags": ["", "important", None, "urgent"]
}
# Custom field processors
def process_balance(value):
return float(value) if value else 0.0
def process_tags(value):
return [tag for tag in value if tag and tag.strip()]
# Sanitize with custom rules
sanitized = await sanitize_fields(
data,
empty_values={None, "", "N/A", "null"},
key_mapping={
"_id": "id",
"user_name": "name"
},
field_processors={
"balance": process_balance,
"tags": process_tags
}
)
# Result:
# {
# "id": "123e4567-e89b-12d3-a456-426614174000",
# "name": "John Doe",
# "balance": 123.45,
# "created_at": "2024-01-15T10:30:00",
# "metadata": {
# "preferences": {"theme": "dark"}
# },
# "tags": ["important", "urgent"]
# }from pydantic import BaseModel
from oguild.utils import sanitize_fields
class User(BaseModel):
id: int
name: str
email: str = ""
metadata: dict = {}
# Pydantic model sanitization
user = User(id=1, name="John", email="", metadata={"key": "value", "empty": ""})
sanitized_user = await sanitize_fields(user)
# Result: {"id": 1, "name": "John", "metadata": {"key": "value"}}from oguild.utils import encode_json_fields, decode_json_fields
# Simulate database results with JSON fields
db_results = [
{
"id": 1,
"user_id": 123,
"settings": {"theme": "dark", "notifications": True},
"tags": ["admin", "premium"]
},
{
"id": 2,
"user_id": 456,
"settings": {"theme": "light", "notifications": False},
"tags": ["user"]
}
]
# Encode for database storage
encoded_results = await encode_json_fields(db_results, ["settings", "tags"])
# Store in database...
# Retrieve from database...
# Decode for application use
decoded_results = await decode_json_fields(encoded_results, ["settings", "tags"])from oguild.utils import encode_json_fields
import json
# Handle encoding errors gracefully
data = {"id": 1, "invalid_json": {"circular": None}}
data["invalid_json"]["circular"] = data["invalid_json"] # Create circular reference
try:
result = await encode_json_fields(data, ["invalid_json"])
# Error is logged but processing continues
except Exception as e:
print(f"Encoding failed: {e}")from oguild.utils import sanitize_fields
def process_email(value):
"""Normalize email addresses."""
if not value:
return None
return value.lower().strip()
def process_phone(value):
"""Format phone numbers."""
if not value:
return None
# Remove all non-digit characters
digits = ''.join(filter(str.isdigit, value))
if len(digits) == 10:
return f"({digits[:3]}) {digits[3:6]}-{digits[6:]}"
return value
# Use custom processors
sanitized = await sanitize_fields(
data,
field_processors={
"email": process_email,
"phone": process_phone
}
)from oguild.utils import sanitize_fields
# Define custom empty values
custom_empty_values = {None, "", "N/A", "null", "undefined", 0, False}
sanitized = await sanitize_fields(
data,
empty_values=custom_empty_values
)import pytest
from oguild.utils import encode_json_fields, decode_json_fields, sanitize_fields
@pytest.mark.asyncio
async def test_encode_json_fields():
data = {"id": 1, "config": {"key": "value"}}
result = await encode_json_fields(data, ["config"])
assert result["config"] == '{"key": "value"}'
@pytest.mark.asyncio
async def test_decode_json_fields():
data = [{"id": 1, "config": '{"key": "value"}'}]
result = await decode_json_fields(data, ["config"])
assert result[0]["config"] == {"key": "value"}
@pytest.mark.asyncio
async def test_sanitize_fields():
data = {"id": 1, "name": "John", "email": ""}
result = await sanitize_fields(data)
assert "email" not in result
assert result["name"] == "John"The utils module integrates with the OpsGuild logging system:
# Errors during JSON encoding/decoding are automatically logged
# Sanitization errors are logged with context information
# All operations include debug-level logging for troubleshootingWe welcome contributions! Please see our Contributing Guide for details.
This project is licensed under the GNU General Public License v3.0 - see the LICENSE file for details.
Made with ❤️ by the OpsGuild team