diff --git a/docs/source/redact/api.rst b/docs/source/redact/api.rst index 9f4d73f..60078cc 100644 --- a/docs/source/redact/api.rst +++ b/docs/source/redact/api.rst @@ -36,4 +36,50 @@ Helper classes :members: .. autoclass:: tonic_textual.helpers.json_conversation_helper.JsonConversationHelper - :members: + :members: + +Generator metadata +------------------------------------------------ +.. autoclass:: tonic_textual.classes.generator_metadata.base_metadata.BaseMetadata + :members: + :no-undoc-members: + +.. autoclass:: tonic_textual.classes.generator_metadata.base_date_time_generator_metadata.BaseDateTimeGeneratorMetadata + :members: + :no-undoc-members: + +.. autoclass:: tonic_textual.classes.generator_metadata.name_generator_metadata.NameGeneratorMetadata + :members: + :no-undoc-members: + +.. autoclass:: tonic_textual.classes.generator_metadata.email_generator_metadata.EmailGeneratorMetadata + :members: + :no-undoc-members: + +.. autoclass:: tonic_textual.classes.generator_metadata.phone_number_generator_metadata.PhoneNumberGeneratorMetadata + :members: + :no-undoc-members: + +.. autoclass:: tonic_textual.classes.generator_metadata.date_time_generator_metadata.DateTimeGeneratorMetadata + :members: + :no-undoc-members: + +.. autoclass:: tonic_textual.classes.generator_metadata.timestamp_shift_metadata.TimestampShiftMetadata + :members: + :no-undoc-members: + +.. autoclass:: tonic_textual.classes.generator_metadata.person_age_generator_metadata.PersonAgeGeneratorMetadata + :members: + :no-undoc-members: + +.. autoclass:: tonic_textual.classes.generator_metadata.age_shift_metadata.AgeShiftMetadata + :members: + :no-undoc-members: + +.. autoclass:: tonic_textual.classes.generator_metadata.hipaa_address_generator_metadata.HipaaAddressGeneratorMetadata + :members: + :no-undoc-members: + +.. autoclass:: tonic_textual.classes.generator_metadata.numeric_value_generator_metadata.NumericValueGeneratorMetadata + :members: + :no-undoc-members: diff --git a/docs/source/redact/generator_metadata.rst b/docs/source/redact/generator_metadata.rst new file mode 100644 index 0000000..3dfc3bf --- /dev/null +++ b/docs/source/redact/generator_metadata.rst @@ -0,0 +1,273 @@ +.. _generator-metadata: + +Customizing synthesis with generator metadata +============================================== + +When you use ``generator_config`` to set an entity type to ``Synthesis``, Textual uses default synthesis settings. The ``generator_metadata`` parameter allows you to fine-tune how each entity type's synthesizer behaves. + +``generator_metadata`` is a dictionary that maps entity type names (such as ``"NAME_GIVEN"`` or ``"EMAIL_ADDRESS"``) to metadata instances that control synthesis behavior for that type. + +.. code-block:: python + + from tonic_textual.redact_api import TextualNer + from tonic_textual.classes.generator_metadata.name_generator_metadata import NameGeneratorMetadata + from tonic_textual.classes.generator_metadata.email_generator_metadata import EmailGeneratorMetadata + + textual = TextualNer() + + generator_metadata = { + "NAME_GIVEN": NameGeneratorMetadata(preserve_gender=True), + "NAME_FAMILY": NameGeneratorMetadata(is_consistency_case_sensitive=True), + "EMAIL_ADDRESS": EmailGeneratorMetadata(preserve_domain=True), + } + + result = textual.redact( + "Contact John Smith at john.smith@example.com", + generator_default="Synthesis", + generator_metadata=generator_metadata, + ) + +.. note:: + + The ``redact_structured`` method takes a single ``Optional[BaseMetadata]`` instead of a dictionary, because it operates on a single entity type at a time. + +Common parameters +----------------- + +All metadata classes inherit from ``BaseMetadata`` and share the following parameter: + +* ``swaps`` (dict of str to str, default ``{}``) -- A dictionary of explicit replacement mappings. When a detected value matches a key, the corresponding value is used as the synthesized replacement instead of a generated one. + +.. code-block:: python + + from tonic_textual.classes.generator_metadata.name_generator_metadata import NameGeneratorMetadata + + # Always replace "Acme" with "Globex" instead of generating a random name + metadata = NameGeneratorMetadata(swaps={"Acme": "Globex"}) + + +Name synthesis +-------------- + +:class:`~tonic_textual.classes.generator_metadata.name_generator_metadata.NameGeneratorMetadata` controls how synthesized names are generated. Use it with the ``NAME_GIVEN`` and ``NAME_FAMILY`` entity types. + +* ``is_consistency_case_sensitive`` (bool, default ``False``) -- When ``True``, name consistency is case-sensitive. ``"john"`` and ``"John"`` are treated as different names and might receive different replacements. +* ``preserve_gender`` (bool, default ``False``) -- When ``True``, the synthesized name preserves the gender of the original. Male names are replaced with male names, and female names with female names. + +.. code-block:: python + + from tonic_textual.classes.generator_metadata.name_generator_metadata import NameGeneratorMetadata + + generator_metadata = { + "NAME_GIVEN": NameGeneratorMetadata(preserve_gender=True), + } + + result = textual.redact( + "John told Mary about the project.", + generator_default="Synthesis", + generator_metadata=generator_metadata, + ) + + +Email synthesis +--------------- + +:class:`~tonic_textual.classes.generator_metadata.email_generator_metadata.EmailGeneratorMetadata` controls how synthesized email addresses are generated. Use it with the ``EMAIL_ADDRESS`` entity type. + +* ``preserve_domain`` (bool, default ``False``) -- When ``True``, the domain portion of the email address is preserved. For example, ``"john@example.com"`` might become ``"alan@example.com"``. + +.. code-block:: python + + from tonic_textual.classes.generator_metadata.email_generator_metadata import EmailGeneratorMetadata + + generator_metadata = { + "EMAIL_ADDRESS": EmailGeneratorMetadata(preserve_domain=True), + } + + result = textual.redact( + "Reach me at john@example.com", + generator_default="Synthesis", + generator_metadata=generator_metadata, + ) + + +Phone number synthesis +---------------------- + +:class:`~tonic_textual.classes.generator_metadata.phone_number_generator_metadata.PhoneNumberGeneratorMetadata` controls how synthesized telephone numbers are generated. Use it with the ``PHONE_NUMBER`` entity type. + +* ``use_us_phone_number_generator`` (bool, default ``False``) -- When ``True``, generated telephone numbers use a US phone number format. +* ``replace_invalid_numbers`` (bool, default ``True``) -- When ``True``, detected telephone numbers that are not valid are still replaced with synthesized values. + +.. code-block:: python + + from tonic_textual.classes.generator_metadata.phone_number_generator_metadata import PhoneNumberGeneratorMetadata + + generator_metadata = { + "PHONE_NUMBER": PhoneNumberGeneratorMetadata( + use_us_phone_number_generator=True, + replace_invalid_numbers=True, + ), + } + + result = textual.redact( + "Call me at 555-0123.", + generator_default="Synthesis", + generator_metadata=generator_metadata, + ) + + +Date and time synthesis +----------------------- + +:class:`~tonic_textual.classes.generator_metadata.date_time_generator_metadata.DateTimeGeneratorMetadata` controls how synthesized dates and times are generated. Use it with the ``DATE_TIME`` entity type. Dates are shifted by a random number of days within a configurable range. + +* ``scramble_unrecognized_dates`` (bool, default ``True``) -- When ``True``, dates that Textual cannot parse into a standard format are scrambled. +* ``additional_date_formats`` (list of str, default ``[]``) -- Additional date format patterns that Textual should recognize. Uses Python ``strftime``/``strptime`` format codes. +* ``apply_constant_shift_to_document`` (bool, default ``False``) -- When ``True``, all dates within the same document are shifted by the same random offset. This preserves the relative time differences between dates. +* ``metadata`` (:class:`~tonic_textual.classes.generator_metadata.timestamp_shift_metadata.TimestampShiftMetadata`) -- Controls the date shift range. By default, dates shift by -7 to +7 days. + +TimestampShiftMetadata +^^^^^^^^^^^^^^^^^^^^^^ + +:class:`~tonic_textual.classes.generator_metadata.timestamp_shift_metadata.TimestampShiftMetadata` configures the range of days by which dates can be shifted. + +* ``left_shift_in_days`` (int, default ``-7``) -- The minimum shift in days. Use a negative value to shift dates into the past. +* ``right_shift_in_days`` (int, default ``7``) -- The maximum shift in days. Use a positive value to shift dates into the future. + +.. code-block:: python + + from tonic_textual.classes.generator_metadata.date_time_generator_metadata import DateTimeGeneratorMetadata + from tonic_textual.classes.generator_metadata.timestamp_shift_metadata import TimestampShiftMetadata + + generator_metadata = { + "DATE_TIME": DateTimeGeneratorMetadata( + apply_constant_shift_to_document=True, + metadata=TimestampShiftMetadata( + left_shift_in_days=-30, + right_shift_in_days=30, + ), + ), + } + + result = textual.redact( + "The meeting is on 2024-01-15 and the deadline is 2024-02-01.", + generator_default="Synthesis", + generator_metadata=generator_metadata, + ) + + +Person age synthesis +-------------------- + +:class:`~tonic_textual.classes.generator_metadata.person_age_generator_metadata.PersonAgeGeneratorMetadata` controls how synthesized ages are generated. Use it with the ``PERSON_AGE`` entity type. + +* ``scramble_unrecognized_dates`` (bool, default ``True``) -- When ``True``, dates that Textual cannot parse are scrambled. +* ``metadata`` (:class:`~tonic_textual.classes.generator_metadata.age_shift_metadata.AgeShiftMetadata`) -- Controls the age shift amount. By default, ages shift by 7 years. + +AgeShiftMetadata +^^^^^^^^^^^^^^^^ + +:class:`~tonic_textual.classes.generator_metadata.age_shift_metadata.AgeShiftMetadata` configures the number of years to shift detected ages. + +* ``age_shift_in_years`` (int, default ``7``) -- The number of years to shift the age. + +.. code-block:: python + + from tonic_textual.classes.generator_metadata.person_age_generator_metadata import PersonAgeGeneratorMetadata + from tonic_textual.classes.generator_metadata.age_shift_metadata import AgeShiftMetadata + + generator_metadata = { + "PERSON_AGE": PersonAgeGeneratorMetadata( + metadata=AgeShiftMetadata(age_shift_in_years=3), + ), + } + + result = textual.redact( + "The patient is 45 years old.", + generator_default="Synthesis", + generator_metadata=generator_metadata, + ) + + +Address synthesis (HIPAA) +------------------------- + +:class:`~tonic_textual.classes.generator_metadata.hipaa_address_generator_metadata.HipaaAddressGeneratorMetadata` controls how synthesized addresses are generated for location entity types such as ``LOCATION_ADDRESS`` and ``LOCATION_ZIP``. By default, address synthesis follows HIPAA Safe Harbor de-identification rules. + +* ``use_non_hipaa_address_generator`` (bool, default ``False``) -- When ``True``, uses a non-HIPAA-compliant address generator that might produce more realistic addresses, but does not guarantee HIPAA Safe Harbor compliance. +* ``replace_truncated_zeros_in_zip_code`` (bool, default ``True``) -- When ``True``, for ZIP codes that are truncated to three digits (per HIPAA Safe Harbor), the removed digits are replaced with zeros. +* ``realistic_synthetic_values`` (bool, default ``True``) -- When ``True``, generates realistic-looking synthetic address values. + +.. code-block:: python + + from tonic_textual.classes.generator_metadata.hipaa_address_generator_metadata import HipaaAddressGeneratorMetadata + + generator_metadata = { + "LOCATION_ADDRESS": HipaaAddressGeneratorMetadata( + realistic_synthetic_values=True, + replace_truncated_zeros_in_zip_code=True, + ), + } + + result = textual.redact( + "She lives at 123 Main St, Springfield, IL 62704.", + generator_default="Synthesis", + generator_metadata=generator_metadata, + ) + + +Numeric value synthesis +----------------------- + +:class:`~tonic_textual.classes.generator_metadata.numeric_value_generator_metadata.NumericValueGeneratorMetadata` controls how synthesized numeric values are generated. Use it with the ``NUMERIC_VALUE`` entity type. + +* ``use_oracle_integer_pk_generator`` (bool, default ``False``) -- When ``True``, uses a generator designed for Oracle integer primary keys. + +.. code-block:: python + + from tonic_textual.classes.generator_metadata.numeric_value_generator_metadata import NumericValueGeneratorMetadata + + generator_metadata = { + "NUMERIC_VALUE": NumericValueGeneratorMetadata( + use_oracle_integer_pk_generator=True, + ), + } + + +Combining multiple metadata configurations +------------------------------------------- + +You can combine multiple metadata configurations in a single call. This example configures synthesis for names, emails, and dates: + +.. code-block:: python + + from tonic_textual.redact_api import TextualNer + from tonic_textual.classes.generator_metadata.name_generator_metadata import NameGeneratorMetadata + from tonic_textual.classes.generator_metadata.email_generator_metadata import EmailGeneratorMetadata + from tonic_textual.classes.generator_metadata.date_time_generator_metadata import DateTimeGeneratorMetadata + from tonic_textual.classes.generator_metadata.timestamp_shift_metadata import TimestampShiftMetadata + + textual = TextualNer() + + result = textual.redact( + "John Smith (john@acme.com) joined on 2024-01-15.", + generator_default="Off", + generator_config={ + "NAME_GIVEN": "Synthesis", + "NAME_FAMILY": "Synthesis", + "EMAIL_ADDRESS": "Synthesis", + "DATE_TIME": "Synthesis", + }, + generator_metadata={ + "NAME_GIVEN": NameGeneratorMetadata(preserve_gender=True), + "EMAIL_ADDRESS": EmailGeneratorMetadata(preserve_domain=True), + "DATE_TIME": DateTimeGeneratorMetadata( + apply_constant_shift_to_document=True, + metadata=TimestampShiftMetadata( + left_shift_in_days=-14, + right_shift_in_days=14, + ), + ), + }, + ) diff --git a/docs/source/redact/index.rst b/docs/source/redact/index.rst index 27770ab..2f8b983 100644 --- a/docs/source/redact/index.rst +++ b/docs/source/redact/index.rst @@ -13,13 +13,14 @@ When Textual operates on your data: 2. Second, it uses information about where entities are located to tokenize or synthesize the data. -In :doc:`Choosing tokenization or synthesis <./redact_config>` you can learn different ways to configure your output. +In :doc:`Choosing tokenization or synthesis <./redact_config>` you can learn different ways to configure your output. To fine-tune how synthesized values are generated for specific entity types, see :doc:`Customizing synthesis with generator metadata <./generator_metadata>`. .. toctree:: :caption: In this section: - + redact_config + generator_metadata redacting_text redacting_json redacting_html diff --git a/docs/source/redact/redact_config.rst b/docs/source/redact/redact_config.rst index 658b601..4dc7892 100644 --- a/docs/source/redact/redact_config.rst +++ b/docs/source/redact/redact_config.rst @@ -33,7 +33,9 @@ Synthesized entities are replaced with realistic fake values, For example:: My name is John Smith. -> My name is Alan Johnson -These fake values are consistent. So in the above example, John goes to Alan and will do so in all cases within the document and optionally across documents as well. +These fake values are consistent. So in the above example, John changed to Alan and does so in all cases within the document and optionally across documents as well. + +To further customize how synthesized values are generated for specific entity types, see :ref:`generator-metadata`. Group synthesis ^^^^^^^^^^^^^^^^^^ diff --git a/tonic_textual/classes/generator_metadata/age_shift_metadata.py b/tonic_textual/classes/generator_metadata/age_shift_metadata.py index 9898bb1..3bdcb1d 100644 --- a/tonic_textual/classes/generator_metadata/age_shift_metadata.py +++ b/tonic_textual/classes/generator_metadata/age_shift_metadata.py @@ -2,6 +2,17 @@ class AgeShiftMetadata(dict): + """Configuration for the age shift amount used by + :class:`PersonAgeGeneratorMetadata`. + + Defines how many years to shift detected ages by. + + Parameters + ---------- + age_shift_in_years : int + The number of years to shift the age. Default is ``7``. + """ + def __init__( self, age_shift_in_years: int = 7 diff --git a/tonic_textual/classes/generator_metadata/base_date_time_generator_metadata.py b/tonic_textual/classes/generator_metadata/base_date_time_generator_metadata.py index 3b3e5a1..bd65fdf 100644 --- a/tonic_textual/classes/generator_metadata/base_date_time_generator_metadata.py +++ b/tonic_textual/classes/generator_metadata/base_date_time_generator_metadata.py @@ -6,6 +6,20 @@ class BaseDateTimeGeneratorMetadata(BaseMetadata): + """Base class for date and time related generator metadata. + + Extends :class:`BaseMetadata` with a common date/time parameter. You + typically do not instantiate this class directly. Instead, use + :class:`DateTimeGeneratorMetadata` or :class:`PersonAgeGeneratorMetadata`. + + Parameters + ---------- + scramble_unrecognized_dates : bool + When ``True``, dates that Textual cannot parse into a standard format + are scrambled. When ``False``, unrecognized dates are left + unchanged. Default is ``True``. + """ + def __init__( self, custom_generator: Optional[GeneratorType] = None, diff --git a/tonic_textual/classes/generator_metadata/base_metadata.py b/tonic_textual/classes/generator_metadata/base_metadata.py index f133201..c33d73b 100644 --- a/tonic_textual/classes/generator_metadata/base_metadata.py +++ b/tonic_textual/classes/generator_metadata/base_metadata.py @@ -5,6 +5,25 @@ class BaseMetadata(dict): + """Base class for all generator metadata configurations. + + Provides common parameters shared by all metadata types. You typically + do not instantiate this class directly. Instead, use a specific metadata + subclass such as :class:`NameGeneratorMetadata` or + :class:`EmailGeneratorMetadata`. + + Parameters + ---------- + custom_generator : GeneratorType, optional + The generator type. Set automatically by subclasses. + generator_version : GeneratorVersion + The generator version to use. Default is ``V1``. + swaps : dict of str to str, optional + A dictionary of explicit replacement mappings. When a detected value + matches a key in the dictionary, the corresponding value is used as + the synthesized replacement instead of a generated one. + """ + def __init__( self, custom_generator: Optional[GeneratorType] = None, diff --git a/tonic_textual/classes/generator_metadata/date_time_generator_metadata.py b/tonic_textual/classes/generator_metadata/date_time_generator_metadata.py index 59f169a..f05b8cb 100644 --- a/tonic_textual/classes/generator_metadata/date_time_generator_metadata.py +++ b/tonic_textual/classes/generator_metadata/date_time_generator_metadata.py @@ -7,6 +7,30 @@ class DateTimeGeneratorMetadata(BaseDateTimeGeneratorMetadata): + """Metadata configuration for date and time synthesis. + + Controls how synthesized date and time values are generated for the + ``DATE_TIME`` entity type. Dates are shifted by a random number of days + within a configurable range. + + Parameters + ---------- + scramble_unrecognized_dates : bool + When ``True``, dates that Textual cannot parse into a standard + format are scrambled. Default is ``True``. + additional_date_formats : list of str + A list of additional date format patterns that Textual should + recognize. Use Python ``strftime``/``strptime`` format codes. + Default is an empty list. + apply_constant_shift_to_document : bool + When ``True``, all dates within the same document are shifted by + the same random offset. This preserves relative time differences + between dates. Default is ``False``. + metadata : TimestampShiftMetadata + Configuration for the date shift range. By default dates shift by + -7 to +7 days. + """ + def __init__( self, generator_version: GeneratorVersion = GeneratorVersion.V1, diff --git a/tonic_textual/classes/generator_metadata/email_generator_metadata.py b/tonic_textual/classes/generator_metadata/email_generator_metadata.py index 78ce063..53b917e 100644 --- a/tonic_textual/classes/generator_metadata/email_generator_metadata.py +++ b/tonic_textual/classes/generator_metadata/email_generator_metadata.py @@ -6,6 +6,19 @@ class EmailGeneratorMetadata(BaseMetadata): + """Metadata configuration for email address synthesis. + + Controls how synthesized email addresses are generated for the + ``EMAIL_ADDRESS`` entity type. + + Parameters + ---------- + preserve_domain : bool + When ``True``, the domain portion of the email address is kept + intact. For example, ``"john@example.com"`` might become + ``"alan@example.com"``. Default is ``False``. + """ + def __init__( self, generator_version: GeneratorVersion = GeneratorVersion.V1, diff --git a/tonic_textual/classes/generator_metadata/hipaa_address_generator_metadata.py b/tonic_textual/classes/generator_metadata/hipaa_address_generator_metadata.py index 77b5ec5..b5aa762 100644 --- a/tonic_textual/classes/generator_metadata/hipaa_address_generator_metadata.py +++ b/tonic_textual/classes/generator_metadata/hipaa_address_generator_metadata.py @@ -6,6 +6,27 @@ class HipaaAddressGeneratorMetadata(BaseMetadata): + """Metadata configuration for HIPAA-compliant address synthesis. + + Controls how synthesized addresses are generated for location entity + types such as ``LOCATION_ADDRESS`` and ``LOCATION_ZIP``. By default, + address synthesis follows HIPAA Safe Harbor de-identification rules. + + Parameters + ---------- + use_non_hipaa_address_generator : bool + When ``True``, uses a non-HIPAA-compliant address generator that + may produce more realistic addresses, but does not guarantee HIPAA + Safe Harbor compliance. Default is ``False``. + replace_truncated_zeros_in_zip_code : bool + When ``True``, for ZIP codes that are truncated to three digits + (per HIPAA Safe Harbor), the removed digits are replaced with + zeros. Default is ``True``. + realistic_synthetic_values : bool + When ``True``, generates realistic-looking synthetic address values. + Default is ``True``. + """ + def __init__( self, generator_version: GeneratorVersion = GeneratorVersion.V1, diff --git a/tonic_textual/classes/generator_metadata/name_generator_metadata.py b/tonic_textual/classes/generator_metadata/name_generator_metadata.py index a230f99..5a04422 100644 --- a/tonic_textual/classes/generator_metadata/name_generator_metadata.py +++ b/tonic_textual/classes/generator_metadata/name_generator_metadata.py @@ -6,6 +6,23 @@ class NameGeneratorMetadata(BaseMetadata): + """Metadata configuration for name synthesis. + + Controls how synthesized names are generated for entity types such as + ``NAME_GIVEN`` and ``NAME_FAMILY``. + + Parameters + ---------- + is_consistency_case_sensitive : bool + When ``True``, name consistency is case-sensitive. For example, + ``"john"`` and ``"John"`` are treated as different names and might + receive different replacements. Default is ``False``. + preserve_gender : bool + When ``True``, the synthesized name preserves the gender of the + original name. Male names are replaced with male names, and female + names are replaced with female names. Default is ``False``. + """ + def __init__( self, generator_version: GeneratorVersion = GeneratorVersion.V1, diff --git a/tonic_textual/classes/generator_metadata/numeric_value_generator_metadata.py b/tonic_textual/classes/generator_metadata/numeric_value_generator_metadata.py index a7fe189..b3221ad 100644 --- a/tonic_textual/classes/generator_metadata/numeric_value_generator_metadata.py +++ b/tonic_textual/classes/generator_metadata/numeric_value_generator_metadata.py @@ -6,6 +6,18 @@ class NumericValueGeneratorMetadata(BaseMetadata): + """Metadata configuration for numeric value synthesis. + + Controls how synthesized numeric values are generated for the + ``NUMERIC_VALUE`` entity type. + + Parameters + ---------- + use_oracle_integer_pk_generator : bool + When ``True``, uses a generator designed for Oracle integer primary + keys. Default is ``False``. + """ + def __init__( self, generator_version: GeneratorVersion = GeneratorVersion.V1, diff --git a/tonic_textual/classes/generator_metadata/person_age_generator_metadata.py b/tonic_textual/classes/generator_metadata/person_age_generator_metadata.py index 37cd026..0bcc1f7 100644 --- a/tonic_textual/classes/generator_metadata/person_age_generator_metadata.py +++ b/tonic_textual/classes/generator_metadata/person_age_generator_metadata.py @@ -7,6 +7,21 @@ class PersonAgeGeneratorMetadata(BaseDateTimeGeneratorMetadata): + """Metadata configuration for person age synthesis. + + Controls how synthesized ages are generated for the ``PERSON_AGE`` + entity type. Ages are shifted by a configurable number of years. + + Parameters + ---------- + scramble_unrecognized_dates : bool + When ``True``, dates that Textual cannot parse into a standard + format are scrambled. Default is ``True``. + metadata : AgeShiftMetadata + Configuration for the age shift amount. By default, ages shift by + 7 years. + """ + def __init__( self, generator_version: GeneratorVersion = GeneratorVersion.V1, diff --git a/tonic_textual/classes/generator_metadata/phone_number_generator_metadata.py b/tonic_textual/classes/generator_metadata/phone_number_generator_metadata.py index c25fe49..51ab56a 100644 --- a/tonic_textual/classes/generator_metadata/phone_number_generator_metadata.py +++ b/tonic_textual/classes/generator_metadata/phone_number_generator_metadata.py @@ -6,6 +6,22 @@ class PhoneNumberGeneratorMetadata(BaseMetadata): + """Metadata configuration for phone number synthesis. + + Controls how synthesized telephone numbers are generated for the + ``PHONE_NUMBER`` entity type. + + Parameters + ---------- + use_us_phone_number_generator : bool + When ``True``, generated telephone numbers use a US phone number format. + Default is ``False``. + replace_invalid_numbers : bool + When ``True``, phone numbers that are detected but are not valid + phone numbers are replaced with synthesized values. Default + is ``True``. + """ + def __init__( self, generator_version: GeneratorVersion = GeneratorVersion.V1, diff --git a/tonic_textual/classes/generator_metadata/timestamp_shift_metadata.py b/tonic_textual/classes/generator_metadata/timestamp_shift_metadata.py index e34461a..c241a9f 100644 --- a/tonic_textual/classes/generator_metadata/timestamp_shift_metadata.py +++ b/tonic_textual/classes/generator_metadata/timestamp_shift_metadata.py @@ -4,6 +4,24 @@ class TimestampShiftMetadata(BaseMetadata): + """Configuration for the date shift range used by + :class:`DateTimeGeneratorMetadata`. + + Defines the range of days by which dates can be shifted. The actual + shift for each date is randomly chosen within the specified range. + + Parameters + ---------- + left_shift_in_days : int, optional + The minimum (leftmost) shift in days. Use a negative value to shift + dates into the past. Default is ``-7``. + right_shift_in_days : int, optional + The maximum (rightmost) shift in days. Use a positive value to shift + dates into the future. Default is ``7``. + time_stamp_shift_in_days : int, optional + Deprecated. Use ``left_shift_in_days`` and ``right_shift_in_days`` + instead. + """ def __init__( self,