From 72ef9fbe7006f707998fac59e4452fb3c29a1a57 Mon Sep 17 00:00:00 2001 From: Shreyas Pawar Date: Mon, 18 May 2026 09:47:20 +0000 Subject: [PATCH 1/2] Hi TN money class bug fix Signed-off-by: Shreyas Pawar --- Jenkinsfile | 2 +- .../hi/data/money/currency_singular.tsv | 9 + .../hi/data/money/major_minor_currencies.tsv | 3 +- .../text_normalization/hi/taggers/money.py | 157 +++++++++++++++--- .../hi/verbalizers/money.py | 131 +++++++++------ .../test_cases_money.txt | 8 + 6 files changed, 236 insertions(+), 74 deletions(-) create mode 100644 nemo_text_processing/text_normalization/hi/data/money/currency_singular.tsv diff --git a/Jenkinsfile b/Jenkinsfile index 24ac047eb..ed82ba514 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -28,7 +28,7 @@ pipeline { MR_TN_CACHE='/home/jenkins/TestData/text_norm/ci/grammars/03-12-24-1' JA_TN_CACHE='/home/jenkins/TestData/text_norm/ci/grammars/10-17-24-1' KO_TN_CACHE='/home/jenkins/TestData/text_norm/ci/grammars/04-23-26-0' - HI_TN_CACHE='/home/jenkins/TestData/text_norm/ci/grammars/04-23-26-0' + HI_TN_CACHE='/home/jenkins/TestData/text_norm/ci/grammars/05-18-26-0' DEFAULT_TN_CACHE='/home/jenkins/TestData/text_norm/ci/grammars/06-08-23-0' } stages { diff --git a/nemo_text_processing/text_normalization/hi/data/money/currency_singular.tsv b/nemo_text_processing/text_normalization/hi/data/money/currency_singular.tsv new file mode 100644 index 000000000..af8d793f2 --- /dev/null +++ b/nemo_text_processing/text_normalization/hi/data/money/currency_singular.tsv @@ -0,0 +1,9 @@ +₹ रुपया +£ पाउंड +₩ वॉन +$ डॉलर +₺ लीरा +৳ टका +¥ येन +₦ नाइरा +€ यूरो \ No newline at end of file diff --git a/nemo_text_processing/text_normalization/hi/data/money/major_minor_currencies.tsv b/nemo_text_processing/text_normalization/hi/data/money/major_minor_currencies.tsv index cf62891d1..a9186acc3 100644 --- a/nemo_text_processing/text_normalization/hi/data/money/major_minor_currencies.tsv +++ b/nemo_text_processing/text_normalization/hi/data/money/major_minor_currencies.tsv @@ -1,4 +1,5 @@ रुपए पैसे +रुपया पैसे पाउंड पेंस वॉन जिओन डॉलर सेंट @@ -6,4 +7,4 @@ टका पैसे येन सेन नाइरा कोबो -यूरो सेंट +यूरो सेंट \ No newline at end of file diff --git a/nemo_text_processing/text_normalization/hi/taggers/money.py b/nemo_text_processing/text_normalization/hi/taggers/money.py index 01e46352f..d587a9b61 100644 --- a/nemo_text_processing/text_normalization/hi/taggers/money.py +++ b/nemo_text_processing/text_normalization/hi/taggers/money.py @@ -16,18 +16,18 @@ from pynini.lib import pynutil from nemo_text_processing.text_normalization.hi.graph_utils import GraphFst, insert_space -from nemo_text_processing.text_normalization.hi.utils import get_abs_path +from nemo_text_processing.text_normalization.hi.utils import get_abs_path, load_labels -currency_graph = pynini.string_file(get_abs_path("data/money/currency.tsv")) +currency_graph = pynini.string_file(get_abs_path("data/money/currency.tsv")) +currency_singular_graph = pynini.string_file(get_abs_path("data/money/currency_singular.tsv")) class MoneyFst(GraphFst): """ Finite state transducer for classifying money, suppletive aware, e.g. - ₹५० -> money { money { currency_maj: "रुपए" integer_part: "पचास" } - ₹५०.५० -> money { currency_maj: "रुपए" integer_part: "पचास" fractional_part: "पचास" currency_min: "centiles" } - ₹०.५० -> money { currency_maj: "रुपए" integer_part: "शून्य" fractional_part: "पचास" currency_min: "centiles" } - Note that the 'centiles' string is a placeholder to handle by the verbalizer by applying the corresponding minor currency denomination + ₹५० -> money { currency_maj: "रुपए" integer_part: "पचास" } + ₹५०.५० -> money { currency_maj: "रुपए" integer_part: "पचास" fractional_part: "पचास" currency_min: "पैसे" } + ₹०.५० -> { money { currency_maj: "रुपए" integer_part: "शून्य" fractional_part: "पचास" currency_min: "पैसे" } Args: cardinal: CardinalFst @@ -41,30 +41,147 @@ def __init__(self, cardinal: GraphFst): cardinal_graph = cardinal.final_graph + _en_to_hi_digit = pynini.string_file(get_abs_path("data/ordinal/en_to_hi_digit.tsv")) + _deva_to_ascii = pynini.invert(_en_to_hi_digit) + deva_to_ascii = pynini.closure(_deva_to_ascii | pynini.union(*"0123456789"), 1) + + _ascii_digit = pynini.union(*"0123456789") + _ascii_nonzero = pynini.union(*"123456789") + _deva_nonzero = pynini.union(*"१२३४५६७८९") + _any_digit = _ascii_digit | pynini.union(*"०१२३४५६७८९") + _any_nonzero = _ascii_nonzero | _deva_nonzero + optional_graph_negative = pynini.closure( - pynutil.insert("negative: ") + pynini.cross("-", "\"true\"") + insert_space, + pynutil.insert("negative: ") + pynini.cross("-", '"true"') + insert_space, 0, 1, ) - currency_major = pynutil.insert('currency_maj: "') + currency_graph + pynutil.insert('"') - integer = pynutil.insert('integer_part: "') + cardinal_graph + pynutil.insert('"') - fraction = pynutil.insert('fractional_part: "') + cardinal_graph + pynutil.insert('"') - currency_minor = pynutil.insert('currency_min: "') + pynutil.insert("centiles") + pynutil.insert('"') - graph_major_only = optional_graph_negative + currency_major + insert_space + integer - graph_major_and_minor = ( + currency_major = ( + pynutil.insert('currency_maj: "') + currency_graph + pynutil.insert('"') + ) + currency_major_singular = ( + pynutil.insert('currency_maj: "') + currency_singular_graph + pynutil.insert('"') + ) + + one = pynini.union("1", "१") + integer_one = ( + pynutil.insert('integer_part: "') + (one @ cardinal_graph) + pynutil.insert('"') + ) + integer = ( + pynutil.insert('integer_part: "') + cardinal_graph + pynutil.insert('"') + ) + + strip_trailing_zeros = ( + pynini.closure(_ascii_digit) + _ascii_nonzero + pynini.closure(pynutil.delete("0")) + ) + canonicalise = ( + (pynutil.delete("0") + _ascii_nonzero) + | (_ascii_nonzero + pynutil.insert("0")) + | (_ascii_nonzero + _ascii_digit) + ) + two_digits_fractional_part = deva_to_ascii @ strip_trailing_zeros @ canonicalise + + fraction = ( + pynutil.insert('fractional_part: "') + + (two_digits_fractional_part @ cardinal_graph) + + pynutil.insert('"') + ) + + optional_delete_fractional_zeros = pynini.closure( + pynutil.delete(".") + + pynini.closure(pynutil.delete("0") | pynutil.delete("०"), 1), + 0, + 1, + ) + + has_3plus_sig_digits = ( + _any_digit + _any_digit + _any_nonzero + pynini.closure(_any_digit) + ) + single_digit = _any_digit @ cardinal.single_digits_graph + decimal_digits = ( + pynutil.insert('fractional_part: "') + + single_digit + + pynini.closure(insert_space + single_digit) + + pynutil.insert('"') + ) + guarded_decimal_digits = has_3plus_sig_digits @ decimal_digits + + graph_decimal_path = ( optional_graph_negative + currency_major + insert_space - + integer + + pynutil.insert('integer_part: "') + + cardinal_graph + + pynutil.insert('"') + pynini.cross(".", " ") - + fraction + + guarded_decimal_digits + ).optimize() + + graph_major_only_singular = ( + optional_graph_negative + + currency_major_singular + insert_space - + currency_minor - ) + + integer_one + + optional_delete_fractional_zeros + ).optimize() + + graph_major_only = ( + optional_graph_negative + + currency_major + + insert_space + + integer + + optional_delete_fractional_zeros + ).optimize() + + maj_labels = load_labels(get_abs_path("data/money/currency.tsv")) + maj_singular_labels = load_labels(get_abs_path("data/money/currency_singular.tsv")) + maj_to_min = dict(load_labels(get_abs_path("data/money/major_minor_currencies.tsv"))) + + def _build_major_and_minor(sym_maj_labels, int_graph): + result = None + for sym, maj in sym_maj_labels: + min_name = maj_to_min.get(maj) + if not min_name: + continue + + curr_maj = ( + pynutil.insert('currency_maj: "') + + pynini.cross(sym, maj) + + pynutil.insert('"') + ) + curr_min = ( + pynutil.insert('currency_min: "') + + pynutil.insert(min_name) + + pynutil.insert('"') + ) + + g = ( + optional_graph_negative + + curr_maj + + insert_space + + int_graph + + pynini.cross(".", " ") + + fraction + + insert_space + + curr_min + ).optimize() - graph_currencies = graph_major_only | graph_major_and_minor + result = g if result is None else pynini.union(result, g).optimize() + + return result + + graph_major_and_minor = _build_major_and_minor(maj_labels, integer) + graph_major_and_minor_singular = _build_major_and_minor(maj_singular_labels, integer_one) + + graph_currencies = ( + pynutil.add_weight( + graph_major_only_singular | graph_major_and_minor_singular, -0.001 + ) + | pynutil.add_weight(graph_decimal_path, -0.0005) + | graph_major_only + | graph_major_and_minor + ) graph = graph_currencies.optimize() - final_graph = self.add_tokens(graph) - self.fst = final_graph + self.fst = self.add_tokens(graph) \ No newline at end of file diff --git a/nemo_text_processing/text_normalization/hi/verbalizers/money.py b/nemo_text_processing/text_normalization/hi/verbalizers/money.py index 048140295..d8a839ed1 100644 --- a/nemo_text_processing/text_normalization/hi/verbalizers/money.py +++ b/nemo_text_processing/text_normalization/hi/verbalizers/money.py @@ -15,26 +15,16 @@ import pynini from pynini.lib import pynutil -major_minor_currencies = { - "रुपए": "पैसे", - "पाउंड": "पेंस", - "वॉन": "जिओन", - "डॉलर": "सेंट", - "लीरा": "कुरस", - "टका": "पैसे", - "येन": "सेन", - "नाइरा": "कोबो", - "यूरो": "सेंट", -} from nemo_text_processing.text_normalization.hi.graph_utils import NEMO_NOT_QUOTE, NEMO_SPACE, GraphFst +from nemo_text_processing.text_normalization.hi.utils import get_abs_path, load_labels class MoneyFst(GraphFst): """ Finite state transducer for verbalizing money, e.g. - money { integer_part: "बारह" currency_maj: "रुपए" } -> बारह रुपए - money { integer_part: "बारह" currency_maj: "रुपए" fractional_part: "पचास" currency_min: "centiles" } -> बारह रुपए पचास पैसे - money { currency_maj: "रुपए" integer_part: "शून्य" fractional_part: "पचास" currency_min: "centiles" } -> पचास पैसे + money { currency_maj: "रुपए" integer_part: "बारह" } } -> बारह रुपए + money { currency_maj: "रुपए" integer_part: "बारह" fractional_part: "पचास" currency_min: "पैसे" } -> बारह रुपए पचास पैसे + money { currency_maj: "रुपए" integer_part: "शून्य" fractional_part: "पचास" currency_min: "पैसे" } -> पचास पैसे Args: cardinal: CardinalFst @@ -46,55 +36,92 @@ class MoneyFst(GraphFst): def __init__(self): super().__init__(name="money", kind="verbalize") - currency_major = pynutil.delete('currency_maj: "') + pynini.closure(NEMO_NOT_QUOTE, 1) + pynutil.delete('"') - - integer_part = pynutil.delete('integer_part: "') + pynini.closure(NEMO_NOT_QUOTE, 1) + pynutil.delete('"') + sp = pynini.accep(NEMO_SPACE) + currency_major = ( + pynutil.delete('currency_maj: "') + + pynini.closure(NEMO_NOT_QUOTE, 1) + + pynutil.delete('"') + ) + integer_part = ( + pynutil.delete('integer_part: "') + + pynini.closure(NEMO_NOT_QUOTE, 1) + + pynutil.delete('"') + ) fractional_part = ( - pynutil.delete('fractional_part: "') + pynini.closure(NEMO_NOT_QUOTE, 1) + pynutil.delete('"') + pynutil.delete('fractional_part: "') + + pynini.closure(NEMO_NOT_QUOTE, 1) + + pynutil.delete('"') + ) + + currency_minor = ( + pynutil.delete('currency_min: "') + + pynini.closure(NEMO_NOT_QUOTE, 1) + + pynutil.delete('"') ) - # Handles major denominations only - graph_major_only = integer_part + pynini.accep(NEMO_SPACE) + currency_major + graph_major_only = integer_part + sp + currency_major - # Handles both major and minor denominations - major_minor_graphs = [] + all_major_names = [ + maj for maj, _ in load_labels(get_abs_path("data/money/major_minor_currencies.tsv")) + ] - # Handles minor denominations only - minor_graphs = [] - - # Logic for handling minor denominations - for major, minor in major_minor_currencies.items(): - graph_major = pynutil.delete('currency_maj: "') + pynini.accep(major) + pynutil.delete('"') - graph_minor = pynutil.delete('currency_min: "') + pynini.cross("centiles", minor) + pynutil.delete('"') - graph_major_minor_partial = ( - integer_part - + pynini.accep(NEMO_SPACE) - + graph_major - + pynini.accep(NEMO_SPACE) - + fractional_part - + pynini.accep(NEMO_SPACE) - + graph_minor - ) - major_minor_graphs.append(graph_major_minor_partial) + major_minor_graphs = [] + minor_only_graphs = [] - graph_minor_partial = ( - pynutil.delete('integer_part: "शून्य"') - + pynutil.delete(NEMO_SPACE) - + pynutil.delete('currency_maj: "') + for major in all_major_names: + graph_major_slot = ( + pynutil.delete('currency_maj: "') + pynutil.delete(major) + pynutil.delete('"') - + pynutil.delete(NEMO_SPACE) - + fractional_part - + pynini.accep(NEMO_SPACE) - + graph_minor ) - minor_graphs.append(graph_minor_partial) + + major_minor_graphs.append( + graph_major_slot + + sp + + integer_part + + pynutil.insert(NEMO_SPACE) + + pynutil.insert(major) + + sp + + fractional_part + + sp + + currency_minor + ) + + minor_only_graphs.append( + graph_major_slot + + sp + + pynutil.delete('integer_part: "शून्य"') + + sp + + fractional_part + + sp + + currency_minor + ) graph_major_minor = pynini.union(*major_minor_graphs) - graph_minor_only = pynini.union(*minor_graphs) + graph_minor_only = pynini.union(*minor_only_graphs) - graph = graph_major_only | graph_major_minor | pynutil.add_weight(graph_minor_only, -0.1) + decimal_graphs = [] + for major in all_major_names: + decimal_graphs.append( + pynutil.delete('currency_maj: "') + + pynutil.delete(major) + + pynutil.delete('"') + + sp + + integer_part + + sp + + pynutil.insert(" दशमलव ") + + fractional_part + + pynutil.insert(NEMO_SPACE) + + pynutil.insert(major) + ) + graph_decimal_money = pynini.union(*decimal_graphs) + + graph = ( + graph_major_only + | graph_major_minor + | pynutil.add_weight(graph_minor_only, -0.1) + | graph_decimal_money + ) - delete_tokens = self.delete_tokens(graph) - self.fst = delete_tokens.optimize() + self.fst = self.delete_tokens(graph).optimize() \ No newline at end of file diff --git a/tests/nemo_text_processing/hi/data_text_normalization/test_cases_money.txt b/tests/nemo_text_processing/hi/data_text_normalization/test_cases_money.txt index 0b199ff37..43985374a 100644 --- a/tests/nemo_text_processing/hi/data_text_normalization/test_cases_money.txt +++ b/tests/nemo_text_processing/hi/data_text_normalization/test_cases_money.txt @@ -116,3 +116,11 @@ $९.९९~नौ डॉलर निन्यानबे सेंट ₦१०.२७~दस नाइरा सत्ताईस कोबो €200.90~दो सौ यूरो नब्बे सेंट €१२३४.७५~एक हज़ार दो सौ चौंतीस यूरो पचहत्तर सेंट +$1.12~एक डॉलर बारह सेंट +$1.123~एक दशमलव एक दो तीन डॉलर +$1.1234~एक दशमलव एक दो तीन चार डॉलर +₹2.2000~दो रुपए बीस पैसे +$1.2000~एक डॉलर बीस सेंट +₹1.500~एक रुपया पचास पैसे +₹5.00~पाँच रुपए +₹१~एक रुपया \ No newline at end of file From efc28002d1c5a2b6ae2041814cd6747cd98a8e13 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 19 May 2026 10:14:13 +0000 Subject: [PATCH 2/2] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- .../text_normalization/hi/taggers/money.py | 77 ++++++------------- .../hi/verbalizers/money.py | 69 ++++++----------- 2 files changed, 46 insertions(+), 100 deletions(-) diff --git a/nemo_text_processing/text_normalization/hi/taggers/money.py b/nemo_text_processing/text_normalization/hi/taggers/money.py index d587a9b61..16f389ae7 100644 --- a/nemo_text_processing/text_normalization/hi/taggers/money.py +++ b/nemo_text_processing/text_normalization/hi/taggers/money.py @@ -18,7 +18,7 @@ from nemo_text_processing.text_normalization.hi.graph_utils import GraphFst, insert_space from nemo_text_processing.text_normalization.hi.utils import get_abs_path, load_labels -currency_graph = pynini.string_file(get_abs_path("data/money/currency.tsv")) +currency_graph = pynini.string_file(get_abs_path("data/money/currency.tsv")) currency_singular_graph = pynini.string_file(get_abs_path("data/money/currency_singular.tsv")) @@ -42,14 +42,14 @@ def __init__(self, cardinal: GraphFst): cardinal_graph = cardinal.final_graph _en_to_hi_digit = pynini.string_file(get_abs_path("data/ordinal/en_to_hi_digit.tsv")) - _deva_to_ascii = pynini.invert(_en_to_hi_digit) - deva_to_ascii = pynini.closure(_deva_to_ascii | pynini.union(*"0123456789"), 1) + _deva_to_ascii = pynini.invert(_en_to_hi_digit) + deva_to_ascii = pynini.closure(_deva_to_ascii | pynini.union(*"0123456789"), 1) - _ascii_digit = pynini.union(*"0123456789") + _ascii_digit = pynini.union(*"0123456789") _ascii_nonzero = pynini.union(*"123456789") - _deva_nonzero = pynini.union(*"१२३४५६७८९") - _any_digit = _ascii_digit | pynini.union(*"०१२३४५६७८९") - _any_nonzero = _ascii_nonzero | _deva_nonzero + _deva_nonzero = pynini.union(*"१२३४५६७८९") + _any_digit = _ascii_digit | pynini.union(*"०१२३४५६७८९") + _any_nonzero = _ascii_nonzero | _deva_nonzero optional_graph_negative = pynini.closure( pynutil.insert("negative: ") + pynini.cross("-", '"true"') + insert_space, @@ -57,24 +57,14 @@ def __init__(self, cardinal: GraphFst): 1, ) - currency_major = ( - pynutil.insert('currency_maj: "') + currency_graph + pynutil.insert('"') - ) - currency_major_singular = ( - pynutil.insert('currency_maj: "') + currency_singular_graph + pynutil.insert('"') - ) + currency_major = pynutil.insert('currency_maj: "') + currency_graph + pynutil.insert('"') + currency_major_singular = pynutil.insert('currency_maj: "') + currency_singular_graph + pynutil.insert('"') one = pynini.union("1", "१") - integer_one = ( - pynutil.insert('integer_part: "') + (one @ cardinal_graph) + pynutil.insert('"') - ) - integer = ( - pynutil.insert('integer_part: "') + cardinal_graph + pynutil.insert('"') - ) + integer_one = pynutil.insert('integer_part: "') + (one @ cardinal_graph) + pynutil.insert('"') + integer = pynutil.insert('integer_part: "') + cardinal_graph + pynutil.insert('"') - strip_trailing_zeros = ( - pynini.closure(_ascii_digit) + _ascii_nonzero + pynini.closure(pynutil.delete("0")) - ) + strip_trailing_zeros = pynini.closure(_ascii_digit) + _ascii_nonzero + pynini.closure(pynutil.delete("0")) canonicalise = ( (pynutil.delete("0") + _ascii_nonzero) | (_ascii_nonzero + pynutil.insert("0")) @@ -83,22 +73,17 @@ def __init__(self, cardinal: GraphFst): two_digits_fractional_part = deva_to_ascii @ strip_trailing_zeros @ canonicalise fraction = ( - pynutil.insert('fractional_part: "') - + (two_digits_fractional_part @ cardinal_graph) - + pynutil.insert('"') + pynutil.insert('fractional_part: "') + (two_digits_fractional_part @ cardinal_graph) + pynutil.insert('"') ) optional_delete_fractional_zeros = pynini.closure( - pynutil.delete(".") - + pynini.closure(pynutil.delete("0") | pynutil.delete("०"), 1), + pynutil.delete(".") + pynini.closure(pynutil.delete("0") | pynutil.delete("०"), 1), 0, 1, ) - has_3plus_sig_digits = ( - _any_digit + _any_digit + _any_nonzero + pynini.closure(_any_digit) - ) - single_digit = _any_digit @ cardinal.single_digits_graph + has_3plus_sig_digits = _any_digit + _any_digit + _any_nonzero + pynini.closure(_any_digit) + single_digit = _any_digit @ cardinal.single_digits_graph decimal_digits = ( pynutil.insert('fractional_part: "') + single_digit @@ -127,16 +112,12 @@ def __init__(self, cardinal: GraphFst): ).optimize() graph_major_only = ( - optional_graph_negative - + currency_major - + insert_space - + integer - + optional_delete_fractional_zeros + optional_graph_negative + currency_major + insert_space + integer + optional_delete_fractional_zeros ).optimize() - maj_labels = load_labels(get_abs_path("data/money/currency.tsv")) + maj_labels = load_labels(get_abs_path("data/money/currency.tsv")) maj_singular_labels = load_labels(get_abs_path("data/money/currency_singular.tsv")) - maj_to_min = dict(load_labels(get_abs_path("data/money/major_minor_currencies.tsv"))) + maj_to_min = dict(load_labels(get_abs_path("data/money/major_minor_currencies.tsv"))) def _build_major_and_minor(sym_maj_labels, int_graph): result = None @@ -145,16 +126,8 @@ def _build_major_and_minor(sym_maj_labels, int_graph): if not min_name: continue - curr_maj = ( - pynutil.insert('currency_maj: "') - + pynini.cross(sym, maj) - + pynutil.insert('"') - ) - curr_min = ( - pynutil.insert('currency_min: "') - + pynutil.insert(min_name) - + pynutil.insert('"') - ) + curr_maj = pynutil.insert('currency_maj: "') + pynini.cross(sym, maj) + pynutil.insert('"') + curr_min = pynutil.insert('currency_min: "') + pynutil.insert(min_name) + pynutil.insert('"') g = ( optional_graph_negative @@ -171,17 +144,15 @@ def _build_major_and_minor(sym_maj_labels, int_graph): return result - graph_major_and_minor = _build_major_and_minor(maj_labels, integer) + graph_major_and_minor = _build_major_and_minor(maj_labels, integer) graph_major_and_minor_singular = _build_major_and_minor(maj_singular_labels, integer_one) graph_currencies = ( - pynutil.add_weight( - graph_major_only_singular | graph_major_and_minor_singular, -0.001 - ) + pynutil.add_weight(graph_major_only_singular | graph_major_and_minor_singular, -0.001) | pynutil.add_weight(graph_decimal_path, -0.0005) | graph_major_only | graph_major_and_minor ) graph = graph_currencies.optimize() - self.fst = self.add_tokens(graph) \ No newline at end of file + self.fst = self.add_tokens(graph) diff --git a/nemo_text_processing/text_normalization/hi/verbalizers/money.py b/nemo_text_processing/text_normalization/hi/verbalizers/money.py index d8a839ed1..1e5da99e4 100644 --- a/nemo_text_processing/text_normalization/hi/verbalizers/money.py +++ b/nemo_text_processing/text_normalization/hi/verbalizers/money.py @@ -38,54 +38,34 @@ def __init__(self): sp = pynini.accep(NEMO_SPACE) - currency_major = ( - pynutil.delete('currency_maj: "') - + pynini.closure(NEMO_NOT_QUOTE, 1) - + pynutil.delete('"') - ) - integer_part = ( - pynutil.delete('integer_part: "') - + pynini.closure(NEMO_NOT_QUOTE, 1) - + pynutil.delete('"') - ) + currency_major = pynutil.delete('currency_maj: "') + pynini.closure(NEMO_NOT_QUOTE, 1) + pynutil.delete('"') + integer_part = pynutil.delete('integer_part: "') + pynini.closure(NEMO_NOT_QUOTE, 1) + pynutil.delete('"') fractional_part = ( - pynutil.delete('fractional_part: "') - + pynini.closure(NEMO_NOT_QUOTE, 1) - + pynutil.delete('"') - ) - - currency_minor = ( - pynutil.delete('currency_min: "') - + pynini.closure(NEMO_NOT_QUOTE, 1) - + pynutil.delete('"') + pynutil.delete('fractional_part: "') + pynini.closure(NEMO_NOT_QUOTE, 1) + pynutil.delete('"') ) + currency_minor = pynutil.delete('currency_min: "') + pynini.closure(NEMO_NOT_QUOTE, 1) + pynutil.delete('"') + graph_major_only = integer_part + sp + currency_major - all_major_names = [ - maj for maj, _ in load_labels(get_abs_path("data/money/major_minor_currencies.tsv")) - ] + all_major_names = [maj for maj, _ in load_labels(get_abs_path("data/money/major_minor_currencies.tsv"))] major_minor_graphs = [] - minor_only_graphs = [] + minor_only_graphs = [] for major in all_major_names: - graph_major_slot = ( - pynutil.delete('currency_maj: "') - + pynutil.delete(major) - + pynutil.delete('"') - ) + graph_major_slot = pynutil.delete('currency_maj: "') + pynutil.delete(major) + pynutil.delete('"') major_minor_graphs.append( graph_major_slot + sp - + integer_part + + integer_part + pynutil.insert(NEMO_SPACE) - + pynutil.insert(major) + + pynutil.insert(major) + sp - + fractional_part + + fractional_part + sp - + currency_minor + + currency_minor ) minor_only_graphs.append( @@ -93,35 +73,30 @@ def __init__(self): + sp + pynutil.delete('integer_part: "शून्य"') + sp - + fractional_part + + fractional_part + sp - + currency_minor + + currency_minor ) graph_major_minor = pynini.union(*major_minor_graphs) - graph_minor_only = pynini.union(*minor_only_graphs) + graph_minor_only = pynini.union(*minor_only_graphs) decimal_graphs = [] for major in all_major_names: decimal_graphs.append( pynutil.delete('currency_maj: "') - + pynutil.delete(major) + + pynutil.delete(major) + pynutil.delete('"') + sp - + integer_part - + sp + + integer_part + + sp + pynutil.insert(" दशमलव ") - + fractional_part + + fractional_part + pynutil.insert(NEMO_SPACE) - + pynutil.insert(major) + + pynutil.insert(major) ) graph_decimal_money = pynini.union(*decimal_graphs) - graph = ( - graph_major_only - | graph_major_minor - | pynutil.add_weight(graph_minor_only, -0.1) - | graph_decimal_money - ) + graph = graph_major_only | graph_major_minor | pynutil.add_weight(graph_minor_only, -0.1) | graph_decimal_money - self.fst = self.delete_tokens(graph).optimize() \ No newline at end of file + self.fst = self.delete_tokens(graph).optimize()