From 819be52383b334d99263d92c68c34599fd24450f Mon Sep 17 00:00:00 2001 From: vaggelisd Date: Tue, 24 Feb 2026 14:40:58 +0200 Subject: [PATCH] Add str.isdigit primitive --- mypyc/doc/str_operations.rst | 1 + mypyc/lib-rt/CPy.h | 1 + mypyc/lib-rt/str_ops.c | 38 ++++++++++++++++++++++++++++++++ mypyc/primitives/str_ops.py | 8 +++++++ mypyc/test-data/fixtures/ir.py | 1 + mypyc/test-data/irbuild-str.test | 11 +++++++++ mypyc/test-data/run-strings.test | 34 ++++++++++++++++++++++++++++ 7 files changed, 94 insertions(+) diff --git a/mypyc/doc/str_operations.rst b/mypyc/doc/str_operations.rst index 2eebd2f6ab573..de298d91eddaf 100644 --- a/mypyc/doc/str_operations.rst +++ b/mypyc/doc/str_operations.rst @@ -38,6 +38,7 @@ Methods * ``s1.find(s2: str)`` * ``s1.find(s2: str, start: int)`` * ``s1.find(s2: str, start: int, end: int)`` +* ``s.isdigit()`` * ``s.join(x: Iterable)`` * ``s.lstrip()`` * ``s.lstrip(chars: str)`` diff --git a/mypyc/lib-rt/CPy.h b/mypyc/lib-rt/CPy.h index 10f1448a2dde9..8fbd70f2c738b 100644 --- a/mypyc/lib-rt/CPy.h +++ b/mypyc/lib-rt/CPy.h @@ -781,6 +781,7 @@ Py_ssize_t CPyStr_CountFull(PyObject *unicode, PyObject *substring, CPyTagged st CPyTagged CPyStr_Ord(PyObject *obj); PyObject *CPyStr_Multiply(PyObject *str, CPyTagged count); bool CPyStr_IsSpace(PyObject *str); +bool CPyStr_IsDigit(PyObject *str); // Bytes operations diff --git a/mypyc/lib-rt/str_ops.c b/mypyc/lib-rt/str_ops.c index a5a7ba4b980d7..98af901a826ab 100644 --- a/mypyc/lib-rt/str_ops.c +++ b/mypyc/lib-rt/str_ops.c @@ -654,3 +654,41 @@ bool CPyStr_IsSpace(PyObject *str) { } return true; } + + +bool CPyStr_IsDigit(PyObject *str) { + Py_ssize_t len = PyUnicode_GET_LENGTH(str); + if (len == 0) return false; + +#define CHECK_ISDIGIT(TYPE, DATA, CHECK) \ + { \ + const TYPE *data = (const TYPE *)(DATA); \ + for (Py_ssize_t i = 0; i < len; i++) { \ + if (!CHECK(data[i])) \ + return false; \ + } \ + } + + // ASCII fast path + if (PyUnicode_IS_ASCII(str)) { + CHECK_ISDIGIT(Py_UCS1, PyUnicode_1BYTE_DATA(str), Py_ISDIGIT); + return true; + } + + switch (PyUnicode_KIND(str)) { + case PyUnicode_1BYTE_KIND: + CHECK_ISDIGIT(Py_UCS1, PyUnicode_1BYTE_DATA(str), Py_UNICODE_ISDIGIT); + break; + case PyUnicode_2BYTE_KIND: + CHECK_ISDIGIT(Py_UCS2, PyUnicode_2BYTE_DATA(str), Py_UNICODE_ISDIGIT); + break; + case PyUnicode_4BYTE_KIND: + CHECK_ISDIGIT(Py_UCS4, PyUnicode_4BYTE_DATA(str), Py_UNICODE_ISDIGIT); + break; + default: + Py_UNREACHABLE(); + } + return true; + +#undef CHECK_ISDIGIT +} diff --git a/mypyc/primitives/str_ops.py b/mypyc/primitives/str_ops.py index ac32225d7ac9b..6492b007439d0 100644 --- a/mypyc/primitives/str_ops.py +++ b/mypyc/primitives/str_ops.py @@ -405,6 +405,14 @@ error_kind=ERR_NEVER, ) +method_op( + name="isdigit", + arg_types=[str_rprimitive], + return_type=bool_rprimitive, + c_function_name="CPyStr_IsDigit", + error_kind=ERR_NEVER, +) + # obj.decode() method_op( name="decode", diff --git a/mypyc/test-data/fixtures/ir.py b/mypyc/test-data/fixtures/ir.py index ee68f7b5a6110..118cfadf5e540 100644 --- a/mypyc/test-data/fixtures/ir.py +++ b/mypyc/test-data/fixtures/ir.py @@ -132,6 +132,7 @@ def removesuffix(self, suffix: str, /) -> str: ... def islower(self) -> bool: ... def count(self, substr: str, start: Optional[int] = None, end: Optional[int] = None) -> int: pass def isspace(self) -> bool: ... + def isdigit(self) -> bool: ... class float: def __init__(self, x: object) -> None: pass diff --git a/mypyc/test-data/irbuild-str.test b/mypyc/test-data/irbuild-str.test index bb43aa7d51c45..7f6f2831e36ad 100644 --- a/mypyc/test-data/irbuild-str.test +++ b/mypyc/test-data/irbuild-str.test @@ -983,3 +983,14 @@ def is_space(x): L0: r0 = CPyStr_IsSpace(x) return r0 + +[case testStrIsDigit] +def is_digit(x: str) -> bool: + return x.isdigit() +[out] +def is_digit(x): + x :: str + r0 :: bool +L0: + r0 = CPyStr_IsDigit(x) + return r0 diff --git a/mypyc/test-data/run-strings.test b/mypyc/test-data/run-strings.test index ca309ea3f136c..12ade9f25265d 100644 --- a/mypyc/test-data/run-strings.test +++ b/mypyc/test-data/run-strings.test @@ -1269,3 +1269,37 @@ def test_isspace() -> None: c = chr(i) a: Any = c assert c.isspace() == a.isspace() + +[case testIsDigit] +from typing import Any + +def test_isdigit() -> None: + for i in range(0x110000): + c = chr(i) + a: Any = c + assert c.isdigit() == a.isdigit() + +def test_isdigit_strings() -> None: + # ASCII digits + assert "0123456789".isdigit() + assert not "".isdigit() + assert not " ".isdigit() + assert not "a".isdigit() + assert not "abc".isdigit() + assert not "!@#".isdigit() + + # Mixed ASCII + assert not "123abc".isdigit() + assert not "abc123".isdigit() + assert not "12 34".isdigit() + assert not "123!".isdigit() + + # Unicode digits + assert "\u0660\u0661\u0662".isdigit() + assert "\u00b2\u00b3".isdigit() + assert "123\U0001d7ce\U0001d7cf\U0001d7d0".isdigit() + + # Mixed digits and Unicode non-digits + assert not "\u00e9\u00e8".isdigit() + assert not "123\u00e9".isdigit() + assert not "\U0001d7ce!".isdigit()