Skip to content

Commit 739a652

Browse files
authored
[mypyc] Add librt.strings.isalnum codepoint primitive (#21509)
3rd PR for #21418, mirroring `librt.strings.isdigit`. Measured on a microbenchmark this is roughly 30-40% faster for a char
1 parent 93eff83 commit 739a652

6 files changed

Lines changed: 57 additions & 1 deletion

File tree

mypy/typeshed/stubs/librt/librt/strings.pyi

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,3 +45,4 @@ def read_f64_be(b: bytes, index: i64, /) -> float: ...
4545
# obtained via ord(s[i])). Negative inputs return False.
4646
def isspace(c: i32, /) -> bool: ...
4747
def isdigit(c: i32, /) -> bool: ...
48+
def isalnum(c: i32, /) -> bool: ...

mypyc/lib-rt/codepoint_extra_ops.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,4 +17,8 @@ static inline bool LibRTStrings_IsDigit(int32_t c) {
1717
return c >= 0 && Py_UNICODE_ISDIGIT((Py_UCS4)c);
1818
}
1919

20+
static inline bool LibRTStrings_IsAlnum(int32_t c) {
21+
return c >= 0 && Py_UNICODE_ISALNUM((Py_UCS4)c);
22+
}
23+
2024
#endif // MYPYC_CODEPOINT_EXTRA_OPS_H

mypyc/lib-rt/strings/librt_strings.c

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1192,6 +1192,7 @@ cp_parse_i32(PyObject *arg, int32_t *out) {
11921192

11931193
DEFINE_CP_BOOL_WRAPPER(isspace, LibRTStrings_IsSpace)
11941194
DEFINE_CP_BOOL_WRAPPER(isdigit, LibRTStrings_IsDigit)
1195+
DEFINE_CP_BOOL_WRAPPER(isalnum, LibRTStrings_IsAlnum)
11951196

11961197
static PyMethodDef librt_strings_module_methods[] = {
11971198
{"write_i16_le", (PyCFunction) write_i16_le, METH_FASTCALL,
@@ -1260,6 +1261,9 @@ static PyMethodDef librt_strings_module_methods[] = {
12601261
{"isdigit", cp_isdigit, METH_O,
12611262
PyDoc_STR("Test whether a codepoint (i32) is a Unicode digit.")
12621263
},
1264+
{"isalnum", cp_isalnum, METH_O,
1265+
PyDoc_STR("Test whether a codepoint (i32) is alphanumeric.")
1266+
},
12631267
{NULL, NULL, 0, NULL}
12641268
};
12651269

mypyc/primitives/librt_strings_ops.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -413,3 +413,12 @@
413413
error_kind=ERR_NEVER,
414414
dependencies=[LIBRT_STRINGS, CODEPOINT_EXTRA_OPS],
415415
)
416+
417+
function_op(
418+
name="librt.strings.isalnum",
419+
arg_types=[int32_rprimitive],
420+
return_type=bool_rprimitive,
421+
c_function_name="LibRTStrings_IsAlnum",
422+
error_kind=ERR_NEVER,
423+
dependencies=[LIBRT_STRINGS, CODEPOINT_EXTRA_OPS],
424+
)

mypyc/test-data/irbuild-librt-strings.test

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -359,3 +359,17 @@ def is_d(c):
359359
L0:
360360
r0 = LibRTStrings_IsDigit(c)
361361
return r0
362+
363+
[case testLibrtStringsIsAlnumIR]
364+
from librt.strings import isalnum
365+
from mypy_extensions import i32
366+
367+
def is_an(c: i32) -> bool:
368+
return isalnum(c)
369+
[out]
370+
def is_an(c):
371+
c :: i32
372+
r0 :: bool
373+
L0:
374+
r0 = LibRTStrings_IsAlnum(c)
375+
return r0

mypyc/test-data/run-librt-strings.test

Lines changed: 25 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1443,14 +1443,17 @@ def test_new_without_init_is_usable() -> None:
14431443
[case testLibrtStringsCodepointClassifiers_librt]
14441444
from typing import Any
14451445
from mypy_extensions import i32
1446-
from librt.strings import isspace, isdigit
1446+
from librt.strings import isspace, isdigit, isalnum
1447+
1448+
from testutil import assertRaises
14471449

14481450

14491451
def test_codepoint_classifiers() -> None:
14501452
# Negative values are not codepoints.
14511453
for bad in (i32(-1), i32(-113)):
14521454
assert not isspace(bad)
14531455
assert not isdigit(bad)
1456+
assert not isalnum(bad)
14541457
# Verify each codepoint primitive agrees with the matching str method
14551458
# across all Unicode codepoints, including the ord(chr(i)) round-trip.
14561459
# Any forces generic dispatch on the str side.
@@ -1460,3 +1463,24 @@ def test_codepoint_classifiers() -> None:
14601463
o = ord(c)
14611464
assert isspace(o) == isspace(i) == a.isspace()
14621465
assert isdigit(o) == isdigit(i) == a.isdigit()
1466+
assert isalnum(o) == isalnum(i) == a.isalnum()
1467+
1468+
1469+
def test_codepoint_classifiers_via_any() -> None:
1470+
# Iterate so the callee is opaque to mypyc and dispatch falls back to
1471+
# the PyMethodDef wrapper, exercising the i32 range check.
1472+
for fn, true_input, false_input in (
1473+
(isspace, " ", "a"),
1474+
(isdigit, "5", "a"),
1475+
(isalnum, "A", " "),
1476+
):
1477+
f: Any = fn
1478+
assert f(ord(true_input)) is True
1479+
assert f(ord(false_input)) is False
1480+
# Negative values are valid i32, just not codepoints.
1481+
assert f(-1) is False
1482+
# Inputs outside i32 range raise OverflowError through the wrapper.
1483+
with assertRaises(OverflowError, "codepoint out of i32 range"):
1484+
f(1 << 40)
1485+
with assertRaises(OverflowError, "codepoint out of i32 range"):
1486+
f(-(1 << 40))

0 commit comments

Comments
 (0)