From 753646696ee0c5a13517166ad396c51fa57e361f Mon Sep 17 00:00:00 2001 From: Jukka Lehtosalo Date: Sun, 1 Mar 2026 15:15:55 +0000 Subject: [PATCH 1/4] Address some review comments --- mypy/nativeparse.py | 41 ++++++++++++----------------------------- mypy/nodes.py | 3 ++- 2 files changed, 14 insertions(+), 30 deletions(-) diff --git a/mypy/nativeparse.py b/mypy/nativeparse.py index c976ba9284401..6ff43e6075f59 100644 --- a/mypy/nativeparse.py +++ b/mypy/nativeparse.py @@ -22,14 +22,15 @@ import os from typing import Any, Final, cast -import ast_serialize # type: ignore[import-untyped, import-not-found, unused-ignore] +import ast_serialize # type: ignore[import-not-found, unused-ignore] from librt.internal import ( read_float as read_float_bare, read_int as read_int_bare, read_str as read_str_bare, ) -from mypy import message_registry, nodes, types +from mypy import errorcodes as codes, message_registry, nodes, types +from mypy.errorcodes import ErrorCode from mypy.cache import ( DICT_STR_GEN, END_TAG, @@ -178,17 +179,9 @@ def add_error( column: int, *, blocker: bool = False, - code: str | None = None, + code: ErrorCode = codes.MISC, ) -> None: - """Report an error at a specific location. - - Args: - message: Error message to display - line: Line number where error occurred - column: Column number where error occurred - blocker: If True, this error blocks further analysis - code: Error code for categorization - """ + """Report an error at a specific location.""" self.errors.append( {"line": line, "column": column, "message": message, "blocker": blocker, "code": code} ) @@ -372,21 +365,17 @@ def read_statement(state: State, data: ReadBuffer) -> Statement: # Build from the bottom up, starting with the final else body current_else = else_body - # Process elif clauses in reverse order - for i in range(len(elif_exprs) - 1, -1, -1): - elif_stmt = IfStmt([elif_exprs[i]], [elif_bodies[i]], current_else) - # Set location from the elif expression - elif_stmt.line = elif_exprs[i].line - elif_stmt.column = elif_exprs[i].column - # Set end location based on what follows + for elif_expr, elif_body in reversed(list(zip(elif_exprs, elif_bodies))): + elif_stmt = IfStmt([elif_expr], [elif_body], current_else) + elif_stmt.line = elif_expr.line + elif_stmt.column = elif_expr.column if current_else is not None: elif_stmt.end_line = current_else.end_line elif_stmt.end_column = current_else.end_column else: - elif_stmt.end_line = elif_bodies[i].end_line - elif_stmt.end_column = elif_bodies[i].end_column + elif_stmt.end_line = elif_body.end_line + elif_stmt.end_column = elif_body.end_column - # Wrap in a Block to become the else clause for the outer if current_else = Block([elif_stmt]) set_line_column_range(current_else, elif_stmt) @@ -1064,7 +1053,6 @@ def read_call_type(state: State, data: ReadBuffer) -> Type: invalid.line, invalid.column, blocker=True, - code="misc", ) return invalid @@ -1090,7 +1078,6 @@ def read_call_type(state: State, data: ReadBuffer) -> Type: invalid.line, invalid.column, blocker=True, - code="misc", ) # Process keyword arguments @@ -1103,7 +1090,6 @@ def read_call_type(state: State, data: ReadBuffer) -> Type: invalid.line, invalid.column, blocker=True, - code="misc", ) name = extract_arg_name(kw_value) elif kw_name == "type": @@ -1114,7 +1100,6 @@ def read_call_type(state: State, data: ReadBuffer) -> Type: invalid.line, invalid.column, blocker=True, - code="misc", ) typ = kw_value else: @@ -1124,7 +1109,6 @@ def read_call_type(state: State, data: ReadBuffer) -> Type: invalid.line, invalid.column, blocker=True, - code="misc", ) # Create CallableArgument @@ -1587,7 +1571,7 @@ def read_expression(state: State, data: ReadBuffer) -> Expression: read_loc(data, expr) expect_end_tag(data) return expr - elif tag == nodes.NAMED_EXPR: + elif tag == nodes.ASSIGNMENT_EXPR: target = read_expression(state, data) value = read_expression(state, data) # AssignmentExpr expects target to be a NameExpr @@ -1773,7 +1757,6 @@ def fail_merge_overload(state: State, node: IfStmt) -> None: node.line, node.column, blocker=False, - code="misc", ) diff --git a/mypy/nodes.py b/mypy/nodes.py index 46d1c870d0556..2eb366e6784e1 100644 --- a/mypy/nodes.py +++ b/mypy/nodes.py @@ -5314,6 +5314,7 @@ def set_info(node: SymbolNode, info: TypeInfo) -> None: CLASS_DEF: Final[Tag] = 60 SYMBOL_TABLE_NODE: Final[Tag] = 61 +# Tags 160+ are shared with the ast_serialize Rust extension and must be kept in sync. EXPR_STMT: Final[Tag] = 160 CALL_EXPR: Final[Tag] = 161 NAME_EXPR: Final[Tag] = 162 @@ -5362,7 +5363,7 @@ def set_info(node: SymbolNode, info: TypeInfo) -> None: FSTRING_EXPR: Final[Tag] = 205 FSTRING_INTERPOLATION: Final[Tag] = 206 LAMBDA_EXPR: Final[Tag] = 207 -NAMED_EXPR: Final[Tag] = 208 +ASSIGNMENT_EXPR: Final[Tag] = 208 STAR_EXPR: Final[Tag] = 209 BYTES_EXPR: Final[Tag] = 210 GLOBAL_DECL: Final[Tag] = 211 From a050ea0457f291583b11ca08d3b9b1d9cceb45dc Mon Sep 17 00:00:00 2001 From: Jukka Lehtosalo Date: Sun, 1 Mar 2026 15:16:26 +0000 Subject: [PATCH 2/4] Lint --- mypy/nativeparse.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/mypy/nativeparse.py b/mypy/nativeparse.py index 6ff43e6075f59..650dfdfdd3bb7 100644 --- a/mypy/nativeparse.py +++ b/mypy/nativeparse.py @@ -30,7 +30,6 @@ ) from mypy import errorcodes as codes, message_registry, nodes, types -from mypy.errorcodes import ErrorCode from mypy.cache import ( DICT_STR_GEN, END_TAG, @@ -48,6 +47,7 @@ read_str_opt, read_tag, ) +from mypy.errorcodes import ErrorCode from mypy.nodes import ( ARG_KINDS, ARG_POS, @@ -1753,10 +1753,7 @@ def is_stripped_if_stmt(stmt: Statement) -> bool: def fail_merge_overload(state: State, node: IfStmt) -> None: """Report an error when overloads cannot be merged due to unknown condition.""" state.add_error( - message_registry.FAILED_TO_MERGE_OVERLOADS.value, - node.line, - node.column, - blocker=False, + message_registry.FAILED_TO_MERGE_OVERLOADS.value, node.line, node.column, blocker=False ) From 922835b086e693813603fbf2063bb467f8f30197 Mon Sep 17 00:00:00 2001 From: Jukka Lehtosalo Date: Sun, 1 Mar 2026 15:38:28 +0000 Subject: [PATCH 3/4] Simplify overly verbose or pointless LLM-generated comments --- mypy/nativeparse.py | 101 ++++++-------------------------------------- 1 file changed, 12 insertions(+), 89 deletions(-) diff --git a/mypy/nativeparse.py b/mypy/nativeparse.py index 650dfdfdd3bb7..3a6bc757df2b6 100644 --- a/mypy/nativeparse.py +++ b/mypy/nativeparse.py @@ -192,23 +192,11 @@ def native_parse( ) -> tuple[MypyFile, list[dict[str, Any]], TypeIgnores]: """Parse a Python file using the native Rust-based parser. - Uses the ast_serialize Rust extension to parse Python code and deserialize - the resulting AST directly into mypy's native AST representation. - - Args: - filename: Path to the Python source file to parse - options: Mypy options affecting parsing behavior (e.g., Python version) - skip_function_bodies: If True, many function and method bodies are omitted from - the AST, useful for parsing stubs or extracting signatures without full - implementation details - imports_only: If True create an empty MypyFile with actual serialized defs - stored in binary_data. - - Returns: - A tuple containing: - - MypyFile: The parsed AST as a mypy AST node - - list[dict[str, Any]]: List of parse errors and deserialization errors - - TypeIgnores: List of (line_number, ignored_codes) tuples for type: ignore comments + Return (MypyFile, errors, type_ignores). + + The caller should set these additional attributes on the returned MypyFile: + - ignored_lines: dict of type ignore comments (from the TypeIgnores return value) + - is_stub: whether the file is a .pyi stub """ # If the path is a directory, return empty AST (matching fastparse behavior) # This can happen for packages that only contain .pyc files without source @@ -238,7 +226,6 @@ def native_parse( b, import_bytes, errors, dict(ignores), is_partial_package, uses_template_strings ) node.uses_template_strings = uses_template_strings - # Merge deserialization errors with parsing errors all_errors = errors + state.errors return node, all_errors, ignores @@ -332,22 +319,17 @@ def read_statement(state: State, data: ReadBuffer) -> Statement: expect_end_tag(data) return a elif tag == nodes.OPERATOR_ASSIGNMENT_STMT: - # Read operator string op = read_str(data) - # Read lvalue (target) lvalue = read_expression(state, data) - # Read rvalue (value) rvalue = read_expression(state, data) stmt = OperatorAssignmentStmt(op, lvalue, rvalue) read_loc(data, stmt) expect_end_tag(data) return stmt elif tag == nodes.IF_STMT: - # Read the main if condition and body expr = read_expression(state, data) body = read_block(state, data) - # Read elif clauses num_elif = read_int(data) elif_exprs = [] elif_bodies = [] @@ -570,10 +552,9 @@ def read_statement(state: State, data: ReadBuffer) -> Statement: def read_parameters(state: State, data: ReadBuffer) -> tuple[list[Argument], bool]: - """Read function/lambda parameters from the buffer. + """Read function/lambda parameters. - Returns: - A tuple of (arguments list, has_annotations flag) + Return (parameters, has_annotations). """ expect_tag(data, LIST_GEN) n_args = read_int_bare(data) @@ -596,7 +577,6 @@ def read_parameters(state: State, data: ReadBuffer) -> tuple[list[Argument], boo default = None pos_only = read_bool(data) - # Apply implicit_optional if enabled and default is None if state.options.implicit_optional and ann is not None: optional = isinstance(default, NameExpr) and default.name == "None" if isinstance(ann, UnboundType): @@ -712,7 +692,6 @@ def read_class_def(state: State, data: ReadBuffer) -> ClassDef: else: type_params = None - # Keywords (all keyword arguments including metaclass) expect_tag(data, DICT_STR_GEN) n_keywords = read_int_bare(data) keywords = [] @@ -721,9 +700,7 @@ def read_class_def(state: State, data: ReadBuffer) -> ClassDef: value = read_expression(state, data) keywords.append((key, value)) - # Extract metaclass from keywords if present metaclass = dict(keywords).get("metaclass") if keywords else None - # Remove metaclass from keywords since it's passed as a separate field filtered_keywords = [(k, v) for k, v in keywords if k != "metaclass"] if keywords else None class_def = ClassDef( @@ -835,7 +812,7 @@ def read_try_stmt(state: State, data: ReadBuffer) -> TryStmt: else: finally_body = None - # Read is_star flag (for except* in Python 3.11+) + # except* (Python 3.11+) is_star = read_bool(data) stmt = TryStmt(body, vars_list, types_list, handlers, else_body, finally_body) @@ -853,7 +830,6 @@ def read_type(state: State, data: ReadBuffer) -> Type: n = read_int_bare(data) args = tuple(read_type(state, data) for i in range(n)) empty_tuple_index = read_bool(data) - # Read optional original_str_expr t = read_tag(data) if t == LITERAL_NONE: original_str_expr = None @@ -861,7 +837,6 @@ def read_type(state: State, data: ReadBuffer) -> Type: original_str_expr = read_str_bare(data) else: assert False, f"Unexpected tag for original_str_expr: {t}" - # Read optional original_str_fallback t = read_tag(data) if t == LITERAL_NONE: original_str_fallback = None @@ -880,13 +855,10 @@ def read_type(state: State, data: ReadBuffer) -> Type: expect_end_tag(data) return unbound elif tag == types.UNION_TYPE: - # Read items list expect_tag(data, LIST_GEN) n = read_int_bare(data) items = [read_type(state, data) for i in range(n)] - # Read uses_pep604_syntax flag uses_pep604_syntax = read_bool(data) - # Read optional original_str_expr t = read_tag(data) if t == LITERAL_NONE: original_str_expr = None @@ -894,7 +866,6 @@ def read_type(state: State, data: ReadBuffer) -> Type: original_str_expr = read_str_bare(data) else: assert False, f"Unexpected tag for original_str_expr: {t}" - # Read optional original_str_fallback t = read_tag(data) if t == LITERAL_NONE: original_str_fallback = None @@ -910,7 +881,6 @@ def read_type(state: State, data: ReadBuffer) -> Type: expect_end_tag(data) return union elif tag == types.LIST_TYPE: - # Read items list expect_tag(data, LIST_GEN) n = read_int_bare(data) items = [read_type(state, data) for i in range(n)] @@ -919,7 +889,6 @@ def read_type(state: State, data: ReadBuffer) -> Type: expect_end_tag(data) return type_list elif tag == types.TUPLE_TYPE: - # Read items list expect_tag(data, LIST_GEN) n = read_int_bare(data) items = [read_type(state, data) for i in range(n)] @@ -949,7 +918,6 @@ def read_type(state: State, data: ReadBuffer) -> Type: expect_end_tag(data) return typeddict_type elif tag == types.ELLIPSIS_TYPE: - # EllipsisType has no attributes ellipsis_type = EllipsisType() read_loc(data, ellipsis_type) expect_end_tag(data) @@ -991,31 +959,23 @@ def read_type(state: State, data: ReadBuffer) -> Type: def stringify_type_name(typ: Type) -> str | None: - """Extract qualified name from a type (for Arg constructor detection).""" if isinstance(typ, UnboundType): return typ.name return None def extract_arg_name(typ: Type) -> str | None: - """Extract argument name from a type (for Arg name parameter).""" if isinstance(typ, RawExpressionType) and typ.base_type_name == "builtins.str": return typ.literal_value # type: ignore[return-value] elif isinstance(typ, UnboundType): - # String literals in type context are parsed as UnboundType (forward references) - # For Arg names, these are typically simple names without dots if typ.name == "None": return None - # Return the name as-is (it's the argument name) return typ.name return None # Invalid, but let validation handle it def read_call_type(state: State, data: ReadBuffer) -> Type: - """Read Call in type context - check if it's an Arg/DefaultArg/VarArg/KwArg constructor. - - This performs validation and error reporting similar to mypy/fastparse.py. - """ + """Read Call in type context (Arg/DefaultArg/VarArg/KwArg constructor).""" callee_type = read_type(state, data) # Read positional arguments @@ -1038,16 +998,13 @@ def read_call_type(state: State, data: ReadBuffer) -> Type: kw_value = read_type(state, data) kwargs.append((kw_name, kw_value)) - # Try to detect Arg/DefaultArg/VarArg/KwArg pattern constructor = stringify_type_name(callee_type) - # We'll read location before processing errors so we can report them correctly invalid = AnyType(TypeOfAny.from_error) read_loc(data, invalid) expect_end_tag(data) if not constructor: - # ARG_CONSTRUCTOR_NAME_EXPECTED state.add_error( message_registry.ARG_CONSTRUCTOR_NAME_EXPECTED.value, invalid.line, @@ -1072,7 +1029,6 @@ def read_call_type(state: State, data: ReadBuffer) -> Type: name = extract_arg_name(arg) name_set_from_positional = True else: - # ARG_CONSTRUCTOR_TOO_MANY_ARGS state.add_error( message_registry.ARG_CONSTRUCTOR_TOO_MANY_ARGS.value, invalid.line, @@ -1083,7 +1039,6 @@ def read_call_type(state: State, data: ReadBuffer) -> Type: # Process keyword arguments for kw_name, kw_value in kwargs: if kw_name == "name": - # MULTIPLE_VALUES_FOR_NAME_KWARG if name is not None and name_set_from_positional: state.add_error( message_registry.MULTIPLE_VALUES_FOR_NAME_KWARG.format(constructor).value, @@ -1093,7 +1048,6 @@ def read_call_type(state: State, data: ReadBuffer) -> Type: ) name = extract_arg_name(kw_value) elif kw_name == "type": - # MULTIPLE_VALUES_FOR_TYPE_KWARG if typ is not default_type and typ_set_from_positional: state.add_error( message_registry.MULTIPLE_VALUES_FOR_TYPE_KWARG.format(constructor).value, @@ -1103,7 +1057,6 @@ def read_call_type(state: State, data: ReadBuffer) -> Type: ) typ = kw_value else: - # ARG_CONSTRUCTOR_UNEXPECTED_ARG state.add_error( message_registry.ARG_CONSTRUCTOR_UNEXPECTED_ARG.format(kw_name).value, invalid.line, @@ -1111,14 +1064,12 @@ def read_call_type(state: State, data: ReadBuffer) -> Type: blocker=True, ) - # Create CallableArgument call_arg = CallableArgument(typ, name, constructor) set_line_column_range(call_arg, invalid) return call_arg def read_pattern(state: State, data: ReadBuffer) -> Pattern: - """Read a pattern node from the buffer.""" tag = read_tag(data) if tag == nodes.AS_PATTERN: has_pattern = read_bool(data) @@ -1169,7 +1120,6 @@ def read_pattern(state: State, data: ReadBuffer) -> Pattern: expect_end_tag(data) return sequence_pattern elif tag == nodes.STARRED_PATTERN: - # Read optional capture name has_name = read_bool(data) if has_name: name_str = read_str(data) @@ -1356,7 +1306,6 @@ def read_expression(state: State, data: ReadBuffer) -> Expression: generator = read_generator_expr(state, data) expr = ListComprehension(generator) read_loc(data, expr) - # Also copy location to the inner generator set_line_column_range(generator, expr) expect_end_tag(data) return expr @@ -1364,7 +1313,6 @@ def read_expression(state: State, data: ReadBuffer) -> Expression: generator = read_generator_expr(state, data) expr = SetComprehension(generator) read_loc(data, expr) - # Also copy location to the inner generator set_line_column_range(generator, expr) expect_end_tag(data) return expr @@ -1460,7 +1408,6 @@ def read_expression(state: State, data: ReadBuffer) -> Expression: else: keys.append(None) values = read_expression_list(state, data) - # Zip keys and values into items items = list(zip(keys, values)) expr = DictExpr(items) read_loc(data, expr) @@ -1488,7 +1435,6 @@ def read_expression(state: State, data: ReadBuffer) -> Expression: expect_end_tag(data) return expr elif tag == nodes.TEMP_NODE: - # TempNode with no attributes temp = TempNode(AnyType(TypeOfAny.special_form), no_rhs=True) expect_end_tag(data) return temp @@ -1574,13 +1520,7 @@ def read_expression(state: State, data: ReadBuffer) -> Expression: elif tag == nodes.ASSIGNMENT_EXPR: target = read_expression(state, data) value = read_expression(state, data) - # AssignmentExpr expects target to be a NameExpr - if not isinstance(target, NameExpr): - # In case target is not a NameExpr, we need to handle this - # For now, we'll assert since the grammar should ensure it's a NameExpr - assert isinstance( - target, NameExpr - ), f"Expected NameExpr for target, got {type(target)}" + assert isinstance(target, NameExpr), f"Expected NameExpr for target, got {type(target)}" expr = AssignmentExpr(target, value) read_loc(data, expr) expect_end_tag(data) @@ -1592,7 +1532,6 @@ def read_expression(state: State, data: ReadBuffer) -> Expression: expect_end_tag(data) return expr elif tag == nodes.BYTES_EXPR: - # Read bytes literal as string value = read_str(data) expr = BytesExpr(value) read_loc(data, expr) @@ -1835,10 +1774,7 @@ def get_executable_if_block_with_overloads( def fix_function_overloads(state: State, stmts: list[Statement]) -> list[Statement]: """Merge consecutive function overloads into OverloadedFuncDef nodes. - This function processes a list of statements and combines function overloads - (marked with @overload decorator) that have the same name into a single - OverloadedFuncDef node. It also handles conditional overloads (overloads - inside if statements) when the condition can be evaluated. + Also handles conditional overloads (overloads inside if statements). """ ret: list[Statement] = [] current_overload: list[OverloadPart] = [] @@ -1984,14 +1920,7 @@ def fix_function_overloads(state: State, stmts: list[Statement]) -> list[Stateme def deserialize_imports(import_bytes: bytes) -> list[ImportBase]: - """Deserialize import metadata from bytes into mypy AST nodes. - - Args: - import_bytes: Serialized import metadata from the Rust parser - - Returns: - List of Import and ImportFrom AST nodes with location and metadata - """ + """Deserialize import metadata from bytes into mypy AST nodes.""" if not import_bytes: return [] @@ -2056,12 +1985,6 @@ def deserialize_imports(import_bytes: bytes) -> list[ImportBase]: def _read_and_set_import_metadata(data: ReadBuffer, stmt: Import | ImportFrom | ImportAll) -> None: - """Read location and metadata flags from buffer and set them on the import statement. - - Args: - data: Buffer containing serialized data - stmt: Import, ImportFrom, or ImportAll statement to populate with location and metadata - """ read_loc(data, stmt) # Metadata flags as a single integer bitfield From e5a1bade1c11e4e43d4c9d8713b915167bbfa2b8 Mon Sep 17 00:00:00 2001 From: Jukka Lehtosalo Date: Mon, 13 Apr 2026 15:46:53 +0100 Subject: [PATCH 4/4] Revert unrelated changes --- mypy/nativeparse.py | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/mypy/nativeparse.py b/mypy/nativeparse.py index 3a6bc757df2b6..ab0ff9a1be099 100644 --- a/mypy/nativeparse.py +++ b/mypy/nativeparse.py @@ -29,7 +29,7 @@ read_str as read_str_bare, ) -from mypy import errorcodes as codes, message_registry, nodes, types +from mypy import message_registry, nodes, types from mypy.cache import ( DICT_STR_GEN, END_TAG, @@ -47,7 +47,6 @@ read_str_opt, read_tag, ) -from mypy.errorcodes import ErrorCode from mypy.nodes import ( ARG_KINDS, ARG_POS, @@ -179,7 +178,7 @@ def add_error( column: int, *, blocker: bool = False, - code: ErrorCode = codes.MISC, + code: str | None = None, ) -> None: """Report an error at a specific location.""" self.errors.append( @@ -1010,6 +1009,7 @@ def read_call_type(state: State, data: ReadBuffer) -> Type: invalid.line, invalid.column, blocker=True, + code="misc", ) return invalid @@ -1034,6 +1034,7 @@ def read_call_type(state: State, data: ReadBuffer) -> Type: invalid.line, invalid.column, blocker=True, + code="misc", ) # Process keyword arguments @@ -1045,6 +1046,7 @@ def read_call_type(state: State, data: ReadBuffer) -> Type: invalid.line, invalid.column, blocker=True, + code="misc", ) name = extract_arg_name(kw_value) elif kw_name == "type": @@ -1054,6 +1056,7 @@ def read_call_type(state: State, data: ReadBuffer) -> Type: invalid.line, invalid.column, blocker=True, + code="misc", ) typ = kw_value else: @@ -1062,6 +1065,7 @@ def read_call_type(state: State, data: ReadBuffer) -> Type: invalid.line, invalid.column, blocker=True, + code="misc", ) call_arg = CallableArgument(typ, name, constructor) @@ -1692,7 +1696,11 @@ def is_stripped_if_stmt(stmt: Statement) -> bool: def fail_merge_overload(state: State, node: IfStmt) -> None: """Report an error when overloads cannot be merged due to unknown condition.""" state.add_error( - message_registry.FAILED_TO_MERGE_OVERLOADS.value, node.line, node.column, blocker=False + message_registry.FAILED_TO_MERGE_OVERLOADS.value, + node.line, + node.column, + blocker=False, + code="misc", )