diff --git a/mypy/nativeparse.py b/mypy/nativeparse.py index 6061ac3048dde..445355807f421 100644 --- a/mypy/nativeparse.py +++ b/mypy/nativeparse.py @@ -23,7 +23,7 @@ import time from typing import Final, cast -import ast_serialize # type: ignore[import-untyped, import-not-found, unused-ignore] +import ast_serialize # type: ignore[import-not-found, unused-ignore] from librt.internal import ( read_float as read_float_bare, read_int as read_int_bare, @@ -176,15 +176,7 @@ def __init__(self, options: Options) -> None: def add_error( self, message: str, line: int, column: int, *, blocker: bool = False, code: str ) -> None: - """Report an error at a specific location. - - Args: - message: Error message to display - line: Line number where error occurred - column: Column number where error occurred - blocker: If True, this error blocks further analysis - code: Error code for categorization - """ + """Report an error at a specific location.""" self.errors.append( {"line": line, "column": column, "message": message, "blocker": blocker, "code": code} ) @@ -195,23 +187,11 @@ def native_parse( ) -> tuple[MypyFile, list[ParseError], TypeIgnores]: """Parse a Python file using the native Rust-based parser. - Uses the ast_serialize Rust extension to parse Python code and deserialize - the resulting AST directly into mypy's native AST representation. - - Args: - filename: Path to the Python source file to parse - options: Mypy options affecting parsing behavior (e.g., Python version) - skip_function_bodies: If True, many function and method bodies are omitted from - the AST, useful for parsing stubs or extracting signatures without full - implementation details - imports_only: If True create an empty MypyFile with actual serialized defs - stored in binary_data. - - Returns: - A tuple containing: - - MypyFile: The parsed AST as a mypy AST node - - list[ParseError]: List of parse errors and deserialization errors - - TypeIgnores: List of (line_number, ignored_codes) tuples for type: ignore comments + Return (MypyFile, errors, type_ignores). + + The caller should set these additional attributes on the returned MypyFile: + - ignored_lines: dict of type ignore comments (from the TypeIgnores return value) + - is_stub: whether the file is a .pyi stub """ # If the path is a directory, return empty AST (matching fastparse behavior) # This can happen for packages that only contain .pyc files without source @@ -241,7 +221,6 @@ def native_parse( b, import_bytes, errors, dict(ignores), is_partial_package, uses_template_strings ) node.uses_template_strings = uses_template_strings - # Merge deserialization errors with parsing errors all_errors = errors + state.errors return node, all_errors, ignores @@ -342,22 +321,17 @@ def read_statement(state: State, data: ReadBuffer) -> Statement: expect_end_tag(data) return a elif tag == nodes.OPERATOR_ASSIGNMENT_STMT: - # Read operator string op = read_str(data) - # Read lvalue (target) lvalue = read_expression(state, data) - # Read rvalue (value) rvalue = read_expression(state, data) stmt = OperatorAssignmentStmt(op, lvalue, rvalue) read_loc(data, stmt) expect_end_tag(data) return stmt elif tag == nodes.IF_STMT: - # Read the main if condition and body expr = read_expression(state, data) body = read_block(state, data) - # Read elif clauses num_elif = read_int(data) elif_exprs = [] elif_bodies = [] @@ -375,21 +349,17 @@ def read_statement(state: State, data: ReadBuffer) -> Statement: # Build from the bottom up, starting with the final else body current_else = else_body - # Process elif clauses in reverse order - for i in range(len(elif_exprs) - 1, -1, -1): - elif_stmt = IfStmt([elif_exprs[i]], [elif_bodies[i]], current_else) - # Set location from the elif expression - elif_stmt.line = elif_exprs[i].line - elif_stmt.column = elif_exprs[i].column - # Set end location based on what follows + for elif_expr, elif_body in reversed(list(zip(elif_exprs, elif_bodies))): + elif_stmt = IfStmt([elif_expr], [elif_body], current_else) + elif_stmt.line = elif_expr.line + elif_stmt.column = elif_expr.column if current_else is not None: elif_stmt.end_line = current_else.end_line elif_stmt.end_column = current_else.end_column else: - elif_stmt.end_line = elif_bodies[i].end_line - elif_stmt.end_column = elif_bodies[i].end_column + elif_stmt.end_line = elif_body.end_line + elif_stmt.end_column = elif_body.end_column - # Wrap in a Block to become the else clause for the outer if current_else = Block([elif_stmt]) set_line_column_range(current_else, elif_stmt) @@ -584,10 +554,9 @@ def read_statement(state: State, data: ReadBuffer) -> Statement: def read_parameters(state: State, data: ReadBuffer) -> tuple[list[Argument], bool]: - """Read function/lambda parameters from the buffer. + """Read function/lambda parameters. - Returns: - A tuple of (arguments list, has_annotations flag) + Return (parameters, has_annotations). """ expect_tag(data, LIST_GEN) n_args = read_int_bare(data) @@ -610,7 +579,6 @@ def read_parameters(state: State, data: ReadBuffer) -> tuple[list[Argument], boo default = None pos_only = read_bool(data) - # Apply implicit_optional if enabled and default is None if state.options.implicit_optional and ann is not None: optional = isinstance(default, NameExpr) and default.name == "None" if isinstance(ann, UnboundType): @@ -726,7 +694,6 @@ def read_class_def(state: State, data: ReadBuffer) -> ClassDef: else: type_params = None - # Keywords (all keyword arguments including metaclass) expect_tag(data, DICT_STR_GEN) n_keywords = read_int_bare(data) keywords = [] @@ -735,9 +702,7 @@ def read_class_def(state: State, data: ReadBuffer) -> ClassDef: value = read_expression(state, data) keywords.append((key, value)) - # Extract metaclass from keywords if present metaclass = dict(keywords).get("metaclass") if keywords else None - # Remove metaclass from keywords since it's passed as a separate field filtered_keywords = [(k, v) for k, v in keywords if k != "metaclass"] if keywords else None class_def = ClassDef( @@ -849,7 +814,7 @@ def read_try_stmt(state: State, data: ReadBuffer) -> TryStmt: else: finally_body = None - # Read is_star flag (for except* in Python 3.11+) + # except* (Python 3.11+) is_star = read_bool(data) stmt = TryStmt(body, vars_list, types_list, handlers, else_body, finally_body) @@ -867,7 +832,6 @@ def read_type(state: State, data: ReadBuffer) -> Type: n = read_int_bare(data) args = tuple(read_type(state, data) for i in range(n)) empty_tuple_index = read_bool(data) - # Read optional original_str_expr t = read_tag(data) if t == LITERAL_NONE: original_str_expr = None @@ -875,7 +839,6 @@ def read_type(state: State, data: ReadBuffer) -> Type: original_str_expr = read_str_bare(data) else: assert False, f"Unexpected tag for original_str_expr: {t}" - # Read optional original_str_fallback t = read_tag(data) if t == LITERAL_NONE: original_str_fallback = None @@ -894,13 +857,10 @@ def read_type(state: State, data: ReadBuffer) -> Type: expect_end_tag(data) return unbound elif tag == types.UNION_TYPE: - # Read items list expect_tag(data, LIST_GEN) n = read_int_bare(data) items = [read_type(state, data) for i in range(n)] - # Read uses_pep604_syntax flag uses_pep604_syntax = read_bool(data) - # Read optional original_str_expr t = read_tag(data) if t == LITERAL_NONE: original_str_expr = None @@ -908,7 +868,6 @@ def read_type(state: State, data: ReadBuffer) -> Type: original_str_expr = read_str_bare(data) else: assert False, f"Unexpected tag for original_str_expr: {t}" - # Read optional original_str_fallback t = read_tag(data) if t == LITERAL_NONE: original_str_fallback = None @@ -924,7 +883,6 @@ def read_type(state: State, data: ReadBuffer) -> Type: expect_end_tag(data) return union elif tag == types.LIST_TYPE: - # Read items list expect_tag(data, LIST_GEN) n = read_int_bare(data) items = [read_type(state, data) for i in range(n)] @@ -933,7 +891,6 @@ def read_type(state: State, data: ReadBuffer) -> Type: expect_end_tag(data) return type_list elif tag == types.TUPLE_TYPE: - # Read items list expect_tag(data, LIST_GEN) n = read_int_bare(data) items = [read_type(state, data) for i in range(n)] @@ -963,7 +920,6 @@ def read_type(state: State, data: ReadBuffer) -> Type: expect_end_tag(data) return typeddict_type elif tag == types.ELLIPSIS_TYPE: - # EllipsisType has no attributes ellipsis_type = EllipsisType() read_loc(data, ellipsis_type) expect_end_tag(data) @@ -1005,31 +961,23 @@ def read_type(state: State, data: ReadBuffer) -> Type: def stringify_type_name(typ: Type) -> str | None: - """Extract qualified name from a type (for Arg constructor detection).""" if isinstance(typ, UnboundType): return typ.name return None def extract_arg_name(typ: Type) -> str | None: - """Extract argument name from a type (for Arg name parameter).""" if isinstance(typ, RawExpressionType) and typ.base_type_name == "builtins.str": return typ.literal_value # type: ignore[return-value] elif isinstance(typ, UnboundType): - # String literals in type context are parsed as UnboundType (forward references) - # For Arg names, these are typically simple names without dots if typ.name == "None": return None - # Return the name as-is (it's the argument name) return typ.name return None # Invalid, but let validation handle it def read_call_type(state: State, data: ReadBuffer) -> Type: - """Read Call in type context - check if it's an Arg/DefaultArg/VarArg/KwArg constructor. - - This performs validation and error reporting similar to mypy/fastparse.py. - """ + """Read Call in type context (Arg/DefaultArg/VarArg/KwArg constructor).""" callee_type = read_type(state, data) # Read positional arguments @@ -1052,16 +1000,13 @@ def read_call_type(state: State, data: ReadBuffer) -> Type: kw_value = read_type(state, data) kwargs.append((kw_name, kw_value)) - # Try to detect Arg/DefaultArg/VarArg/KwArg pattern constructor = stringify_type_name(callee_type) - # We'll read location before processing errors so we can report them correctly invalid = AnyType(TypeOfAny.from_error) read_loc(data, invalid) expect_end_tag(data) if not constructor: - # ARG_CONSTRUCTOR_NAME_EXPECTED state.add_error( message_registry.ARG_CONSTRUCTOR_NAME_EXPECTED.value, invalid.line, @@ -1087,7 +1032,6 @@ def read_call_type(state: State, data: ReadBuffer) -> Type: name = extract_arg_name(arg) name_set_from_positional = True else: - # ARG_CONSTRUCTOR_TOO_MANY_ARGS state.add_error( message_registry.ARG_CONSTRUCTOR_TOO_MANY_ARGS.value, invalid.line, @@ -1099,7 +1043,6 @@ def read_call_type(state: State, data: ReadBuffer) -> Type: # Process keyword arguments for kw_name, kw_value in kwargs: if kw_name == "name": - # MULTIPLE_VALUES_FOR_NAME_KWARG if name is not None and name_set_from_positional: state.add_error( message_registry.MULTIPLE_VALUES_FOR_NAME_KWARG.format(constructor).value, @@ -1110,7 +1053,6 @@ def read_call_type(state: State, data: ReadBuffer) -> Type: ) name = extract_arg_name(kw_value) elif kw_name == "type": - # MULTIPLE_VALUES_FOR_TYPE_KWARG if typ is not default_type and typ_set_from_positional: state.add_error( message_registry.MULTIPLE_VALUES_FOR_TYPE_KWARG.format(constructor).value, @@ -1121,7 +1063,6 @@ def read_call_type(state: State, data: ReadBuffer) -> Type: ) typ = kw_value else: - # ARG_CONSTRUCTOR_UNEXPECTED_ARG state.add_error( message_registry.ARG_CONSTRUCTOR_UNEXPECTED_ARG.format(kw_name).value, invalid.line, @@ -1130,14 +1071,12 @@ def read_call_type(state: State, data: ReadBuffer) -> Type: code="misc", ) - # Create CallableArgument call_arg = CallableArgument(typ, name, constructor) set_line_column_range(call_arg, invalid) return call_arg def read_pattern(state: State, data: ReadBuffer) -> Pattern: - """Read a pattern node from the buffer.""" tag = read_tag(data) if tag == nodes.AS_PATTERN: has_pattern = read_bool(data) @@ -1188,7 +1127,6 @@ def read_pattern(state: State, data: ReadBuffer) -> Pattern: expect_end_tag(data) return sequence_pattern elif tag == nodes.STARRED_PATTERN: - # Read optional capture name has_name = read_bool(data) if has_name: name_str = read_str(data) @@ -1375,7 +1313,6 @@ def read_expression(state: State, data: ReadBuffer) -> Expression: generator = read_generator_expr(state, data) expr = ListComprehension(generator) read_loc(data, expr) - # Also copy location to the inner generator set_line_column_range(generator, expr) expect_end_tag(data) return expr @@ -1383,7 +1320,6 @@ def read_expression(state: State, data: ReadBuffer) -> Expression: generator = read_generator_expr(state, data) expr = SetComprehension(generator) read_loc(data, expr) - # Also copy location to the inner generator set_line_column_range(generator, expr) expect_end_tag(data) return expr @@ -1479,7 +1415,6 @@ def read_expression(state: State, data: ReadBuffer) -> Expression: else: keys.append(None) values = read_expression_list(state, data) - # Zip keys and values into items items = list(zip(keys, values)) expr = DictExpr(items) read_loc(data, expr) @@ -1507,7 +1442,6 @@ def read_expression(state: State, data: ReadBuffer) -> Expression: expect_end_tag(data) return expr elif tag == nodes.TEMP_NODE: - # TempNode with no attributes temp = TempNode(AnyType(TypeOfAny.special_form), no_rhs=True) expect_end_tag(data) return temp @@ -1590,16 +1524,10 @@ def read_expression(state: State, data: ReadBuffer) -> Expression: read_loc(data, expr) expect_end_tag(data) return expr - elif tag == nodes.NAMED_EXPR: + elif tag == nodes.ASSIGNMENT_EXPR: target = read_expression(state, data) value = read_expression(state, data) - # AssignmentExpr expects target to be a NameExpr - if not isinstance(target, NameExpr): - # In case target is not a NameExpr, we need to handle this - # For now, we'll assert since the grammar should ensure it's a NameExpr - assert isinstance( - target, NameExpr - ), f"Expected NameExpr for target, got {type(target)}" + assert isinstance(target, NameExpr), f"Expected NameExpr for target, got {type(target)}" expr = AssignmentExpr(target, value) read_loc(data, expr) expect_end_tag(data) @@ -1611,7 +1539,6 @@ def read_expression(state: State, data: ReadBuffer) -> Expression: expect_end_tag(data) return expr elif tag == nodes.BYTES_EXPR: - # Read bytes literal as string value = read_str(data) expr = BytesExpr(value) read_loc(data, expr) @@ -1858,10 +1785,7 @@ def get_executable_if_block_with_overloads( def fix_function_overloads(state: State, stmts: list[Statement]) -> list[Statement]: """Merge consecutive function overloads into OverloadedFuncDef nodes. - This function processes a list of statements and combines function overloads - (marked with @overload decorator) that have the same name into a single - OverloadedFuncDef node. It also handles conditional overloads (overloads - inside if statements) when the condition can be evaluated. + Also handles conditional overloads (overloads inside if statements). """ ret: list[Statement] = [] current_overload: list[OverloadPart] = [] @@ -2007,14 +1931,7 @@ def fix_function_overloads(state: State, stmts: list[Statement]) -> list[Stateme def deserialize_imports(import_bytes: bytes) -> list[ImportBase]: - """Deserialize import metadata from bytes into mypy AST nodes. - - Args: - import_bytes: Serialized import metadata from the Rust parser - - Returns: - List of Import and ImportFrom AST nodes with location and metadata - """ + """Deserialize import metadata from bytes into mypy AST nodes.""" if not import_bytes: return [] @@ -2079,12 +1996,6 @@ def deserialize_imports(import_bytes: bytes) -> list[ImportBase]: def _read_and_set_import_metadata(data: ReadBuffer, stmt: Import | ImportFrom | ImportAll) -> None: - """Read location and metadata flags from buffer and set them on the import statement. - - Args: - data: Buffer containing serialized data - stmt: Import, ImportFrom, or ImportAll statement to populate with location and metadata - """ read_loc(data, stmt) # Metadata flags as a single integer bitfield diff --git a/mypy/nodes.py b/mypy/nodes.py index a8dd366c0e3ee..46c603c60e475 100644 --- a/mypy/nodes.py +++ b/mypy/nodes.py @@ -5351,6 +5351,7 @@ def set_info(node: SymbolNode, info: TypeInfo) -> None: CLASS_DEF: Final[Tag] = 60 SYMBOL_TABLE_NODE: Final[Tag] = 61 +# Tags 160+ are shared with the ast_serialize Rust extension and must be kept in sync. EXPR_STMT: Final[Tag] = 160 CALL_EXPR: Final[Tag] = 161 NAME_EXPR: Final[Tag] = 162 @@ -5399,7 +5400,7 @@ def set_info(node: SymbolNode, info: TypeInfo) -> None: FSTRING_EXPR: Final[Tag] = 205 FSTRING_INTERPOLATION: Final[Tag] = 206 LAMBDA_EXPR: Final[Tag] = 207 -NAMED_EXPR: Final[Tag] = 208 +ASSIGNMENT_EXPR: Final[Tag] = 208 STAR_EXPR: Final[Tag] = 209 BYTES_EXPR: Final[Tag] = 210 GLOBAL_DECL: Final[Tag] = 211