From 86fcb2a1ad0d99d4985c216a9149c795021b397d Mon Sep 17 00:00:00 2001 From: jenkins Date: Tue, 28 Apr 2026 18:43:54 +0530 Subject: [PATCH] Add support for C-style block comments (/* */) in tokenizer --- .gitignore | 6 +++++ Parser/lexer/lexer.c | 62 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 68 insertions(+) diff --git a/.gitignore b/.gitignore index 3bf5187d531c23..ad40f75b72b62b 100644 --- a/.gitignore +++ b/.gitignore @@ -179,3 +179,9 @@ CLAUDE.local.md #### main branch only stuff below this line, things to backport go above. #### # main branch only: ABI files are not checked/maintained. Doc/data/python*.abi + + + + + + diff --git a/Parser/lexer/lexer.c b/Parser/lexer/lexer.c index 7f25afec302c22..9cb7d73efa744d 100644 --- a/Parser/lexer/lexer.c +++ b/Parser/lexer/lexer.c @@ -542,11 +542,39 @@ tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct t else if (c == EOF && PyErr_Occurred()) { return MAKE_TOKEN(ERRORTOKEN); } + else if (c == '/') { + int c2 = tok_nextc(tok); + if (c2 == '*') { + /* Consume block comment as part of indentation/whitespace */ + int prev = 0; + int ch; + while (1) { + ch = tok_nextc(tok); + if (ch == EOF) { + tok->done = E_EOFS; + return MAKE_TOKEN(ERRORTOKEN); + } + if (ch == '\n') { + tok->lineno++; + } + if (prev == '*' && ch == '/') { + break; + } + prev = ch; + } + /* Continue looking for more whitespace/comments */ + continue; + } else { + tok_backup(tok, c2); + break; + } + } else { break; } } tok_backup(tok, c); + if (c == '#' || c == '\n' || c == '\r') { /* Lines with only whitespace and/or comments shouldn't affect the indentation and are @@ -726,6 +754,40 @@ tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct t } } + /* Skip C-style block comment (slash-star ... star-slash) */ + if (c == '/') { + int c2 = tok_nextc(tok); + if (c2 == '*') { + /* Consume everything until the closing star-slash sequence */ + int prev = 0; + int ch; + while (1) { + ch = tok_nextc(tok); + if (ch == EOF) { + tok->done = E_EOFS; + _PyTokenizer_syntaxerror(tok, + "unterminated C-style block comment (/* ... */) " + "— missing closing */"); + return MAKE_TOKEN(ERRORTOKEN); + } + if (ch == '\n') { + tok->lineno++; + tok->atbol = 1; + } + if (prev == '*' && ch == '/') { + break; /* found closing star-slash */ + } + prev = ch; + } + /* Restart token loop: treat block comment as whitespace */ + goto again; + } + else { + /* Plain '/' operator: put c2 back, fall through to operator handling */ + tok_backup(tok, c2); + } + } + if (tok->done == E_INTERACT_STOP) { return MAKE_TOKEN(ENDMARKER); }