Skip to content

Commit 86fcb2a

Browse files
author
jenkins
committed
Add support for C-style block comments (/* */) in tokenizer
1 parent 9a57179 commit 86fcb2a

2 files changed

Lines changed: 68 additions & 0 deletions

File tree

.gitignore

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -179,3 +179,9 @@ CLAUDE.local.md
179179
#### main branch only stuff below this line, things to backport go above. ####
180180
# main branch only: ABI files are not checked/maintained.
181181
Doc/data/python*.abi
182+
183+
184+
185+
186+
187+

Parser/lexer/lexer.c

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -542,11 +542,39 @@ tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct t
542542
else if (c == EOF && PyErr_Occurred()) {
543543
return MAKE_TOKEN(ERRORTOKEN);
544544
}
545+
else if (c == '/') {
546+
int c2 = tok_nextc(tok);
547+
if (c2 == '*') {
548+
/* Consume block comment as part of indentation/whitespace */
549+
int prev = 0;
550+
int ch;
551+
while (1) {
552+
ch = tok_nextc(tok);
553+
if (ch == EOF) {
554+
tok->done = E_EOFS;
555+
return MAKE_TOKEN(ERRORTOKEN);
556+
}
557+
if (ch == '\n') {
558+
tok->lineno++;
559+
}
560+
if (prev == '*' && ch == '/') {
561+
break;
562+
}
563+
prev = ch;
564+
}
565+
/* Continue looking for more whitespace/comments */
566+
continue;
567+
} else {
568+
tok_backup(tok, c2);
569+
break;
570+
}
571+
}
545572
else {
546573
break;
547574
}
548575
}
549576
tok_backup(tok, c);
577+
550578
if (c == '#' || c == '\n' || c == '\r') {
551579
/* Lines with only whitespace and/or comments
552580
shouldn't affect the indentation and are
@@ -726,6 +754,40 @@ tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct t
726754
}
727755
}
728756

757+
/* Skip C-style block comment (slash-star ... star-slash) */
758+
if (c == '/') {
759+
int c2 = tok_nextc(tok);
760+
if (c2 == '*') {
761+
/* Consume everything until the closing star-slash sequence */
762+
int prev = 0;
763+
int ch;
764+
while (1) {
765+
ch = tok_nextc(tok);
766+
if (ch == EOF) {
767+
tok->done = E_EOFS;
768+
_PyTokenizer_syntaxerror(tok,
769+
"unterminated C-style block comment (/* ... */) "
770+
"— missing closing */");
771+
return MAKE_TOKEN(ERRORTOKEN);
772+
}
773+
if (ch == '\n') {
774+
tok->lineno++;
775+
tok->atbol = 1;
776+
}
777+
if (prev == '*' && ch == '/') {
778+
break; /* found closing star-slash */
779+
}
780+
prev = ch;
781+
}
782+
/* Restart token loop: treat block comment as whitespace */
783+
goto again;
784+
}
785+
else {
786+
/* Plain '/' operator: put c2 back, fall through to operator handling */
787+
tok_backup(tok, c2);
788+
}
789+
}
790+
729791
if (tok->done == E_INTERACT_STOP) {
730792
return MAKE_TOKEN(ENDMARKER);
731793
}

0 commit comments

Comments
 (0)