Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
50 changes: 37 additions & 13 deletions src/frontc/cparser.mly
Original file line number Diff line number Diff line change
Expand Up @@ -115,18 +115,18 @@ let applyPointer (ptspecs: attribute list list) (dt: decl_type)
loop ptspecs

let doDeclaration (loc: cabsloc) (specs: spec_elem list) (nl: init_name list) : definition =
if isTypedef specs then begin
(* Tell the lexer about the new type names *)
List.iter (fun ((n, _, _, _), _) -> !Lexerhack.add_type n) nl;
Lexerhack.is_typedef_decl := false;
(* Lexer registrations (add_type / add_identifier) are done per-declarator in
declarator_no_init / declarator_init_start as each declarator is parsed
(C11 6.2.1.7: scope begins just after the completion of the declarator), so
nothing to do here. *)
if isTypedef specs then
TYPEDEF ((specs, List.map (fun (n, _) -> n) nl), loc)
end else
else
if nl = [] then
ONLYTYPEDEF (specs, loc)
else begin
(* Tell the lexer about the new variable names *)
List.iter (fun ((n, _, _, _), _) -> !Lexerhack.add_identifier n) nl;
else
DECDEF ((specs, nl), loc)
end


let doFunctionDef (loc: cabsloc)
Expand Down Expand Up @@ -388,7 +388,7 @@ let transformOffsetOf (speclist, dtype) member =

%type <Cabs.init_name> init_declarator
%type <Cabs.init_name list> init_declarator_list
%type <Cabs.name> declarator
%type <Cabs.name> declarator declarator_no_init declarator_init_start
%type <Cabs.name * expression option> field_decl
%type <(Cabs.name * expression option) list> field_decl_list
%type <string * Cabs.decl_type> direct_decl
Expand Down Expand Up @@ -1036,14 +1036,38 @@ init_declarator_attr:

;
init_declarator: /* ISO 6.7 */
declarator { ($1, NO_INIT) }
| declarator EQ init_expression location
{ let (n, d, a, l) = $1 in ((n, d, a, joinLoc l $4), $3) }
declarator_no_init { ($1, NO_INIT) }
| declarator_init_start init_expression location
{ let (n, d, a, l) = $1 in ((n, d, a, joinLoc l $3), $2) }
;

/* (* Parses "declarator" (without initializer) and immediately registers the
declared name in the lexer hack, per C11 6.2.1.7 (scope begins just after
the completion of its declarator).
- For non-typedef declarations: calls add_identifier so subsequent
declarators in the same list see the name as a variable (not a type).
- For typedef declarations (Lexerhack.is_typedef_decl = true): calls add_type so
subsequent declarators see the name as a type.
In both cases every declarator is registered at the right time without
going through doDeclaration. *) */
declarator_no_init:
declarator { let (n, _, _, _) = $1 in
if !Lexerhack.is_typedef_decl then !Lexerhack.add_type n
else !Lexerhack.add_identifier n;
$1 }
;

/* (* Parses "declarator =" and adds the declared name as a variable identifier
in the lexer hack, so that in the initializer the name shadows any typedef
with the same name (C11 6.2.1.7: scope begins just after the completion of the
declarator). *) */
declarator_init_start:
declarator EQ { let (n, _, _, _) = $1 in !Lexerhack.add_identifier n; $1 }
Comment thread
michael-schwarz marked this conversation as resolved.
Comment on lines +1053 to +1065
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why not do the lexer hack in declarator, instead of duplicating it at both new rules?

Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Moving the registration into declarator itself would fire in every context where declarator is used — including field_decl (struct members), parameter_decl (function parameters), function_def_start (function definitions), and old_pardef (K&R-style parameters). All of those must not trigger lexer registration. The two helper rules are the minimal way to isolate the registration to the init_declarator context, while also handling the semantic difference: declarator_no_init must conditionally call add_type or add_identifier (based on is_typedef_decl), whereas declarator_init_start always calls add_identifier (typedef declarations cannot have initializers).

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

An interesting question here is: where exactly in the C standard is it stated that typedefs cannot have initializers?
Of course, I don't know what it would mean, but it'd also be strange if it isn't forbidden.

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Apparently, the grammar doesn't explicitly ban it, but one can get some kind of semantic ban out of

6.7.9 (8) An initializer specifies the initial value stored in an object.

C11 draft

as typedefs do not define objects.

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Still, there's no need for two new grammar rules. The declarator_no_init would suffice and could be reused also in the EQ case. It just needs a suitable name then.

Having declarator_init_start hardcode add_identifier is odd because the grammar doesn't forbid typedef initializers. As is, the parser accepts such a thing, but adds it to the lexer hack with the wrong kind which will screw up all the following parsing.
It seems more reasonable to me to just use the same logic for both and not try to be clever about it if the grammar was never intended to be so precise.

Instead, I would expect Cabs2cil to reject a typedef with initializer. I haven't checked if it does but surely it cannot use it for anything meaningful.
Either way, then the following lexer hack state wouldn't be affected by the strange but valid grammatical construct.

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not sure what exactly you suggest here. If you're happy with the rest of it feel free to push directly to this branch. I'll have a look at it and then we can merge.

;

decl_spec_list_common: /* ISO 6.7 */
/* ISO 6.7.1 */
| TYPEDEF decl_spec_list_opt { SpecTypedef :: $2, $1 }
| TYPEDEF decl_spec_list_opt { Lexerhack.is_typedef_decl := true; SpecTypedef :: $2, $1 }
| EXTERN decl_spec_list_opt { SpecStorage EXTERN :: $2, $1 }
| STATIC decl_spec_list_opt { SpecStorage STATIC :: $2, $1 }
| AUTO decl_spec_list_opt { SpecStorage AUTO :: $2, $1 }
Expand Down
6 changes: 5 additions & 1 deletion src/frontc/lexerhack.ml
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,11 @@ let push_context: (unit -> unit) ref =
ref (fun _ -> E.s (E.bug "You called an uninitialized push_context"))

let pop_context: (unit -> unit) ref =
ref (fun _ -> E.s (E.bug "You called an uninitialized pop_context"))
ref (fun _ -> E.s (E.bug "You called an uninitialized pop_context"))

(* Set to true while parsing a typedef declaration's declarator list, so that
declarator_no_init calls add_type (not add_identifier) for typedef names. *)
let is_typedef_decl : bool ref = ref false


(* Keep here the current pattern for formatparse *)
Expand Down
54 changes: 54 additions & 0 deletions test/small1/typedef_varname.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
/* Test for typedef and variable name conflict (issue #114).
A variable with the same name as a typedef should shadow it in its initializer,
per C11 6.2.1.7: "Any other identifier has scope that begins just after the
completion of its declarator." */
#include "testharness.h"
#include <stdint.h>
#include <stdlib.h>

typedef struct raxNode {
uint32_t iskey:1;
uint32_t isnull:1;
uint32_t iscompr:1;
uint32_t size:29;
unsigned char data[];
} raxNode;

typedef struct rax {
raxNode *head;
uint64_t numele;
uint64_t numnodes;
} rax;

typedef int mytype;
typedef int T;

int main() {
rax *rax = malloc(sizeof(*rax)); /* variable rax shadows typedef rax in initializer */
if (rax == 0) E(1);
free(rax);

/* NO_INIT variable in the middle of a declaration list shadows a typedef.
After "mytype a = 1, mytype", the name "mytype" is an identifier (variable).
So "b = sizeof mytype" (without parens) is only valid when mytype is a variable
(types require parens: "sizeof(type)"). This fails to parse without the fix. */
{
mytype a = 1, mytype, b = sizeof mytype;
mytype = 5;
if (a != 1) E(2);
if (mytype != 5) E(3);
if (b != sizeof(int)) E(4);
}

/* Chain initialization: after "T T = 1", T is registered as a variable by the
lexer hack, so the subsequent initializer "U = T + 1" sees T as a variable
(value 1), not as a type name. This fails to parse without the fix because
"T + 1" would be a syntax error when T is a NAMED_TYPE. */
{
T T = 1, U = T + 1;
if (T != 1) E(5);
if (U != 2) E(6);
}

SUCCESS;
}
Comment thread
michael-schwarz marked this conversation as resolved.
1 change: 1 addition & 0 deletions test/testcil.pl
Original file line number Diff line number Diff line change
Expand Up @@ -471,6 +471,7 @@ sub addToGroup {
addTest("testrun/for1 ");
addTest("testrun/void _GNUCC=1");
addTest("test/voidtypedef ");
addTest("testrun/typedef_varname ");
addTest("testrun/wrongnumargs ");
addBadComment("testrun/wrongnumargs",
"Notbug. Should fail since we don't pad argument lists");
Expand Down
Loading