2377 lines
88 KiB
Python
2377 lines
88 KiB
Python
# ------------------------------------------------------------------------------
|
|
# pycparser: c_parser.py
|
|
#
|
|
# Recursive-descent parser for the C language.
|
|
#
|
|
# Eli Bendersky [https://eli.thegreenplace.net/]
|
|
# License: BSD
|
|
# ------------------------------------------------------------------------------
|
|
from dataclasses import dataclass
|
|
from typing import (
|
|
Any,
|
|
Dict,
|
|
List,
|
|
Literal,
|
|
NoReturn,
|
|
Optional,
|
|
Tuple,
|
|
TypedDict,
|
|
cast,
|
|
)
|
|
|
|
from . import c_ast
|
|
from .c_lexer import CLexer, _Token
|
|
from .ast_transforms import fix_switch_cases, fix_atomic_specifiers
|
|
|
|
|
|
@dataclass
|
|
class Coord:
|
|
"""Coordinates of a syntactic element. Consists of:
|
|
- File name
|
|
- Line number
|
|
- Column number
|
|
"""
|
|
|
|
file: str
|
|
line: int
|
|
column: Optional[int] = None
|
|
|
|
def __str__(self) -> str:
|
|
text = f"{self.file}:{self.line}"
|
|
if self.column:
|
|
text += f":{self.column}"
|
|
return text
|
|
|
|
|
|
class ParseError(Exception):
|
|
pass
|
|
|
|
|
|
class CParser:
|
|
"""Recursive-descent C parser.
|
|
|
|
Usage:
|
|
parser = CParser()
|
|
ast = parser.parse(text, filename)
|
|
|
|
The `lexer` parameter lets you inject a lexer class (defaults to CLexer).
|
|
The parameters after `lexer` are accepted for backward compatibility with
|
|
the old PLY-based parser and are otherwise unused.
|
|
"""
|
|
|
|
def __init__(
|
|
self,
|
|
lex_optimize: bool = True,
|
|
lexer: type[CLexer] = CLexer,
|
|
lextab: str = "pycparser.lextab",
|
|
yacc_optimize: bool = True,
|
|
yacctab: str = "pycparser.yacctab",
|
|
yacc_debug: bool = False,
|
|
taboutputdir: str = "",
|
|
) -> None:
|
|
self.clex: CLexer = lexer(
|
|
error_func=self._lex_error_func,
|
|
on_lbrace_func=self._lex_on_lbrace_func,
|
|
on_rbrace_func=self._lex_on_rbrace_func,
|
|
type_lookup_func=self._lex_type_lookup_func,
|
|
)
|
|
|
|
# Stack of scopes for keeping track of symbols. _scope_stack[-1] is
|
|
# the current (topmost) scope. Each scope is a dictionary that
|
|
# specifies whether a name is a type. If _scope_stack[n][name] is
|
|
# True, 'name' is currently a type in the scope. If it's False,
|
|
# 'name' is used in the scope but not as a type (for instance, if we
|
|
# saw: int name;
|
|
# If 'name' is not a key in _scope_stack[n] then 'name' was not defined
|
|
# in this scope at all.
|
|
self._scope_stack: List[Dict[str, bool]] = [dict()]
|
|
self._tokens: _TokenStream = _TokenStream(self.clex)
|
|
|
|
def parse(
|
|
self, text: str, filename: str = "", debug: bool = False
|
|
) -> c_ast.FileAST:
|
|
"""Parses C code and returns an AST.
|
|
|
|
text:
|
|
A string containing the C source code
|
|
|
|
filename:
|
|
Name of the file being parsed (for meaningful
|
|
error messages)
|
|
|
|
debug:
|
|
Deprecated debug flag (unused); for backwards compatibility.
|
|
"""
|
|
self._scope_stack = [dict()]
|
|
self.clex.input(text, filename)
|
|
self._tokens = _TokenStream(self.clex)
|
|
|
|
ast = self._parse_translation_unit_or_empty()
|
|
tok = self._peek()
|
|
if tok is not None:
|
|
self._parse_error(f"before: {tok.value}", self._tok_coord(tok))
|
|
return ast
|
|
|
|
# ------------------------------------------------------------------
|
|
# Scope and declaration helpers
|
|
# ------------------------------------------------------------------
|
|
def _coord(self, lineno: int, column: Optional[int] = None) -> Coord:
|
|
return Coord(file=self.clex.filename, line=lineno, column=column)
|
|
|
|
def _parse_error(self, msg: str, coord: Coord | str | None) -> NoReturn:
|
|
raise ParseError(f"{coord}: {msg}")
|
|
|
|
def _push_scope(self) -> None:
|
|
self._scope_stack.append(dict())
|
|
|
|
def _pop_scope(self) -> None:
|
|
assert len(self._scope_stack) > 1
|
|
self._scope_stack.pop()
|
|
|
|
def _add_typedef_name(self, name: str, coord: Optional[Coord]) -> None:
|
|
"""Add a new typedef name (ie a TYPEID) to the current scope"""
|
|
if not self._scope_stack[-1].get(name, True):
|
|
self._parse_error(
|
|
f"Typedef {name!r} previously declared as non-typedef in this scope",
|
|
coord,
|
|
)
|
|
self._scope_stack[-1][name] = True
|
|
|
|
def _add_identifier(self, name: str, coord: Optional[Coord]) -> None:
|
|
"""Add a new object, function, or enum member name (ie an ID) to the
|
|
current scope
|
|
"""
|
|
if self._scope_stack[-1].get(name, False):
|
|
self._parse_error(
|
|
f"Non-typedef {name!r} previously declared as typedef in this scope",
|
|
coord,
|
|
)
|
|
self._scope_stack[-1][name] = False
|
|
|
|
def _is_type_in_scope(self, name: str) -> bool:
|
|
"""Is *name* a typedef-name in the current scope?"""
|
|
for scope in reversed(self._scope_stack):
|
|
# If name is an identifier in this scope it shadows typedefs in
|
|
# higher scopes.
|
|
in_scope = scope.get(name)
|
|
if in_scope is not None:
|
|
return in_scope
|
|
return False
|
|
|
|
def _lex_error_func(self, msg: str, line: int, column: int) -> None:
|
|
self._parse_error(msg, self._coord(line, column))
|
|
|
|
def _lex_on_lbrace_func(self) -> None:
|
|
self._push_scope()
|
|
|
|
def _lex_on_rbrace_func(self) -> None:
|
|
self._pop_scope()
|
|
|
|
def _lex_type_lookup_func(self, name: str) -> bool:
|
|
"""Looks up types that were previously defined with
|
|
typedef.
|
|
Passed to the lexer for recognizing identifiers that
|
|
are types.
|
|
"""
|
|
return self._is_type_in_scope(name)
|
|
|
|
# To understand what's going on here, read sections A.8.5 and
|
|
# A.8.6 of K&R2 very carefully.
|
|
#
|
|
# A C type consists of a basic type declaration, with a list
|
|
# of modifiers. For example:
|
|
#
|
|
# int *c[5];
|
|
#
|
|
# The basic declaration here is 'int c', and the pointer and
|
|
# the array are the modifiers.
|
|
#
|
|
# Basic declarations are represented by TypeDecl (from module c_ast) and the
|
|
# modifiers are FuncDecl, PtrDecl and ArrayDecl.
|
|
#
|
|
# The standard states that whenever a new modifier is parsed, it should be
|
|
# added to the end of the list of modifiers. For example:
|
|
#
|
|
# K&R2 A.8.6.2: Array Declarators
|
|
#
|
|
# In a declaration T D where D has the form
|
|
# D1 [constant-expression-opt]
|
|
# and the type of the identifier in the declaration T D1 is
|
|
# "type-modifier T", the type of the
|
|
# identifier of D is "type-modifier array of T"
|
|
#
|
|
# This is what this method does. The declarator it receives
|
|
# can be a list of declarators ending with TypeDecl. It
|
|
# tacks the modifier to the end of this list, just before
|
|
# the TypeDecl.
|
|
#
|
|
# Additionally, the modifier may be a list itself. This is
|
|
# useful for pointers, that can come as a chain from the rule
|
|
# p_pointer. In this case, the whole modifier list is spliced
|
|
# into the new location.
|
|
def _type_modify_decl(self, decl: Any, modifier: Any) -> c_ast.Node:
|
|
"""Tacks a type modifier on a declarator, and returns
|
|
the modified declarator.
|
|
|
|
Note: the declarator and modifier may be modified
|
|
"""
|
|
modifier_head = modifier
|
|
modifier_tail = modifier
|
|
|
|
# The modifier may be a nested list. Reach its tail.
|
|
while modifier_tail.type:
|
|
modifier_tail = modifier_tail.type
|
|
|
|
# If the decl is a basic type, just tack the modifier onto it.
|
|
if isinstance(decl, c_ast.TypeDecl):
|
|
modifier_tail.type = decl
|
|
return modifier
|
|
else:
|
|
# Otherwise, the decl is a list of modifiers. Reach
|
|
# its tail and splice the modifier onto the tail,
|
|
# pointing to the underlying basic type.
|
|
decl_tail = decl
|
|
while not isinstance(decl_tail.type, c_ast.TypeDecl):
|
|
decl_tail = decl_tail.type
|
|
|
|
modifier_tail.type = decl_tail.type
|
|
decl_tail.type = modifier_head
|
|
return decl
|
|
|
|
# Due to the order in which declarators are constructed,
|
|
# they have to be fixed in order to look like a normal AST.
|
|
#
|
|
# When a declaration arrives from syntax construction, it has
|
|
# these problems:
|
|
# * The innermost TypeDecl has no type (because the basic
|
|
# type is only known at the uppermost declaration level)
|
|
# * The declaration has no variable name, since that is saved
|
|
# in the innermost TypeDecl
|
|
# * The typename of the declaration is a list of type
|
|
# specifiers, and not a node. Here, basic identifier types
|
|
# should be separated from more complex types like enums
|
|
# and structs.
|
|
#
|
|
# This method fixes these problems.
|
|
def _fix_decl_name_type(
|
|
self,
|
|
decl: c_ast.Decl | c_ast.Typedef | c_ast.Typename,
|
|
typename: List[Any],
|
|
) -> c_ast.Decl | c_ast.Typedef | c_ast.Typename:
|
|
"""Fixes a declaration. Modifies decl."""
|
|
# Reach the underlying basic type
|
|
typ = decl
|
|
while not isinstance(typ, c_ast.TypeDecl):
|
|
typ = typ.type
|
|
|
|
decl.name = typ.declname
|
|
typ.quals = decl.quals[:]
|
|
|
|
# The typename is a list of types. If any type in this
|
|
# list isn't an IdentifierType, it must be the only
|
|
# type in the list (it's illegal to declare "int enum ..")
|
|
# If all the types are basic, they're collected in the
|
|
# IdentifierType holder.
|
|
for tn in typename:
|
|
if not isinstance(tn, c_ast.IdentifierType):
|
|
if len(typename) > 1:
|
|
self._parse_error("Invalid multiple types specified", tn.coord)
|
|
else:
|
|
typ.type = tn
|
|
return decl
|
|
|
|
if not typename:
|
|
# Functions default to returning int
|
|
if not isinstance(decl.type, c_ast.FuncDecl):
|
|
self._parse_error("Missing type in declaration", decl.coord)
|
|
typ.type = c_ast.IdentifierType(["int"], coord=decl.coord)
|
|
else:
|
|
# At this point, we know that typename is a list of IdentifierType
|
|
# nodes. Concatenate all the names into a single list.
|
|
typ.type = c_ast.IdentifierType(
|
|
[name for id in typename for name in id.names], coord=typename[0].coord
|
|
)
|
|
return decl
|
|
|
|
def _add_declaration_specifier(
|
|
self,
|
|
declspec: Optional["_DeclSpec"],
|
|
newspec: Any,
|
|
kind: "_DeclSpecKind",
|
|
append: bool = False,
|
|
) -> "_DeclSpec":
|
|
"""See _DeclSpec for the specifier dictionary layout."""
|
|
if declspec is None:
|
|
spec: _DeclSpec = dict(
|
|
qual=[], storage=[], type=[], function=[], alignment=[]
|
|
)
|
|
else:
|
|
spec = declspec
|
|
|
|
if append:
|
|
spec[kind].append(newspec)
|
|
else:
|
|
spec[kind].insert(0, newspec)
|
|
|
|
return spec
|
|
|
|
def _build_declarations(
|
|
self,
|
|
spec: "_DeclSpec",
|
|
decls: List["_DeclInfo"],
|
|
typedef_namespace: bool = False,
|
|
) -> List[c_ast.Node]:
|
|
"""Builds a list of declarations all sharing the given specifiers.
|
|
If typedef_namespace is true, each declared name is added
|
|
to the "typedef namespace", which also includes objects,
|
|
functions, and enum constants.
|
|
"""
|
|
is_typedef = "typedef" in spec["storage"]
|
|
declarations = []
|
|
|
|
# Bit-fields are allowed to be unnamed.
|
|
if decls[0].get("bitsize") is None:
|
|
# When redeclaring typedef names as identifiers in inner scopes, a
|
|
# problem can occur where the identifier gets grouped into
|
|
# spec['type'], leaving decl as None. This can only occur for the
|
|
# first declarator.
|
|
if decls[0]["decl"] is None:
|
|
if (
|
|
len(spec["type"]) < 2
|
|
or len(spec["type"][-1].names) != 1
|
|
or not self._is_type_in_scope(spec["type"][-1].names[0])
|
|
):
|
|
coord = "?"
|
|
for t in spec["type"]:
|
|
if hasattr(t, "coord"):
|
|
coord = t.coord
|
|
break
|
|
self._parse_error("Invalid declaration", coord)
|
|
|
|
# Make this look as if it came from "direct_declarator:ID"
|
|
decls[0]["decl"] = c_ast.TypeDecl(
|
|
declname=spec["type"][-1].names[0],
|
|
type=None,
|
|
quals=None,
|
|
align=spec["alignment"],
|
|
coord=spec["type"][-1].coord,
|
|
)
|
|
# Remove the "new" type's name from the end of spec['type']
|
|
del spec["type"][-1]
|
|
# A similar problem can occur where the declaration ends up
|
|
# looking like an abstract declarator. Give it a name if this is
|
|
# the case.
|
|
elif not isinstance(
|
|
decls[0]["decl"],
|
|
(c_ast.Enum, c_ast.Struct, c_ast.Union, c_ast.IdentifierType),
|
|
):
|
|
decls_0_tail = cast(Any, decls[0]["decl"])
|
|
while not isinstance(decls_0_tail, c_ast.TypeDecl):
|
|
decls_0_tail = decls_0_tail.type
|
|
if decls_0_tail.declname is None:
|
|
decls_0_tail.declname = spec["type"][-1].names[0]
|
|
del spec["type"][-1]
|
|
|
|
for decl in decls:
|
|
assert decl["decl"] is not None
|
|
if is_typedef:
|
|
declaration = c_ast.Typedef(
|
|
name=None,
|
|
quals=spec["qual"],
|
|
storage=spec["storage"],
|
|
type=decl["decl"],
|
|
coord=decl["decl"].coord,
|
|
)
|
|
else:
|
|
declaration = c_ast.Decl(
|
|
name=None,
|
|
quals=spec["qual"],
|
|
align=spec["alignment"],
|
|
storage=spec["storage"],
|
|
funcspec=spec["function"],
|
|
type=decl["decl"],
|
|
init=decl.get("init"),
|
|
bitsize=decl.get("bitsize"),
|
|
coord=decl["decl"].coord,
|
|
)
|
|
|
|
if isinstance(
|
|
declaration.type,
|
|
(c_ast.Enum, c_ast.Struct, c_ast.Union, c_ast.IdentifierType),
|
|
):
|
|
fixed_decl = declaration
|
|
else:
|
|
fixed_decl = self._fix_decl_name_type(declaration, spec["type"])
|
|
|
|
# Add the type name defined by typedef to a
|
|
# symbol table (for usage in the lexer)
|
|
if typedef_namespace:
|
|
if is_typedef:
|
|
self._add_typedef_name(fixed_decl.name, fixed_decl.coord)
|
|
else:
|
|
self._add_identifier(fixed_decl.name, fixed_decl.coord)
|
|
|
|
fixed_decl = fix_atomic_specifiers(
|
|
cast(c_ast.Decl | c_ast.Typedef, fixed_decl)
|
|
)
|
|
declarations.append(fixed_decl)
|
|
|
|
return declarations
|
|
|
|
def _build_function_definition(
|
|
self,
|
|
spec: "_DeclSpec",
|
|
decl: c_ast.Node,
|
|
param_decls: Optional[List[c_ast.Node]],
|
|
body: c_ast.Node,
|
|
) -> c_ast.Node:
|
|
"""Builds a function definition."""
|
|
if "typedef" in spec["storage"]:
|
|
self._parse_error("Invalid typedef", decl.coord)
|
|
|
|
declaration = self._build_declarations(
|
|
spec=spec,
|
|
decls=[dict(decl=decl, init=None, bitsize=None)],
|
|
typedef_namespace=True,
|
|
)[0]
|
|
|
|
return c_ast.FuncDef(
|
|
decl=declaration, param_decls=param_decls, body=body, coord=decl.coord
|
|
)
|
|
|
|
def _select_struct_union_class(self, token: str) -> type:
|
|
"""Given a token (either STRUCT or UNION), selects the
|
|
appropriate AST class.
|
|
"""
|
|
if token == "struct":
|
|
return c_ast.Struct
|
|
else:
|
|
return c_ast.Union
|
|
|
|
# ------------------------------------------------------------------
|
|
# Token helpers
|
|
# ------------------------------------------------------------------
|
|
def _peek(self, k: int = 1) -> Optional[_Token]:
|
|
"""Return the k-th next token without consuming it (1-based)."""
|
|
return self._tokens.peek(k)
|
|
|
|
def _peek_type(self, k: int = 1) -> Optional[str]:
|
|
"""Return the type of the k-th next token, or None if absent (1-based)."""
|
|
tok = self._peek(k)
|
|
return tok.type if tok is not None else None
|
|
|
|
def _advance(self) -> _Token:
|
|
tok = self._tokens.next()
|
|
if tok is None:
|
|
self._parse_error("At end of input", self.clex.filename)
|
|
else:
|
|
return tok
|
|
|
|
def _accept(self, token_type: str) -> Optional[_Token]:
|
|
"""Conditionally consume next token, only if it's of token_type.
|
|
|
|
If it is of the expected type, consume and return it.
|
|
Otherwise, leaves the token intact and returns None.
|
|
"""
|
|
tok = self._peek()
|
|
if tok is not None and tok.type == token_type:
|
|
return self._advance()
|
|
return None
|
|
|
|
def _expect(self, token_type: str) -> _Token:
|
|
tok = self._advance()
|
|
if tok.type != token_type:
|
|
self._parse_error(f"before: {tok.value}", self._tok_coord(tok))
|
|
return tok
|
|
|
|
def _mark(self) -> int:
|
|
return self._tokens.mark()
|
|
|
|
def _reset(self, mark: int) -> None:
|
|
self._tokens.reset(mark)
|
|
|
|
def _tok_coord(self, tok: _Token) -> Coord:
|
|
return self._coord(tok.lineno, tok.column)
|
|
|
|
def _starts_declaration(self, tok: Optional[_Token] = None) -> bool:
|
|
tok = tok or self._peek()
|
|
if tok is None:
|
|
return False
|
|
return tok.type in _DECL_START
|
|
|
|
def _starts_expression(self, tok: Optional[_Token] = None) -> bool:
|
|
tok = tok or self._peek()
|
|
if tok is None:
|
|
return False
|
|
return tok.type in _STARTS_EXPRESSION
|
|
|
|
def _starts_statement(self) -> bool:
|
|
tok_type = self._peek_type()
|
|
if tok_type is None:
|
|
return False
|
|
if tok_type in _STARTS_STATEMENT:
|
|
return True
|
|
return self._starts_expression()
|
|
|
|
def _starts_declarator(self, id_only: bool = False) -> bool:
|
|
tok_type = self._peek_type()
|
|
if tok_type is None:
|
|
return False
|
|
if tok_type in {"TIMES", "LPAREN"}:
|
|
return True
|
|
if id_only:
|
|
return tok_type == "ID"
|
|
return tok_type in {"ID", "TYPEID"}
|
|
|
|
def _peek_declarator_name_info(self) -> Tuple[Optional[str], bool]:
|
|
mark = self._mark()
|
|
tok_type, saw_paren = self._scan_declarator_name_info()
|
|
self._reset(mark)
|
|
return tok_type, saw_paren
|
|
|
|
def _parse_any_declarator(
|
|
self, allow_abstract: bool = False, typeid_paren_as_abstract: bool = False
|
|
) -> Tuple[Optional[c_ast.Node], bool]:
|
|
# C declarators are ambiguous without lookahead. For example:
|
|
# int foo(int (aa)); -> aa is a name (ID)
|
|
# typedef char TT;
|
|
# int bar(int (TT)); -> TT is a type (TYPEID) in parens
|
|
name_type, saw_paren = self._peek_declarator_name_info()
|
|
if name_type is None or (
|
|
typeid_paren_as_abstract and name_type == "TYPEID" and saw_paren
|
|
):
|
|
if not allow_abstract:
|
|
tok = self._peek()
|
|
coord = self._tok_coord(tok) if tok is not None else self.clex.filename
|
|
self._parse_error("Invalid declarator", coord)
|
|
decl = self._parse_abstract_declarator_opt()
|
|
return decl, False
|
|
|
|
if name_type == "TYPEID":
|
|
if typeid_paren_as_abstract:
|
|
decl = self._parse_typeid_noparen_declarator()
|
|
else:
|
|
decl = self._parse_typeid_declarator()
|
|
else:
|
|
decl = self._parse_id_declarator()
|
|
return decl, True
|
|
|
|
def _scan_declarator_name_info(self) -> Tuple[Optional[str], bool]:
|
|
saw_paren = False
|
|
while self._accept("TIMES"):
|
|
while self._peek_type() in _TYPE_QUALIFIER:
|
|
self._advance()
|
|
|
|
tok = self._peek()
|
|
if tok is None:
|
|
return None, saw_paren
|
|
if tok.type in {"ID", "TYPEID"}:
|
|
self._advance()
|
|
return tok.type, saw_paren
|
|
if tok.type == "LPAREN":
|
|
saw_paren = True
|
|
self._advance()
|
|
tok_type, nested_paren = self._scan_declarator_name_info()
|
|
if nested_paren:
|
|
saw_paren = True
|
|
depth = 1
|
|
while True:
|
|
tok = self._peek()
|
|
if tok is None:
|
|
return None, saw_paren
|
|
if tok.type == "LPAREN":
|
|
depth += 1
|
|
elif tok.type == "RPAREN":
|
|
depth -= 1
|
|
self._advance()
|
|
if depth == 0:
|
|
break
|
|
continue
|
|
self._advance()
|
|
return tok_type, saw_paren
|
|
return None, saw_paren
|
|
|
|
def _starts_direct_abstract_declarator(self) -> bool:
|
|
return self._peek_type() in {"LPAREN", "LBRACKET"}
|
|
|
|
def _is_assignment_op(self) -> bool:
|
|
tok = self._peek()
|
|
return tok is not None and tok.type in _ASSIGNMENT_OPS
|
|
|
|
def _try_parse_paren_type_name(
|
|
self,
|
|
) -> Optional[Tuple[c_ast.Typename, int, _Token]]:
|
|
"""Parse and return a parenthesized type name if present.
|
|
|
|
Returns (typ, mark, lparen_tok) when the next tokens look like
|
|
'(' type_name ')', where typ is the parsed type name, mark is the
|
|
token-stream position before parsing, and lparen_tok is the LPAREN
|
|
token. Returns None if no parenthesized type name is present.
|
|
"""
|
|
mark = self._mark()
|
|
lparen_tok = self._accept("LPAREN")
|
|
if lparen_tok is None:
|
|
return None
|
|
if not self._starts_declaration():
|
|
self._reset(mark)
|
|
return None
|
|
typ = self._parse_type_name()
|
|
if self._accept("RPAREN") is None:
|
|
self._reset(mark)
|
|
return None
|
|
return typ, mark, lparen_tok
|
|
|
|
# ------------------------------------------------------------------
|
|
# Top-level
|
|
# ------------------------------------------------------------------
|
|
# BNF: translation_unit_or_empty : translation_unit | empty
|
|
def _parse_translation_unit_or_empty(self) -> c_ast.FileAST:
|
|
if self._peek() is None:
|
|
return c_ast.FileAST([])
|
|
return c_ast.FileAST(self._parse_translation_unit())
|
|
|
|
# BNF: translation_unit : external_declaration+
|
|
def _parse_translation_unit(self) -> List[c_ast.Node]:
|
|
ext = []
|
|
while self._peek() is not None:
|
|
ext.extend(self._parse_external_declaration())
|
|
return ext
|
|
|
|
# BNF: external_declaration : function_definition
|
|
# | declaration
|
|
# | pp_directive
|
|
# | pppragma_directive
|
|
# | static_assert
|
|
# | ';'
|
|
def _parse_external_declaration(self) -> List[c_ast.Node]:
|
|
tok = self._peek()
|
|
if tok is None:
|
|
return []
|
|
if tok.type == "PPHASH":
|
|
self._parse_pp_directive()
|
|
return []
|
|
if tok.type in {"PPPRAGMA", "_PRAGMA"}:
|
|
return [self._parse_pppragma_directive()]
|
|
if self._accept("SEMI"):
|
|
return []
|
|
if tok.type == "_STATIC_ASSERT":
|
|
return self._parse_static_assert()
|
|
|
|
if not self._starts_declaration(tok):
|
|
# Special handling for old-style function definitions that have an
|
|
# implicit return type, e.g.
|
|
#
|
|
# foo() {
|
|
# return 5;
|
|
# }
|
|
#
|
|
# These get an implicit 'int' return type.
|
|
decl = self._parse_id_declarator()
|
|
param_decls = None
|
|
if self._peek_type() != "LBRACE":
|
|
self._parse_error("Invalid function definition", decl.coord)
|
|
spec: _DeclSpec = dict(
|
|
qual=[],
|
|
alignment=[],
|
|
storage=[],
|
|
type=[c_ast.IdentifierType(["int"], coord=decl.coord)],
|
|
function=[],
|
|
)
|
|
func = self._build_function_definition(
|
|
spec=spec,
|
|
decl=decl,
|
|
param_decls=param_decls,
|
|
body=self._parse_compound_statement(),
|
|
)
|
|
return [func]
|
|
|
|
# From here on, parsing a standard declatation/definition.
|
|
spec, saw_type, spec_coord = self._parse_declaration_specifiers(
|
|
allow_no_type=True
|
|
)
|
|
|
|
name_type, _ = self._peek_declarator_name_info()
|
|
if name_type != "ID":
|
|
decls = self._parse_decl_body_with_spec(spec, saw_type)
|
|
self._expect("SEMI")
|
|
return decls
|
|
|
|
decl = self._parse_id_declarator()
|
|
|
|
if self._peek_type() == "LBRACE" or self._starts_declaration():
|
|
param_decls = None
|
|
if self._starts_declaration():
|
|
param_decls = self._parse_declaration_list()
|
|
if self._peek_type() != "LBRACE":
|
|
self._parse_error("Invalid function definition", decl.coord)
|
|
if not spec["type"]:
|
|
spec["type"] = [c_ast.IdentifierType(["int"], coord=spec_coord)]
|
|
func = self._build_function_definition(
|
|
spec=spec,
|
|
decl=decl,
|
|
param_decls=param_decls,
|
|
body=self._parse_compound_statement(),
|
|
)
|
|
return [func]
|
|
|
|
decl_dict: "_DeclInfo" = dict(decl=decl, init=None, bitsize=None)
|
|
if self._accept("EQUALS"):
|
|
decl_dict["init"] = self._parse_initializer()
|
|
decls = self._parse_init_declarator_list(first=decl_dict)
|
|
decls = self._build_declarations(spec=spec, decls=decls, typedef_namespace=True)
|
|
self._expect("SEMI")
|
|
return decls
|
|
|
|
# ------------------------------------------------------------------
|
|
# Declarations
|
|
#
|
|
# Declarations always come as lists (because they can be several in one
|
|
# line). When returning parsed declarations, a list is always returned -
|
|
# even if it contains a single element.
|
|
# ------------------------------------------------------------------
|
|
def _parse_declaration(self) -> List[c_ast.Node]:
|
|
decls = self._parse_decl_body()
|
|
self._expect("SEMI")
|
|
return decls
|
|
|
|
# BNF: decl_body : declaration_specifiers decl_body_with_spec
|
|
def _parse_decl_body(self) -> List[c_ast.Node]:
|
|
spec, saw_type, _ = self._parse_declaration_specifiers(allow_no_type=True)
|
|
return self._parse_decl_body_with_spec(spec, saw_type)
|
|
|
|
# BNF: decl_body_with_spec : init_declarator_list
|
|
# | struct_or_union_or_enum_only
|
|
def _parse_decl_body_with_spec(
|
|
self, spec: "_DeclSpec", saw_type: bool
|
|
) -> List[c_ast.Node]:
|
|
decls = None
|
|
if saw_type:
|
|
if self._starts_declarator():
|
|
decls = self._parse_init_declarator_list()
|
|
else:
|
|
if self._starts_declarator(id_only=True):
|
|
decls = self._parse_init_declarator_list(id_only=True)
|
|
|
|
if decls is None:
|
|
ty = spec["type"]
|
|
s_u_or_e = (c_ast.Struct, c_ast.Union, c_ast.Enum)
|
|
if len(ty) == 1 and isinstance(ty[0], s_u_or_e):
|
|
decls = [
|
|
c_ast.Decl(
|
|
name=None,
|
|
quals=spec["qual"],
|
|
align=spec["alignment"],
|
|
storage=spec["storage"],
|
|
funcspec=spec["function"],
|
|
type=ty[0],
|
|
init=None,
|
|
bitsize=None,
|
|
coord=ty[0].coord,
|
|
)
|
|
]
|
|
else:
|
|
decls = self._build_declarations(
|
|
spec=spec,
|
|
decls=[dict(decl=None, init=None, bitsize=None)],
|
|
typedef_namespace=True,
|
|
)
|
|
else:
|
|
decls = self._build_declarations(
|
|
spec=spec, decls=decls, typedef_namespace=True
|
|
)
|
|
|
|
return decls
|
|
|
|
# BNF: declaration_list : declaration+
|
|
def _parse_declaration_list(self) -> List[c_ast.Node]:
|
|
decls = []
|
|
while self._starts_declaration():
|
|
decls.extend(self._parse_declaration())
|
|
return decls
|
|
|
|
# BNF: declaration_specifiers : (storage_class_specifier
|
|
# | type_specifier
|
|
# | type_qualifier
|
|
# | function_specifier
|
|
# | alignment_specifier)+
|
|
def _parse_declaration_specifiers(
|
|
self, allow_no_type: bool = False
|
|
) -> Tuple["_DeclSpec", bool, Optional[Coord]]:
|
|
"""Parse declaration-specifier sequence.
|
|
|
|
allow_no_type:
|
|
If True, allow a missing type specifier without error.
|
|
|
|
Returns:
|
|
(spec, saw_type, first_coord) where spec is a dict with
|
|
qual/storage/type/function/alignment entries, saw_type is True
|
|
if a type specifier was consumed, and first_coord is the coord
|
|
of the first specifier token (used for diagnostics).
|
|
"""
|
|
spec = None
|
|
saw_type = False
|
|
first_coord = None
|
|
|
|
while True:
|
|
tok = self._peek()
|
|
if tok is None:
|
|
break
|
|
|
|
if tok.type == "_ALIGNAS":
|
|
if first_coord is None:
|
|
first_coord = self._tok_coord(tok)
|
|
spec = self._add_declaration_specifier(
|
|
spec, self._parse_alignment_specifier(), "alignment", append=True
|
|
)
|
|
continue
|
|
|
|
if tok.type == "_ATOMIC" and self._peek_type(2) == "LPAREN":
|
|
if first_coord is None:
|
|
first_coord = self._tok_coord(tok)
|
|
spec = self._add_declaration_specifier(
|
|
spec, self._parse_atomic_specifier(), "type", append=True
|
|
)
|
|
saw_type = True
|
|
continue
|
|
|
|
if tok.type in _TYPE_QUALIFIER:
|
|
if first_coord is None:
|
|
first_coord = self._tok_coord(tok)
|
|
spec = self._add_declaration_specifier(
|
|
spec, self._advance().value, "qual", append=True
|
|
)
|
|
continue
|
|
|
|
if tok.type in _STORAGE_CLASS:
|
|
if first_coord is None:
|
|
first_coord = self._tok_coord(tok)
|
|
spec = self._add_declaration_specifier(
|
|
spec, self._advance().value, "storage", append=True
|
|
)
|
|
continue
|
|
|
|
if tok.type in _FUNCTION_SPEC:
|
|
if first_coord is None:
|
|
first_coord = self._tok_coord(tok)
|
|
spec = self._add_declaration_specifier(
|
|
spec, self._advance().value, "function", append=True
|
|
)
|
|
continue
|
|
|
|
if tok.type in _TYPE_SPEC_SIMPLE:
|
|
if first_coord is None:
|
|
first_coord = self._tok_coord(tok)
|
|
tok = self._advance()
|
|
spec = self._add_declaration_specifier(
|
|
spec,
|
|
c_ast.IdentifierType([tok.value], coord=self._tok_coord(tok)),
|
|
"type",
|
|
append=True,
|
|
)
|
|
saw_type = True
|
|
continue
|
|
|
|
if tok.type == "TYPEID":
|
|
if saw_type:
|
|
break
|
|
if first_coord is None:
|
|
first_coord = self._tok_coord(tok)
|
|
tok = self._advance()
|
|
spec = self._add_declaration_specifier(
|
|
spec,
|
|
c_ast.IdentifierType([tok.value], coord=self._tok_coord(tok)),
|
|
"type",
|
|
append=True,
|
|
)
|
|
saw_type = True
|
|
continue
|
|
|
|
if tok.type in {"STRUCT", "UNION"}:
|
|
if first_coord is None:
|
|
first_coord = self._tok_coord(tok)
|
|
spec = self._add_declaration_specifier(
|
|
spec, self._parse_struct_or_union_specifier(), "type", append=True
|
|
)
|
|
saw_type = True
|
|
continue
|
|
|
|
if tok.type == "ENUM":
|
|
if first_coord is None:
|
|
first_coord = self._tok_coord(tok)
|
|
spec = self._add_declaration_specifier(
|
|
spec, self._parse_enum_specifier(), "type", append=True
|
|
)
|
|
saw_type = True
|
|
continue
|
|
|
|
break
|
|
|
|
if spec is None:
|
|
self._parse_error("Invalid declaration", self.clex.filename)
|
|
|
|
if not saw_type and not allow_no_type:
|
|
self._parse_error("Missing type in declaration", first_coord)
|
|
|
|
return spec, saw_type, first_coord
|
|
|
|
# BNF: specifier_qualifier_list : (type_specifier
|
|
# | type_qualifier
|
|
# | alignment_specifier)+
|
|
def _parse_specifier_qualifier_list(self) -> "_DeclSpec":
|
|
spec = None
|
|
saw_type = False
|
|
saw_alignment = False
|
|
first_coord = None
|
|
|
|
while True:
|
|
tok = self._peek()
|
|
if tok is None:
|
|
break
|
|
|
|
if tok.type == "_ALIGNAS":
|
|
if first_coord is None:
|
|
first_coord = self._tok_coord(tok)
|
|
spec = self._add_declaration_specifier(
|
|
spec, self._parse_alignment_specifier(), "alignment", append=True
|
|
)
|
|
saw_alignment = True
|
|
continue
|
|
|
|
if tok.type == "_ATOMIC" and self._peek_type(2) == "LPAREN":
|
|
if first_coord is None:
|
|
first_coord = self._tok_coord(tok)
|
|
spec = self._add_declaration_specifier(
|
|
spec, self._parse_atomic_specifier(), "type", append=True
|
|
)
|
|
saw_type = True
|
|
continue
|
|
|
|
if tok.type in _TYPE_QUALIFIER:
|
|
if first_coord is None:
|
|
first_coord = self._tok_coord(tok)
|
|
spec = self._add_declaration_specifier(
|
|
spec, self._advance().value, "qual", append=True
|
|
)
|
|
continue
|
|
|
|
if tok.type in _TYPE_SPEC_SIMPLE:
|
|
if first_coord is None:
|
|
first_coord = self._tok_coord(tok)
|
|
tok = self._advance()
|
|
spec = self._add_declaration_specifier(
|
|
spec,
|
|
c_ast.IdentifierType([tok.value], coord=self._tok_coord(tok)),
|
|
"type",
|
|
append=True,
|
|
)
|
|
saw_type = True
|
|
continue
|
|
|
|
if tok.type == "TYPEID":
|
|
if saw_type:
|
|
break
|
|
if first_coord is None:
|
|
first_coord = self._tok_coord(tok)
|
|
tok = self._advance()
|
|
spec = self._add_declaration_specifier(
|
|
spec,
|
|
c_ast.IdentifierType([tok.value], coord=self._tok_coord(tok)),
|
|
"type",
|
|
append=True,
|
|
)
|
|
saw_type = True
|
|
continue
|
|
|
|
if tok.type in {"STRUCT", "UNION"}:
|
|
if first_coord is None:
|
|
first_coord = self._tok_coord(tok)
|
|
spec = self._add_declaration_specifier(
|
|
spec, self._parse_struct_or_union_specifier(), "type", append=True
|
|
)
|
|
saw_type = True
|
|
continue
|
|
|
|
if tok.type == "ENUM":
|
|
if first_coord is None:
|
|
first_coord = self._tok_coord(tok)
|
|
spec = self._add_declaration_specifier(
|
|
spec, self._parse_enum_specifier(), "type", append=True
|
|
)
|
|
saw_type = True
|
|
continue
|
|
|
|
break
|
|
|
|
if spec is None:
|
|
self._parse_error("Invalid specifier list", self.clex.filename)
|
|
|
|
if not saw_type and not saw_alignment:
|
|
self._parse_error("Missing type in declaration", first_coord)
|
|
|
|
if spec.get("storage") is None:
|
|
spec["storage"] = []
|
|
if spec.get("function") is None:
|
|
spec["function"] = []
|
|
|
|
return spec
|
|
|
|
# BNF: type_qualifier_list : type_qualifier+
|
|
def _parse_type_qualifier_list(self) -> List[str]:
|
|
quals = []
|
|
while self._peek_type() in _TYPE_QUALIFIER:
|
|
quals.append(self._advance().value)
|
|
return quals
|
|
|
|
# BNF: alignment_specifier : _ALIGNAS '(' type_name | constant_expression ')'
|
|
def _parse_alignment_specifier(self) -> c_ast.Node:
|
|
tok = self._expect("_ALIGNAS")
|
|
self._expect("LPAREN")
|
|
|
|
if self._starts_declaration():
|
|
typ = self._parse_type_name()
|
|
self._expect("RPAREN")
|
|
return c_ast.Alignas(typ, self._tok_coord(tok))
|
|
|
|
expr = self._parse_constant_expression()
|
|
self._expect("RPAREN")
|
|
return c_ast.Alignas(expr, self._tok_coord(tok))
|
|
|
|
# BNF: atomic_specifier : _ATOMIC '(' type_name ')'
|
|
def _parse_atomic_specifier(self) -> c_ast.Node:
|
|
self._expect("_ATOMIC")
|
|
self._expect("LPAREN")
|
|
typ = self._parse_type_name()
|
|
self._expect("RPAREN")
|
|
typ.quals.append("_Atomic")
|
|
return typ
|
|
|
|
# BNF: init_declarator_list : init_declarator (',' init_declarator)*
|
|
def _parse_init_declarator_list(
|
|
self, first: Optional["_DeclInfo"] = None, id_only: bool = False
|
|
) -> List["_DeclInfo"]:
|
|
decls = (
|
|
[first]
|
|
if first is not None
|
|
else [self._parse_init_declarator(id_only=id_only)]
|
|
)
|
|
|
|
while self._accept("COMMA"):
|
|
decls.append(self._parse_init_declarator(id_only=id_only))
|
|
return decls
|
|
|
|
# BNF: init_declarator : declarator ('=' initializer)?
|
|
def _parse_init_declarator(self, id_only: bool = False) -> "_DeclInfo":
|
|
decl = self._parse_id_declarator() if id_only else self._parse_declarator()
|
|
init = None
|
|
if self._accept("EQUALS"):
|
|
init = self._parse_initializer()
|
|
return dict(decl=decl, init=init, bitsize=None)
|
|
|
|
# ------------------------------------------------------------------
|
|
# Structs/unions/enums
|
|
# ------------------------------------------------------------------
|
|
# BNF: struct_or_union_specifier : struct_or_union ID? '{' struct_declaration_list? '}'
|
|
# | struct_or_union ID
|
|
def _parse_struct_or_union_specifier(self) -> c_ast.Node:
|
|
tok = self._advance()
|
|
klass = self._select_struct_union_class(tok.value)
|
|
|
|
if self._peek_type() in {"ID", "TYPEID"}:
|
|
name_tok = self._advance()
|
|
if self._peek_type() == "LBRACE":
|
|
self._advance()
|
|
if self._accept("RBRACE"):
|
|
return klass(
|
|
name=name_tok.value, decls=[], coord=self._tok_coord(name_tok)
|
|
)
|
|
decls = self._parse_struct_declaration_list()
|
|
self._expect("RBRACE")
|
|
return klass(
|
|
name=name_tok.value, decls=decls, coord=self._tok_coord(name_tok)
|
|
)
|
|
|
|
return klass(
|
|
name=name_tok.value, decls=None, coord=self._tok_coord(name_tok)
|
|
)
|
|
|
|
if self._peek_type() == "LBRACE":
|
|
brace_tok = self._advance()
|
|
if self._accept("RBRACE"):
|
|
return klass(name=None, decls=[], coord=self._tok_coord(brace_tok))
|
|
decls = self._parse_struct_declaration_list()
|
|
self._expect("RBRACE")
|
|
return klass(name=None, decls=decls, coord=self._tok_coord(brace_tok))
|
|
|
|
self._parse_error("Invalid struct/union declaration", self._tok_coord(tok))
|
|
|
|
# BNF: struct_declaration_list : struct_declaration+
|
|
def _parse_struct_declaration_list(self) -> List[c_ast.Node]:
|
|
decls = []
|
|
while self._peek_type() not in {None, "RBRACE"}:
|
|
items = self._parse_struct_declaration()
|
|
if items is None:
|
|
continue
|
|
decls.extend(items)
|
|
return decls
|
|
|
|
# BNF: struct_declaration : specifier_qualifier_list struct_declarator_list? ';'
|
|
# | static_assert
|
|
# | pppragma_directive
|
|
def _parse_struct_declaration(self) -> Optional[List[c_ast.Node]]:
|
|
if self._peek_type() == "SEMI":
|
|
self._advance()
|
|
return None
|
|
if self._peek_type() in {"PPPRAGMA", "_PRAGMA"}:
|
|
return [self._parse_pppragma_directive()]
|
|
|
|
spec = self._parse_specifier_qualifier_list()
|
|
assert "typedef" not in spec.get("storage", [])
|
|
|
|
decls = None
|
|
if self._starts_declarator() or self._peek_type() == "COLON":
|
|
decls = self._parse_struct_declarator_list()
|
|
if decls is not None:
|
|
self._expect("SEMI")
|
|
return self._build_declarations(spec=spec, decls=decls)
|
|
|
|
if len(spec["type"]) == 1:
|
|
node = spec["type"][0]
|
|
if isinstance(node, c_ast.Node):
|
|
decl_type = node
|
|
else:
|
|
decl_type = c_ast.IdentifierType(node)
|
|
self._expect("SEMI")
|
|
return self._build_declarations(
|
|
spec=spec, decls=[dict(decl=decl_type, init=None, bitsize=None)]
|
|
)
|
|
|
|
self._expect("SEMI")
|
|
return self._build_declarations(
|
|
spec=spec, decls=[dict(decl=None, init=None, bitsize=None)]
|
|
)
|
|
|
|
# BNF: struct_declarator_list : struct_declarator (',' struct_declarator)*
|
|
def _parse_struct_declarator_list(self) -> List["_DeclInfo"]:
|
|
decls = [self._parse_struct_declarator()]
|
|
while self._accept("COMMA"):
|
|
decls.append(self._parse_struct_declarator())
|
|
return decls
|
|
|
|
# BNF: struct_declarator : declarator? ':' constant_expression
|
|
# | declarator (':' constant_expression)?
|
|
def _parse_struct_declarator(self) -> "_DeclInfo":
|
|
if self._accept("COLON"):
|
|
bitsize = self._parse_constant_expression()
|
|
return {
|
|
"decl": c_ast.TypeDecl(None, None, None, None),
|
|
"init": None,
|
|
"bitsize": bitsize,
|
|
}
|
|
|
|
decl = self._parse_declarator()
|
|
if self._accept("COLON"):
|
|
bitsize = self._parse_constant_expression()
|
|
return {"decl": decl, "init": None, "bitsize": bitsize}
|
|
|
|
return {"decl": decl, "init": None, "bitsize": None}
|
|
|
|
# BNF: enum_specifier : ENUM ID? '{' enumerator_list? '}'
|
|
# | ENUM ID
|
|
def _parse_enum_specifier(self) -> c_ast.Node:
|
|
tok = self._expect("ENUM")
|
|
if self._peek_type() in {"ID", "TYPEID"}:
|
|
name_tok = self._advance()
|
|
if self._peek_type() == "LBRACE":
|
|
self._advance()
|
|
enums = self._parse_enumerator_list()
|
|
self._expect("RBRACE")
|
|
return c_ast.Enum(name_tok.value, enums, self._tok_coord(tok))
|
|
return c_ast.Enum(name_tok.value, None, self._tok_coord(tok))
|
|
|
|
self._expect("LBRACE")
|
|
enums = self._parse_enumerator_list()
|
|
self._expect("RBRACE")
|
|
return c_ast.Enum(None, enums, self._tok_coord(tok))
|
|
|
|
# BNF: enumerator_list : enumerator (',' enumerator)* ','?
|
|
def _parse_enumerator_list(self) -> c_ast.Node:
|
|
enum = self._parse_enumerator()
|
|
enum_list = c_ast.EnumeratorList([enum], enum.coord)
|
|
while self._accept("COMMA"):
|
|
if self._peek_type() == "RBRACE":
|
|
break
|
|
enum = self._parse_enumerator()
|
|
enum_list.enumerators.append(enum)
|
|
return enum_list
|
|
|
|
# BNF: enumerator : ID ('=' constant_expression)?
|
|
def _parse_enumerator(self) -> c_ast.Node:
|
|
name_tok = self._expect("ID")
|
|
if self._accept("EQUALS"):
|
|
value = self._parse_constant_expression()
|
|
else:
|
|
value = None
|
|
enum = c_ast.Enumerator(name_tok.value, value, self._tok_coord(name_tok))
|
|
self._add_identifier(enum.name, enum.coord)
|
|
return enum
|
|
|
|
# ------------------------------------------------------------------
|
|
# Declarators
|
|
# ------------------------------------------------------------------
|
|
# BNF: declarator : pointer? direct_declarator
|
|
def _parse_declarator(self) -> c_ast.Node:
|
|
decl, _ = self._parse_any_declarator(
|
|
allow_abstract=False, typeid_paren_as_abstract=False
|
|
)
|
|
assert decl is not None
|
|
return decl
|
|
|
|
# BNF: id_declarator : declarator with ID name
|
|
def _parse_id_declarator(self) -> c_ast.Node:
|
|
return self._parse_declarator_kind(kind="id", allow_paren=True)
|
|
|
|
# BNF: typeid_declarator : declarator with TYPEID name
|
|
def _parse_typeid_declarator(self) -> c_ast.Node:
|
|
return self._parse_declarator_kind(kind="typeid", allow_paren=True)
|
|
|
|
# BNF: typeid_noparen_declarator : declarator without parenthesized name
|
|
def _parse_typeid_noparen_declarator(self) -> c_ast.Node:
|
|
return self._parse_declarator_kind(kind="typeid", allow_paren=False)
|
|
|
|
# BNF: declarator_kind : pointer? direct_declarator(kind)
|
|
def _parse_declarator_kind(self, kind: str, allow_paren: bool) -> c_ast.Node:
|
|
ptr = None
|
|
if self._peek_type() == "TIMES":
|
|
ptr = self._parse_pointer()
|
|
direct = self._parse_direct_declarator(kind, allow_paren=allow_paren)
|
|
if ptr is not None:
|
|
return self._type_modify_decl(direct, ptr)
|
|
return direct
|
|
|
|
# BNF: direct_declarator : ID | TYPEID | '(' declarator ')'
|
|
# | direct_declarator '[' ... ']'
|
|
# | direct_declarator '(' ... ')'
|
|
def _parse_direct_declarator(
|
|
self, kind: str, allow_paren: bool = True
|
|
) -> c_ast.Node:
|
|
if allow_paren and self._accept("LPAREN"):
|
|
decl = self._parse_declarator_kind(kind, allow_paren=True)
|
|
self._expect("RPAREN")
|
|
else:
|
|
if kind == "id":
|
|
name_tok = self._expect("ID")
|
|
else:
|
|
name_tok = self._expect("TYPEID")
|
|
decl = c_ast.TypeDecl(
|
|
declname=name_tok.value,
|
|
type=None,
|
|
quals=None,
|
|
align=None,
|
|
coord=self._tok_coord(name_tok),
|
|
)
|
|
|
|
return self._parse_decl_suffixes(decl)
|
|
|
|
def _parse_decl_suffixes(self, decl: c_ast.Node) -> c_ast.Node:
|
|
"""Parse a chain of array/function suffixes and attach them to decl."""
|
|
while True:
|
|
if self._peek_type() == "LBRACKET":
|
|
decl = self._type_modify_decl(decl, self._parse_array_decl(decl))
|
|
continue
|
|
if self._peek_type() == "LPAREN":
|
|
func = self._parse_function_decl(decl)
|
|
decl = self._type_modify_decl(decl, func)
|
|
continue
|
|
break
|
|
return decl
|
|
|
|
# BNF: array_decl : '[' array_specifiers? assignment_expression? ']'
|
|
def _parse_array_decl(self, base_decl: c_ast.Node) -> c_ast.Node:
|
|
return self._parse_array_decl_common(base_type=None, coord=base_decl.coord)
|
|
|
|
def _parse_array_decl_common(
|
|
self, base_type: Optional[c_ast.Node], coord: Optional[Coord] = None
|
|
) -> c_ast.Node:
|
|
"""Parse an array declarator suffix and return an ArrayDecl node.
|
|
|
|
base_type:
|
|
Base declarator node to attach (None for direct-declarator parsing,
|
|
TypeDecl for abstract declarators).
|
|
|
|
coord:
|
|
Coordinate to use for the ArrayDecl. If None, uses the '[' token.
|
|
"""
|
|
lbrack_tok = self._expect("LBRACKET")
|
|
if coord is None:
|
|
coord = self._tok_coord(lbrack_tok)
|
|
|
|
def make_array_decl(dim, dim_quals):
|
|
return c_ast.ArrayDecl(
|
|
type=base_type, dim=dim, dim_quals=dim_quals, coord=coord
|
|
)
|
|
|
|
if self._accept("STATIC"):
|
|
dim_quals = ["static"] + (self._parse_type_qualifier_list() or [])
|
|
dim = self._parse_assignment_expression()
|
|
self._expect("RBRACKET")
|
|
return make_array_decl(dim, dim_quals)
|
|
|
|
if self._peek_type() in _TYPE_QUALIFIER:
|
|
dim_quals = self._parse_type_qualifier_list() or []
|
|
if self._accept("STATIC"):
|
|
dim_quals = dim_quals + ["static"]
|
|
dim = self._parse_assignment_expression()
|
|
self._expect("RBRACKET")
|
|
return make_array_decl(dim, dim_quals)
|
|
times_tok = self._accept("TIMES")
|
|
if times_tok:
|
|
self._expect("RBRACKET")
|
|
dim = c_ast.ID(times_tok.value, self._tok_coord(times_tok))
|
|
return make_array_decl(dim, dim_quals)
|
|
dim = None
|
|
if self._starts_expression():
|
|
dim = self._parse_assignment_expression()
|
|
self._expect("RBRACKET")
|
|
return make_array_decl(dim, dim_quals)
|
|
|
|
times_tok = self._accept("TIMES")
|
|
if times_tok:
|
|
self._expect("RBRACKET")
|
|
dim = c_ast.ID(times_tok.value, self._tok_coord(times_tok))
|
|
return make_array_decl(dim, [])
|
|
|
|
dim = None
|
|
if self._starts_expression():
|
|
dim = self._parse_assignment_expression()
|
|
self._expect("RBRACKET")
|
|
return make_array_decl(dim, [])
|
|
|
|
# BNF: function_decl : '(' parameter_type_list_opt | identifier_list_opt ')'
|
|
def _parse_function_decl(self, base_decl: c_ast.Node) -> c_ast.Node:
|
|
self._expect("LPAREN")
|
|
if self._accept("RPAREN"):
|
|
args = None
|
|
else:
|
|
args = (
|
|
self._parse_parameter_type_list()
|
|
if self._starts_declaration()
|
|
else self._parse_identifier_list_opt()
|
|
)
|
|
self._expect("RPAREN")
|
|
|
|
func = c_ast.FuncDecl(args=args, type=None, coord=base_decl.coord)
|
|
|
|
if self._peek_type() == "LBRACE":
|
|
if func.args is not None:
|
|
for param in func.args.params:
|
|
if isinstance(param, c_ast.EllipsisParam):
|
|
break
|
|
name = getattr(param, "name", None)
|
|
if name:
|
|
self._add_identifier(name, param.coord)
|
|
|
|
return func
|
|
|
|
# BNF: pointer : '*' type_qualifier_list? pointer?
|
|
def _parse_pointer(self) -> Optional[c_ast.Node]:
|
|
stars = []
|
|
times_tok = self._accept("TIMES")
|
|
while times_tok:
|
|
quals = self._parse_type_qualifier_list() or []
|
|
stars.append((quals, self._tok_coord(times_tok)))
|
|
times_tok = self._accept("TIMES")
|
|
|
|
if not stars:
|
|
return None
|
|
|
|
ptr = None
|
|
for quals, coord in stars:
|
|
ptr = c_ast.PtrDecl(quals=quals, type=ptr, coord=coord)
|
|
return ptr
|
|
|
|
# BNF: parameter_type_list : parameter_list (',' ELLIPSIS)?
|
|
def _parse_parameter_type_list(self) -> c_ast.ParamList:
|
|
params = self._parse_parameter_list()
|
|
if self._peek_type() == "COMMA" and self._peek_type(2) == "ELLIPSIS":
|
|
self._advance()
|
|
ell_tok = self._advance()
|
|
params.params.append(c_ast.EllipsisParam(self._tok_coord(ell_tok)))
|
|
return params
|
|
|
|
# BNF: parameter_list : parameter_declaration (',' parameter_declaration)*
|
|
def _parse_parameter_list(self) -> c_ast.ParamList:
|
|
first = self._parse_parameter_declaration()
|
|
params = c_ast.ParamList([first], first.coord)
|
|
while self._peek_type() == "COMMA" and self._peek_type(2) != "ELLIPSIS":
|
|
self._advance()
|
|
params.params.append(self._parse_parameter_declaration())
|
|
return params
|
|
|
|
# BNF: parameter_declaration : declaration_specifiers declarator?
|
|
# | declaration_specifiers abstract_declarator_opt
|
|
def _parse_parameter_declaration(self) -> c_ast.Node:
|
|
spec, _, spec_coord = self._parse_declaration_specifiers(allow_no_type=True)
|
|
|
|
if not spec["type"]:
|
|
spec["type"] = [c_ast.IdentifierType(["int"], coord=spec_coord)]
|
|
|
|
if self._starts_declarator():
|
|
decl, is_named = self._parse_any_declarator(
|
|
allow_abstract=True, typeid_paren_as_abstract=True
|
|
)
|
|
if is_named:
|
|
return self._build_declarations(
|
|
spec=spec, decls=[dict(decl=decl, init=None, bitsize=None)]
|
|
)[0]
|
|
return self._build_parameter_declaration(spec, decl, spec_coord)
|
|
|
|
decl = self._parse_abstract_declarator_opt()
|
|
return self._build_parameter_declaration(spec, decl, spec_coord)
|
|
|
|
def _build_parameter_declaration(
|
|
self, spec: "_DeclSpec", decl: Optional[c_ast.Node], spec_coord: Optional[Coord]
|
|
) -> c_ast.Node:
|
|
if (
|
|
len(spec["type"]) > 1
|
|
and len(spec["type"][-1].names) == 1
|
|
and self._is_type_in_scope(spec["type"][-1].names[0])
|
|
):
|
|
return self._build_declarations(
|
|
spec=spec, decls=[dict(decl=decl, init=None, bitsize=None)]
|
|
)[0]
|
|
|
|
decl = c_ast.Typename(
|
|
name="",
|
|
quals=spec["qual"],
|
|
align=None,
|
|
type=decl or c_ast.TypeDecl(None, None, None, None),
|
|
coord=spec_coord,
|
|
)
|
|
return self._fix_decl_name_type(decl, spec["type"])
|
|
|
|
# BNF: identifier_list_opt : identifier_list | empty
|
|
def _parse_identifier_list_opt(self) -> Optional[c_ast.Node]:
|
|
if self._peek_type() == "RPAREN":
|
|
return None
|
|
return self._parse_identifier_list()
|
|
|
|
# BNF: identifier_list : identifier (',' identifier)*
|
|
def _parse_identifier_list(self) -> c_ast.Node:
|
|
first = self._parse_identifier()
|
|
params = c_ast.ParamList([first], first.coord)
|
|
while self._accept("COMMA"):
|
|
params.params.append(self._parse_identifier())
|
|
return params
|
|
|
|
# ------------------------------------------------------------------
|
|
# Abstract declarators
|
|
# ------------------------------------------------------------------
|
|
# BNF: type_name : specifier_qualifier_list abstract_declarator_opt
|
|
def _parse_type_name(self) -> c_ast.Typename:
|
|
spec = self._parse_specifier_qualifier_list()
|
|
decl = self._parse_abstract_declarator_opt()
|
|
|
|
coord = None
|
|
if decl is not None:
|
|
coord = decl.coord
|
|
elif spec["type"]:
|
|
coord = spec["type"][0].coord
|
|
|
|
typename = c_ast.Typename(
|
|
name="",
|
|
quals=spec["qual"][:],
|
|
align=None,
|
|
type=decl or c_ast.TypeDecl(None, None, None, None),
|
|
coord=coord,
|
|
)
|
|
return cast(c_ast.Typename, self._fix_decl_name_type(typename, spec["type"]))
|
|
|
|
# BNF: abstract_declarator_opt : pointer? direct_abstract_declarator?
|
|
def _parse_abstract_declarator_opt(self) -> Optional[c_ast.Node]:
|
|
if self._peek_type() == "TIMES":
|
|
ptr = self._parse_pointer()
|
|
if self._starts_direct_abstract_declarator():
|
|
decl = self._parse_direct_abstract_declarator()
|
|
else:
|
|
decl = c_ast.TypeDecl(None, None, None, None)
|
|
assert ptr is not None
|
|
return self._type_modify_decl(decl, ptr)
|
|
|
|
if self._starts_direct_abstract_declarator():
|
|
return self._parse_direct_abstract_declarator()
|
|
|
|
return None
|
|
|
|
# BNF: direct_abstract_declarator : '(' parameter_type_list_opt ')'
|
|
# | '(' abstract_declarator ')'
|
|
# | '[' ... ']'
|
|
def _parse_direct_abstract_declarator(self) -> c_ast.Node:
|
|
lparen_tok = self._accept("LPAREN")
|
|
if lparen_tok:
|
|
if self._starts_declaration() or self._peek_type() == "RPAREN":
|
|
params = self._parse_parameter_type_list_opt()
|
|
self._expect("RPAREN")
|
|
decl = c_ast.FuncDecl(
|
|
args=params,
|
|
type=c_ast.TypeDecl(None, None, None, None),
|
|
coord=self._tok_coord(lparen_tok),
|
|
)
|
|
else:
|
|
decl = self._parse_abstract_declarator_opt()
|
|
self._expect("RPAREN")
|
|
assert decl is not None
|
|
elif self._peek_type() == "LBRACKET":
|
|
decl = self._parse_abstract_array_base()
|
|
else:
|
|
self._parse_error("Invalid abstract declarator", self.clex.filename)
|
|
|
|
return self._parse_decl_suffixes(decl)
|
|
|
|
# BNF: parameter_type_list_opt : parameter_type_list | empty
|
|
def _parse_parameter_type_list_opt(self) -> Optional[c_ast.ParamList]:
|
|
if self._peek_type() == "RPAREN":
|
|
return None
|
|
return self._parse_parameter_type_list()
|
|
|
|
# BNF: abstract_array_base : '[' array_specifiers? assignment_expression? ']'
|
|
def _parse_abstract_array_base(self) -> c_ast.Node:
|
|
return self._parse_array_decl_common(
|
|
base_type=c_ast.TypeDecl(None, None, None, None), coord=None
|
|
)
|
|
|
|
# ------------------------------------------------------------------
|
|
# Statements
|
|
# ------------------------------------------------------------------
|
|
# BNF: statement : labeled_statement | compound_statement
|
|
# | selection_statement | iteration_statement
|
|
# | jump_statement | expression_statement
|
|
# | static_assert | pppragma_directive
|
|
def _parse_statement(self) -> c_ast.Node | List[c_ast.Node]:
|
|
tok_type = self._peek_type()
|
|
match tok_type:
|
|
case "CASE" | "DEFAULT":
|
|
return self._parse_labeled_statement()
|
|
case "ID" if self._peek_type(2) == "COLON":
|
|
return self._parse_labeled_statement()
|
|
case "LBRACE":
|
|
return self._parse_compound_statement()
|
|
case "IF" | "SWITCH":
|
|
return self._parse_selection_statement()
|
|
case "WHILE" | "DO" | "FOR":
|
|
return self._parse_iteration_statement()
|
|
case "GOTO" | "BREAK" | "CONTINUE" | "RETURN":
|
|
return self._parse_jump_statement()
|
|
case "PPPRAGMA" | "_PRAGMA":
|
|
return self._parse_pppragma_directive()
|
|
case "_STATIC_ASSERT":
|
|
return self._parse_static_assert()
|
|
case _:
|
|
return self._parse_expression_statement()
|
|
|
|
# BNF: pragmacomp_or_statement : pppragma_directive* statement
|
|
def _parse_pragmacomp_or_statement(self) -> c_ast.Node | List[c_ast.Node]:
|
|
if self._peek_type() in {"PPPRAGMA", "_PRAGMA"}:
|
|
pragmas = self._parse_pppragma_directive_list()
|
|
stmt = self._parse_statement()
|
|
return c_ast.Compound(block_items=pragmas + [stmt], coord=pragmas[0].coord)
|
|
return self._parse_statement()
|
|
|
|
# BNF: block_item : declaration | statement
|
|
def _parse_block_item(self) -> c_ast.Node | List[c_ast.Node]:
|
|
if self._starts_declaration():
|
|
return self._parse_declaration()
|
|
return self._parse_statement()
|
|
|
|
# BNF: block_item_list : block_item+
|
|
def _parse_block_item_list(self) -> List[c_ast.Node]:
|
|
items = []
|
|
while self._peek_type() not in {"RBRACE", None}:
|
|
item = self._parse_block_item()
|
|
if isinstance(item, list):
|
|
if item == [None]:
|
|
continue
|
|
items.extend(item)
|
|
else:
|
|
items.append(item)
|
|
return items
|
|
|
|
# BNF: compound_statement : '{' block_item_list? '}'
|
|
def _parse_compound_statement(self) -> c_ast.Node:
|
|
lbrace_tok = self._expect("LBRACE")
|
|
if self._accept("RBRACE"):
|
|
return c_ast.Compound(block_items=None, coord=self._tok_coord(lbrace_tok))
|
|
block_items = self._parse_block_item_list()
|
|
self._expect("RBRACE")
|
|
return c_ast.Compound(
|
|
block_items=block_items, coord=self._tok_coord(lbrace_tok)
|
|
)
|
|
|
|
# BNF: labeled_statement : ID ':' statement
|
|
# | CASE constant_expression ':' statement
|
|
# | DEFAULT ':' statement
|
|
def _parse_labeled_statement(self) -> c_ast.Node:
|
|
tok_type = self._peek_type()
|
|
match tok_type:
|
|
case "ID":
|
|
name_tok = self._advance()
|
|
self._expect("COLON")
|
|
if self._starts_statement():
|
|
stmt = self._parse_pragmacomp_or_statement()
|
|
else:
|
|
stmt = c_ast.EmptyStatement(self._tok_coord(name_tok))
|
|
return c_ast.Label(name_tok.value, stmt, self._tok_coord(name_tok))
|
|
case "CASE":
|
|
case_tok = self._advance()
|
|
expr = self._parse_constant_expression()
|
|
self._expect("COLON")
|
|
if self._starts_statement():
|
|
stmt = self._parse_pragmacomp_or_statement()
|
|
else:
|
|
stmt = c_ast.EmptyStatement(self._tok_coord(case_tok))
|
|
return c_ast.Case(expr, [stmt], self._tok_coord(case_tok))
|
|
case "DEFAULT":
|
|
def_tok = self._advance()
|
|
self._expect("COLON")
|
|
if self._starts_statement():
|
|
stmt = self._parse_pragmacomp_or_statement()
|
|
else:
|
|
stmt = c_ast.EmptyStatement(self._tok_coord(def_tok))
|
|
return c_ast.Default([stmt], self._tok_coord(def_tok))
|
|
case _:
|
|
self._parse_error("Invalid labeled statement", self.clex.filename)
|
|
|
|
# BNF: selection_statement : IF '(' expression ')' statement (ELSE statement)?
|
|
# | SWITCH '(' expression ')' statement
|
|
def _parse_selection_statement(self) -> c_ast.Node:
|
|
tok = self._advance()
|
|
match tok.type:
|
|
case "IF":
|
|
self._expect("LPAREN")
|
|
cond = self._parse_expression()
|
|
self._expect("RPAREN")
|
|
then_stmt = self._parse_pragmacomp_or_statement()
|
|
if self._accept("ELSE"):
|
|
else_stmt = self._parse_pragmacomp_or_statement()
|
|
return c_ast.If(cond, then_stmt, else_stmt, self._tok_coord(tok))
|
|
return c_ast.If(cond, then_stmt, None, self._tok_coord(tok))
|
|
case "SWITCH":
|
|
self._expect("LPAREN")
|
|
expr = self._parse_expression()
|
|
self._expect("RPAREN")
|
|
stmt = self._parse_pragmacomp_or_statement()
|
|
return fix_switch_cases(c_ast.Switch(expr, stmt, self._tok_coord(tok)))
|
|
case _:
|
|
self._parse_error("Invalid selection statement", self._tok_coord(tok))
|
|
|
|
# BNF: iteration_statement : WHILE '(' expression ')' statement
|
|
# | DO statement WHILE '(' expression ')' ';'
|
|
# | FOR '(' (declaration | expression_opt) ';'
|
|
# expression_opt ';' expression_opt ')' statement
|
|
def _parse_iteration_statement(self) -> c_ast.Node:
|
|
tok = self._advance()
|
|
match tok.type:
|
|
case "WHILE":
|
|
self._expect("LPAREN")
|
|
cond = self._parse_expression()
|
|
self._expect("RPAREN")
|
|
stmt = self._parse_pragmacomp_or_statement()
|
|
return c_ast.While(cond, stmt, self._tok_coord(tok))
|
|
case "DO":
|
|
stmt = self._parse_pragmacomp_or_statement()
|
|
self._expect("WHILE")
|
|
self._expect("LPAREN")
|
|
cond = self._parse_expression()
|
|
self._expect("RPAREN")
|
|
self._expect("SEMI")
|
|
return c_ast.DoWhile(cond, stmt, self._tok_coord(tok))
|
|
case "FOR":
|
|
self._expect("LPAREN")
|
|
if self._starts_declaration():
|
|
decls = self._parse_declaration()
|
|
init = c_ast.DeclList(decls, self._tok_coord(tok))
|
|
cond = self._parse_expression_opt()
|
|
self._expect("SEMI")
|
|
next_expr = self._parse_expression_opt()
|
|
self._expect("RPAREN")
|
|
stmt = self._parse_pragmacomp_or_statement()
|
|
return c_ast.For(init, cond, next_expr, stmt, self._tok_coord(tok))
|
|
|
|
init = self._parse_expression_opt()
|
|
self._expect("SEMI")
|
|
cond = self._parse_expression_opt()
|
|
self._expect("SEMI")
|
|
next_expr = self._parse_expression_opt()
|
|
self._expect("RPAREN")
|
|
stmt = self._parse_pragmacomp_or_statement()
|
|
return c_ast.For(init, cond, next_expr, stmt, self._tok_coord(tok))
|
|
case _:
|
|
self._parse_error("Invalid iteration statement", self._tok_coord(tok))
|
|
|
|
# BNF: jump_statement : GOTO ID ';' | BREAK ';' | CONTINUE ';'
|
|
# | RETURN expression? ';'
|
|
def _parse_jump_statement(self) -> c_ast.Node:
|
|
tok = self._advance()
|
|
match tok.type:
|
|
case "GOTO":
|
|
name_tok = self._expect("ID")
|
|
self._expect("SEMI")
|
|
return c_ast.Goto(name_tok.value, self._tok_coord(tok))
|
|
case "BREAK":
|
|
self._expect("SEMI")
|
|
return c_ast.Break(self._tok_coord(tok))
|
|
case "CONTINUE":
|
|
self._expect("SEMI")
|
|
return c_ast.Continue(self._tok_coord(tok))
|
|
case "RETURN":
|
|
if self._accept("SEMI"):
|
|
return c_ast.Return(None, self._tok_coord(tok))
|
|
expr = self._parse_expression()
|
|
self._expect("SEMI")
|
|
return c_ast.Return(expr, self._tok_coord(tok))
|
|
case _:
|
|
self._parse_error("Invalid jump statement", self._tok_coord(tok))
|
|
|
|
# BNF: expression_statement : expression_opt ';'
|
|
def _parse_expression_statement(self) -> c_ast.Node:
|
|
expr = self._parse_expression_opt()
|
|
semi_tok = self._expect("SEMI")
|
|
if expr is None:
|
|
return c_ast.EmptyStatement(self._tok_coord(semi_tok))
|
|
return expr
|
|
|
|
# ------------------------------------------------------------------
|
|
# Expressions
|
|
# ------------------------------------------------------------------
|
|
# BNF: expression_opt : expression | empty
|
|
def _parse_expression_opt(self) -> Optional[c_ast.Node]:
|
|
if self._starts_expression():
|
|
return self._parse_expression()
|
|
return None
|
|
|
|
# BNF: expression : assignment_expression (',' assignment_expression)*
|
|
def _parse_expression(self) -> c_ast.Node:
|
|
expr = self._parse_assignment_expression()
|
|
if not self._accept("COMMA"):
|
|
return expr
|
|
exprs = [expr, self._parse_assignment_expression()]
|
|
while self._accept("COMMA"):
|
|
exprs.append(self._parse_assignment_expression())
|
|
return c_ast.ExprList(exprs, expr.coord)
|
|
|
|
# BNF: assignment_expression : conditional_expression
|
|
# | unary_expression assignment_op assignment_expression
|
|
def _parse_assignment_expression(self) -> c_ast.Node:
|
|
if self._peek_type() == "LPAREN" and self._peek_type(2) == "LBRACE":
|
|
self._advance()
|
|
comp = self._parse_compound_statement()
|
|
self._expect("RPAREN")
|
|
return comp
|
|
|
|
expr = self._parse_conditional_expression()
|
|
if self._is_assignment_op():
|
|
op = self._advance().value
|
|
rhs = self._parse_assignment_expression()
|
|
return c_ast.Assignment(op, expr, rhs, expr.coord)
|
|
return expr
|
|
|
|
# BNF: conditional_expression : binary_expression
|
|
# | binary_expression '?' expression ':' conditional_expression
|
|
def _parse_conditional_expression(self) -> c_ast.Node:
|
|
expr = self._parse_binary_expression()
|
|
if self._accept("CONDOP"):
|
|
iftrue = self._parse_expression()
|
|
self._expect("COLON")
|
|
iffalse = self._parse_conditional_expression()
|
|
return c_ast.TernaryOp(expr, iftrue, iffalse, expr.coord)
|
|
return expr
|
|
|
|
# BNF: binary_expression : cast_expression (binary_op cast_expression)*
|
|
def _parse_binary_expression(
|
|
self, min_prec: int = 0, lhs: Optional[c_ast.Node] = None
|
|
) -> c_ast.Node:
|
|
if lhs is None:
|
|
lhs = self._parse_cast_expression()
|
|
|
|
while True:
|
|
tok = self._peek()
|
|
if tok is None or tok.type not in _BINARY_PRECEDENCE:
|
|
break
|
|
prec = _BINARY_PRECEDENCE[tok.type]
|
|
if prec < min_prec:
|
|
break
|
|
|
|
op = tok.value
|
|
self._advance()
|
|
rhs = self._parse_cast_expression()
|
|
|
|
while True:
|
|
next_tok = self._peek()
|
|
if next_tok is None or next_tok.type not in _BINARY_PRECEDENCE:
|
|
break
|
|
next_prec = _BINARY_PRECEDENCE[next_tok.type]
|
|
if next_prec > prec:
|
|
rhs = self._parse_binary_expression(next_prec, rhs)
|
|
else:
|
|
break
|
|
|
|
lhs = c_ast.BinaryOp(op, lhs, rhs, lhs.coord)
|
|
|
|
return lhs
|
|
|
|
# BNF: cast_expression : '(' type_name ')' cast_expression
|
|
# | unary_expression
|
|
def _parse_cast_expression(self) -> c_ast.Node:
|
|
result = self._try_parse_paren_type_name()
|
|
if result is not None:
|
|
typ, mark, lparen_tok = result
|
|
if self._peek_type() == "LBRACE":
|
|
# (type){...} is a compound literal, not a cast. Examples:
|
|
# (int){1} -> compound literal, handled in postfix
|
|
# (int) x -> cast, handled below
|
|
self._reset(mark)
|
|
else:
|
|
expr = self._parse_cast_expression()
|
|
return c_ast.Cast(typ, expr, self._tok_coord(lparen_tok))
|
|
return self._parse_unary_expression()
|
|
|
|
# BNF: unary_expression : postfix_expression
|
|
# | '++' unary_expression
|
|
# | '--' unary_expression
|
|
# | unary_op cast_expression
|
|
# | 'sizeof' unary_expression
|
|
# | 'sizeof' '(' type_name ')'
|
|
# | '_Alignof' '(' type_name ')'
|
|
def _parse_unary_expression(self) -> c_ast.Node:
|
|
tok_type = self._peek_type()
|
|
if tok_type in {"PLUSPLUS", "MINUSMINUS"}:
|
|
tok = self._advance()
|
|
expr = self._parse_unary_expression()
|
|
return c_ast.UnaryOp(tok.value, expr, expr.coord)
|
|
|
|
if tok_type in {"AND", "TIMES", "PLUS", "MINUS", "NOT", "LNOT"}:
|
|
tok = self._advance()
|
|
expr = self._parse_cast_expression()
|
|
return c_ast.UnaryOp(tok.value, expr, expr.coord)
|
|
|
|
if tok_type == "SIZEOF":
|
|
tok = self._advance()
|
|
result = self._try_parse_paren_type_name()
|
|
if result is not None:
|
|
typ, _, _ = result
|
|
return c_ast.UnaryOp(tok.value, typ, self._tok_coord(tok))
|
|
expr = self._parse_unary_expression()
|
|
return c_ast.UnaryOp(tok.value, expr, self._tok_coord(tok))
|
|
|
|
if tok_type == "_ALIGNOF":
|
|
tok = self._advance()
|
|
self._expect("LPAREN")
|
|
typ = self._parse_type_name()
|
|
self._expect("RPAREN")
|
|
return c_ast.UnaryOp(tok.value, typ, self._tok_coord(tok))
|
|
|
|
return self._parse_postfix_expression()
|
|
|
|
# BNF: postfix_expression : primary_expression postfix_suffix*
|
|
# | '(' type_name ')' '{' initializer_list ','? '}'
|
|
def _parse_postfix_expression(self) -> c_ast.Node:
|
|
result = self._try_parse_paren_type_name()
|
|
if result is not None:
|
|
typ, mark, _ = result
|
|
# Disambiguate between casts and compound literals:
|
|
# (int) x -> cast
|
|
# (int) {1} -> compound literal
|
|
if self._accept("LBRACE"):
|
|
init = self._parse_initializer_list()
|
|
self._accept("COMMA")
|
|
self._expect("RBRACE")
|
|
return c_ast.CompoundLiteral(typ, init)
|
|
else:
|
|
self._reset(mark)
|
|
|
|
expr = self._parse_primary_expression()
|
|
while True:
|
|
if self._accept("LBRACKET"):
|
|
sub = self._parse_expression()
|
|
self._expect("RBRACKET")
|
|
expr = c_ast.ArrayRef(expr, sub, expr.coord)
|
|
continue
|
|
if self._accept("LPAREN"):
|
|
if self._peek_type() == "RPAREN":
|
|
self._advance()
|
|
args = None
|
|
else:
|
|
args = self._parse_argument_expression_list()
|
|
self._expect("RPAREN")
|
|
expr = c_ast.FuncCall(expr, args, expr.coord)
|
|
continue
|
|
if self._peek_type() in {"PERIOD", "ARROW"}:
|
|
op_tok = self._advance()
|
|
name_tok = self._advance()
|
|
if name_tok.type not in {"ID", "TYPEID"}:
|
|
self._parse_error(
|
|
"Invalid struct reference", self._tok_coord(name_tok)
|
|
)
|
|
field = c_ast.ID(name_tok.value, self._tok_coord(name_tok))
|
|
expr = c_ast.StructRef(expr, op_tok.value, field, expr.coord)
|
|
continue
|
|
if self._peek_type() in {"PLUSPLUS", "MINUSMINUS"}:
|
|
tok = self._advance()
|
|
expr = c_ast.UnaryOp("p" + tok.value, expr, expr.coord)
|
|
continue
|
|
break
|
|
return expr
|
|
|
|
# BNF: primary_expression : ID | constant | string_literal
|
|
# | '(' expression ')' | offsetof
|
|
def _parse_primary_expression(self) -> c_ast.Node:
|
|
tok_type = self._peek_type()
|
|
if tok_type == "ID":
|
|
return self._parse_identifier()
|
|
if (
|
|
tok_type in _INT_CONST
|
|
or tok_type in _FLOAT_CONST
|
|
or tok_type in _CHAR_CONST
|
|
):
|
|
return self._parse_constant()
|
|
if tok_type in _STRING_LITERAL:
|
|
return self._parse_unified_string_literal()
|
|
if tok_type in _WSTR_LITERAL:
|
|
return self._parse_unified_wstring_literal()
|
|
if tok_type == "LPAREN":
|
|
self._advance()
|
|
expr = self._parse_expression()
|
|
self._expect("RPAREN")
|
|
return expr
|
|
if tok_type == "OFFSETOF":
|
|
off_tok = self._advance()
|
|
self._expect("LPAREN")
|
|
typ = self._parse_type_name()
|
|
self._expect("COMMA")
|
|
designator = self._parse_offsetof_member_designator()
|
|
self._expect("RPAREN")
|
|
coord = self._tok_coord(off_tok)
|
|
return c_ast.FuncCall(
|
|
c_ast.ID(off_tok.value, coord),
|
|
c_ast.ExprList([typ, designator], coord),
|
|
coord,
|
|
)
|
|
|
|
self._parse_error("Invalid expression", self.clex.filename)
|
|
|
|
# BNF: offsetof_member_designator : identifier_or_typeid
|
|
# ('.' identifier_or_typeid | '[' expression ']')*
|
|
def _parse_offsetof_member_designator(self) -> c_ast.Node:
|
|
node = self._parse_identifier_or_typeid()
|
|
while True:
|
|
if self._accept("PERIOD"):
|
|
field = self._parse_identifier_or_typeid()
|
|
node = c_ast.StructRef(node, ".", field, node.coord)
|
|
continue
|
|
if self._accept("LBRACKET"):
|
|
expr = self._parse_expression()
|
|
self._expect("RBRACKET")
|
|
node = c_ast.ArrayRef(node, expr, node.coord)
|
|
continue
|
|
break
|
|
return node
|
|
|
|
# BNF: argument_expression_list : assignment_expression (',' assignment_expression)*
|
|
def _parse_argument_expression_list(self) -> c_ast.Node:
|
|
expr = self._parse_assignment_expression()
|
|
exprs = [expr]
|
|
while self._accept("COMMA"):
|
|
exprs.append(self._parse_assignment_expression())
|
|
return c_ast.ExprList(exprs, expr.coord)
|
|
|
|
# BNF: constant_expression : conditional_expression
|
|
def _parse_constant_expression(self) -> c_ast.Node:
|
|
return self._parse_conditional_expression()
|
|
|
|
# ------------------------------------------------------------------
|
|
# Terminals
|
|
# ------------------------------------------------------------------
|
|
# BNF: identifier : ID
|
|
def _parse_identifier(self) -> c_ast.Node:
|
|
tok = self._expect("ID")
|
|
return c_ast.ID(tok.value, self._tok_coord(tok))
|
|
|
|
# BNF: identifier_or_typeid : ID | TYPEID
|
|
def _parse_identifier_or_typeid(self) -> c_ast.Node:
|
|
tok = self._advance()
|
|
if tok.type not in {"ID", "TYPEID"}:
|
|
self._parse_error("Expected identifier", self._tok_coord(tok))
|
|
return c_ast.ID(tok.value, self._tok_coord(tok))
|
|
|
|
# BNF: constant : INT_CONST | FLOAT_CONST | CHAR_CONST
|
|
def _parse_constant(self) -> c_ast.Node:
|
|
tok = self._advance()
|
|
if tok.type in _INT_CONST:
|
|
u_count = 0
|
|
l_count = 0
|
|
for ch in tok.value[-3:]:
|
|
if ch in ("l", "L"):
|
|
l_count += 1
|
|
elif ch in ("u", "U"):
|
|
u_count += 1
|
|
if u_count > 1:
|
|
raise ValueError("Constant cannot have more than one u/U suffix.")
|
|
if l_count > 2:
|
|
raise ValueError("Constant cannot have more than two l/L suffix.")
|
|
prefix = "unsigned " * u_count + "long " * l_count
|
|
return c_ast.Constant(prefix + "int", tok.value, self._tok_coord(tok))
|
|
|
|
if tok.type in _FLOAT_CONST:
|
|
if tok.value[-1] in ("f", "F"):
|
|
t = "float"
|
|
elif tok.value[-1] in ("l", "L"):
|
|
t = "long double"
|
|
else:
|
|
t = "double"
|
|
return c_ast.Constant(t, tok.value, self._tok_coord(tok))
|
|
|
|
if tok.type in _CHAR_CONST:
|
|
return c_ast.Constant("char", tok.value, self._tok_coord(tok))
|
|
|
|
self._parse_error("Invalid constant", self._tok_coord(tok))
|
|
|
|
# BNF: unified_string_literal : STRING_LITERAL+
|
|
def _parse_unified_string_literal(self) -> c_ast.Node:
|
|
tok = self._expect("STRING_LITERAL")
|
|
node = c_ast.Constant("string", tok.value, self._tok_coord(tok))
|
|
while self._peek_type() == "STRING_LITERAL":
|
|
tok2 = self._advance()
|
|
node.value = node.value[:-1] + tok2.value[1:]
|
|
return node
|
|
|
|
# BNF: unified_wstring_literal : WSTRING_LITERAL+
|
|
def _parse_unified_wstring_literal(self) -> c_ast.Node:
|
|
tok = self._advance()
|
|
if tok.type not in _WSTR_LITERAL:
|
|
self._parse_error("Invalid string literal", self._tok_coord(tok))
|
|
node = c_ast.Constant("string", tok.value, self._tok_coord(tok))
|
|
while self._peek_type() in _WSTR_LITERAL:
|
|
tok2 = self._advance()
|
|
node.value = node.value.rstrip()[:-1] + tok2.value[2:]
|
|
return node
|
|
|
|
# ------------------------------------------------------------------
|
|
# Initializers
|
|
# ------------------------------------------------------------------
|
|
# BNF: initializer : assignment_expression
|
|
# | '{' initializer_list ','? '}'
|
|
# | '{' '}'
|
|
def _parse_initializer(self) -> c_ast.Node:
|
|
lbrace_tok = self._accept("LBRACE")
|
|
if lbrace_tok:
|
|
if self._accept("RBRACE"):
|
|
return c_ast.InitList([], self._tok_coord(lbrace_tok))
|
|
init_list = self._parse_initializer_list()
|
|
self._accept("COMMA")
|
|
self._expect("RBRACE")
|
|
return init_list
|
|
|
|
return self._parse_assignment_expression()
|
|
|
|
# BNF: initializer_list : initializer_item (',' initializer_item)* ','?
|
|
def _parse_initializer_list(self) -> c_ast.Node:
|
|
items = [self._parse_initializer_item()]
|
|
while self._accept("COMMA"):
|
|
if self._peek_type() == "RBRACE":
|
|
break
|
|
items.append(self._parse_initializer_item())
|
|
return c_ast.InitList(items, items[0].coord)
|
|
|
|
# BNF: initializer_item : designation? initializer
|
|
def _parse_initializer_item(self) -> c_ast.Node:
|
|
designation = None
|
|
if self._peek_type() in {"LBRACKET", "PERIOD"}:
|
|
designation = self._parse_designation()
|
|
init = self._parse_initializer()
|
|
if designation is not None:
|
|
return c_ast.NamedInitializer(designation, init)
|
|
return init
|
|
|
|
# BNF: designation : designator_list '='
|
|
def _parse_designation(self) -> List[c_ast.Node]:
|
|
designators = self._parse_designator_list()
|
|
self._expect("EQUALS")
|
|
return designators
|
|
|
|
# BNF: designator_list : designator+
|
|
def _parse_designator_list(self) -> List[c_ast.Node]:
|
|
designators = []
|
|
while self._peek_type() in {"LBRACKET", "PERIOD"}:
|
|
designators.append(self._parse_designator())
|
|
return designators
|
|
|
|
# BNF: designator : '[' constant_expression ']'
|
|
# | '.' identifier_or_typeid
|
|
def _parse_designator(self) -> c_ast.Node:
|
|
if self._accept("LBRACKET"):
|
|
expr = self._parse_constant_expression()
|
|
self._expect("RBRACKET")
|
|
return expr
|
|
if self._accept("PERIOD"):
|
|
return self._parse_identifier_or_typeid()
|
|
self._parse_error("Invalid designator", self.clex.filename)
|
|
|
|
# ------------------------------------------------------------------
|
|
# Preprocessor-like directives
|
|
# ------------------------------------------------------------------
|
|
# BNF: pp_directive : '#' ... (unsupported)
|
|
def _parse_pp_directive(self) -> NoReturn:
|
|
tok = self._expect("PPHASH")
|
|
self._parse_error("Directives not supported yet", self._tok_coord(tok))
|
|
|
|
# BNF: pppragma_directive : PPPRAGMA PPPRAGMASTR?
|
|
# | _PRAGMA '(' string_literal ')'
|
|
def _parse_pppragma_directive(self) -> c_ast.Node:
|
|
if self._peek_type() == "PPPRAGMA":
|
|
tok = self._advance()
|
|
if self._peek_type() == "PPPRAGMASTR":
|
|
str_tok = self._advance()
|
|
return c_ast.Pragma(str_tok.value, self._tok_coord(str_tok))
|
|
return c_ast.Pragma("", self._tok_coord(tok))
|
|
|
|
if self._peek_type() == "_PRAGMA":
|
|
tok = self._advance()
|
|
lparen = self._expect("LPAREN")
|
|
literal = self._parse_unified_string_literal()
|
|
self._expect("RPAREN")
|
|
return c_ast.Pragma(literal, self._tok_coord(lparen))
|
|
|
|
self._parse_error("Invalid pragma", self.clex.filename)
|
|
|
|
# BNF: pppragma_directive_list : pppragma_directive+
|
|
def _parse_pppragma_directive_list(self) -> List[c_ast.Node]:
|
|
pragmas = []
|
|
while self._peek_type() in {"PPPRAGMA", "_PRAGMA"}:
|
|
pragmas.append(self._parse_pppragma_directive())
|
|
return pragmas
|
|
|
|
# BNF: static_assert : _STATIC_ASSERT '(' constant_expression (',' string_literal)? ')'
|
|
def _parse_static_assert(self) -> List[c_ast.Node]:
|
|
tok = self._expect("_STATIC_ASSERT")
|
|
self._expect("LPAREN")
|
|
cond = self._parse_constant_expression()
|
|
msg = None
|
|
if self._accept("COMMA"):
|
|
msg = self._parse_unified_string_literal()
|
|
self._expect("RPAREN")
|
|
return [c_ast.StaticAssert(cond, msg, self._tok_coord(tok))]
|
|
|
|
|
|
_ASSIGNMENT_OPS = {
|
|
"EQUALS",
|
|
"XOREQUAL",
|
|
"TIMESEQUAL",
|
|
"DIVEQUAL",
|
|
"MODEQUAL",
|
|
"PLUSEQUAL",
|
|
"MINUSEQUAL",
|
|
"LSHIFTEQUAL",
|
|
"RSHIFTEQUAL",
|
|
"ANDEQUAL",
|
|
"OREQUAL",
|
|
}
|
|
|
|
# Precedence of operators (lower number = weather binding)
|
|
# If this changes, c_generator.CGenerator.precedence_map needs to change as
|
|
# well
|
|
_BINARY_PRECEDENCE = {
|
|
"LOR": 0,
|
|
"LAND": 1,
|
|
"OR": 2,
|
|
"XOR": 3,
|
|
"AND": 4,
|
|
"EQ": 5,
|
|
"NE": 5,
|
|
"GT": 6,
|
|
"GE": 6,
|
|
"LT": 6,
|
|
"LE": 6,
|
|
"RSHIFT": 7,
|
|
"LSHIFT": 7,
|
|
"PLUS": 8,
|
|
"MINUS": 8,
|
|
"TIMES": 9,
|
|
"DIVIDE": 9,
|
|
"MOD": 9,
|
|
}
|
|
|
|
_STORAGE_CLASS = {"AUTO", "REGISTER", "STATIC", "EXTERN", "TYPEDEF", "_THREAD_LOCAL"}
|
|
|
|
_FUNCTION_SPEC = {"INLINE", "_NORETURN"}
|
|
|
|
_TYPE_QUALIFIER = {"CONST", "RESTRICT", "VOLATILE", "_ATOMIC"}
|
|
|
|
_TYPE_SPEC_SIMPLE = {
|
|
"VOID",
|
|
"_BOOL",
|
|
"CHAR",
|
|
"SHORT",
|
|
"INT",
|
|
"LONG",
|
|
"FLOAT",
|
|
"DOUBLE",
|
|
"_COMPLEX",
|
|
"SIGNED",
|
|
"UNSIGNED",
|
|
"__INT128",
|
|
}
|
|
|
|
_DECL_START = (
|
|
_STORAGE_CLASS
|
|
| _FUNCTION_SPEC
|
|
| _TYPE_QUALIFIER
|
|
| _TYPE_SPEC_SIMPLE
|
|
| {"TYPEID", "STRUCT", "UNION", "ENUM", "_ALIGNAS", "_ATOMIC"}
|
|
)
|
|
|
|
_EXPR_START = {
|
|
"ID",
|
|
"LPAREN",
|
|
"PLUSPLUS",
|
|
"MINUSMINUS",
|
|
"PLUS",
|
|
"MINUS",
|
|
"TIMES",
|
|
"AND",
|
|
"NOT",
|
|
"LNOT",
|
|
"SIZEOF",
|
|
"_ALIGNOF",
|
|
"OFFSETOF",
|
|
}
|
|
|
|
_INT_CONST = {
|
|
"INT_CONST_DEC",
|
|
"INT_CONST_OCT",
|
|
"INT_CONST_HEX",
|
|
"INT_CONST_BIN",
|
|
"INT_CONST_CHAR",
|
|
}
|
|
|
|
_FLOAT_CONST = {"FLOAT_CONST", "HEX_FLOAT_CONST"}
|
|
|
|
_CHAR_CONST = {
|
|
"CHAR_CONST",
|
|
"WCHAR_CONST",
|
|
"U8CHAR_CONST",
|
|
"U16CHAR_CONST",
|
|
"U32CHAR_CONST",
|
|
}
|
|
|
|
_STRING_LITERAL = {"STRING_LITERAL"}
|
|
|
|
_WSTR_LITERAL = {
|
|
"WSTRING_LITERAL",
|
|
"U8STRING_LITERAL",
|
|
"U16STRING_LITERAL",
|
|
"U32STRING_LITERAL",
|
|
}
|
|
|
|
_STARTS_EXPRESSION = (
|
|
_EXPR_START
|
|
| _INT_CONST
|
|
| _FLOAT_CONST
|
|
| _CHAR_CONST
|
|
| _STRING_LITERAL
|
|
| _WSTR_LITERAL
|
|
)
|
|
|
|
_STARTS_STATEMENT = {
|
|
"LBRACE",
|
|
"IF",
|
|
"SWITCH",
|
|
"WHILE",
|
|
"DO",
|
|
"FOR",
|
|
"GOTO",
|
|
"BREAK",
|
|
"CONTINUE",
|
|
"RETURN",
|
|
"CASE",
|
|
"DEFAULT",
|
|
"PPPRAGMA",
|
|
"_PRAGMA",
|
|
"_STATIC_ASSERT",
|
|
"SEMI",
|
|
}
|
|
|
|
|
|
class _TokenStream:
|
|
"""Wraps a lexer to provide convenient, buffered access to the underlying
|
|
token stream. The lexer is expected to be initialized with the input
|
|
string already.
|
|
"""
|
|
|
|
def __init__(self, lexer: CLexer) -> None:
|
|
self._lexer = lexer
|
|
self._buffer: List[Optional[_Token]] = []
|
|
self._index = 0
|
|
|
|
def peek(self, k: int = 1) -> Optional[_Token]:
|
|
"""Peek at the k-th next token in the stream, without consuming it.
|
|
|
|
Examples:
|
|
k=1 returns the immediate next token.
|
|
k=2 returns the token after that.
|
|
"""
|
|
if k <= 0:
|
|
return None
|
|
self._fill(k)
|
|
return self._buffer[self._index + k - 1]
|
|
|
|
def next(self) -> Optional[_Token]:
|
|
"""Consume a single token and return it."""
|
|
self._fill(1)
|
|
tok = self._buffer[self._index]
|
|
self._index += 1
|
|
return tok
|
|
|
|
# The 'mark' and 'reset' methods are useful for speculative parsing with
|
|
# backtracking; when the parser needs to examine a sequence of tokens
|
|
# and potentially decide to try a different path on the same sequence, it
|
|
# can call 'mark' to obtain the current token position, and if the first
|
|
# path fails restore the position with `reset(pos)`.
|
|
def mark(self) -> int:
|
|
return self._index
|
|
|
|
def reset(self, mark: int) -> None:
|
|
self._index = mark
|
|
|
|
def _fill(self, n: int) -> None:
|
|
while len(self._buffer) < self._index + n:
|
|
tok = self._lexer.token()
|
|
self._buffer.append(tok)
|
|
if tok is None:
|
|
break
|
|
|
|
|
|
# Declaration specifiers are represented by a dictionary with entries:
|
|
# - qual: a list of type qualifiers
|
|
# - storage: a list of storage class specifiers
|
|
# - type: a list of type specifiers
|
|
# - function: a list of function specifiers
|
|
# - alignment: a list of alignment specifiers
|
|
class _DeclSpec(TypedDict):
|
|
qual: List[Any]
|
|
storage: List[Any]
|
|
type: List[Any]
|
|
function: List[Any]
|
|
alignment: List[Any]
|
|
|
|
|
|
_DeclSpecKind = Literal["qual", "storage", "type", "function", "alignment"]
|
|
|
|
|
|
class _DeclInfo(TypedDict):
|
|
# Declarator payloads used by declaration/initializer parsing:
|
|
# - decl: the declarator node (may be None for abstract/implicit cases)
|
|
# - init: optional initializer expression
|
|
# - bitsize: optional bit-field width expression (for struct declarators)
|
|
decl: Optional[c_ast.Node]
|
|
init: Optional[c_ast.Node]
|
|
bitsize: Optional[c_ast.Node]
|