Update dashboard, memory, root +2 more (+3 ~5)
This commit is contained in:
99
venv/lib/python3.12/site-packages/pycparser/__init__.py
Normal file
99
venv/lib/python3.12/site-packages/pycparser/__init__.py
Normal file
@@ -0,0 +1,99 @@
|
||||
# -----------------------------------------------------------------
|
||||
# pycparser: __init__.py
|
||||
#
|
||||
# This package file exports some convenience functions for
|
||||
# interacting with pycparser
|
||||
#
|
||||
# Eli Bendersky [https://eli.thegreenplace.net/]
|
||||
# License: BSD
|
||||
# -----------------------------------------------------------------
|
||||
__all__ = ["c_lexer", "c_parser", "c_ast"]
|
||||
__version__ = "3.00"
|
||||
|
||||
import io
|
||||
from subprocess import check_output
|
||||
|
||||
from . import c_parser
|
||||
|
||||
CParser = c_parser.CParser
|
||||
|
||||
|
||||
def preprocess_file(filename, cpp_path="cpp", cpp_args=""):
|
||||
"""Preprocess a file using cpp.
|
||||
|
||||
filename:
|
||||
Name of the file you want to preprocess.
|
||||
|
||||
cpp_path:
|
||||
cpp_args:
|
||||
Refer to the documentation of parse_file for the meaning of these
|
||||
arguments.
|
||||
|
||||
When successful, returns the preprocessed file's contents.
|
||||
Errors from cpp will be printed out.
|
||||
"""
|
||||
path_list = [cpp_path]
|
||||
if isinstance(cpp_args, list):
|
||||
path_list += cpp_args
|
||||
elif cpp_args != "":
|
||||
path_list += [cpp_args]
|
||||
path_list += [filename]
|
||||
|
||||
try:
|
||||
# Note the use of universal_newlines to treat all newlines
|
||||
# as \n for Python's purpose
|
||||
text = check_output(path_list, universal_newlines=True)
|
||||
except OSError as e:
|
||||
raise RuntimeError(
|
||||
"Unable to invoke 'cpp'. "
|
||||
+ "Make sure its path was passed correctly\n"
|
||||
+ f"Original error: {e}"
|
||||
)
|
||||
|
||||
return text
|
||||
|
||||
|
||||
def parse_file(
|
||||
filename, use_cpp=False, cpp_path="cpp", cpp_args="", parser=None, encoding=None
|
||||
):
|
||||
"""Parse a C file using pycparser.
|
||||
|
||||
filename:
|
||||
Name of the file you want to parse.
|
||||
|
||||
use_cpp:
|
||||
Set to True if you want to execute the C pre-processor
|
||||
on the file prior to parsing it.
|
||||
|
||||
cpp_path:
|
||||
If use_cpp is True, this is the path to 'cpp' on your
|
||||
system. If no path is provided, it attempts to just
|
||||
execute 'cpp', so it must be in your PATH.
|
||||
|
||||
cpp_args:
|
||||
If use_cpp is True, set this to the command line arguments strings
|
||||
to cpp. Be careful with quotes - it's best to pass a raw string
|
||||
(r'') here. For example:
|
||||
r'-I../utils/fake_libc_include'
|
||||
If several arguments are required, pass a list of strings.
|
||||
|
||||
encoding:
|
||||
Encoding to use for the file to parse
|
||||
|
||||
parser:
|
||||
Optional parser object to be used instead of the default CParser
|
||||
|
||||
When successful, an AST is returned. ParseError can be
|
||||
thrown if the file doesn't parse successfully.
|
||||
|
||||
Errors from cpp will be printed out.
|
||||
"""
|
||||
if use_cpp:
|
||||
text = preprocess_file(filename, cpp_path, cpp_args)
|
||||
else:
|
||||
with io.open(filename, encoding=encoding) as f:
|
||||
text = f.read()
|
||||
|
||||
if parser is None:
|
||||
parser = CParser()
|
||||
return parser.parse(text, filename)
|
||||
355
venv/lib/python3.12/site-packages/pycparser/_ast_gen.py
Normal file
355
venv/lib/python3.12/site-packages/pycparser/_ast_gen.py
Normal file
@@ -0,0 +1,355 @@
|
||||
# -----------------------------------------------------------------
|
||||
# _ast_gen.py
|
||||
#
|
||||
# Generates the AST Node classes from a specification given in
|
||||
# a configuration file. This module can also be run as a script to
|
||||
# regenerate c_ast.py from _c_ast.cfg (from the repo root or the
|
||||
# pycparser/ directory). Use 'make check' to reformat the generated
|
||||
# file after running this script.
|
||||
#
|
||||
# The design of this module was inspired by astgen.py from the
|
||||
# Python 2.5 code-base.
|
||||
#
|
||||
# Eli Bendersky [https://eli.thegreenplace.net/]
|
||||
# License: BSD
|
||||
# -----------------------------------------------------------------
|
||||
from string import Template
|
||||
import os
|
||||
from typing import IO
|
||||
|
||||
|
||||
class ASTCodeGenerator:
|
||||
def __init__(self, cfg_filename="_c_ast.cfg"):
|
||||
"""Initialize the code generator from a configuration
|
||||
file.
|
||||
"""
|
||||
self.cfg_filename = cfg_filename
|
||||
self.node_cfg = [
|
||||
NodeCfg(name, contents)
|
||||
for (name, contents) in self.parse_cfgfile(cfg_filename)
|
||||
]
|
||||
|
||||
def generate(self, file: IO[str]) -> None:
|
||||
"""Generates the code into file, an open file buffer."""
|
||||
src = Template(_PROLOGUE_COMMENT).substitute(cfg_filename=self.cfg_filename)
|
||||
|
||||
src += _PROLOGUE_CODE
|
||||
for node_cfg in self.node_cfg:
|
||||
src += node_cfg.generate_source() + "\n\n"
|
||||
|
||||
file.write(src)
|
||||
|
||||
def parse_cfgfile(self, filename):
|
||||
"""Parse the configuration file and yield pairs of
|
||||
(name, contents) for each node.
|
||||
"""
|
||||
with open(filename, "r") as f:
|
||||
for line in f:
|
||||
line = line.strip()
|
||||
if not line or line.startswith("#"):
|
||||
continue
|
||||
colon_i = line.find(":")
|
||||
lbracket_i = line.find("[")
|
||||
rbracket_i = line.find("]")
|
||||
if colon_i < 1 or lbracket_i <= colon_i or rbracket_i <= lbracket_i:
|
||||
raise RuntimeError(f"Invalid line in {filename}:\n{line}\n")
|
||||
|
||||
name = line[:colon_i]
|
||||
val = line[lbracket_i + 1 : rbracket_i]
|
||||
vallist = [v.strip() for v in val.split(",")] if val else []
|
||||
yield name, vallist
|
||||
|
||||
|
||||
class NodeCfg:
|
||||
"""Node configuration.
|
||||
|
||||
name: node name
|
||||
contents: a list of contents - attributes and child nodes
|
||||
See comment at the top of the configuration file for details.
|
||||
"""
|
||||
|
||||
def __init__(self, name, contents):
|
||||
self.name = name
|
||||
self.all_entries = []
|
||||
self.attr = []
|
||||
self.child = []
|
||||
self.seq_child = []
|
||||
|
||||
for entry in contents:
|
||||
clean_entry = entry.rstrip("*")
|
||||
self.all_entries.append(clean_entry)
|
||||
|
||||
if entry.endswith("**"):
|
||||
self.seq_child.append(clean_entry)
|
||||
elif entry.endswith("*"):
|
||||
self.child.append(clean_entry)
|
||||
else:
|
||||
self.attr.append(entry)
|
||||
|
||||
def generate_source(self):
|
||||
src = self._gen_init()
|
||||
src += "\n" + self._gen_children()
|
||||
src += "\n" + self._gen_iter()
|
||||
src += "\n" + self._gen_attr_names()
|
||||
return src
|
||||
|
||||
def _gen_init(self):
|
||||
src = f"class {self.name}(Node):\n"
|
||||
|
||||
if self.all_entries:
|
||||
args = ", ".join(self.all_entries)
|
||||
slots = ", ".join(f"'{e}'" for e in self.all_entries)
|
||||
slots += ", 'coord', '__weakref__'"
|
||||
arglist = f"(self, {args}, coord=None)"
|
||||
else:
|
||||
slots = "'coord', '__weakref__'"
|
||||
arglist = "(self, coord=None)"
|
||||
|
||||
src += f" __slots__ = ({slots})\n"
|
||||
src += f" def __init__{arglist}:\n"
|
||||
|
||||
for name in self.all_entries + ["coord"]:
|
||||
src += f" self.{name} = {name}\n"
|
||||
|
||||
return src
|
||||
|
||||
def _gen_children(self):
|
||||
src = " def children(self):\n"
|
||||
|
||||
if self.all_entries:
|
||||
src += " nodelist = []\n"
|
||||
|
||||
for child in self.child:
|
||||
src += f" if self.{child} is not None:\n"
|
||||
src += f' nodelist.append(("{child}", self.{child}))\n'
|
||||
|
||||
for seq_child in self.seq_child:
|
||||
src += f" for i, child in enumerate(self.{seq_child} or []):\n"
|
||||
src += f' nodelist.append((f"{seq_child}[{{i}}]", child))\n'
|
||||
|
||||
src += " return tuple(nodelist)\n"
|
||||
else:
|
||||
src += " return ()\n"
|
||||
|
||||
return src
|
||||
|
||||
def _gen_iter(self):
|
||||
src = " def __iter__(self):\n"
|
||||
|
||||
if self.all_entries:
|
||||
for child in self.child:
|
||||
src += f" if self.{child} is not None:\n"
|
||||
src += f" yield self.{child}\n"
|
||||
|
||||
for seq_child in self.seq_child:
|
||||
src += f" for child in (self.{seq_child} or []):\n"
|
||||
src += " yield child\n"
|
||||
|
||||
if not (self.child or self.seq_child):
|
||||
# Empty generator
|
||||
src += " return\n" + " yield\n"
|
||||
else:
|
||||
# Empty generator
|
||||
src += " return\n" + " yield\n"
|
||||
|
||||
return src
|
||||
|
||||
def _gen_attr_names(self):
|
||||
src = " attr_names = (" + "".join(f"{nm!r}, " for nm in self.attr) + ")"
|
||||
return src
|
||||
|
||||
|
||||
_PROLOGUE_COMMENT = r"""#-----------------------------------------------------------------
|
||||
# ** ATTENTION **
|
||||
# This code was automatically generated from _c_ast.cfg
|
||||
#
|
||||
# Do not modify it directly. Modify the configuration file and
|
||||
# run the generator again.
|
||||
# ** ** *** ** **
|
||||
#
|
||||
# pycparser: c_ast.py
|
||||
#
|
||||
# AST Node classes.
|
||||
#
|
||||
# Eli Bendersky [https://eli.thegreenplace.net/]
|
||||
# License: BSD
|
||||
#-----------------------------------------------------------------
|
||||
|
||||
"""
|
||||
_PROLOGUE_CODE = r'''
|
||||
import sys
|
||||
from typing import Any, ClassVar, IO, Optional
|
||||
|
||||
def _repr(obj):
|
||||
"""
|
||||
Get the representation of an object, with dedicated pprint-like format for lists.
|
||||
"""
|
||||
if isinstance(obj, list):
|
||||
return '[' + (',\n '.join((_repr(e).replace('\n', '\n ') for e in obj))) + '\n]'
|
||||
else:
|
||||
return repr(obj)
|
||||
|
||||
class Node:
|
||||
__slots__ = ()
|
||||
""" Abstract base class for AST nodes.
|
||||
"""
|
||||
attr_names: ClassVar[tuple[str, ...]] = ()
|
||||
coord: Optional[Any]
|
||||
def __repr__(self):
|
||||
""" Generates a python representation of the current node
|
||||
"""
|
||||
result = self.__class__.__name__ + '('
|
||||
|
||||
indent = ''
|
||||
separator = ''
|
||||
for name in self.__slots__[:-2]:
|
||||
result += separator
|
||||
result += indent
|
||||
result += name + '=' + (_repr(getattr(self, name)).replace('\n', '\n ' + (' ' * (len(name) + len(self.__class__.__name__)))))
|
||||
|
||||
separator = ','
|
||||
indent = '\n ' + (' ' * len(self.__class__.__name__))
|
||||
|
||||
result += indent + ')'
|
||||
|
||||
return result
|
||||
|
||||
def children(self):
|
||||
""" A sequence of all children that are Nodes
|
||||
"""
|
||||
pass
|
||||
|
||||
def show(
|
||||
self,
|
||||
buf: IO[str] = sys.stdout,
|
||||
offset: int = 0,
|
||||
attrnames: bool = False,
|
||||
showemptyattrs: bool = True,
|
||||
nodenames: bool = False,
|
||||
showcoord: bool = False,
|
||||
_my_node_name: Optional[str] = None,
|
||||
):
|
||||
""" Pretty print the Node and all its attributes and
|
||||
children (recursively) to a buffer.
|
||||
|
||||
buf:
|
||||
Open IO buffer into which the Node is printed.
|
||||
|
||||
offset:
|
||||
Initial offset (amount of leading spaces)
|
||||
|
||||
attrnames:
|
||||
True if you want to see the attribute names in
|
||||
name=value pairs. False to only see the values.
|
||||
|
||||
showemptyattrs:
|
||||
False if you want to suppress printing empty attributes.
|
||||
|
||||
nodenames:
|
||||
True if you want to see the actual node names
|
||||
within their parents.
|
||||
|
||||
showcoord:
|
||||
Do you want the coordinates of each Node to be
|
||||
displayed.
|
||||
"""
|
||||
lead = ' ' * offset
|
||||
if nodenames and _my_node_name is not None:
|
||||
buf.write(lead + self.__class__.__name__+ ' <' + _my_node_name + '>: ')
|
||||
else:
|
||||
buf.write(lead + self.__class__.__name__+ ': ')
|
||||
|
||||
if self.attr_names:
|
||||
def is_empty(v):
|
||||
v is None or (hasattr(v, '__len__') and len(v) == 0)
|
||||
nvlist = [(n, getattr(self,n)) for n in self.attr_names \
|
||||
if showemptyattrs or not is_empty(getattr(self,n))]
|
||||
if attrnames:
|
||||
attrstr = ', '.join(f'{name}={value}' for name, value in nvlist)
|
||||
else:
|
||||
attrstr = ', '.join(f'{value}' for _, value in nvlist)
|
||||
buf.write(attrstr)
|
||||
|
||||
if showcoord:
|
||||
buf.write(f' (at {self.coord})')
|
||||
buf.write('\n')
|
||||
|
||||
for (child_name, child) in self.children():
|
||||
child.show(
|
||||
buf,
|
||||
offset=offset + 2,
|
||||
attrnames=attrnames,
|
||||
showemptyattrs=showemptyattrs,
|
||||
nodenames=nodenames,
|
||||
showcoord=showcoord,
|
||||
_my_node_name=child_name)
|
||||
|
||||
|
||||
class NodeVisitor:
|
||||
""" A base NodeVisitor class for visiting c_ast nodes.
|
||||
Subclass it and define your own visit_XXX methods, where
|
||||
XXX is the class name you want to visit with these
|
||||
methods.
|
||||
|
||||
For example:
|
||||
|
||||
class ConstantVisitor(NodeVisitor):
|
||||
def __init__(self):
|
||||
self.values = []
|
||||
|
||||
def visit_Constant(self, node):
|
||||
self.values.append(node.value)
|
||||
|
||||
Creates a list of values of all the constant nodes
|
||||
encountered below the given node. To use it:
|
||||
|
||||
cv = ConstantVisitor()
|
||||
cv.visit(node)
|
||||
|
||||
Notes:
|
||||
|
||||
* generic_visit() will be called for AST nodes for which
|
||||
no visit_XXX method was defined.
|
||||
* The children of nodes for which a visit_XXX was
|
||||
defined will not be visited - if you need this, call
|
||||
generic_visit() on the node.
|
||||
You can use:
|
||||
NodeVisitor.generic_visit(self, node)
|
||||
* Modeled after Python's own AST visiting facilities
|
||||
(the ast module of Python 3.0)
|
||||
"""
|
||||
|
||||
_method_cache = None
|
||||
|
||||
def visit(self, node: Node):
|
||||
""" Visit a node.
|
||||
"""
|
||||
|
||||
if self._method_cache is None:
|
||||
self._method_cache = {}
|
||||
|
||||
visitor = self._method_cache.get(node.__class__.__name__, None)
|
||||
if visitor is None:
|
||||
method = 'visit_' + node.__class__.__name__
|
||||
visitor = getattr(self, method, self.generic_visit)
|
||||
self._method_cache[node.__class__.__name__] = visitor
|
||||
|
||||
return visitor(node)
|
||||
|
||||
def generic_visit(self, node: Node):
|
||||
""" Called if no explicit visitor function exists for a
|
||||
node. Implements preorder visiting of the node.
|
||||
"""
|
||||
for _, c in node.children():
|
||||
self.visit(c)
|
||||
|
||||
'''
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
base_dir = os.path.dirname(os.path.abspath(__file__))
|
||||
cfg_path = os.path.join(base_dir, "_c_ast.cfg")
|
||||
out_path = os.path.join(base_dir, "c_ast.py")
|
||||
ast_gen = ASTCodeGenerator(cfg_path)
|
||||
with open(out_path, "w") as out:
|
||||
ast_gen.generate(out)
|
||||
195
venv/lib/python3.12/site-packages/pycparser/_c_ast.cfg
Normal file
195
venv/lib/python3.12/site-packages/pycparser/_c_ast.cfg
Normal file
@@ -0,0 +1,195 @@
|
||||
#-----------------------------------------------------------------
|
||||
# pycparser: _c_ast.cfg
|
||||
#
|
||||
# Defines the AST Node classes used in pycparser.
|
||||
#
|
||||
# Each entry is a Node sub-class name, listing the attributes
|
||||
# and child nodes of the class:
|
||||
# <name>* - a child node
|
||||
# <name>** - a sequence of child nodes
|
||||
# <name> - an attribute
|
||||
#
|
||||
# Eli Bendersky [https://eli.thegreenplace.net/]
|
||||
# License: BSD
|
||||
#-----------------------------------------------------------------
|
||||
|
||||
# ArrayDecl is a nested declaration of an array with the given type.
|
||||
# dim: the dimension (for example, constant 42)
|
||||
# dim_quals: list of dimension qualifiers, to support C99's allowing 'const'
|
||||
# and 'static' within the array dimension in function declarations.
|
||||
ArrayDecl: [type*, dim*, dim_quals]
|
||||
|
||||
ArrayRef: [name*, subscript*]
|
||||
|
||||
# op: =, +=, /= etc.
|
||||
#
|
||||
Assignment: [op, lvalue*, rvalue*]
|
||||
|
||||
Alignas: [alignment*]
|
||||
|
||||
BinaryOp: [op, left*, right*]
|
||||
|
||||
Break: []
|
||||
|
||||
Case: [expr*, stmts**]
|
||||
|
||||
Cast: [to_type*, expr*]
|
||||
|
||||
# Compound statement in C99 is a list of block items (declarations or
|
||||
# statements).
|
||||
#
|
||||
Compound: [block_items**]
|
||||
|
||||
# Compound literal (anonymous aggregate) for C99.
|
||||
# (type-name) {initializer_list}
|
||||
# type: the typename
|
||||
# init: InitList for the initializer list
|
||||
#
|
||||
CompoundLiteral: [type*, init*]
|
||||
|
||||
# type: int, char, float, string, etc.
|
||||
#
|
||||
Constant: [type, value]
|
||||
|
||||
Continue: []
|
||||
|
||||
# name: the variable being declared
|
||||
# quals: list of qualifiers (const, volatile)
|
||||
# funcspec: list function specifiers (i.e. inline in C99)
|
||||
# storage: list of storage specifiers (extern, register, etc.)
|
||||
# type: declaration type (probably nested with all the modifiers)
|
||||
# init: initialization value, or None
|
||||
# bitsize: bit field size, or None
|
||||
#
|
||||
Decl: [name, quals, align, storage, funcspec, type*, init*, bitsize*]
|
||||
|
||||
DeclList: [decls**]
|
||||
|
||||
Default: [stmts**]
|
||||
|
||||
DoWhile: [cond*, stmt*]
|
||||
|
||||
# Represents the ellipsis (...) parameter in a function
|
||||
# declaration
|
||||
#
|
||||
EllipsisParam: []
|
||||
|
||||
# An empty statement (a semicolon ';' on its own)
|
||||
#
|
||||
EmptyStatement: []
|
||||
|
||||
# Enumeration type specifier
|
||||
# name: an optional ID
|
||||
# values: an EnumeratorList
|
||||
#
|
||||
Enum: [name, values*]
|
||||
|
||||
# A name/value pair for enumeration values
|
||||
#
|
||||
Enumerator: [name, value*]
|
||||
|
||||
# A list of enumerators
|
||||
#
|
||||
EnumeratorList: [enumerators**]
|
||||
|
||||
# A list of expressions separated by the comma operator.
|
||||
#
|
||||
ExprList: [exprs**]
|
||||
|
||||
# This is the top of the AST, representing a single C file (a
|
||||
# translation unit in K&R jargon). It contains a list of
|
||||
# "external-declaration"s, which is either declarations (Decl),
|
||||
# Typedef or function definitions (FuncDef).
|
||||
#
|
||||
FileAST: [ext**]
|
||||
|
||||
# for (init; cond; next) stmt
|
||||
#
|
||||
For: [init*, cond*, next*, stmt*]
|
||||
|
||||
# name: Id
|
||||
# args: ExprList
|
||||
#
|
||||
FuncCall: [name*, args*]
|
||||
|
||||
# type <decl>(args)
|
||||
#
|
||||
FuncDecl: [args*, type*]
|
||||
|
||||
# Function definition: a declarator for the function name and
|
||||
# a body, which is a compound statement.
|
||||
# There's an optional list of parameter declarations for old
|
||||
# K&R-style definitions
|
||||
#
|
||||
FuncDef: [decl*, param_decls**, body*]
|
||||
|
||||
Goto: [name]
|
||||
|
||||
ID: [name]
|
||||
|
||||
# Holder for types that are a simple identifier (e.g. the built
|
||||
# ins void, char etc. and typedef-defined types)
|
||||
#
|
||||
IdentifierType: [names]
|
||||
|
||||
If: [cond*, iftrue*, iffalse*]
|
||||
|
||||
# An initialization list used for compound literals.
|
||||
#
|
||||
InitList: [exprs**]
|
||||
|
||||
Label: [name, stmt*]
|
||||
|
||||
# A named initializer for C99.
|
||||
# The name of a NamedInitializer is a sequence of Nodes, because
|
||||
# names can be hierarchical and contain constant expressions.
|
||||
#
|
||||
NamedInitializer: [name**, expr*]
|
||||
|
||||
# a list of comma separated function parameter declarations
|
||||
#
|
||||
ParamList: [params**]
|
||||
|
||||
PtrDecl: [quals, type*]
|
||||
|
||||
Return: [expr*]
|
||||
|
||||
StaticAssert: [cond*, message*]
|
||||
|
||||
# name: struct tag name
|
||||
# decls: declaration of members
|
||||
#
|
||||
Struct: [name, decls**]
|
||||
|
||||
# type: . or ->
|
||||
# name.field or name->field
|
||||
#
|
||||
StructRef: [name*, type, field*]
|
||||
|
||||
Switch: [cond*, stmt*]
|
||||
|
||||
# cond ? iftrue : iffalse
|
||||
#
|
||||
TernaryOp: [cond*, iftrue*, iffalse*]
|
||||
|
||||
# A base type declaration
|
||||
#
|
||||
TypeDecl: [declname, quals, align, type*]
|
||||
|
||||
# A typedef declaration.
|
||||
# Very similar to Decl, but without some attributes
|
||||
#
|
||||
Typedef: [name, quals, storage, type*]
|
||||
|
||||
Typename: [name, quals, align, type*]
|
||||
|
||||
UnaryOp: [op, expr*]
|
||||
|
||||
# name: union tag name
|
||||
# decls: declaration of members
|
||||
#
|
||||
Union: [name, decls**]
|
||||
|
||||
While: [cond*, stmt*]
|
||||
|
||||
Pragma: [string]
|
||||
174
venv/lib/python3.12/site-packages/pycparser/ast_transforms.py
Normal file
174
venv/lib/python3.12/site-packages/pycparser/ast_transforms.py
Normal file
@@ -0,0 +1,174 @@
|
||||
# ------------------------------------------------------------------------------
|
||||
# pycparser: ast_transforms.py
|
||||
#
|
||||
# Some utilities used by the parser to create a friendlier AST.
|
||||
#
|
||||
# Eli Bendersky [https://eli.thegreenplace.net/]
|
||||
# License: BSD
|
||||
# ------------------------------------------------------------------------------
|
||||
|
||||
from typing import Any, List, Tuple, cast
|
||||
|
||||
from . import c_ast
|
||||
|
||||
|
||||
def fix_switch_cases(switch_node: c_ast.Switch) -> c_ast.Switch:
|
||||
"""The 'case' statements in a 'switch' come out of parsing with one
|
||||
child node, so subsequent statements are just tucked to the parent
|
||||
Compound. Additionally, consecutive (fall-through) case statements
|
||||
come out messy. This is a peculiarity of the C grammar. The following:
|
||||
|
||||
switch (myvar) {
|
||||
case 10:
|
||||
k = 10;
|
||||
p = k + 1;
|
||||
return 10;
|
||||
case 20:
|
||||
case 30:
|
||||
return 20;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
Creates this tree (pseudo-dump):
|
||||
|
||||
Switch
|
||||
ID: myvar
|
||||
Compound:
|
||||
Case 10:
|
||||
k = 10
|
||||
p = k + 1
|
||||
return 10
|
||||
Case 20:
|
||||
Case 30:
|
||||
return 20
|
||||
Default:
|
||||
break
|
||||
|
||||
The goal of this transform is to fix this mess, turning it into the
|
||||
following:
|
||||
|
||||
Switch
|
||||
ID: myvar
|
||||
Compound:
|
||||
Case 10:
|
||||
k = 10
|
||||
p = k + 1
|
||||
return 10
|
||||
Case 20:
|
||||
Case 30:
|
||||
return 20
|
||||
Default:
|
||||
break
|
||||
|
||||
A fixed AST node is returned. The argument may be modified.
|
||||
"""
|
||||
assert isinstance(switch_node, c_ast.Switch)
|
||||
if not isinstance(switch_node.stmt, c_ast.Compound):
|
||||
return switch_node
|
||||
|
||||
# The new Compound child for the Switch, which will collect children in the
|
||||
# correct order
|
||||
new_compound = c_ast.Compound([], switch_node.stmt.coord)
|
||||
|
||||
# The last Case/Default node
|
||||
last_case: c_ast.Case | c_ast.Default | None = None
|
||||
|
||||
# Goes over the children of the Compound below the Switch, adding them
|
||||
# either directly below new_compound or below the last Case as appropriate
|
||||
# (for `switch(cond) {}`, block_items would have been None)
|
||||
for child in switch_node.stmt.block_items or []:
|
||||
if isinstance(child, (c_ast.Case, c_ast.Default)):
|
||||
# If it's a Case/Default:
|
||||
# 1. Add it to the Compound and mark as "last case"
|
||||
# 2. If its immediate child is also a Case or Default, promote it
|
||||
# to a sibling.
|
||||
new_compound.block_items.append(child)
|
||||
_extract_nested_case(child, new_compound.block_items)
|
||||
last_case = new_compound.block_items[-1]
|
||||
else:
|
||||
# Other statements are added as children to the last case, if it
|
||||
# exists.
|
||||
if last_case is None:
|
||||
new_compound.block_items.append(child)
|
||||
else:
|
||||
last_case.stmts.append(child)
|
||||
|
||||
switch_node.stmt = new_compound
|
||||
return switch_node
|
||||
|
||||
|
||||
def _extract_nested_case(
|
||||
case_node: c_ast.Case | c_ast.Default, stmts_list: List[c_ast.Node]
|
||||
) -> None:
|
||||
"""Recursively extract consecutive Case statements that are made nested
|
||||
by the parser and add them to the stmts_list.
|
||||
"""
|
||||
if isinstance(case_node.stmts[0], (c_ast.Case, c_ast.Default)):
|
||||
nested = case_node.stmts.pop()
|
||||
stmts_list.append(nested)
|
||||
_extract_nested_case(cast(Any, nested), stmts_list)
|
||||
|
||||
|
||||
def fix_atomic_specifiers(
|
||||
decl: c_ast.Decl | c_ast.Typedef,
|
||||
) -> c_ast.Decl | c_ast.Typedef:
|
||||
"""Atomic specifiers like _Atomic(type) are unusually structured,
|
||||
conferring a qualifier upon the contained type.
|
||||
|
||||
This function fixes a decl with atomic specifiers to have a sane AST
|
||||
structure, by removing spurious Typename->TypeDecl pairs and attaching
|
||||
the _Atomic qualifier in the right place.
|
||||
"""
|
||||
# There can be multiple levels of _Atomic in a decl; fix them until a
|
||||
# fixed point is reached.
|
||||
while True:
|
||||
decl, found = _fix_atomic_specifiers_once(decl)
|
||||
if not found:
|
||||
break
|
||||
|
||||
# Make sure to add an _Atomic qual on the topmost decl if needed. Also
|
||||
# restore the declname on the innermost TypeDecl (it gets placed in the
|
||||
# wrong place during construction).
|
||||
typ: Any = decl
|
||||
while not isinstance(typ, c_ast.TypeDecl):
|
||||
try:
|
||||
typ = typ.type
|
||||
except AttributeError:
|
||||
return decl
|
||||
if "_Atomic" in typ.quals and "_Atomic" not in decl.quals:
|
||||
decl.quals.append("_Atomic")
|
||||
if typ.declname is None:
|
||||
typ.declname = decl.name
|
||||
|
||||
return decl
|
||||
|
||||
|
||||
def _fix_atomic_specifiers_once(
|
||||
decl: c_ast.Decl | c_ast.Typedef,
|
||||
) -> Tuple[c_ast.Decl | c_ast.Typedef, bool]:
|
||||
"""Performs one 'fix' round of atomic specifiers.
|
||||
Returns (modified_decl, found) where found is True iff a fix was made.
|
||||
"""
|
||||
parent: Any = decl
|
||||
grandparent: Any = None
|
||||
node: Any = decl.type
|
||||
while node is not None:
|
||||
if isinstance(node, c_ast.Typename) and "_Atomic" in node.quals:
|
||||
break
|
||||
try:
|
||||
grandparent = parent
|
||||
parent = node
|
||||
node = node.type
|
||||
except AttributeError:
|
||||
# If we've reached a node without a `type` field, it means we won't
|
||||
# find what we're looking for at this point; give up the search
|
||||
# and return the original decl unmodified.
|
||||
return decl, False
|
||||
|
||||
assert isinstance(parent, c_ast.TypeDecl)
|
||||
assert grandparent is not None
|
||||
cast(Any, grandparent).type = node.type
|
||||
if "_Atomic" not in node.type.quals:
|
||||
node.type.quals.append("_Atomic")
|
||||
return decl, True
|
||||
1341
venv/lib/python3.12/site-packages/pycparser/c_ast.py
Normal file
1341
venv/lib/python3.12/site-packages/pycparser/c_ast.py
Normal file
File diff suppressed because it is too large
Load Diff
573
venv/lib/python3.12/site-packages/pycparser/c_generator.py
Normal file
573
venv/lib/python3.12/site-packages/pycparser/c_generator.py
Normal file
@@ -0,0 +1,573 @@
|
||||
# ------------------------------------------------------------------------------
|
||||
# pycparser: c_generator.py
|
||||
#
|
||||
# C code generator from pycparser AST nodes.
|
||||
#
|
||||
# Eli Bendersky [https://eli.thegreenplace.net/]
|
||||
# License: BSD
|
||||
# ------------------------------------------------------------------------------
|
||||
from typing import Callable, List, Optional
|
||||
|
||||
from . import c_ast
|
||||
|
||||
|
||||
class CGenerator:
|
||||
"""Uses the same visitor pattern as c_ast.NodeVisitor, but modified to
|
||||
return a value from each visit method, using string accumulation in
|
||||
generic_visit.
|
||||
"""
|
||||
|
||||
indent_level: int
|
||||
reduce_parentheses: bool
|
||||
|
||||
def __init__(self, reduce_parentheses: bool = False) -> None:
|
||||
"""Constructs C-code generator
|
||||
|
||||
reduce_parentheses:
|
||||
if True, eliminates needless parentheses on binary operators
|
||||
"""
|
||||
# Statements start with indentation of self.indent_level spaces, using
|
||||
# the _make_indent method.
|
||||
self.indent_level = 0
|
||||
self.reduce_parentheses = reduce_parentheses
|
||||
|
||||
def _make_indent(self) -> str:
|
||||
return " " * self.indent_level
|
||||
|
||||
def visit(self, node: c_ast.Node) -> str:
|
||||
method = "visit_" + node.__class__.__name__
|
||||
return getattr(self, method, self.generic_visit)(node)
|
||||
|
||||
def generic_visit(self, node: Optional[c_ast.Node]) -> str:
|
||||
if node is None:
|
||||
return ""
|
||||
else:
|
||||
return "".join(self.visit(c) for c_name, c in node.children())
|
||||
|
||||
def visit_Constant(self, n: c_ast.Constant) -> str:
|
||||
return n.value
|
||||
|
||||
def visit_ID(self, n: c_ast.ID) -> str:
|
||||
return n.name
|
||||
|
||||
def visit_Pragma(self, n: c_ast.Pragma) -> str:
|
||||
ret = "#pragma"
|
||||
if n.string:
|
||||
ret += " " + n.string
|
||||
return ret
|
||||
|
||||
def visit_ArrayRef(self, n: c_ast.ArrayRef) -> str:
|
||||
arrref = self._parenthesize_unless_simple(n.name)
|
||||
return arrref + "[" + self.visit(n.subscript) + "]"
|
||||
|
||||
def visit_StructRef(self, n: c_ast.StructRef) -> str:
|
||||
sref = self._parenthesize_unless_simple(n.name)
|
||||
return sref + n.type + self.visit(n.field)
|
||||
|
||||
def visit_FuncCall(self, n: c_ast.FuncCall) -> str:
|
||||
fref = self._parenthesize_unless_simple(n.name)
|
||||
args = self.visit(n.args) if n.args is not None else ""
|
||||
return fref + "(" + args + ")"
|
||||
|
||||
def visit_UnaryOp(self, n: c_ast.UnaryOp) -> str:
|
||||
match n.op:
|
||||
case "sizeof":
|
||||
# Always parenthesize the argument of sizeof since it can be
|
||||
# a name.
|
||||
return f"sizeof({self.visit(n.expr)})"
|
||||
case "p++":
|
||||
operand = self._parenthesize_unless_simple(n.expr)
|
||||
return f"{operand}++"
|
||||
case "p--":
|
||||
operand = self._parenthesize_unless_simple(n.expr)
|
||||
return f"{operand}--"
|
||||
case _:
|
||||
operand = self._parenthesize_unless_simple(n.expr)
|
||||
return f"{n.op}{operand}"
|
||||
|
||||
# Precedence map of binary operators:
|
||||
precedence_map = {
|
||||
# Should be in sync with c_parser.CParser.precedence
|
||||
# Higher numbers are stronger binding
|
||||
"||": 0, # weakest binding
|
||||
"&&": 1,
|
||||
"|": 2,
|
||||
"^": 3,
|
||||
"&": 4,
|
||||
"==": 5,
|
||||
"!=": 5,
|
||||
">": 6,
|
||||
">=": 6,
|
||||
"<": 6,
|
||||
"<=": 6,
|
||||
">>": 7,
|
||||
"<<": 7,
|
||||
"+": 8,
|
||||
"-": 8,
|
||||
"*": 9,
|
||||
"/": 9,
|
||||
"%": 9, # strongest binding
|
||||
}
|
||||
|
||||
def visit_BinaryOp(self, n: c_ast.BinaryOp) -> str:
|
||||
# Note: all binary operators are left-to-right associative
|
||||
#
|
||||
# If `n.left.op` has a stronger or equally binding precedence in
|
||||
# comparison to `n.op`, no parenthesis are needed for the left:
|
||||
# e.g., `(a*b) + c` is equivalent to `a*b + c`, as well as
|
||||
# `(a+b) - c` is equivalent to `a+b - c` (same precedence).
|
||||
# If the left operator is weaker binding than the current, then
|
||||
# parentheses are necessary:
|
||||
# e.g., `(a+b) * c` is NOT equivalent to `a+b * c`.
|
||||
lval_str = self._parenthesize_if(
|
||||
n.left,
|
||||
lambda d: not (
|
||||
self._is_simple_node(d)
|
||||
or self.reduce_parentheses
|
||||
and isinstance(d, c_ast.BinaryOp)
|
||||
and self.precedence_map[d.op] >= self.precedence_map[n.op]
|
||||
),
|
||||
)
|
||||
# If `n.right.op` has a stronger -but not equal- binding precedence,
|
||||
# parenthesis can be omitted on the right:
|
||||
# e.g., `a + (b*c)` is equivalent to `a + b*c`.
|
||||
# If the right operator is weaker or equally binding, then parentheses
|
||||
# are necessary:
|
||||
# e.g., `a * (b+c)` is NOT equivalent to `a * b+c` and
|
||||
# `a - (b+c)` is NOT equivalent to `a - b+c` (same precedence).
|
||||
rval_str = self._parenthesize_if(
|
||||
n.right,
|
||||
lambda d: not (
|
||||
self._is_simple_node(d)
|
||||
or self.reduce_parentheses
|
||||
and isinstance(d, c_ast.BinaryOp)
|
||||
and self.precedence_map[d.op] > self.precedence_map[n.op]
|
||||
),
|
||||
)
|
||||
return f"{lval_str} {n.op} {rval_str}"
|
||||
|
||||
def visit_Assignment(self, n: c_ast.Assignment) -> str:
|
||||
rval_str = self._parenthesize_if(
|
||||
n.rvalue, lambda n: isinstance(n, c_ast.Assignment)
|
||||
)
|
||||
return f"{self.visit(n.lvalue)} {n.op} {rval_str}"
|
||||
|
||||
def visit_IdentifierType(self, n: c_ast.IdentifierType) -> str:
|
||||
return " ".join(n.names)
|
||||
|
||||
def _visit_expr(self, n: c_ast.Node) -> str:
|
||||
match n:
|
||||
case c_ast.InitList():
|
||||
return "{" + self.visit(n) + "}"
|
||||
case c_ast.ExprList() | c_ast.Compound():
|
||||
return "(" + self.visit(n) + ")"
|
||||
case _:
|
||||
return self.visit(n)
|
||||
|
||||
def visit_Decl(self, n: c_ast.Decl, no_type: bool = False) -> str:
|
||||
# no_type is used when a Decl is part of a DeclList, where the type is
|
||||
# explicitly only for the first declaration in a list.
|
||||
#
|
||||
s = n.name if no_type else self._generate_decl(n)
|
||||
if n.bitsize:
|
||||
s += " : " + self.visit(n.bitsize)
|
||||
if n.init:
|
||||
s += " = " + self._visit_expr(n.init)
|
||||
return s
|
||||
|
||||
def visit_DeclList(self, n: c_ast.DeclList) -> str:
|
||||
s = self.visit(n.decls[0])
|
||||
if len(n.decls) > 1:
|
||||
s += ", " + ", ".join(
|
||||
self.visit_Decl(decl, no_type=True) for decl in n.decls[1:]
|
||||
)
|
||||
return s
|
||||
|
||||
def visit_Typedef(self, n: c_ast.Typedef) -> str:
|
||||
s = ""
|
||||
if n.storage:
|
||||
s += " ".join(n.storage) + " "
|
||||
s += self._generate_type(n.type)
|
||||
return s
|
||||
|
||||
def visit_Cast(self, n: c_ast.Cast) -> str:
|
||||
s = "(" + self._generate_type(n.to_type, emit_declname=False) + ")"
|
||||
return s + " " + self._parenthesize_unless_simple(n.expr)
|
||||
|
||||
def visit_ExprList(self, n: c_ast.ExprList) -> str:
|
||||
visited_subexprs = []
|
||||
for expr in n.exprs:
|
||||
visited_subexprs.append(self._visit_expr(expr))
|
||||
return ", ".join(visited_subexprs)
|
||||
|
||||
def visit_InitList(self, n: c_ast.InitList) -> str:
|
||||
visited_subexprs = []
|
||||
for expr in n.exprs:
|
||||
visited_subexprs.append(self._visit_expr(expr))
|
||||
return ", ".join(visited_subexprs)
|
||||
|
||||
def visit_Enum(self, n: c_ast.Enum) -> str:
|
||||
return self._generate_struct_union_enum(n, name="enum")
|
||||
|
||||
def visit_Alignas(self, n: c_ast.Alignas) -> str:
|
||||
return "_Alignas({})".format(self.visit(n.alignment))
|
||||
|
||||
def visit_Enumerator(self, n: c_ast.Enumerator) -> str:
|
||||
if not n.value:
|
||||
return "{indent}{name},\n".format(
|
||||
indent=self._make_indent(),
|
||||
name=n.name,
|
||||
)
|
||||
else:
|
||||
return "{indent}{name} = {value},\n".format(
|
||||
indent=self._make_indent(),
|
||||
name=n.name,
|
||||
value=self.visit(n.value),
|
||||
)
|
||||
|
||||
def visit_FuncDef(self, n: c_ast.FuncDef) -> str:
|
||||
decl = self.visit(n.decl)
|
||||
self.indent_level = 0
|
||||
body = self.visit(n.body)
|
||||
if n.param_decls:
|
||||
knrdecls = ";\n".join(self.visit(p) for p in n.param_decls)
|
||||
return decl + "\n" + knrdecls + ";\n" + body + "\n"
|
||||
else:
|
||||
return decl + "\n" + body + "\n"
|
||||
|
||||
def visit_FileAST(self, n: c_ast.FileAST) -> str:
|
||||
s = ""
|
||||
for ext in n.ext:
|
||||
match ext:
|
||||
case c_ast.FuncDef():
|
||||
s += self.visit(ext)
|
||||
case c_ast.Pragma():
|
||||
s += self.visit(ext) + "\n"
|
||||
case _:
|
||||
s += self.visit(ext) + ";\n"
|
||||
return s
|
||||
|
||||
def visit_Compound(self, n: c_ast.Compound) -> str:
|
||||
s = self._make_indent() + "{\n"
|
||||
self.indent_level += 2
|
||||
if n.block_items:
|
||||
s += "".join(self._generate_stmt(stmt) for stmt in n.block_items)
|
||||
self.indent_level -= 2
|
||||
s += self._make_indent() + "}\n"
|
||||
return s
|
||||
|
||||
def visit_CompoundLiteral(self, n: c_ast.CompoundLiteral) -> str:
|
||||
return "(" + self.visit(n.type) + "){" + self.visit(n.init) + "}"
|
||||
|
||||
def visit_EmptyStatement(self, n: c_ast.EmptyStatement) -> str:
|
||||
return ";"
|
||||
|
||||
def visit_ParamList(self, n: c_ast.ParamList) -> str:
|
||||
return ", ".join(self.visit(param) for param in n.params)
|
||||
|
||||
def visit_Return(self, n: c_ast.Return) -> str:
|
||||
s = "return"
|
||||
if n.expr:
|
||||
s += " " + self.visit(n.expr)
|
||||
return s + ";"
|
||||
|
||||
def visit_Break(self, n: c_ast.Break) -> str:
|
||||
return "break;"
|
||||
|
||||
def visit_Continue(self, n: c_ast.Continue) -> str:
|
||||
return "continue;"
|
||||
|
||||
def visit_TernaryOp(self, n: c_ast.TernaryOp) -> str:
|
||||
s = "(" + self._visit_expr(n.cond) + ") ? "
|
||||
s += "(" + self._visit_expr(n.iftrue) + ") : "
|
||||
s += "(" + self._visit_expr(n.iffalse) + ")"
|
||||
return s
|
||||
|
||||
def visit_If(self, n: c_ast.If) -> str:
|
||||
s = "if ("
|
||||
if n.cond:
|
||||
s += self.visit(n.cond)
|
||||
s += ")\n"
|
||||
s += self._generate_stmt(n.iftrue, add_indent=True)
|
||||
if n.iffalse:
|
||||
s += self._make_indent() + "else\n"
|
||||
s += self._generate_stmt(n.iffalse, add_indent=True)
|
||||
return s
|
||||
|
||||
def visit_For(self, n: c_ast.For) -> str:
|
||||
s = "for ("
|
||||
if n.init:
|
||||
s += self.visit(n.init)
|
||||
s += ";"
|
||||
if n.cond:
|
||||
s += " " + self.visit(n.cond)
|
||||
s += ";"
|
||||
if n.next:
|
||||
s += " " + self.visit(n.next)
|
||||
s += ")\n"
|
||||
s += self._generate_stmt(n.stmt, add_indent=True)
|
||||
return s
|
||||
|
||||
def visit_While(self, n: c_ast.While) -> str:
|
||||
s = "while ("
|
||||
if n.cond:
|
||||
s += self.visit(n.cond)
|
||||
s += ")\n"
|
||||
s += self._generate_stmt(n.stmt, add_indent=True)
|
||||
return s
|
||||
|
||||
def visit_DoWhile(self, n: c_ast.DoWhile) -> str:
|
||||
s = "do\n"
|
||||
s += self._generate_stmt(n.stmt, add_indent=True)
|
||||
s += self._make_indent() + "while ("
|
||||
if n.cond:
|
||||
s += self.visit(n.cond)
|
||||
s += ");"
|
||||
return s
|
||||
|
||||
def visit_StaticAssert(self, n: c_ast.StaticAssert) -> str:
|
||||
s = "_Static_assert("
|
||||
s += self.visit(n.cond)
|
||||
if n.message:
|
||||
s += ","
|
||||
s += self.visit(n.message)
|
||||
s += ")"
|
||||
return s
|
||||
|
||||
def visit_Switch(self, n: c_ast.Switch) -> str:
|
||||
s = "switch (" + self.visit(n.cond) + ")\n"
|
||||
s += self._generate_stmt(n.stmt, add_indent=True)
|
||||
return s
|
||||
|
||||
def visit_Case(self, n: c_ast.Case) -> str:
|
||||
s = "case " + self.visit(n.expr) + ":\n"
|
||||
for stmt in n.stmts:
|
||||
s += self._generate_stmt(stmt, add_indent=True)
|
||||
return s
|
||||
|
||||
def visit_Default(self, n: c_ast.Default) -> str:
|
||||
s = "default:\n"
|
||||
for stmt in n.stmts:
|
||||
s += self._generate_stmt(stmt, add_indent=True)
|
||||
return s
|
||||
|
||||
def visit_Label(self, n: c_ast.Label) -> str:
|
||||
return n.name + ":\n" + self._generate_stmt(n.stmt)
|
||||
|
||||
def visit_Goto(self, n: c_ast.Goto) -> str:
|
||||
return "goto " + n.name + ";"
|
||||
|
||||
def visit_EllipsisParam(self, n: c_ast.EllipsisParam) -> str:
|
||||
return "..."
|
||||
|
||||
def visit_Struct(self, n: c_ast.Struct) -> str:
|
||||
return self._generate_struct_union_enum(n, "struct")
|
||||
|
||||
def visit_Typename(self, n: c_ast.Typename) -> str:
|
||||
return self._generate_type(n.type)
|
||||
|
||||
def visit_Union(self, n: c_ast.Union) -> str:
|
||||
return self._generate_struct_union_enum(n, "union")
|
||||
|
||||
def visit_NamedInitializer(self, n: c_ast.NamedInitializer) -> str:
|
||||
s = ""
|
||||
for name in n.name:
|
||||
if isinstance(name, c_ast.ID):
|
||||
s += "." + name.name
|
||||
else:
|
||||
s += "[" + self.visit(name) + "]"
|
||||
s += " = " + self._visit_expr(n.expr)
|
||||
return s
|
||||
|
||||
def visit_FuncDecl(self, n: c_ast.FuncDecl) -> str:
|
||||
return self._generate_type(n)
|
||||
|
||||
def visit_ArrayDecl(self, n: c_ast.ArrayDecl) -> str:
|
||||
return self._generate_type(n, emit_declname=False)
|
||||
|
||||
def visit_TypeDecl(self, n: c_ast.TypeDecl) -> str:
|
||||
return self._generate_type(n, emit_declname=False)
|
||||
|
||||
def visit_PtrDecl(self, n: c_ast.PtrDecl) -> str:
|
||||
return self._generate_type(n, emit_declname=False)
|
||||
|
||||
def _generate_struct_union_enum(
|
||||
self, n: c_ast.Struct | c_ast.Union | c_ast.Enum, name: str
|
||||
) -> str:
|
||||
"""Generates code for structs, unions, and enums. name should be
|
||||
'struct', 'union', or 'enum'.
|
||||
"""
|
||||
if name in ("struct", "union"):
|
||||
assert isinstance(n, (c_ast.Struct, c_ast.Union))
|
||||
members = n.decls
|
||||
body_function = self._generate_struct_union_body
|
||||
else:
|
||||
assert name == "enum"
|
||||
assert isinstance(n, c_ast.Enum)
|
||||
members = None if n.values is None else n.values.enumerators
|
||||
body_function = self._generate_enum_body
|
||||
s = name + " " + (n.name or "")
|
||||
if members is not None:
|
||||
# None means no members
|
||||
# Empty sequence means an empty list of members
|
||||
s += "\n"
|
||||
s += self._make_indent()
|
||||
self.indent_level += 2
|
||||
s += "{\n"
|
||||
s += body_function(members)
|
||||
self.indent_level -= 2
|
||||
s += self._make_indent() + "}"
|
||||
return s
|
||||
|
||||
def _generate_struct_union_body(self, members: List[c_ast.Node]) -> str:
|
||||
return "".join(self._generate_stmt(decl) for decl in members)
|
||||
|
||||
def _generate_enum_body(self, members: List[c_ast.Enumerator]) -> str:
|
||||
# `[:-2] + '\n'` removes the final `,` from the enumerator list
|
||||
return "".join(self.visit(value) for value in members)[:-2] + "\n"
|
||||
|
||||
def _generate_stmt(self, n: c_ast.Node, add_indent: bool = False) -> str:
|
||||
"""Generation from a statement node. This method exists as a wrapper
|
||||
for individual visit_* methods to handle different treatment of
|
||||
some statements in this context.
|
||||
"""
|
||||
if add_indent:
|
||||
self.indent_level += 2
|
||||
indent = self._make_indent()
|
||||
if add_indent:
|
||||
self.indent_level -= 2
|
||||
|
||||
match n:
|
||||
case (
|
||||
c_ast.Decl()
|
||||
| c_ast.Assignment()
|
||||
| c_ast.Cast()
|
||||
| c_ast.UnaryOp()
|
||||
| c_ast.BinaryOp()
|
||||
| c_ast.TernaryOp()
|
||||
| c_ast.FuncCall()
|
||||
| c_ast.ArrayRef()
|
||||
| c_ast.StructRef()
|
||||
| c_ast.Constant()
|
||||
| c_ast.ID()
|
||||
| c_ast.Typedef()
|
||||
| c_ast.ExprList()
|
||||
):
|
||||
# These can also appear in an expression context so no semicolon
|
||||
# is added to them automatically
|
||||
#
|
||||
return indent + self.visit(n) + ";\n"
|
||||
case c_ast.Compound():
|
||||
# No extra indentation required before the opening brace of a
|
||||
# compound - because it consists of multiple lines it has to
|
||||
# compute its own indentation.
|
||||
#
|
||||
return self.visit(n)
|
||||
case c_ast.If():
|
||||
return indent + self.visit(n)
|
||||
case _:
|
||||
return indent + self.visit(n) + "\n"
|
||||
|
||||
def _generate_decl(self, n: c_ast.Decl) -> str:
|
||||
"""Generation from a Decl node."""
|
||||
s = ""
|
||||
if n.funcspec:
|
||||
s = " ".join(n.funcspec) + " "
|
||||
if n.storage:
|
||||
s += " ".join(n.storage) + " "
|
||||
if n.align:
|
||||
s += self.visit(n.align[0]) + " "
|
||||
s += self._generate_type(n.type)
|
||||
return s
|
||||
|
||||
def _generate_type(
|
||||
self,
|
||||
n: c_ast.Node,
|
||||
modifiers: List[c_ast.Node] = [],
|
||||
emit_declname: bool = True,
|
||||
) -> str:
|
||||
"""Recursive generation from a type node. n is the type node.
|
||||
modifiers collects the PtrDecl, ArrayDecl and FuncDecl modifiers
|
||||
encountered on the way down to a TypeDecl, to allow proper
|
||||
generation from it.
|
||||
"""
|
||||
# ~ print(n, modifiers)
|
||||
match n:
|
||||
case c_ast.TypeDecl():
|
||||
s = ""
|
||||
if n.quals:
|
||||
s += " ".join(n.quals) + " "
|
||||
s += self.visit(n.type)
|
||||
|
||||
nstr = n.declname if n.declname and emit_declname else ""
|
||||
# Resolve modifiers.
|
||||
# Wrap in parens to distinguish pointer to array and pointer to
|
||||
# function syntax.
|
||||
#
|
||||
for i, modifier in enumerate(modifiers):
|
||||
match modifier:
|
||||
case c_ast.ArrayDecl():
|
||||
if i != 0 and isinstance(modifiers[i - 1], c_ast.PtrDecl):
|
||||
nstr = "(" + nstr + ")"
|
||||
nstr += "["
|
||||
if modifier.dim_quals:
|
||||
nstr += " ".join(modifier.dim_quals) + " "
|
||||
if modifier.dim is not None:
|
||||
nstr += self.visit(modifier.dim)
|
||||
nstr += "]"
|
||||
case c_ast.FuncDecl():
|
||||
if i != 0 and isinstance(modifiers[i - 1], c_ast.PtrDecl):
|
||||
nstr = "(" + nstr + ")"
|
||||
args = (
|
||||
self.visit(modifier.args)
|
||||
if modifier.args is not None
|
||||
else ""
|
||||
)
|
||||
nstr += "(" + args + ")"
|
||||
case c_ast.PtrDecl():
|
||||
if modifier.quals:
|
||||
quals = " ".join(modifier.quals)
|
||||
suffix = f" {nstr}" if nstr else ""
|
||||
nstr = f"* {quals}{suffix}"
|
||||
else:
|
||||
nstr = "*" + nstr
|
||||
if nstr:
|
||||
s += " " + nstr
|
||||
return s
|
||||
case c_ast.Decl():
|
||||
return self._generate_decl(n.type)
|
||||
case c_ast.Typename():
|
||||
return self._generate_type(n.type, emit_declname=emit_declname)
|
||||
case c_ast.IdentifierType():
|
||||
return " ".join(n.names) + " "
|
||||
case c_ast.ArrayDecl() | c_ast.PtrDecl() | c_ast.FuncDecl():
|
||||
return self._generate_type(
|
||||
n.type, modifiers + [n], emit_declname=emit_declname
|
||||
)
|
||||
case _:
|
||||
return self.visit(n)
|
||||
|
||||
def _parenthesize_if(
|
||||
self, n: c_ast.Node, condition: Callable[[c_ast.Node], bool]
|
||||
) -> str:
|
||||
"""Visits 'n' and returns its string representation, parenthesized
|
||||
if the condition function applied to the node returns True.
|
||||
"""
|
||||
s = self._visit_expr(n)
|
||||
if condition(n):
|
||||
return "(" + s + ")"
|
||||
else:
|
||||
return s
|
||||
|
||||
def _parenthesize_unless_simple(self, n: c_ast.Node) -> str:
|
||||
"""Common use case for _parenthesize_if"""
|
||||
return self._parenthesize_if(n, lambda d: not self._is_simple_node(d))
|
||||
|
||||
def _is_simple_node(self, n: c_ast.Node) -> bool:
|
||||
"""Returns True for nodes that are "simple" - i.e. nodes that always
|
||||
have higher precedence than operators.
|
||||
"""
|
||||
return isinstance(
|
||||
n,
|
||||
(c_ast.Constant, c_ast.ID, c_ast.ArrayRef, c_ast.StructRef, c_ast.FuncCall),
|
||||
)
|
||||
706
venv/lib/python3.12/site-packages/pycparser/c_lexer.py
Normal file
706
venv/lib/python3.12/site-packages/pycparser/c_lexer.py
Normal file
@@ -0,0 +1,706 @@
|
||||
# ------------------------------------------------------------------------------
|
||||
# pycparser: c_lexer.py
|
||||
#
|
||||
# CLexer class: lexer for the C language
|
||||
#
|
||||
# Eli Bendersky [https://eli.thegreenplace.net/]
|
||||
# License: BSD
|
||||
# ------------------------------------------------------------------------------
|
||||
import re
|
||||
from dataclasses import dataclass
|
||||
from enum import Enum
|
||||
from typing import Callable, Dict, List, Optional, Tuple
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class _Token:
|
||||
type: str
|
||||
value: str
|
||||
lineno: int
|
||||
column: int
|
||||
|
||||
|
||||
class CLexer:
|
||||
"""A standalone lexer for C.
|
||||
|
||||
Parameters for construction:
|
||||
error_func:
|
||||
Called with (msg, line, column) on lexing errors.
|
||||
on_lbrace_func:
|
||||
Called when an LBRACE token is produced (used for scope tracking).
|
||||
on_rbrace_func:
|
||||
Called when an RBRACE token is produced (used for scope tracking).
|
||||
type_lookup_func:
|
||||
Called with an identifier name; expected to return True if it is
|
||||
a typedef name and should be tokenized as TYPEID.
|
||||
|
||||
Call input(text) to initialize lexing, and then keep calling token() to
|
||||
get the next token, until it returns None (at end of input).
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
error_func: Callable[[str, int, int], None],
|
||||
on_lbrace_func: Callable[[], None],
|
||||
on_rbrace_func: Callable[[], None],
|
||||
type_lookup_func: Callable[[str], bool],
|
||||
) -> None:
|
||||
self.error_func = error_func
|
||||
self.on_lbrace_func = on_lbrace_func
|
||||
self.on_rbrace_func = on_rbrace_func
|
||||
self.type_lookup_func = type_lookup_func
|
||||
self._init_state()
|
||||
|
||||
def input(self, text: str, filename: str = "") -> None:
|
||||
"""Initialize the lexer to the given input text.
|
||||
|
||||
filename is an optional name identifying the file from which the input
|
||||
comes. The lexer can modify it if #line directives are encountered.
|
||||
"""
|
||||
self._init_state()
|
||||
self._lexdata = text
|
||||
self._filename = filename
|
||||
|
||||
def _init_state(self) -> None:
|
||||
self._lexdata = ""
|
||||
self._filename = ""
|
||||
self._pos = 0
|
||||
self._line_start = 0
|
||||
self._pending_tok: Optional[_Token] = None
|
||||
self._lineno = 1
|
||||
|
||||
@property
|
||||
def filename(self) -> str:
|
||||
return self._filename
|
||||
|
||||
def token(self) -> Optional[_Token]:
|
||||
# Lexing strategy overview:
|
||||
#
|
||||
# - We maintain a current position (self._pos), line number, and the
|
||||
# byte offset of the current line start. The lexer is a simple loop
|
||||
# that skips whitespace/newlines and emits one token per call.
|
||||
# - A small amount of logic is handled manually before regex matching:
|
||||
#
|
||||
# * Preprocessor-style directives: if we see '#', we check whether
|
||||
# it's a #line or #pragma directive and consume it inline. #line
|
||||
# updates lineno/filename and produces no tokens. #pragma can yield
|
||||
# both PPPRAGMA and PPPRAGMASTR, but token() returns a single token,
|
||||
# so we stash the PPPRAGMASTR as _pending_tok to return on the next
|
||||
# token() call. Otherwise we return PPHASH.
|
||||
# * Newlines update lineno/line-start tracking so tokens can record
|
||||
# accurate columns.
|
||||
#
|
||||
# - The bulk of tokens are recognized in _match_token:
|
||||
#
|
||||
# * _regex_rules: regex patterns for identifiers, literals, and other
|
||||
# complex tokens (including error-producing patterns). The lexer
|
||||
# uses a combined _regex_master to scan options at the same time.
|
||||
# * _fixed_tokens: exact string matches for operators and punctuation,
|
||||
# resolved by longest match.
|
||||
#
|
||||
# - Error patterns call the error callback and advance minimally, which
|
||||
# keeps lexing resilient while reporting useful diagnostics.
|
||||
text = self._lexdata
|
||||
n = len(text)
|
||||
|
||||
if self._pending_tok is not None:
|
||||
tok = self._pending_tok
|
||||
self._pending_tok = None
|
||||
return tok
|
||||
|
||||
while self._pos < n:
|
||||
match text[self._pos]:
|
||||
case " " | "\t":
|
||||
self._pos += 1
|
||||
case "\n":
|
||||
self._lineno += 1
|
||||
self._pos += 1
|
||||
self._line_start = self._pos
|
||||
case "#":
|
||||
if _line_pattern.match(text, self._pos + 1):
|
||||
self._pos += 1
|
||||
self._handle_ppline()
|
||||
continue
|
||||
if _pragma_pattern.match(text, self._pos + 1):
|
||||
self._pos += 1
|
||||
toks = self._handle_pppragma()
|
||||
if len(toks) > 1:
|
||||
self._pending_tok = toks[1]
|
||||
if len(toks) > 0:
|
||||
return toks[0]
|
||||
continue
|
||||
tok = self._make_token("PPHASH", "#", self._pos)
|
||||
self._pos += 1
|
||||
return tok
|
||||
case _:
|
||||
if tok := self._match_token():
|
||||
return tok
|
||||
else:
|
||||
continue
|
||||
|
||||
def _match_token(self) -> Optional[_Token]:
|
||||
"""Match one token at the current position.
|
||||
|
||||
Returns a Token on success, or None if no token could be matched and
|
||||
an error was reported. This method always advances _pos by the matched
|
||||
length, or by 1 on error/no-match.
|
||||
"""
|
||||
text = self._lexdata
|
||||
pos = self._pos
|
||||
# We pick the longest match between:
|
||||
# - the master regex (identifiers, literals, error patterns, etc.)
|
||||
# - fixed operator/punctuator literals from the bucket for text[pos]
|
||||
#
|
||||
# The longest match is required to ensure we properly lex something
|
||||
# like ".123" (a floating-point constant) as a single entity (with
|
||||
# FLOAT_CONST), rather than a PERIOD followed by a number.
|
||||
#
|
||||
# The fixed-literal buckets are already length-sorted, so within that
|
||||
# bucket we can take the first match. However, we still compare its
|
||||
# length to the regex match because the regex may have matched a longer
|
||||
# token that should take precedence.
|
||||
best = None
|
||||
|
||||
if m := _regex_master.match(text, pos):
|
||||
tok_type = m.lastgroup
|
||||
# All master-regex alternatives are named; lastgroup shouldn't be None.
|
||||
assert tok_type is not None
|
||||
value = m.group(tok_type)
|
||||
length = len(value)
|
||||
action, msg = _regex_actions[tok_type]
|
||||
best = (length, tok_type, value, action, msg)
|
||||
|
||||
if bucket := _fixed_tokens_by_first.get(text[pos]):
|
||||
for entry in bucket:
|
||||
if text.startswith(entry.literal, pos):
|
||||
length = len(entry.literal)
|
||||
if best is None or length > best[0]:
|
||||
best = (
|
||||
length,
|
||||
entry.tok_type,
|
||||
entry.literal,
|
||||
_RegexAction.TOKEN,
|
||||
None,
|
||||
)
|
||||
break
|
||||
|
||||
if best is None:
|
||||
msg = f"Illegal character {repr(text[pos])}"
|
||||
self._error(msg, pos)
|
||||
self._pos += 1
|
||||
return None
|
||||
|
||||
length, tok_type, value, action, msg = best
|
||||
if action == _RegexAction.ERROR:
|
||||
if tok_type == "BAD_CHAR_CONST":
|
||||
msg = f"Invalid char constant {value}"
|
||||
# All other ERROR rules provide a message.
|
||||
assert msg is not None
|
||||
self._error(msg, pos)
|
||||
self._pos += max(1, length)
|
||||
return None
|
||||
|
||||
if action == _RegexAction.ID:
|
||||
tok_type = _keyword_map.get(value, "ID")
|
||||
if tok_type == "ID" and self.type_lookup_func(value):
|
||||
tok_type = "TYPEID"
|
||||
|
||||
tok = self._make_token(tok_type, value, pos)
|
||||
self._pos += length
|
||||
|
||||
if tok.type == "LBRACE":
|
||||
self.on_lbrace_func()
|
||||
elif tok.type == "RBRACE":
|
||||
self.on_rbrace_func()
|
||||
|
||||
return tok
|
||||
|
||||
def _make_token(self, tok_type: str, value: str, pos: int) -> _Token:
|
||||
"""Create a Token at an absolute input position.
|
||||
|
||||
Expects tok_type/value and the absolute byte offset pos in the current
|
||||
input. Does not advance lexer state; callers manage _pos themselves.
|
||||
Returns a Token with lineno/column computed from current line tracking.
|
||||
"""
|
||||
column = pos - self._line_start + 1
|
||||
tok = _Token(tok_type, value, self._lineno, column)
|
||||
return tok
|
||||
|
||||
def _error(self, msg: str, pos: int) -> None:
|
||||
column = pos - self._line_start + 1
|
||||
self.error_func(msg, self._lineno, column)
|
||||
|
||||
def _handle_ppline(self) -> None:
|
||||
# Since #line directives aren't supposed to return tokens but should
|
||||
# only affect the lexer's state (update line/filename for coords), this
|
||||
# method does a bit of parsing on its own. It doesn't return anything,
|
||||
# but its side effect is to update self._pos past the directive, and
|
||||
# potentially update self._lineno and self._filename, based on the
|
||||
# directive's contents.
|
||||
#
|
||||
# Accepted #line forms from preprocessors:
|
||||
# - "#line 66 \"kwas\\df.h\""
|
||||
# - "# 9"
|
||||
# - "#line 10 \"include/me.h\" 1 2 3" (extra numeric flags)
|
||||
# - "# 1 \"file.h\" 3"
|
||||
# Errors we must report:
|
||||
# - "#line \"file.h\"" (filename before line number)
|
||||
# - "#line df" (garbage instead of number/string)
|
||||
#
|
||||
# We scan the directive line once (after an optional 'line' keyword),
|
||||
# validating the order: NUMBER, optional STRING, then any NUMBERs.
|
||||
# The NUMBERs tail is only accepted if a filename STRING was present.
|
||||
text = self._lexdata
|
||||
n = len(text)
|
||||
line_end = text.find("\n", self._pos)
|
||||
if line_end == -1:
|
||||
line_end = n
|
||||
line = text[self._pos : line_end]
|
||||
pos = 0
|
||||
line_len = len(line)
|
||||
|
||||
def skip_ws() -> None:
|
||||
nonlocal pos
|
||||
while pos < line_len and line[pos] in " \t":
|
||||
pos += 1
|
||||
|
||||
skip_ws()
|
||||
if line.startswith("line", pos):
|
||||
pos += 4
|
||||
|
||||
def success(pp_line: Optional[str], pp_filename: Optional[str]) -> None:
|
||||
if pp_line is None:
|
||||
self._error("line number missing in #line", self._pos + line_len)
|
||||
else:
|
||||
self._lineno = int(pp_line)
|
||||
if pp_filename is not None:
|
||||
self._filename = pp_filename
|
||||
self._pos = line_end + 1
|
||||
self._line_start = self._pos
|
||||
|
||||
def fail(msg: str, offset: int) -> None:
|
||||
self._error(msg, self._pos + offset)
|
||||
self._pos = line_end + 1
|
||||
self._line_start = self._pos
|
||||
|
||||
skip_ws()
|
||||
if pos >= line_len:
|
||||
success(None, None)
|
||||
return
|
||||
if line[pos] == '"':
|
||||
fail("filename before line number in #line", pos)
|
||||
return
|
||||
|
||||
m = re.match(_decimal_constant, line[pos:])
|
||||
if not m:
|
||||
fail("invalid #line directive", pos)
|
||||
return
|
||||
|
||||
pp_line = m.group(0)
|
||||
pos += len(pp_line)
|
||||
skip_ws()
|
||||
if pos >= line_len:
|
||||
success(pp_line, None)
|
||||
return
|
||||
|
||||
if line[pos] != '"':
|
||||
fail("invalid #line directive", pos)
|
||||
return
|
||||
|
||||
m = re.match(_string_literal, line[pos:])
|
||||
if not m:
|
||||
fail("invalid #line directive", pos)
|
||||
return
|
||||
|
||||
pp_filename = m.group(0).lstrip('"').rstrip('"')
|
||||
pos += len(m.group(0))
|
||||
|
||||
# Consume arbitrary sequence of numeric flags after the directive
|
||||
while True:
|
||||
skip_ws()
|
||||
if pos >= line_len:
|
||||
break
|
||||
m = re.match(_decimal_constant, line[pos:])
|
||||
if not m:
|
||||
fail("invalid #line directive", pos)
|
||||
return
|
||||
pos += len(m.group(0))
|
||||
|
||||
success(pp_line, pp_filename)
|
||||
|
||||
def _handle_pppragma(self) -> List[_Token]:
|
||||
# Parse a full #pragma line; returns a list of tokens with 1 or 2
|
||||
# tokens - PPPRAGMA and an optional PPPRAGMASTR. If an empty list is
|
||||
# returned, it means an error occurred, or we're at the end of input.
|
||||
#
|
||||
# Examples:
|
||||
# - "#pragma" -> PPPRAGMA only
|
||||
# - "#pragma once" -> PPPRAGMA, PPPRAGMASTR("once")
|
||||
# - "# pragma omp parallel private(th_id)" -> PPPRAGMA, PPPRAGMASTR("omp parallel private(th_id)")
|
||||
# - "#\tpragma {pack: 2, smack: 3}" -> PPPRAGMA, PPPRAGMASTR("{pack: 2, smack: 3}")
|
||||
text = self._lexdata
|
||||
n = len(text)
|
||||
pos = self._pos
|
||||
|
||||
while pos < n and text[pos] in " \t":
|
||||
pos += 1
|
||||
if pos >= n:
|
||||
self._pos = pos
|
||||
return []
|
||||
|
||||
if not text.startswith("pragma", pos):
|
||||
self._error("invalid #pragma directive", pos)
|
||||
self._pos = pos + 1
|
||||
return []
|
||||
|
||||
pragma_pos = pos
|
||||
pos += len("pragma")
|
||||
toks = [self._make_token("PPPRAGMA", "pragma", pragma_pos)]
|
||||
|
||||
while pos < n and text[pos] in " \t":
|
||||
pos += 1
|
||||
|
||||
start = pos
|
||||
while pos < n and text[pos] != "\n":
|
||||
pos += 1
|
||||
if pos > start:
|
||||
toks.append(self._make_token("PPPRAGMASTR", text[start:pos], start))
|
||||
if pos < n and text[pos] == "\n":
|
||||
self._lineno += 1
|
||||
pos += 1
|
||||
self._line_start = pos
|
||||
self._pos = pos
|
||||
return toks
|
||||
|
||||
|
||||
##
|
||||
## Reserved keywords
|
||||
##
|
||||
_keywords: Tuple[str, ...] = (
|
||||
"AUTO",
|
||||
"BREAK",
|
||||
"CASE",
|
||||
"CHAR",
|
||||
"CONST",
|
||||
"CONTINUE",
|
||||
"DEFAULT",
|
||||
"DO",
|
||||
"DOUBLE",
|
||||
"ELSE",
|
||||
"ENUM",
|
||||
"EXTERN",
|
||||
"FLOAT",
|
||||
"FOR",
|
||||
"GOTO",
|
||||
"IF",
|
||||
"INLINE",
|
||||
"INT",
|
||||
"LONG",
|
||||
"REGISTER",
|
||||
"OFFSETOF",
|
||||
"RESTRICT",
|
||||
"RETURN",
|
||||
"SHORT",
|
||||
"SIGNED",
|
||||
"SIZEOF",
|
||||
"STATIC",
|
||||
"STRUCT",
|
||||
"SWITCH",
|
||||
"TYPEDEF",
|
||||
"UNION",
|
||||
"UNSIGNED",
|
||||
"VOID",
|
||||
"VOLATILE",
|
||||
"WHILE",
|
||||
"__INT128",
|
||||
"_BOOL",
|
||||
"_COMPLEX",
|
||||
"_NORETURN",
|
||||
"_THREAD_LOCAL",
|
||||
"_STATIC_ASSERT",
|
||||
"_ATOMIC",
|
||||
"_ALIGNOF",
|
||||
"_ALIGNAS",
|
||||
"_PRAGMA",
|
||||
)
|
||||
|
||||
_keyword_map: Dict[str, str] = {}
|
||||
|
||||
for keyword in _keywords:
|
||||
# Keywords from new C standard are mixed-case, like _Bool, _Alignas, etc.
|
||||
if keyword.startswith("_") and len(keyword) > 1 and keyword[1].isalpha():
|
||||
_keyword_map[keyword[:2].upper() + keyword[2:].lower()] = keyword
|
||||
else:
|
||||
_keyword_map[keyword.lower()] = keyword
|
||||
|
||||
##
|
||||
## Regexes for use in tokens
|
||||
##
|
||||
|
||||
# valid C identifiers (K&R2: A.2.3), plus '$' (supported by some compilers)
|
||||
_identifier = r"[a-zA-Z_$][0-9a-zA-Z_$]*"
|
||||
|
||||
_hex_prefix = "0[xX]"
|
||||
_hex_digits = "[0-9a-fA-F]+"
|
||||
_bin_prefix = "0[bB]"
|
||||
_bin_digits = "[01]+"
|
||||
|
||||
# integer constants (K&R2: A.2.5.1)
|
||||
_integer_suffix_opt = (
|
||||
r"(([uU]ll)|([uU]LL)|(ll[uU]?)|(LL[uU]?)|([uU][lL])|([lL][uU]?)|[uU])?"
|
||||
)
|
||||
_decimal_constant = (
|
||||
"(0" + _integer_suffix_opt + ")|([1-9][0-9]*" + _integer_suffix_opt + ")"
|
||||
)
|
||||
_octal_constant = "0[0-7]*" + _integer_suffix_opt
|
||||
_hex_constant = _hex_prefix + _hex_digits + _integer_suffix_opt
|
||||
_bin_constant = _bin_prefix + _bin_digits + _integer_suffix_opt
|
||||
|
||||
_bad_octal_constant = "0[0-7]*[89]"
|
||||
|
||||
# comments are not supported
|
||||
_unsupported_c_style_comment = r"\/\*"
|
||||
_unsupported_cxx_style_comment = r"\/\/"
|
||||
|
||||
# character constants (K&R2: A.2.5.2)
|
||||
# Note: a-zA-Z and '.-~^_!=&;,' are allowed as escape chars to support #line
|
||||
# directives with Windows paths as filenames (..\..\dir\file)
|
||||
# For the same reason, decimal_escape allows all digit sequences. We want to
|
||||
# parse all correct code, even if it means to sometimes parse incorrect
|
||||
# code.
|
||||
#
|
||||
# The original regexes were taken verbatim from the C syntax definition,
|
||||
# and were later modified to avoid worst-case exponential running time.
|
||||
#
|
||||
# simple_escape = r"""([a-zA-Z._~!=&\^\-\\?'"])"""
|
||||
# decimal_escape = r"""(\d+)"""
|
||||
# hex_escape = r"""(x[0-9a-fA-F]+)"""
|
||||
# bad_escape = r"""([\\][^a-zA-Z._~^!=&\^\-\\?'"x0-7])"""
|
||||
#
|
||||
# The following modifications were made to avoid the ambiguity that allowed
|
||||
# backtracking: (https://github.com/eliben/pycparser/issues/61)
|
||||
#
|
||||
# - \x was removed from simple_escape, unless it was not followed by a hex
|
||||
# digit, to avoid ambiguity with hex_escape.
|
||||
# - hex_escape allows one or more hex characters, but requires that the next
|
||||
# character(if any) is not hex
|
||||
# - decimal_escape allows one or more decimal characters, but requires that the
|
||||
# next character(if any) is not a decimal
|
||||
# - bad_escape does not allow any decimals (8-9), to avoid conflicting with the
|
||||
# permissive decimal_escape.
|
||||
#
|
||||
# Without this change, python's `re` module would recursively try parsing each
|
||||
# ambiguous escape sequence in multiple ways. e.g. `\123` could be parsed as
|
||||
# `\1`+`23`, `\12`+`3`, and `\123`.
|
||||
|
||||
_simple_escape = r"""([a-wyzA-Z._~!=&\^\-\\?'"]|x(?![0-9a-fA-F]))"""
|
||||
_decimal_escape = r"""(\d+)(?!\d)"""
|
||||
_hex_escape = r"""(x[0-9a-fA-F]+)(?![0-9a-fA-F])"""
|
||||
_bad_escape = r"""([\\][^a-zA-Z._~^!=&\^\-\\?'"x0-9])"""
|
||||
|
||||
_escape_sequence = (
|
||||
r"""(\\(""" + _simple_escape + "|" + _decimal_escape + "|" + _hex_escape + "))"
|
||||
)
|
||||
|
||||
# This complicated regex with lookahead might be slow for strings, so because
|
||||
# all of the valid escapes (including \x) allowed
|
||||
# 0 or more non-escaped characters after the first character,
|
||||
# simple_escape+decimal_escape+hex_escape got simplified to
|
||||
|
||||
_escape_sequence_start_in_string = r"""(\\[0-9a-zA-Z._~!=&\^\-\\?'"])"""
|
||||
|
||||
_cconst_char = r"""([^'\\\n]|""" + _escape_sequence + ")"
|
||||
_char_const = "'" + _cconst_char + "'"
|
||||
_wchar_const = "L" + _char_const
|
||||
_u8char_const = "u8" + _char_const
|
||||
_u16char_const = "u" + _char_const
|
||||
_u32char_const = "U" + _char_const
|
||||
_multicharacter_constant = "'" + _cconst_char + "{2,4}'"
|
||||
_unmatched_quote = "('" + _cconst_char + "*\\n)|('" + _cconst_char + "*$)"
|
||||
_bad_char_const = (
|
||||
r"""('""" + _cconst_char + """[^'\n]+')|('')|('""" + _bad_escape + r"""[^'\n]*')"""
|
||||
)
|
||||
|
||||
# string literals (K&R2: A.2.6)
|
||||
_string_char = r"""([^"\\\n]|""" + _escape_sequence_start_in_string + ")"
|
||||
_string_literal = '"' + _string_char + '*"'
|
||||
_wstring_literal = "L" + _string_literal
|
||||
_u8string_literal = "u8" + _string_literal
|
||||
_u16string_literal = "u" + _string_literal
|
||||
_u32string_literal = "U" + _string_literal
|
||||
_bad_string_literal = '"' + _string_char + "*" + _bad_escape + _string_char + '*"'
|
||||
|
||||
# floating constants (K&R2: A.2.5.3)
|
||||
_exponent_part = r"""([eE][-+]?[0-9]+)"""
|
||||
_fractional_constant = r"""([0-9]*\.[0-9]+)|([0-9]+\.)"""
|
||||
_floating_constant = (
|
||||
"(((("
|
||||
+ _fractional_constant
|
||||
+ ")"
|
||||
+ _exponent_part
|
||||
+ "?)|([0-9]+"
|
||||
+ _exponent_part
|
||||
+ "))[FfLl]?)"
|
||||
)
|
||||
_binary_exponent_part = r"""([pP][+-]?[0-9]+)"""
|
||||
_hex_fractional_constant = (
|
||||
"(((" + _hex_digits + r""")?\.""" + _hex_digits + ")|(" + _hex_digits + r"""\.))"""
|
||||
)
|
||||
_hex_floating_constant = (
|
||||
"("
|
||||
+ _hex_prefix
|
||||
+ "("
|
||||
+ _hex_digits
|
||||
+ "|"
|
||||
+ _hex_fractional_constant
|
||||
+ ")"
|
||||
+ _binary_exponent_part
|
||||
+ "[FfLl]?)"
|
||||
)
|
||||
|
||||
|
||||
class _RegexAction(Enum):
|
||||
TOKEN = 0
|
||||
ID = 1
|
||||
ERROR = 2
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class _RegexRule:
|
||||
# tok_type: name of the token emitted for a match
|
||||
# regex_pattern: the raw regex (no anchors) to match at the current position
|
||||
# action: TOKEN for normal tokens, ID for identifiers, ERROR to report
|
||||
# error_message: message used for ERROR entries
|
||||
tok_type: str
|
||||
regex_pattern: str
|
||||
action: _RegexAction
|
||||
error_message: Optional[str]
|
||||
|
||||
|
||||
_regex_rules: List[_RegexRule] = [
|
||||
_RegexRule(
|
||||
"UNSUPPORTED_C_STYLE_COMMENT",
|
||||
_unsupported_c_style_comment,
|
||||
_RegexAction.ERROR,
|
||||
"Comments are not supported, see https://github.com/eliben/pycparser#3using.",
|
||||
),
|
||||
_RegexRule(
|
||||
"UNSUPPORTED_CXX_STYLE_COMMENT",
|
||||
_unsupported_cxx_style_comment,
|
||||
_RegexAction.ERROR,
|
||||
"Comments are not supported, see https://github.com/eliben/pycparser#3using.",
|
||||
),
|
||||
_RegexRule(
|
||||
"BAD_STRING_LITERAL",
|
||||
_bad_string_literal,
|
||||
_RegexAction.ERROR,
|
||||
"String contains invalid escape code",
|
||||
),
|
||||
_RegexRule("WSTRING_LITERAL", _wstring_literal, _RegexAction.TOKEN, None),
|
||||
_RegexRule("U8STRING_LITERAL", _u8string_literal, _RegexAction.TOKEN, None),
|
||||
_RegexRule("U16STRING_LITERAL", _u16string_literal, _RegexAction.TOKEN, None),
|
||||
_RegexRule("U32STRING_LITERAL", _u32string_literal, _RegexAction.TOKEN, None),
|
||||
_RegexRule("STRING_LITERAL", _string_literal, _RegexAction.TOKEN, None),
|
||||
_RegexRule("HEX_FLOAT_CONST", _hex_floating_constant, _RegexAction.TOKEN, None),
|
||||
_RegexRule("FLOAT_CONST", _floating_constant, _RegexAction.TOKEN, None),
|
||||
_RegexRule("INT_CONST_HEX", _hex_constant, _RegexAction.TOKEN, None),
|
||||
_RegexRule("INT_CONST_BIN", _bin_constant, _RegexAction.TOKEN, None),
|
||||
_RegexRule(
|
||||
"BAD_CONST_OCT",
|
||||
_bad_octal_constant,
|
||||
_RegexAction.ERROR,
|
||||
"Invalid octal constant",
|
||||
),
|
||||
_RegexRule("INT_CONST_OCT", _octal_constant, _RegexAction.TOKEN, None),
|
||||
_RegexRule("INT_CONST_DEC", _decimal_constant, _RegexAction.TOKEN, None),
|
||||
_RegexRule("INT_CONST_CHAR", _multicharacter_constant, _RegexAction.TOKEN, None),
|
||||
_RegexRule("CHAR_CONST", _char_const, _RegexAction.TOKEN, None),
|
||||
_RegexRule("WCHAR_CONST", _wchar_const, _RegexAction.TOKEN, None),
|
||||
_RegexRule("U8CHAR_CONST", _u8char_const, _RegexAction.TOKEN, None),
|
||||
_RegexRule("U16CHAR_CONST", _u16char_const, _RegexAction.TOKEN, None),
|
||||
_RegexRule("U32CHAR_CONST", _u32char_const, _RegexAction.TOKEN, None),
|
||||
_RegexRule("UNMATCHED_QUOTE", _unmatched_quote, _RegexAction.ERROR, "Unmatched '"),
|
||||
_RegexRule("BAD_CHAR_CONST", _bad_char_const, _RegexAction.ERROR, None),
|
||||
_RegexRule("ID", _identifier, _RegexAction.ID, None),
|
||||
]
|
||||
|
||||
_regex_actions: Dict[str, Tuple[_RegexAction, Optional[str]]] = {}
|
||||
_regex_pattern_parts: List[str] = []
|
||||
for _rule in _regex_rules:
|
||||
_regex_actions[_rule.tok_type] = (_rule.action, _rule.error_message)
|
||||
_regex_pattern_parts.append(f"(?P<{_rule.tok_type}>{_rule.regex_pattern})")
|
||||
# The master regex is a single alternation of all token patterns, each wrapped
|
||||
# in a named group. We match once at the current position and then use
|
||||
# `lastgroup` to recover which token kind fired; this avoids iterating over all
|
||||
# regexes on every character while keeping the same token-level semantics.
|
||||
_regex_master: re.Pattern[str] = re.compile("|".join(_regex_pattern_parts))
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class _FixedToken:
|
||||
tok_type: str
|
||||
literal: str
|
||||
|
||||
|
||||
_fixed_tokens: List[_FixedToken] = [
|
||||
_FixedToken("ELLIPSIS", "..."),
|
||||
_FixedToken("LSHIFTEQUAL", "<<="),
|
||||
_FixedToken("RSHIFTEQUAL", ">>="),
|
||||
_FixedToken("PLUSPLUS", "++"),
|
||||
_FixedToken("MINUSMINUS", "--"),
|
||||
_FixedToken("ARROW", "->"),
|
||||
_FixedToken("LAND", "&&"),
|
||||
_FixedToken("LOR", "||"),
|
||||
_FixedToken("LSHIFT", "<<"),
|
||||
_FixedToken("RSHIFT", ">>"),
|
||||
_FixedToken("LE", "<="),
|
||||
_FixedToken("GE", ">="),
|
||||
_FixedToken("EQ", "=="),
|
||||
_FixedToken("NE", "!="),
|
||||
_FixedToken("TIMESEQUAL", "*="),
|
||||
_FixedToken("DIVEQUAL", "/="),
|
||||
_FixedToken("MODEQUAL", "%="),
|
||||
_FixedToken("PLUSEQUAL", "+="),
|
||||
_FixedToken("MINUSEQUAL", "-="),
|
||||
_FixedToken("ANDEQUAL", "&="),
|
||||
_FixedToken("OREQUAL", "|="),
|
||||
_FixedToken("XOREQUAL", "^="),
|
||||
_FixedToken("EQUALS", "="),
|
||||
_FixedToken("PLUS", "+"),
|
||||
_FixedToken("MINUS", "-"),
|
||||
_FixedToken("TIMES", "*"),
|
||||
_FixedToken("DIVIDE", "/"),
|
||||
_FixedToken("MOD", "%"),
|
||||
_FixedToken("OR", "|"),
|
||||
_FixedToken("AND", "&"),
|
||||
_FixedToken("NOT", "~"),
|
||||
_FixedToken("XOR", "^"),
|
||||
_FixedToken("LNOT", "!"),
|
||||
_FixedToken("LT", "<"),
|
||||
_FixedToken("GT", ">"),
|
||||
_FixedToken("CONDOP", "?"),
|
||||
_FixedToken("LPAREN", "("),
|
||||
_FixedToken("RPAREN", ")"),
|
||||
_FixedToken("LBRACKET", "["),
|
||||
_FixedToken("RBRACKET", "]"),
|
||||
_FixedToken("LBRACE", "{"),
|
||||
_FixedToken("RBRACE", "}"),
|
||||
_FixedToken("COMMA", ","),
|
||||
_FixedToken("PERIOD", "."),
|
||||
_FixedToken("SEMI", ";"),
|
||||
_FixedToken("COLON", ":"),
|
||||
]
|
||||
|
||||
# To avoid scanning all fixed tokens on every character, we bucket them by the
|
||||
# first character. When matching at position i, we only look at the bucket for
|
||||
# text[i], and we pre-sort that bucket by token length so the first match is
|
||||
# also the longest. This preserves longest-match semantics (e.g. '>>=' before
|
||||
# '>>' before '>') while reducing the number of comparisons.
|
||||
_fixed_tokens_by_first: Dict[str, List[_FixedToken]] = {}
|
||||
for _entry in _fixed_tokens:
|
||||
_fixed_tokens_by_first.setdefault(_entry.literal[0], []).append(_entry)
|
||||
for _bucket in _fixed_tokens_by_first.values():
|
||||
_bucket.sort(key=lambda item: len(item.literal), reverse=True)
|
||||
|
||||
_line_pattern: re.Pattern[str] = re.compile(r"([ \t]*line\W)|([ \t]*\d+)")
|
||||
_pragma_pattern: re.Pattern[str] = re.compile(r"[ \t]*pragma\W")
|
||||
2376
venv/lib/python3.12/site-packages/pycparser/c_parser.py
Normal file
2376
venv/lib/python3.12/site-packages/pycparser/c_parser.py
Normal file
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user