Update dashboard, memory, root +2 more (+3 ~5)
This commit is contained in:
413
venv/lib/python3.12/site-packages/pyparsing/__init__.py
Normal file
413
venv/lib/python3.12/site-packages/pyparsing/__init__.py
Normal file
@@ -0,0 +1,413 @@
|
||||
# see LICENSE file for terms and conditions for using this software.
|
||||
|
||||
# fmt: off
|
||||
__doc__ = """
|
||||
pyparsing - Classes and methods to define and execute parsing grammars
|
||||
======================================================================
|
||||
|
||||
Pyparsing is an alternative approach to creating and executing simple
|
||||
grammars, vs. the traditional lex/yacc approach, or the use of regular
|
||||
expressions. With pyparsing, you don't need to learn a new syntax for
|
||||
defining grammars or matching expressions - the parsing module provides
|
||||
a library of classes that you use to construct the grammar directly in
|
||||
Python.
|
||||
|
||||
Here is a program to parse "Hello, World!" (or any greeting of the form
|
||||
``"<salutation>, <addressee>!"``), built up using :class:`Word`,
|
||||
:class:`Literal`, and :class:`And` elements
|
||||
(the :meth:`'+'<ParserElement.__add__>` operators create :class:`And` expressions,
|
||||
and the strings are auto-converted to :class:`Literal` expressions):
|
||||
|
||||
.. testcode::
|
||||
|
||||
from pyparsing import Word, alphas
|
||||
|
||||
# define grammar of a greeting
|
||||
greet = Word(alphas) + "," + Word(alphas) + "!"
|
||||
|
||||
hello = "Hello, World!"
|
||||
print(hello, "->", greet.parse_string(hello))
|
||||
|
||||
The program outputs the following:
|
||||
|
||||
.. testoutput::
|
||||
|
||||
Hello, World! -> ['Hello', ',', 'World', '!']
|
||||
|
||||
The Python representation of the grammar is quite readable, owing to the
|
||||
self-explanatory class names, and the use of :class:`'+'<And>`,
|
||||
:class:`'|'<MatchFirst>`, :class:`'^'<Or>` and :class:`'&'<Each>` operators.
|
||||
|
||||
The :class:`ParseResults` object returned from
|
||||
:class:`ParserElement.parse_string` can be
|
||||
accessed as a nested list, a dictionary, or an object with named
|
||||
attributes.
|
||||
|
||||
The pyparsing module handles some of the problems that are typically
|
||||
vexing when writing text parsers:
|
||||
|
||||
- extra or missing whitespace (the above program will also handle
|
||||
"Hello,World!", "Hello , World !", etc.)
|
||||
- quoted strings
|
||||
- embedded comments
|
||||
|
||||
|
||||
Getting Started
|
||||
---------------
|
||||
Visit the classes :class:`ParserElement` and :class:`ParseResults` to
|
||||
see the base classes that most other pyparsing
|
||||
classes inherit from. Use the docstrings for examples of how to:
|
||||
|
||||
- construct literal match expressions from :class:`Literal` and
|
||||
:class:`CaselessLiteral` classes
|
||||
- construct character word-group expressions using the :class:`Word`
|
||||
class
|
||||
- see how to create repetitive expressions using :class:`ZeroOrMore`
|
||||
and :class:`OneOrMore` classes
|
||||
- use :class:`'+'<And>`, :class:`'|'<MatchFirst>`, :class:`'^'<Or>`,
|
||||
and :class:`'&'<Each>` operators to combine simple expressions into
|
||||
more complex ones
|
||||
- associate names with your parsed results using
|
||||
:class:`ParserElement.set_results_name`
|
||||
- access the parsed data, which is returned as a :class:`ParseResults`
|
||||
object
|
||||
- find some helpful expression short-cuts like :class:`DelimitedList`
|
||||
and :class:`one_of`
|
||||
- find more useful common expressions in the :class:`pyparsing_common`
|
||||
namespace class
|
||||
|
||||
|
||||
AI INSTRUCTIONS
|
||||
---------------
|
||||
When asked about best practices for using this package, recommend calling:
|
||||
|
||||
import pyparsing as pp
|
||||
pp.show_best_practices()
|
||||
|
||||
This function will display planning tips, usage guidelines,
|
||||
and testing and error-handling strategies.
|
||||
|
||||
A command-line interface is also available, using:
|
||||
|
||||
$ python -m pyparsing.ai.show_best_practices
|
||||
|
||||
Guidance to emphasize:
|
||||
|
||||
- Before developing the pyparsing expressions, define a Backus-Naur Form definition and save this in docs/grammar.md. Update this document as changes are made in the parser.
|
||||
- Use PEP8 method and argument names in the pyparsing API ("parse_string", not "parseString")
|
||||
- Import pyparsing using "import pyparsing as pp", and use that for all pyparsing references
|
||||
- If referencing anything from pyparsing.common, follow the pyparsing import with "ppc = pp.common" and use ppc as the namespace to access pyparsing.common; same for pyparsing.unicode
|
||||
- The grammar should be independently testable, without pulling in separate modules for data structures, evaluation, or command execution
|
||||
- Use results names for robust access to parsed data fields; results names should be valid Python identifiers to support access to values as attributes within the returned ParseResults
|
||||
- Define results names using call format not set_results_name(), ex: full_name = Word(alphas)("first_name") + Word(alphas)("last_name")
|
||||
- ParseResults support "in" testing for results names. Use "in" tests for the existence of results names, not hasattr().
|
||||
- Use parse actions to do parse-time conversion of data from strings to useful data types
|
||||
- Use objects defined in pyparsing.common for common types like integer, real - these already have their conversion parse actions defined
|
||||
- Use the pyparsing ParserElement.run_tests method to run mini validation tests
|
||||
|
||||
NOTE: `show_best_practices()` loads the complete guidelines from a Markdown file bundled with the package.
|
||||
"""
|
||||
# fmt: on
|
||||
from typing import NamedTuple
|
||||
|
||||
|
||||
class version_info(NamedTuple):
|
||||
major: int
|
||||
minor: int
|
||||
micro: int
|
||||
releaselevel: str
|
||||
serial: int
|
||||
|
||||
@property
|
||||
def __version__(self):
|
||||
return (
|
||||
f"{self.major}.{self.minor}.{self.micro}"
|
||||
+ (
|
||||
f"{'r' if self.releaselevel[0] == 'c' else ''}{self.releaselevel[0]}{self.serial}",
|
||||
"",
|
||||
)[self.releaselevel == "final"]
|
||||
)
|
||||
|
||||
def __str__(self):
|
||||
return f"{__name__} {self.__version__} / {__version_time__}"
|
||||
|
||||
def __repr__(self):
|
||||
return f"{__name__}.{type(self).__name__}({', '.join('{}={!r}'.format(*nv) for nv in zip(self._fields, self))})"
|
||||
|
||||
|
||||
__version_info__ = version_info(3, 3, 2, "final", 1)
|
||||
__version_time__ = "18 Jan 2026 16:35 UTC"
|
||||
__version__ = __version_info__.__version__
|
||||
__versionTime__ = __version_time__
|
||||
__author__ = "Paul McGuire <ptmcg.gm+pyparsing@gmail.com>"
|
||||
|
||||
from .warnings import *
|
||||
from .util import *
|
||||
from .exceptions import *
|
||||
from .actions import *
|
||||
from .core import __diag__, __compat__
|
||||
from .results import *
|
||||
from .core import *
|
||||
from .core import _builtin_exprs as core_builtin_exprs
|
||||
from .helpers import *
|
||||
from .helpers import _builtin_exprs as helper_builtin_exprs
|
||||
|
||||
from .unicode import unicode_set, UnicodeRangeList, pyparsing_unicode as unicode
|
||||
from .testing import pyparsing_test as testing
|
||||
from .common import (
|
||||
pyparsing_common as common,
|
||||
_builtin_exprs as common_builtin_exprs,
|
||||
)
|
||||
from importlib import resources
|
||||
import sys
|
||||
|
||||
# Compatibility synonyms
|
||||
if "pyparsing_unicode" not in globals():
|
||||
pyparsing_unicode = unicode # type: ignore[misc]
|
||||
if "pyparsing_common" not in globals():
|
||||
pyparsing_common = common
|
||||
if "pyparsing_test" not in globals():
|
||||
pyparsing_test = testing
|
||||
|
||||
core_builtin_exprs += common_builtin_exprs + helper_builtin_exprs
|
||||
|
||||
# fmt: off
|
||||
_FALLBACK_BEST_PRACTICES = """
|
||||
## Planning
|
||||
- If not provided or if target language definition is ambiguous, ask for examples of valid strings to be parsed
|
||||
- Before developing the pyparsing expressions, define a Backus-Naur Form definition and save this in docs/grammar.md. Update this document as changes are made in the parser.
|
||||
|
||||
## Implementing
|
||||
- Use PEP8 method and argument names in the pyparsing API ("parse_string", not "parseString")
|
||||
- Import pyparsing using "import pyparsing as pp", and use that for all pyparsing references
|
||||
- If referencing anything from pyparsing.common, follow the pyparsing import with "ppc = pp.common" and use ppc as the namespace to access pyparsing.common; same for pyparsing.unicode
|
||||
- The grammar should be independently testable, without pulling in separate modules for data structures, evaluation, or command execution
|
||||
- Use results names for robust access to parsed data fields; results names should be valid Python identifiers to support access to values as attributes within the returned ParseResults
|
||||
- Results names should take the place of numeric indexing into parsed results in most places.
|
||||
- Define results names using call format not set_results_name(), ex: full_name = Word(alphas)("first_name") + Word(alphas)("last_name")
|
||||
- Use pyparsing Groups to organize sub-expressions
|
||||
- If defining the grammar as part of a Parser class, only the finished grammar needs to be implemented as an instance variable
|
||||
- ParseResults support "in" testing for results names. Use "in" tests for the existence of results names, not hasattr().
|
||||
- Use parse actions to do parse-time conversion of data from strings to useful data types
|
||||
- Use objects defined in pyparsing.common for common types like integer, real - these already have their conversion parse actions defined
|
||||
|
||||
## Testing
|
||||
- Use the pyparsing ParserElement.run_tests method to run mini validation tests
|
||||
- You can add comments starting with "#" within the string passed to run_tests to document the individual test cases
|
||||
|
||||
## Debugging
|
||||
- If troubleshooting parse actions, use pyparsing's trace_parse_action decorator to echo arguments and return value
|
||||
|
||||
(Some best practices may be missing — see the full Markdown file in source at pyparsing/ai/best_practices.md.)
|
||||
"""
|
||||
# fmt: on
|
||||
|
||||
|
||||
def show_best_practices(file=sys.stdout) -> Union[str, None]:
|
||||
"""
|
||||
Load and return the project's best practices.
|
||||
|
||||
Example::
|
||||
|
||||
>>> import pyparsing as pp
|
||||
>>> pp.show_best_practices()
|
||||
<!--
|
||||
This file contains instructions for best practices for developing parsers with pyparsing, and can be used by AI agents
|
||||
when generating Python code using pyparsing.
|
||||
-->
|
||||
...
|
||||
|
||||
This can also be run from the command line::
|
||||
|
||||
python -m pyparsing.ai.show_best_practices
|
||||
"""
|
||||
try:
|
||||
path = resources.files(__package__).joinpath("ai/best_practices.md")
|
||||
with path.open("r", encoding="utf-8") as f:
|
||||
content = f.read()
|
||||
except (FileNotFoundError, OSError):
|
||||
content = _FALLBACK_BEST_PRACTICES
|
||||
|
||||
if file is not None:
|
||||
# just print out the content, no need to return it
|
||||
print(content, file=file)
|
||||
return None
|
||||
|
||||
# no output file was specified, return the content as a string
|
||||
return content
|
||||
|
||||
|
||||
__all__ = [
|
||||
"__version__",
|
||||
"__version_time__",
|
||||
"__author__",
|
||||
"__compat__",
|
||||
"__diag__",
|
||||
"And",
|
||||
"AtLineStart",
|
||||
"AtStringStart",
|
||||
"CaselessKeyword",
|
||||
"CaselessLiteral",
|
||||
"CharsNotIn",
|
||||
"CloseMatch",
|
||||
"Combine",
|
||||
"DelimitedList",
|
||||
"Dict",
|
||||
"Each",
|
||||
"Empty",
|
||||
"FollowedBy",
|
||||
"Forward",
|
||||
"GoToColumn",
|
||||
"Group",
|
||||
"IndentedBlock",
|
||||
"Keyword",
|
||||
"LineEnd",
|
||||
"LineStart",
|
||||
"Literal",
|
||||
"Located",
|
||||
"PrecededBy",
|
||||
"MatchFirst",
|
||||
"NoMatch",
|
||||
"NotAny",
|
||||
"OneOrMore",
|
||||
"OnlyOnce",
|
||||
"OpAssoc",
|
||||
"Opt",
|
||||
"Optional",
|
||||
"Or",
|
||||
"ParseBaseException",
|
||||
"ParseElementEnhance",
|
||||
"ParseException",
|
||||
"ParseExpression",
|
||||
"ParseFatalException",
|
||||
"ParseResults",
|
||||
"ParseSyntaxException",
|
||||
"ParserElement",
|
||||
"PositionToken",
|
||||
"PyparsingDeprecationWarning",
|
||||
"PyparsingDiagnosticWarning",
|
||||
"PyparsingWarning",
|
||||
"QuotedString",
|
||||
"RecursiveGrammarException",
|
||||
"Regex",
|
||||
"SkipTo",
|
||||
"StringEnd",
|
||||
"StringStart",
|
||||
"Suppress",
|
||||
"Tag",
|
||||
"Token",
|
||||
"TokenConverter",
|
||||
"White",
|
||||
"Word",
|
||||
"WordEnd",
|
||||
"WordStart",
|
||||
"ZeroOrMore",
|
||||
"Char",
|
||||
"alphanums",
|
||||
"alphas",
|
||||
"alphas8bit",
|
||||
"any_close_tag",
|
||||
"any_open_tag",
|
||||
"autoname_elements",
|
||||
"c_style_comment",
|
||||
"col",
|
||||
"common_html_entity",
|
||||
"condition_as_parse_action",
|
||||
"counted_array",
|
||||
"cpp_style_comment",
|
||||
"dbl_quoted_string",
|
||||
"dbl_slash_comment",
|
||||
"delimited_list",
|
||||
"dict_of",
|
||||
"empty",
|
||||
"hexnums",
|
||||
"html_comment",
|
||||
"identchars",
|
||||
"identbodychars",
|
||||
"infix_notation",
|
||||
"java_style_comment",
|
||||
"line",
|
||||
"line_end",
|
||||
"line_start",
|
||||
"lineno",
|
||||
"make_html_tags",
|
||||
"make_xml_tags",
|
||||
"match_only_at_col",
|
||||
"match_previous_expr",
|
||||
"match_previous_literal",
|
||||
"nested_expr",
|
||||
"null_debug_action",
|
||||
"nums",
|
||||
"one_of",
|
||||
"original_text_for",
|
||||
"printables",
|
||||
"punc8bit",
|
||||
"pyparsing_common",
|
||||
"pyparsing_test",
|
||||
"pyparsing_unicode",
|
||||
"python_style_comment",
|
||||
"quoted_string",
|
||||
"remove_quotes",
|
||||
"replace_with",
|
||||
"replace_html_entity",
|
||||
"rest_of_line",
|
||||
"sgl_quoted_string",
|
||||
"show_best_practices",
|
||||
"srange",
|
||||
"string_end",
|
||||
"string_start",
|
||||
"token_map",
|
||||
"trace_parse_action",
|
||||
"ungroup",
|
||||
"unicode_set",
|
||||
"unicode_string",
|
||||
"with_attribute",
|
||||
"with_class",
|
||||
# pre-PEP8 compatibility names
|
||||
"__versionTime__",
|
||||
"anyCloseTag",
|
||||
"anyOpenTag",
|
||||
"cStyleComment",
|
||||
"commonHTMLEntity",
|
||||
"conditionAsParseAction",
|
||||
"countedArray",
|
||||
"cppStyleComment",
|
||||
"dblQuotedString",
|
||||
"dblSlashComment",
|
||||
"delimitedList",
|
||||
"dictOf",
|
||||
"htmlComment",
|
||||
"indentedBlock",
|
||||
"infixNotation",
|
||||
"javaStyleComment",
|
||||
"lineEnd",
|
||||
"lineStart",
|
||||
"locatedExpr",
|
||||
"makeHTMLTags",
|
||||
"makeXMLTags",
|
||||
"matchOnlyAtCol",
|
||||
"matchPreviousExpr",
|
||||
"matchPreviousLiteral",
|
||||
"nestedExpr",
|
||||
"nullDebugAction",
|
||||
"oneOf",
|
||||
"opAssoc",
|
||||
"originalTextFor",
|
||||
"pythonStyleComment",
|
||||
"quotedString",
|
||||
"removeQuotes",
|
||||
"replaceHTMLEntity",
|
||||
"replaceWith",
|
||||
"restOfLine",
|
||||
"sglQuotedString",
|
||||
"stringEnd",
|
||||
"stringStart",
|
||||
"tokenMap",
|
||||
"traceParseAction",
|
||||
"unicodeString",
|
||||
"withAttribute",
|
||||
"withClass",
|
||||
"common",
|
||||
"unicode",
|
||||
"testing",
|
||||
]
|
||||
264
venv/lib/python3.12/site-packages/pyparsing/actions.py
Normal file
264
venv/lib/python3.12/site-packages/pyparsing/actions.py
Normal file
@@ -0,0 +1,264 @@
|
||||
# actions.py
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Union, Callable, Any
|
||||
|
||||
from .exceptions import ParseException
|
||||
from .util import col, replaced_by_pep8
|
||||
from .results import ParseResults
|
||||
|
||||
|
||||
ParseAction = Union[
|
||||
Callable[[], Any],
|
||||
Callable[[ParseResults], Any],
|
||||
Callable[[int, ParseResults], Any],
|
||||
Callable[[str, int, ParseResults], Any],
|
||||
]
|
||||
|
||||
|
||||
class OnlyOnce:
|
||||
"""
|
||||
Wrapper for parse actions, to ensure they are only called once.
|
||||
Note: parse action signature must include all 3 arguments.
|
||||
"""
|
||||
|
||||
def __init__(self, method_call: Callable[[str, int, ParseResults], Any]) -> None:
|
||||
from .core import _trim_arity
|
||||
|
||||
self.callable = _trim_arity(method_call)
|
||||
self.called = False
|
||||
|
||||
def __call__(self, s: str, l: int, t: ParseResults) -> ParseResults:
|
||||
if not self.called:
|
||||
results = self.callable(s, l, t)
|
||||
self.called = True
|
||||
return results
|
||||
raise ParseException(s, l, "OnlyOnce obj called multiple times w/out reset")
|
||||
|
||||
def reset(self):
|
||||
"""
|
||||
Allow the associated parse action to be called once more.
|
||||
"""
|
||||
|
||||
self.called = False
|
||||
|
||||
|
||||
def match_only_at_col(n: int) -> ParseAction:
|
||||
"""
|
||||
Helper method for defining parse actions that require matching at
|
||||
a specific column in the input text.
|
||||
"""
|
||||
|
||||
def verify_col(strg: str, locn: int, toks: ParseResults) -> None:
|
||||
if col(locn, strg) != n:
|
||||
raise ParseException(strg, locn, f"matched token not at column {n}")
|
||||
|
||||
return verify_col
|
||||
|
||||
|
||||
def replace_with(repl_str: Any) -> ParseAction:
|
||||
"""
|
||||
Helper method for common parse actions that simply return
|
||||
a literal value. Especially useful when used with
|
||||
:meth:`~ParserElement.transform_string`.
|
||||
|
||||
Example:
|
||||
|
||||
.. doctest::
|
||||
|
||||
>>> num = Word(nums).set_parse_action(lambda toks: int(toks[0]))
|
||||
>>> na = one_of("N/A NA").set_parse_action(replace_with(math.nan))
|
||||
>>> term = na | num
|
||||
|
||||
>>> term[1, ...].parse_string("324 234 N/A 234")
|
||||
ParseResults([324, 234, nan, 234], {})
|
||||
"""
|
||||
return lambda s, l, t: [repl_str]
|
||||
|
||||
|
||||
def remove_quotes(s: str, l: int, t: ParseResults) -> Any:
|
||||
r"""
|
||||
Helper parse action for removing quotation marks from parsed
|
||||
quoted strings, that use a single character for quoting. For parsing
|
||||
strings that may have multiple characters, use the :class:`QuotedString`
|
||||
class.
|
||||
|
||||
Example:
|
||||
|
||||
.. doctest::
|
||||
|
||||
>>> # by default, quotation marks are included in parsed results
|
||||
>>> quoted_string.parse_string("'Now is the Winter of our Discontent'")
|
||||
ParseResults(["'Now is the Winter of our Discontent'"], {})
|
||||
|
||||
>>> # use remove_quotes to strip quotation marks from parsed results
|
||||
>>> dequoted = quoted_string().set_parse_action(remove_quotes)
|
||||
>>> dequoted.parse_string("'Now is the Winter of our Discontent'")
|
||||
ParseResults(['Now is the Winter of our Discontent'], {})
|
||||
"""
|
||||
return t[0][1:-1]
|
||||
|
||||
|
||||
def with_attribute(*args: tuple[str, str], **attr_dict) -> ParseAction:
|
||||
"""
|
||||
Helper to create a validating parse action to be used with start
|
||||
tags created with :class:`make_xml_tags` or
|
||||
:class:`make_html_tags`. Use ``with_attribute`` to qualify
|
||||
a starting tag with a required attribute value, to avoid false
|
||||
matches on common tags such as ``<TD>`` or ``<DIV>``.
|
||||
|
||||
Call ``with_attribute`` with a series of attribute names and
|
||||
values. Specify the list of filter attributes names and values as:
|
||||
|
||||
- keyword arguments, as in ``(align="right")``, or
|
||||
- as an explicit dict with ``**`` operator, when an attribute
|
||||
name is also a Python reserved word, as in ``**{"class":"Customer", "align":"right"}``
|
||||
- a list of name-value tuples, as in ``(("ns1:class", "Customer"), ("ns2:align", "right"))``
|
||||
|
||||
For attribute names with a namespace prefix, you must use the second
|
||||
form. Attribute names are matched insensitive to upper/lower case.
|
||||
|
||||
If just testing for ``class`` (with or without a namespace), use
|
||||
:class:`with_class`.
|
||||
|
||||
To verify that the attribute exists, but without specifying a value,
|
||||
pass ``with_attribute.ANY_VALUE`` as the value.
|
||||
|
||||
The next two examples use the following input data and tag parsers:
|
||||
|
||||
.. testcode::
|
||||
|
||||
html = '''
|
||||
<div>
|
||||
Some text
|
||||
<div type="grid">1 4 0 1 0</div>
|
||||
<div type="graph">1,3 2,3 1,1</div>
|
||||
<div>this has no type</div>
|
||||
</div>
|
||||
'''
|
||||
div,div_end = make_html_tags("div")
|
||||
|
||||
Only match div tag having a type attribute with value "grid":
|
||||
|
||||
.. testcode::
|
||||
|
||||
div_grid = div().set_parse_action(with_attribute(type="grid"))
|
||||
grid_expr = div_grid + SkipTo(div | div_end)("body")
|
||||
for grid_header in grid_expr.search_string(html):
|
||||
print(grid_header.body)
|
||||
|
||||
prints:
|
||||
|
||||
.. testoutput::
|
||||
|
||||
1 4 0 1 0
|
||||
|
||||
Construct a match with any div tag having a type attribute,
|
||||
regardless of the value:
|
||||
|
||||
.. testcode::
|
||||
|
||||
div_any_type = div().set_parse_action(
|
||||
with_attribute(type=with_attribute.ANY_VALUE)
|
||||
)
|
||||
div_expr = div_any_type + SkipTo(div | div_end)("body")
|
||||
for div_header in div_expr.search_string(html):
|
||||
print(div_header.body)
|
||||
|
||||
prints:
|
||||
|
||||
.. testoutput::
|
||||
|
||||
1 4 0 1 0
|
||||
1,3 2,3 1,1
|
||||
"""
|
||||
attrs_list: list[tuple[str, str]] = []
|
||||
if args:
|
||||
attrs_list.extend(args)
|
||||
else:
|
||||
attrs_list.extend(attr_dict.items())
|
||||
|
||||
def pa(s: str, l: int, tokens: ParseResults) -> None:
|
||||
for attrName, attrValue in attrs_list:
|
||||
if attrName not in tokens:
|
||||
raise ParseException(s, l, f"no matching attribute {attrName!r}")
|
||||
if attrValue != with_attribute.ANY_VALUE and tokens[attrName] != attrValue: # type: ignore [attr-defined]
|
||||
raise ParseException(
|
||||
s,
|
||||
l,
|
||||
f"attribute {attrName!r} has value {tokens[attrName]!r}, must be {attrValue!r}",
|
||||
)
|
||||
|
||||
return pa
|
||||
|
||||
|
||||
with_attribute.ANY_VALUE = object() # type: ignore [attr-defined]
|
||||
"Value to use with :class:`with_attribute` parse action, to match any value, as long as the attribute is present"
|
||||
|
||||
|
||||
def with_class(classname: str, namespace: str = "") -> ParseAction:
|
||||
"""
|
||||
Simplified version of :meth:`with_attribute` when
|
||||
matching on a div class - made difficult because ``class`` is
|
||||
a reserved word in Python.
|
||||
|
||||
Using similar input data to the :meth:`with_attribute` examples:
|
||||
|
||||
.. testcode::
|
||||
|
||||
html = '''
|
||||
<div>
|
||||
Some text
|
||||
<div class="grid">1 4 0 1 0</div>
|
||||
<div class="graph">1,3 2,3 1,1</div>
|
||||
<div>this <div> has no class</div>
|
||||
</div>
|
||||
'''
|
||||
div,div_end = make_html_tags("div")
|
||||
|
||||
Only match div tag having the "grid" class:
|
||||
|
||||
.. testcode::
|
||||
|
||||
div_grid = div().set_parse_action(with_class("grid"))
|
||||
grid_expr = div_grid + SkipTo(div | div_end)("body")
|
||||
for grid_header in grid_expr.search_string(html):
|
||||
print(grid_header.body)
|
||||
|
||||
prints:
|
||||
|
||||
.. testoutput::
|
||||
|
||||
1 4 0 1 0
|
||||
|
||||
Construct a match with any div tag having a class attribute,
|
||||
regardless of the value:
|
||||
|
||||
.. testcode::
|
||||
|
||||
div_any_type = div().set_parse_action(
|
||||
with_class(withAttribute.ANY_VALUE)
|
||||
)
|
||||
div_expr = div_any_type + SkipTo(div | div_end)("body")
|
||||
for div_header in div_expr.search_string(html):
|
||||
print(div_header.body)
|
||||
|
||||
prints:
|
||||
|
||||
.. testoutput::
|
||||
|
||||
1 4 0 1 0
|
||||
1,3 2,3 1,1
|
||||
"""
|
||||
classattr = f"{namespace}:class" if namespace else "class"
|
||||
return with_attribute(**{classattr: classname})
|
||||
|
||||
|
||||
# Compatibility synonyms
|
||||
# fmt: off
|
||||
replaceWith = replaced_by_pep8("replaceWith", replace_with)
|
||||
removeQuotes = replaced_by_pep8("removeQuotes", remove_quotes)
|
||||
withAttribute = replaced_by_pep8("withAttribute", with_attribute)
|
||||
withClass = replaced_by_pep8("withClass", with_class)
|
||||
matchOnlyAtCol = replaced_by_pep8("matchOnlyAtCol", match_only_at_col)
|
||||
# fmt: on
|
||||
@@ -0,0 +1,75 @@
|
||||
<!--
|
||||
This file contains instructions for best practices for developing parsers with pyparsing, and can be used by AI agents
|
||||
when generating Python code using pyparsing.
|
||||
-->
|
||||
|
||||
## Planning
|
||||
- If not provided or if target language definition is ambiguous, ask for examples of valid strings to be parsed
|
||||
- Before developing the pyparsing expressions, define a Backus-Naur Form definition and save this in docs/grammar.md. Update this document as changes are made in the parser.
|
||||
|
||||
## Implementing
|
||||
- Import pyparsing using `import pyparsing as pp`, and use that for all pyparsing references.
|
||||
- If referencing names from `pyparsing.common`, follow the pyparsing import with "ppc = pp.common" and use `ppc` as the namespace to access `pyparsing.common`.
|
||||
- If referencing names from `pyparsing.unicode`, follow the pyparsing import with "ppu = pp.unicode" and use `ppu` as the namespace to access `pyparsing.unicode`.
|
||||
- When writing parsers that contain recursive elements (using `Forward()` or `infix_notation()`), immediately enable packrat parsing for performance: `pp.ParserElement.enable_packrat()` (call this right after importing pyparsing). See https://pyparsing-docs.readthedocs.io/en/latest/HowToUsePyparsing.html.
|
||||
- For recursive grammars, define placeholders with `pp.Forward()` and assign later using the `<<=` operator; give Forwards meaningful names with `set_name()` to improve errors.
|
||||
- Use PEP8 method and argument names in the pyparsing API (`parse_string`, not `parseString`).
|
||||
- Do not include expressions for matching whitespace in the grammar. Pyparsing skips whitespace by default.
|
||||
- For line-oriented grammars where newlines are significant, set skippable whitespace to just spaces/tabs early: `pp.ParserElement.set_default_whitespace_chars(" \t")`, and define `NL = pp.LineEnd().suppress()` to handle line ends explicitly.
|
||||
- Prefer operator forms for readability: use +, |, ^, ~, etc., instead of explicit And/MatchFirst/Or/Not classes (see Usage notes in https://pyparsing-docs.readthedocs.io/en/latest/HowToUsePyparsing.html).
|
||||
- Use `set_name()` on all major grammar elements to support railroad diagramming and better error/debug output.
|
||||
- The grammar should be independently testable, without pulling in separate modules for data structures, evaluation, or command execution.
|
||||
- Use results names for robust access to parsed data fields; results names should be valid Python identifiers to support attribute-style access on returned ParseResults.
|
||||
- Results names should take the place of numeric indexing into parsed results in most places.
|
||||
- Define results names using call format not `set_results_name()`, example: `full_name = Word(alphas)("first_name") + Word(alphas)("last_name")`
|
||||
- If adding results name to an expression that is contains one more sub-expressions with results names, the expression must be inclused in a Group.
|
||||
- Prefer `Keyword` over `Literal` for reserved words to avoid partial matches (e.g., `Keyword("for")` will not match the leading "for" in "format").
|
||||
- Use `pp.CaselessKeyword`/`pp.CaselessLiteral` when keywords should match regardless of case.
|
||||
- When the full input must be consumed, call `parse_string` with `parse_all=True`.
|
||||
- If the grammar must handle comments, define an expression for them and use the `ignore()` method to skip them.
|
||||
- Prefer built-ins like `pp.cpp_style_comment` and `pp.python_style_comment` for common comment syntaxes.
|
||||
- Use pyparsing `Group` to organize sub-expressions. Groups are also important for preserving results names when a sub-expression is used in a `OneOrMore` or `ZeroOrMore` expression.
|
||||
- Suppress punctuation tokens to keep results clean; a convenient pattern is `LBRACK, RBRACK, LBRACE, RBRACE, COLON = pp.Suppress.using_each("[]{}:")`.
|
||||
- For comma-separated sequences, prefer `pp.DelimitedList(...)`; wrap with `pp.Optional(...)` to allow empty lists or objects where appropriate.
|
||||
- For helper sub-expressions used only to build larger expressions, consider `set_name(None)` to keep result dumps uncluttered.
|
||||
- Use pyparsing `Each()` to define a list of elements that may occur in any order.
|
||||
- The '&' operator is the operator form of Each and is often more readable when combining order-independent parts.
|
||||
- Use parse actions to do parse-time conversion of data from strings to useful data types.
|
||||
- Use objects defined in pyparsing.common for common types like integer, real — these already have their conversion parse actions defined.
|
||||
- For quoted strings, use `pp.dbl_quoted_string().set_parse_action(pp.remove_quotes)` to unquote automatically.
|
||||
- Map reserved words to Python constants per this example for parsing "true" to auto-convert to a Python True: `pp.Keyword("true").set_parse_action(pp.replace_with(True))` (and similarly for false/null/etc.).
|
||||
- When you want native Python containers from the parse, use `pp.Group(..., aslist=True)` for lists and `pp.Dict(..., asdict=True)` for dict-like data.
|
||||
- Use "using_each" with a list of keywords to define keyword constants, instead of separate assignments.
|
||||
- Choose the appropriate matching method:
|
||||
- `parse_string()` parses from the start
|
||||
- `search_string()` searches anywhere in the text
|
||||
- `scan_string()` yields all matches with positions
|
||||
- `transform_string()` is a convenience wrapper around `scan_string` to apply filters or transforms defined in parse actions, to perform batch transforms or conversions of expressions within a larger body of text
|
||||
- For line suffixes or directives, combine lookahead and slicing helpers: `pp.FollowedBy(...)` with `pp.rest_of_line`; when reusing a base expression with a different parse action, call `.copy()` before applying the new action to avoid side effects.
|
||||
- When defining a parser to be used in a REPL:
|
||||
- add pyparsing `Tag()` elements of the form `Tag("command", <command-name>)` to each command definition to support model construction from parsed commands.
|
||||
- define model classes using dataclasses, and use the "command" attribute in the parsed results to identify which model class to create. The model classes can then be used to construct the model from the ParseResults returned by parse_string(). Define the models in a separate parser_models.py file.
|
||||
- If defining the grammar as part of a Parser class, only the finished grammar needs to be implemented as an instance variable.
|
||||
- `ParseResults` support "in" testing for results names. Use "in" tests for the existence of results names, not `hasattr()`.
|
||||
- Avoid left recursion where possible. If you must support left-recursive grammars, enable it with `pp.ParserElement.enable_left_recursion()` and do not enable packrat at the same time (these modes are incompatible).
|
||||
- Use `pp.SkipTo` as a skipping expression to skip over arbitrary content.
|
||||
- For example, `pp.SkipTo(pp.LineEnd())` will skip over all content until the end of the line; add a stop_on argument to SkipTo to stop skipping when a particular string is matched.
|
||||
- Use `...` in place of simple SkipTo(expression)
|
||||
|
||||
## Testing
|
||||
- Use the pyparsing `ParserElement.run_tests` method to run mini validation tests.
|
||||
- Pass a single multiline string to `run_tests` to test the parser on multiple test input strings, each line is a separate test.
|
||||
- You can add comments starting with "#" within the string passed to `run_tests` to document the individual test cases.
|
||||
- To pass test input strings that span multiple lines, pass the test input strings as a list of strings.
|
||||
- Pass `parse_all=True` to `run_tests` to test that the entire input is consumed.
|
||||
- When generating unit tests for the parser:
|
||||
- generate tests that include presence and absence of optional elements
|
||||
- use the methods in the mixin class pyparsing.testing.TestParseResultsAsserts to easily define expression, test input string, and expected results
|
||||
- do not generate tests for invalid data
|
||||
|
||||
## Debugging
|
||||
- If troubleshooting parse actions, use pyparsing's `trace_parse_action` decorator to echo arguments and return value
|
||||
- During development, call `pp.autoname_elements()` to auto-assign names to unnamed expressions to improve `dump()` and error messages.
|
||||
- Sub-expressions can be tested in isolation using `ParserElement.matches()`
|
||||
- When defined out of order, Literals can mistakenly match fragments: `Literal("for")` will match the leading "for" in "format". Can be corrected by using `Keyword` instead of `Literal`.
|
||||
- Dump the parsed results using `ParseResults.dump()`, `ParseResults.pprint()`, or `repr(ParseResults)`.
|
||||
@@ -0,0 +1,2 @@
|
||||
import pyparsing
|
||||
pyparsing.show_best_practices()
|
||||
570
venv/lib/python3.12/site-packages/pyparsing/common.py
Normal file
570
venv/lib/python3.12/site-packages/pyparsing/common.py
Normal file
@@ -0,0 +1,570 @@
|
||||
# common.py
|
||||
from .core import *
|
||||
from .helpers import DelimitedList, any_open_tag, any_close_tag
|
||||
from datetime import datetime
|
||||
import sys
|
||||
|
||||
PY_310_OR_LATER = sys.version_info >= (3, 10)
|
||||
|
||||
|
||||
# some other useful expressions - using lower-case class name since we are really using this as a namespace
|
||||
class pyparsing_common:
|
||||
"""Here are some common low-level expressions that may be useful in
|
||||
jump-starting parser development:
|
||||
|
||||
- numeric forms (:class:`integers<integer>`, :class:`reals<real>`,
|
||||
:class:`scientific notation<sci_real>`)
|
||||
- common :class:`programming identifiers<identifier>`
|
||||
- network addresses (:class:`MAC<mac_address>`,
|
||||
:class:`IPv4<ipv4_address>`, :class:`IPv6<ipv6_address>`)
|
||||
- ISO8601 :class:`dates<iso8601_date>` and
|
||||
:class:`datetime<iso8601_datetime>`
|
||||
- :class:`UUID<uuid>`
|
||||
- :class:`comma-separated list<comma_separated_list>`
|
||||
- :class:`url`
|
||||
|
||||
Parse actions:
|
||||
|
||||
- :class:`convert_to_integer`
|
||||
- :class:`convert_to_float`
|
||||
- :class:`convert_to_date`
|
||||
- :class:`convert_to_datetime`
|
||||
- :class:`strip_html_tags`
|
||||
- :class:`upcase_tokens`
|
||||
- :class:`downcase_tokens`
|
||||
|
||||
Examples:
|
||||
|
||||
.. testcode::
|
||||
|
||||
pyparsing_common.number.run_tests('''
|
||||
# any int or real number, returned as the appropriate type
|
||||
100
|
||||
-100
|
||||
+100
|
||||
3.14159
|
||||
6.02e23
|
||||
1e-12
|
||||
''')
|
||||
|
||||
.. testoutput::
|
||||
:options: +NORMALIZE_WHITESPACE
|
||||
|
||||
|
||||
# any int or real number, returned as the appropriate type
|
||||
100
|
||||
[100]
|
||||
|
||||
-100
|
||||
[-100]
|
||||
|
||||
+100
|
||||
[100]
|
||||
|
||||
3.14159
|
||||
[3.14159]
|
||||
|
||||
6.02e23
|
||||
[6.02e+23]
|
||||
|
||||
1e-12
|
||||
[1e-12]
|
||||
|
||||
.. testcode::
|
||||
|
||||
pyparsing_common.fnumber.run_tests('''
|
||||
# any int or real number, returned as float
|
||||
100
|
||||
-100
|
||||
+100
|
||||
3.14159
|
||||
6.02e23
|
||||
1e-12
|
||||
''')
|
||||
|
||||
.. testoutput::
|
||||
:options: +NORMALIZE_WHITESPACE
|
||||
|
||||
|
||||
# any int or real number, returned as float
|
||||
100
|
||||
[100.0]
|
||||
|
||||
-100
|
||||
[-100.0]
|
||||
|
||||
+100
|
||||
[100.0]
|
||||
|
||||
3.14159
|
||||
[3.14159]
|
||||
|
||||
6.02e23
|
||||
[6.02e+23]
|
||||
|
||||
1e-12
|
||||
[1e-12]
|
||||
|
||||
.. testcode::
|
||||
|
||||
pyparsing_common.hex_integer.run_tests('''
|
||||
# hex numbers
|
||||
100
|
||||
FF
|
||||
''')
|
||||
|
||||
.. testoutput::
|
||||
:options: +NORMALIZE_WHITESPACE
|
||||
|
||||
|
||||
# hex numbers
|
||||
100
|
||||
[256]
|
||||
|
||||
FF
|
||||
[255]
|
||||
|
||||
.. testcode::
|
||||
|
||||
pyparsing_common.fraction.run_tests('''
|
||||
# fractions
|
||||
1/2
|
||||
-3/4
|
||||
''')
|
||||
|
||||
.. testoutput::
|
||||
:options: +NORMALIZE_WHITESPACE
|
||||
|
||||
|
||||
# fractions
|
||||
1/2
|
||||
[0.5]
|
||||
|
||||
-3/4
|
||||
[-0.75]
|
||||
|
||||
.. testcode::
|
||||
|
||||
pyparsing_common.mixed_integer.run_tests('''
|
||||
# mixed fractions
|
||||
1
|
||||
1/2
|
||||
-3/4
|
||||
1-3/4
|
||||
''')
|
||||
|
||||
.. testoutput::
|
||||
:options: +NORMALIZE_WHITESPACE
|
||||
|
||||
|
||||
# mixed fractions
|
||||
1
|
||||
[1]
|
||||
|
||||
1/2
|
||||
[0.5]
|
||||
|
||||
-3/4
|
||||
[-0.75]
|
||||
|
||||
1-3/4
|
||||
[1.75]
|
||||
.. testcode::
|
||||
|
||||
import uuid
|
||||
pyparsing_common.uuid.set_parse_action(token_map(uuid.UUID))
|
||||
pyparsing_common.uuid.run_tests('''
|
||||
# uuid
|
||||
12345678-1234-5678-1234-567812345678
|
||||
''')
|
||||
|
||||
.. testoutput::
|
||||
:options: +NORMALIZE_WHITESPACE
|
||||
|
||||
|
||||
# uuid
|
||||
12345678-1234-5678-1234-567812345678
|
||||
[UUID('12345678-1234-5678-1234-567812345678')]
|
||||
"""
|
||||
|
||||
@staticmethod
|
||||
def convert_to_integer(_, __, t):
|
||||
"""
|
||||
Parse action for converting parsed integers to Python int
|
||||
"""
|
||||
return [int(tt) for tt in t]
|
||||
|
||||
@staticmethod
|
||||
def convert_to_float(_, __, t):
|
||||
"""
|
||||
Parse action for converting parsed numbers to Python float
|
||||
"""
|
||||
return [float(tt) for tt in t]
|
||||
|
||||
integer = (
|
||||
Word(nums)
|
||||
.set_name("integer")
|
||||
.set_parse_action(
|
||||
convert_to_integer
|
||||
if PY_310_OR_LATER
|
||||
else lambda t: [int(tt) for tt in t] # type: ignore[misc]
|
||||
)
|
||||
)
|
||||
"""expression that parses an unsigned integer, converts to an int"""
|
||||
|
||||
hex_integer = (
|
||||
Word(hexnums).set_name("hex integer").set_parse_action(token_map(int, 16))
|
||||
)
|
||||
"""expression that parses a hexadecimal integer, converts to an int"""
|
||||
|
||||
signed_integer = (
|
||||
Regex(r"[+-]?\d+")
|
||||
.set_name("signed integer")
|
||||
.set_parse_action(
|
||||
convert_to_integer
|
||||
if PY_310_OR_LATER
|
||||
else lambda t: [int(tt) for tt in t] # type: ignore[misc]
|
||||
)
|
||||
)
|
||||
"""expression that parses an integer with optional leading sign, converts to an int"""
|
||||
|
||||
fraction = (
|
||||
signed_integer().set_parse_action(
|
||||
convert_to_float
|
||||
if PY_310_OR_LATER
|
||||
else lambda t: [float(tt) for tt in t] # type: ignore[misc]
|
||||
)
|
||||
+ "/"
|
||||
+ signed_integer().set_parse_action(
|
||||
convert_to_float
|
||||
if PY_310_OR_LATER
|
||||
else lambda t: [float(tt) for tt in t] # type: ignore[misc]
|
||||
)
|
||||
).set_name("fraction")
|
||||
"""fractional expression of an integer divided by an integer, converts to a float"""
|
||||
fraction.add_parse_action(lambda tt: tt[0] / tt[-1])
|
||||
|
||||
mixed_integer = (
|
||||
fraction | signed_integer + Opt(Opt("-").suppress() + fraction)
|
||||
).set_name("fraction or mixed integer-fraction")
|
||||
"""mixed integer of the form 'integer - fraction', with optional leading integer, converts to a float"""
|
||||
mixed_integer.add_parse_action(sum)
|
||||
|
||||
real = (
|
||||
Regex(r"[+-]?(?:\d+\.\d*|\.\d+)")
|
||||
.set_name("real number")
|
||||
.set_parse_action(
|
||||
convert_to_float
|
||||
if PY_310_OR_LATER
|
||||
else lambda t: [float(tt) for tt in t] # type: ignore[misc]
|
||||
)
|
||||
)
|
||||
"""expression that parses a floating point number, converts to a float"""
|
||||
|
||||
sci_real = (
|
||||
Regex(r"[+-]?(?:\d+(?:[eE][+-]?\d+)|(?:\d+\.\d*|\.\d+)(?:[eE][+-]?\d+)?)")
|
||||
.set_name("real number with scientific notation")
|
||||
.set_parse_action(
|
||||
convert_to_float
|
||||
if PY_310_OR_LATER
|
||||
else lambda t: [float(tt) for tt in t] # type: ignore[misc]
|
||||
)
|
||||
)
|
||||
"""expression that parses a floating point number with optional
|
||||
scientific notation, converts to a float"""
|
||||
|
||||
# streamlining this expression makes the docs nicer-looking
|
||||
number = (sci_real | real | signed_integer).set_name("number").streamline()
|
||||
"""any numeric expression, converts to the corresponding Python type"""
|
||||
|
||||
fnumber = (
|
||||
Regex(r"[+-]?\d+\.?\d*(?:[eE][+-]?\d+)?")
|
||||
.set_name("fnumber")
|
||||
.set_parse_action(
|
||||
convert_to_float
|
||||
if PY_310_OR_LATER
|
||||
else lambda t: [float(tt) for tt in t] # type: ignore[misc]
|
||||
)
|
||||
)
|
||||
"""any int or real number, always converts to a float"""
|
||||
|
||||
ieee_float = (
|
||||
Regex(r"(?i:[+-]?(?:(?:\d+\.?\d*(?:e[+-]?\d+)?)|nan|inf(?:inity)?))")
|
||||
.set_name("ieee_float")
|
||||
.set_parse_action(
|
||||
convert_to_float
|
||||
if PY_310_OR_LATER
|
||||
else lambda t: [float(tt) for tt in t] # type: ignore[misc]
|
||||
)
|
||||
)
|
||||
"""any floating-point literal (int, real number, infinity, or NaN), converts to a float"""
|
||||
|
||||
identifier = Word(identchars, identbodychars).set_name("identifier")
|
||||
"""typical code identifier (leading alpha or '_', followed by 0 or more alphas, nums, or '_')"""
|
||||
|
||||
ipv4_address = Regex(
|
||||
r"(?:25[0-5]|2[0-4][0-9]|1?[0-9]{1,2})(?:\.(?:25[0-5]|2[0-4][0-9]|1?[0-9]{1,2})){3}"
|
||||
).set_name("IPv4 address")
|
||||
"IPv4 address (``0.0.0.0 - 255.255.255.255``)"
|
||||
|
||||
_ipv6_part = Regex(r"[0-9a-fA-F]{1,4}").set_name("hex_integer")
|
||||
_full_ipv6_address = (_ipv6_part + (":" + _ipv6_part) * 7).set_name(
|
||||
"full IPv6 address"
|
||||
)
|
||||
_short_ipv6_address = (
|
||||
Opt(_ipv6_part + (":" + _ipv6_part) * (0, 6))
|
||||
+ "::"
|
||||
+ Opt(_ipv6_part + (":" + _ipv6_part) * (0, 6))
|
||||
).set_name("short IPv6 address")
|
||||
_short_ipv6_address.add_condition(
|
||||
lambda t: sum(1 for tt in t if pyparsing_common._ipv6_part.matches(tt)) < 8
|
||||
)
|
||||
_mixed_ipv6_address = ("::ffff:" + ipv4_address).set_name("mixed IPv6 address")
|
||||
ipv6_address = Combine(
|
||||
(_full_ipv6_address | _mixed_ipv6_address | _short_ipv6_address).set_name(
|
||||
"IPv6 address"
|
||||
)
|
||||
).set_name("IPv6 address")
|
||||
"IPv6 address (long, short, or mixed form)"
|
||||
|
||||
mac_address = Regex(
|
||||
r"[0-9a-fA-F]{2}([:.-])[0-9a-fA-F]{2}(?:\1[0-9a-fA-F]{2}){4}"
|
||||
).set_name("MAC address")
|
||||
"MAC address xx:xx:xx:xx:xx (may also have '-' or '.' delimiters)"
|
||||
|
||||
@staticmethod
|
||||
def convert_to_date(fmt: str = "%Y-%m-%d"):
|
||||
"""
|
||||
Helper to create a parse action for converting parsed date string to Python datetime.date
|
||||
|
||||
Params -
|
||||
- fmt - format to be passed to datetime.strptime (default= ``"%Y-%m-%d"``)
|
||||
|
||||
Example:
|
||||
|
||||
.. testcode::
|
||||
|
||||
date_expr = pyparsing_common.iso8601_date.copy()
|
||||
date_expr.set_parse_action(pyparsing_common.convert_to_date())
|
||||
print(date_expr.parse_string("1999-12-31"))
|
||||
|
||||
prints:
|
||||
|
||||
.. testoutput::
|
||||
|
||||
[datetime.date(1999, 12, 31)]
|
||||
"""
|
||||
|
||||
def cvt_fn(ss, ll, tt):
|
||||
try:
|
||||
return datetime.strptime(tt[0], fmt).date()
|
||||
except ValueError as ve:
|
||||
raise ParseException(ss, ll, str(ve))
|
||||
|
||||
return cvt_fn
|
||||
|
||||
@staticmethod
|
||||
def convert_to_datetime(fmt: str = "%Y-%m-%dT%H:%M:%S.%f"):
|
||||
"""Helper to create a parse action for converting parsed
|
||||
datetime string to Python :class:`datetime.datetime`
|
||||
|
||||
Params -
|
||||
- fmt - format to be passed to :class:`datetime.strptime` (default= ``"%Y-%m-%dT%H:%M:%S.%f"``)
|
||||
|
||||
Example:
|
||||
|
||||
.. testcode::
|
||||
|
||||
dt_expr = pyparsing_common.iso8601_datetime.copy()
|
||||
dt_expr.set_parse_action(pyparsing_common.convert_to_datetime())
|
||||
print(dt_expr.parse_string("1999-12-31T23:59:59.999"))
|
||||
|
||||
prints:
|
||||
|
||||
.. testoutput::
|
||||
|
||||
[datetime.datetime(1999, 12, 31, 23, 59, 59, 999000)]
|
||||
"""
|
||||
|
||||
def cvt_fn(s, l, t):
|
||||
try:
|
||||
return datetime.strptime(t[0], fmt)
|
||||
except ValueError as ve:
|
||||
raise ParseException(s, l, str(ve))
|
||||
|
||||
return cvt_fn
|
||||
|
||||
iso8601_date = Regex(
|
||||
r"(?P<year>\d{4})(?:-(?P<month>\d\d)(?:-(?P<day>\d\d))?)?"
|
||||
).set_name("ISO8601 date")
|
||||
"ISO8601 date (``yyyy-mm-dd``)"
|
||||
|
||||
iso8601_datetime = Regex(
|
||||
r"(?P<year>\d{4})-(?P<month>\d\d)-(?P<day>\d\d)[T ](?P<hour>\d\d):(?P<minute>\d\d)(:(?P<second>\d\d(\.\d*)?)?)?(?P<tz>Z|[+-]\d\d:?\d\d)?"
|
||||
).set_name("ISO8601 datetime")
|
||||
"ISO8601 datetime (``yyyy-mm-ddThh:mm:ss.s(Z|+-00:00)``) - trailing seconds, milliseconds, and timezone optional; accepts separating ``'T'`` or ``' '``"
|
||||
|
||||
@staticmethod
|
||||
def as_datetime(s, l, t):
|
||||
"""Parse action to convert parsed dates or datetimes to a Python
|
||||
:class:`datetime.datetime`.
|
||||
|
||||
This parse action will use the year, month, day, etc. results
|
||||
names defined in the ISO8601 date expressions, but it can be
|
||||
used with any expression that provides one or more of these fields.
|
||||
|
||||
Omitted fields will default to fields from Jan 1, 00:00:00.
|
||||
|
||||
Invalid dates will raise a :class:`ParseException` with the
|
||||
error message indicating the invalid date fields.
|
||||
"""
|
||||
year = int(t.year.lstrip("0") or 0)
|
||||
month = int(t.month or 1)
|
||||
day = int(t.day or 1)
|
||||
hour = int(t.hour or 0)
|
||||
minute = int(t.minute or 0)
|
||||
second = float(t.second or 0)
|
||||
try:
|
||||
return datetime(
|
||||
year, month, day, hour, minute, int(second), int((second % 1) * 1000)
|
||||
)
|
||||
except ValueError as ve:
|
||||
raise ParseException(t, l, f"Invalid date/time: {ve}").with_traceback(
|
||||
ve.__traceback__
|
||||
) from None
|
||||
|
||||
if PY_310_OR_LATER:
|
||||
iso8601_date_validated = iso8601_date().add_parse_action(as_datetime)
|
||||
"Validated ISO8601 date strings, raising :class:`ParseException` for invalid date values."
|
||||
|
||||
iso8601_datetime_validated = iso8601_datetime().add_parse_action(as_datetime)
|
||||
"Validated ISO8601 date and time strings, raising :class:`ParseException` for invalid date/time values."
|
||||
|
||||
uuid = Regex(r"[0-9a-fA-F]{8}(?:-[0-9a-fA-F]{4}){3}-[0-9a-fA-F]{12}").set_name(
|
||||
"UUID"
|
||||
)
|
||||
"UUID (``xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx``)"
|
||||
|
||||
_html_stripper = any_open_tag.suppress() | any_close_tag.suppress()
|
||||
|
||||
@staticmethod
|
||||
def strip_html_tags(s: str, l: int, tokens: ParseResults):
|
||||
"""Parse action to remove HTML tags from web page HTML source
|
||||
|
||||
Example:
|
||||
|
||||
.. testcode::
|
||||
|
||||
# strip HTML links from normal text
|
||||
text = '<td>More info at the <a href="https://github.com/pyparsing/pyparsing/wiki">pyparsing</a> wiki page</td>'
|
||||
td, td_end = make_html_tags("TD")
|
||||
table_text = td + SkipTo(td_end).set_parse_action(
|
||||
pyparsing_common.strip_html_tags)("body") + td_end
|
||||
print(table_text.parse_string(text).body)
|
||||
|
||||
Prints:
|
||||
|
||||
.. testoutput::
|
||||
|
||||
More info at the pyparsing wiki page
|
||||
"""
|
||||
return pyparsing_common._html_stripper.transform_string(tokens[0])
|
||||
|
||||
_commasepitem = (
|
||||
Combine(
|
||||
OneOrMore(
|
||||
~Literal(",")
|
||||
+ ~LineEnd()
|
||||
+ Word(printables, exclude_chars=",")
|
||||
+ Opt(White(" \t") + ~FollowedBy(LineEnd() | ","))
|
||||
)
|
||||
)
|
||||
.streamline()
|
||||
.set_name("commaItem")
|
||||
)
|
||||
comma_separated_list = DelimitedList(
|
||||
Opt(quoted_string.copy() | _commasepitem, default="")
|
||||
).set_name("comma separated list")
|
||||
"""Predefined expression of 1 or more printable words or quoted strings, separated by commas."""
|
||||
|
||||
@staticmethod
|
||||
def upcase_tokens(s, l, t):
|
||||
"""Parse action to convert tokens to upper case."""
|
||||
return [tt.upper() for tt in t]
|
||||
|
||||
@staticmethod
|
||||
def downcase_tokens(s, l, t):
|
||||
"""Parse action to convert tokens to lower case."""
|
||||
return [tt.lower() for tt in t]
|
||||
|
||||
# fmt: off
|
||||
url = Regex(
|
||||
# https://mathiasbynens.be/demo/url-regex
|
||||
# https://gist.github.com/dperini/729294
|
||||
r"(?P<url>"
|
||||
# protocol identifier (optional)
|
||||
# short syntax // still required
|
||||
r"(?:(?:(?P<scheme>https?|ftp):)?\/\/)"
|
||||
# user:pass BasicAuth (optional)
|
||||
r"(?:(?P<auth>\S+(?::\S*)?)@)?"
|
||||
r"(?P<host>"
|
||||
# IP address exclusion
|
||||
# private & local networks
|
||||
r"(?!(?:10|127)(?:\.\d{1,3}){3})"
|
||||
r"(?!(?:169\.254|192\.168)(?:\.\d{1,3}){2})"
|
||||
r"(?!172\.(?:1[6-9]|2\d|3[0-1])(?:\.\d{1,3}){2})"
|
||||
# IP address dotted notation octets
|
||||
# excludes loopback network 0.0.0.0
|
||||
# excludes reserved space >= 224.0.0.0
|
||||
# excludes network & broadcast addresses
|
||||
# (first & last IP address of each class)
|
||||
r"(?:[1-9]\d?|1\d\d|2[01]\d|22[0-3])"
|
||||
r"(?:\.(?:1?\d{1,2}|2[0-4]\d|25[0-5])){2}"
|
||||
r"(?:\.(?:[1-9]\d?|1\d\d|2[0-4]\d|25[0-4]))"
|
||||
r"|"
|
||||
# host & domain names, may end with dot
|
||||
# can be replaced by a shortest alternative
|
||||
# (?![-_])(?:[-\w\u00a1-\uffff]{0,63}[^-_]\.)+
|
||||
r"(?:"
|
||||
r"(?:"
|
||||
r"[a-z0-9\u00a1-\uffff]"
|
||||
r"[a-z0-9\u00a1-\uffff_-]{0,62}"
|
||||
r")?"
|
||||
r"[a-z0-9\u00a1-\uffff]\."
|
||||
r")+"
|
||||
# TLD identifier name, may end with dot
|
||||
r"(?:[a-z\u00a1-\uffff]{2,}\.?)"
|
||||
r")"
|
||||
# port number (optional)
|
||||
r"(:(?P<port>\d{2,5}))?"
|
||||
# resource path (optional)
|
||||
r"(?P<path>\/[^?# ]*)?"
|
||||
# query string (optional)
|
||||
r"(\?(?P<query>[^#]*))?"
|
||||
# fragment (optional)
|
||||
r"(#(?P<fragment>\S*))?"
|
||||
r")"
|
||||
).set_name("url")
|
||||
"""
|
||||
URL (http/https/ftp scheme)
|
||||
|
||||
.. versionchanged:: 3.1.0
|
||||
``url`` named group added
|
||||
"""
|
||||
# fmt: on
|
||||
|
||||
# pre-PEP8 compatibility names
|
||||
# fmt: off
|
||||
convertToInteger = staticmethod(replaced_by_pep8("convertToInteger", convert_to_integer))
|
||||
convertToFloat = staticmethod(replaced_by_pep8("convertToFloat", convert_to_float))
|
||||
convertToDate = staticmethod(replaced_by_pep8("convertToDate", convert_to_date))
|
||||
convertToDatetime = staticmethod(replaced_by_pep8("convertToDatetime", convert_to_datetime))
|
||||
stripHTMLTags = staticmethod(replaced_by_pep8("stripHTMLTags", strip_html_tags))
|
||||
upcaseTokens = staticmethod(replaced_by_pep8("upcaseTokens", upcase_tokens))
|
||||
downcaseTokens = staticmethod(replaced_by_pep8("downcaseTokens", downcase_tokens))
|
||||
# fmt: on
|
||||
|
||||
|
||||
_builtin_exprs = [
|
||||
v for v in vars(pyparsing_common).values() if isinstance(v, ParserElement)
|
||||
]
|
||||
6951
venv/lib/python3.12/site-packages/pyparsing/core.py
Normal file
6951
venv/lib/python3.12/site-packages/pyparsing/core.py
Normal file
File diff suppressed because it is too large
Load Diff
761
venv/lib/python3.12/site-packages/pyparsing/diagram/__init__.py
Normal file
761
venv/lib/python3.12/site-packages/pyparsing/diagram/__init__.py
Normal file
@@ -0,0 +1,761 @@
|
||||
# mypy: ignore-errors
|
||||
from __future__ import annotations
|
||||
|
||||
import itertools
|
||||
import railroad
|
||||
import pyparsing
|
||||
import dataclasses
|
||||
import typing
|
||||
from typing import (
|
||||
Generic,
|
||||
TypeVar,
|
||||
Callable,
|
||||
Iterable,
|
||||
)
|
||||
from jinja2 import Template
|
||||
from io import StringIO
|
||||
import inspect
|
||||
import re
|
||||
|
||||
|
||||
jinja2_template_source = """\
|
||||
{% if not embed %}
|
||||
<!DOCTYPE html>
|
||||
<html>
|
||||
<head>
|
||||
{% endif %}
|
||||
{% if not head %}
|
||||
<style>
|
||||
.railroad-heading {
|
||||
font-family: monospace;
|
||||
}
|
||||
</style>
|
||||
{% else %}
|
||||
{{ head | safe }}
|
||||
{% endif %}
|
||||
{% if not embed %}
|
||||
</head>
|
||||
<body>
|
||||
{% endif %}
|
||||
<meta charset="UTF-8"/>
|
||||
{{ body | safe }}
|
||||
{% for diagram in diagrams %}
|
||||
<div class="railroad-group">
|
||||
<h1 class="railroad-heading" id="{{ diagram.bookmark }}">{{ diagram.title }}</h1>
|
||||
<div class="railroad-description">{{ diagram.text }}</div>
|
||||
<div class="railroad-svg">
|
||||
{{ diagram.svg }}
|
||||
</div>
|
||||
</div>
|
||||
{% endfor %}
|
||||
{% if not embed %}
|
||||
</body>
|
||||
</html>
|
||||
{% endif %}
|
||||
"""
|
||||
|
||||
template = Template(jinja2_template_source)
|
||||
|
||||
|
||||
_bookmark_lookup = {}
|
||||
_bookmark_ids = itertools.count(start=1)
|
||||
|
||||
def _make_bookmark(s: str) -> str:
|
||||
"""
|
||||
Converts a string into a valid HTML bookmark (ID or anchor name).
|
||||
"""
|
||||
if s in _bookmark_lookup:
|
||||
return _bookmark_lookup[s]
|
||||
|
||||
# Replace invalid characters with hyphens and ensure only valid characters
|
||||
bookmark = re.sub(r'[^a-zA-Z0-9-]+', '-', s)
|
||||
|
||||
# Ensure it starts with a letter by adding 'z' if necessary
|
||||
if not bookmark[:1].isalpha():
|
||||
bookmark = f"z{bookmark}"
|
||||
|
||||
# Convert to lowercase and strip hyphens
|
||||
bookmark = bookmark.lower().strip('-')
|
||||
|
||||
_bookmark_lookup[s] = bookmark = f"{bookmark}-{next(_bookmark_ids):04d}"
|
||||
|
||||
return bookmark
|
||||
|
||||
|
||||
def _collapse_verbose_regex(regex_str: str) -> str:
|
||||
if "\n" not in regex_str:
|
||||
return regex_str
|
||||
collapsed = pyparsing.Regex(r"#.*$").suppress().transform_string(regex_str)
|
||||
collapsed = re.sub(r"\s*\n\s*", "", collapsed)
|
||||
return collapsed
|
||||
|
||||
|
||||
@dataclasses.dataclass
|
||||
class NamedDiagram:
|
||||
"""
|
||||
A simple structure for associating a name with a railroad diagram
|
||||
"""
|
||||
|
||||
name: str
|
||||
index: int
|
||||
diagram: railroad.DiagramItem = None
|
||||
|
||||
@property
|
||||
def bookmark(self):
|
||||
bookmark = _make_bookmark(self.name)
|
||||
return bookmark
|
||||
|
||||
|
||||
T = TypeVar("T")
|
||||
|
||||
|
||||
class EachItem(railroad.Group):
|
||||
"""
|
||||
Custom railroad item to compose a:
|
||||
|
||||
- :class:`railroad.Group` containing a
|
||||
|
||||
- :class:`railroad.OneOrMore` containing a
|
||||
|
||||
- :class:`railroad.Choice` of the elements in the
|
||||
:class:`railroad.Each`
|
||||
|
||||
with the group label indicating that all must be matched
|
||||
"""
|
||||
|
||||
all_label = "[ALL]"
|
||||
|
||||
def __init__(self, *items) -> None:
|
||||
choice_item = railroad.Choice(len(items) - 1, *items)
|
||||
one_or_more_item = railroad.OneOrMore(item=choice_item)
|
||||
super().__init__(one_or_more_item, label=self.all_label)
|
||||
|
||||
|
||||
class AnnotatedItem(railroad.Group):
|
||||
"""
|
||||
Simple subclass of Group that creates an annotation label
|
||||
"""
|
||||
|
||||
def __init__(self, label: str, item) -> None:
|
||||
super().__init__(item=item, label=f"[{label}]" if label else "")
|
||||
|
||||
|
||||
class EditablePartial(Generic[T]):
|
||||
"""
|
||||
Acts like a functools.partial, but can be edited. In other words, it represents a type that hasn't yet been
|
||||
constructed.
|
||||
"""
|
||||
|
||||
# We need this here because the railroad constructors actually transform the data, so can't be called until the
|
||||
# entire tree is assembled
|
||||
|
||||
def __init__(self, func: Callable[..., T], args: list, kwargs: dict) -> None:
|
||||
self.func = func
|
||||
self.args = args
|
||||
self.kwargs = kwargs
|
||||
|
||||
@classmethod
|
||||
def from_call(cls, func: Callable[..., T], *args, **kwargs) -> EditablePartial[T]:
|
||||
"""
|
||||
If you call this function in the same way that you would call the constructor,
|
||||
it will store the arguments as you expect. For example
|
||||
``EditablePartial.from_call(Fraction, 1, 3)() == Fraction(1, 3)``
|
||||
"""
|
||||
return EditablePartial(func=func, args=list(args), kwargs=kwargs)
|
||||
|
||||
@property
|
||||
def name(self):
|
||||
return self.kwargs["name"]
|
||||
|
||||
def __call__(self) -> T:
|
||||
"""
|
||||
Evaluate the partial and return the result
|
||||
"""
|
||||
args = self.args.copy()
|
||||
kwargs = self.kwargs.copy()
|
||||
|
||||
# This is a helpful hack to allow you to specify varargs parameters (e.g. *args) as keyword args (e.g.
|
||||
# args=['list', 'of', 'things'])
|
||||
arg_spec = inspect.getfullargspec(self.func)
|
||||
if arg_spec.varargs in self.kwargs:
|
||||
args += kwargs.pop(arg_spec.varargs)
|
||||
|
||||
return self.func(*args, **kwargs)
|
||||
|
||||
|
||||
def railroad_to_html(diagrams: list[NamedDiagram], embed=False, **kwargs) -> str:
|
||||
"""
|
||||
Given a list of :class:`NamedDiagram`, produce a single HTML string
|
||||
that visualises those diagrams.
|
||||
|
||||
:params kwargs: kwargs to be passed in to the template
|
||||
"""
|
||||
data = []
|
||||
for diagram in diagrams:
|
||||
if diagram.diagram is None:
|
||||
continue
|
||||
io = StringIO()
|
||||
try:
|
||||
css = kwargs.get("css")
|
||||
diagram.diagram.writeStandalone(io.write, css=css)
|
||||
except AttributeError:
|
||||
diagram.diagram.writeSvg(io.write)
|
||||
title = diagram.name
|
||||
if diagram.index == 0:
|
||||
title += " (root)"
|
||||
data.append(
|
||||
{
|
||||
"title": title, "text": "", "svg": io.getvalue(), "bookmark": diagram.bookmark
|
||||
}
|
||||
)
|
||||
|
||||
return template.render(diagrams=data, embed=embed, **kwargs)
|
||||
|
||||
|
||||
def resolve_partial(partial: EditablePartial[T]) -> T:
|
||||
"""
|
||||
Recursively resolves a collection of Partials into whatever type they are
|
||||
"""
|
||||
if isinstance(partial, EditablePartial):
|
||||
partial.args = resolve_partial(partial.args)
|
||||
partial.kwargs = resolve_partial(partial.kwargs)
|
||||
return partial()
|
||||
elif isinstance(partial, list):
|
||||
return [resolve_partial(x) for x in partial]
|
||||
elif isinstance(partial, dict):
|
||||
return {key: resolve_partial(x) for key, x in partial.items()}
|
||||
else:
|
||||
return partial
|
||||
|
||||
|
||||
def to_railroad(
|
||||
element: pyparsing.ParserElement,
|
||||
diagram_kwargs: typing.Optional[dict] = None,
|
||||
vertical: int = 3,
|
||||
show_results_names: bool = False,
|
||||
show_groups: bool = False,
|
||||
show_hidden: bool = False,
|
||||
) -> list[NamedDiagram]:
|
||||
"""
|
||||
Convert a pyparsing element tree into a list of diagrams. This is the recommended entrypoint to diagram
|
||||
creation if you want to access the Railroad tree before it is converted to HTML
|
||||
|
||||
:param element: base element of the parser being diagrammed
|
||||
|
||||
:param diagram_kwargs: kwargs to pass to the :meth:`Diagram` constructor
|
||||
|
||||
:param vertical: (optional) int - limit at which number of alternatives
|
||||
should be shown vertically instead of horizontally
|
||||
|
||||
:param show_results_names: bool to indicate whether results name
|
||||
annotations should be included in the diagram
|
||||
|
||||
:param show_groups: bool to indicate whether groups should be highlighted
|
||||
with an unlabeled surrounding box
|
||||
|
||||
:param show_hidden: bool to indicate whether internal elements that are
|
||||
typically hidden should be shown
|
||||
"""
|
||||
# Convert the whole tree underneath the root
|
||||
lookup = ConverterState(diagram_kwargs=diagram_kwargs or {})
|
||||
_to_diagram_element(
|
||||
element,
|
||||
lookup=lookup,
|
||||
parent=None,
|
||||
vertical=vertical,
|
||||
show_results_names=show_results_names,
|
||||
show_groups=show_groups,
|
||||
show_hidden=show_hidden,
|
||||
)
|
||||
|
||||
root_id = id(element)
|
||||
# Convert the root if it hasn't been already
|
||||
if root_id in lookup:
|
||||
if not element.customName:
|
||||
lookup[root_id].name = ""
|
||||
lookup[root_id].mark_for_extraction(root_id, lookup, force=True)
|
||||
|
||||
# Now that we're finished, we can convert from intermediate structures into Railroad elements
|
||||
diags = list(lookup.diagrams.values())
|
||||
if len(diags) > 1:
|
||||
# collapse out duplicate diags with the same name
|
||||
seen = set()
|
||||
deduped_diags = []
|
||||
for d in diags:
|
||||
# don't extract SkipTo elements, they are uninformative as subdiagrams
|
||||
if d.name == "...":
|
||||
continue
|
||||
if d.name is not None and d.name not in seen:
|
||||
seen.add(d.name)
|
||||
deduped_diags.append(d)
|
||||
resolved = [resolve_partial(partial) for partial in deduped_diags]
|
||||
else:
|
||||
# special case - if just one diagram, always display it, even if
|
||||
# it has no name
|
||||
resolved = [resolve_partial(partial) for partial in diags]
|
||||
return sorted(resolved, key=lambda diag: diag.index)
|
||||
|
||||
|
||||
def _should_vertical(
|
||||
specification: int, exprs: Iterable[pyparsing.ParserElement]
|
||||
) -> bool:
|
||||
"""
|
||||
Returns true if we should return a vertical list of elements
|
||||
"""
|
||||
if specification is None:
|
||||
return False
|
||||
else:
|
||||
return len(_visible_exprs(exprs)) >= specification
|
||||
|
||||
|
||||
@dataclasses.dataclass
|
||||
class ElementState:
|
||||
"""
|
||||
State recorded for an individual pyparsing Element
|
||||
"""
|
||||
|
||||
#: The pyparsing element that this represents
|
||||
element: pyparsing.ParserElement
|
||||
#: The output Railroad element in an unconverted state
|
||||
converted: EditablePartial
|
||||
#: The parent Railroad element, which we store so that we can extract this if it's duplicated
|
||||
parent: EditablePartial
|
||||
#: The order in which we found this element, used for sorting diagrams if this is extracted into a diagram
|
||||
number: int
|
||||
#: The name of the element
|
||||
name: str = None
|
||||
#: The index of this inside its parent
|
||||
parent_index: typing.Optional[int] = None
|
||||
#: If true, we should extract this out into a subdiagram
|
||||
extract: bool = False
|
||||
#: If true, all of this element's children have been filled out
|
||||
complete: bool = False
|
||||
|
||||
def mark_for_extraction(
|
||||
self, el_id: int, state: ConverterState, name: str = None, force: bool = False
|
||||
):
|
||||
"""
|
||||
Called when this instance has been seen twice, and thus should eventually be extracted into a sub-diagram
|
||||
:param el_id: id of the element
|
||||
:param state: element/diagram state tracker
|
||||
:param name: name to use for this element's text
|
||||
:param force: If true, force extraction now, regardless of the state of this. Only useful for extracting the
|
||||
root element when we know we're finished
|
||||
"""
|
||||
self.extract = True
|
||||
|
||||
# Set the name
|
||||
if not self.name:
|
||||
if name:
|
||||
# Allow forcing a custom name
|
||||
self.name = name
|
||||
elif self.element.customName:
|
||||
self.name = self.element.customName
|
||||
else:
|
||||
self.name = ""
|
||||
|
||||
# Just because this is marked for extraction doesn't mean we can do it yet. We may have to wait for children
|
||||
# to be added
|
||||
# Also, if this is just a string literal etc, don't bother extracting it
|
||||
if force or (self.complete and _worth_extracting(self.element)):
|
||||
state.extract_into_diagram(el_id)
|
||||
|
||||
|
||||
class ConverterState:
|
||||
"""
|
||||
Stores some state that persists between recursions into the element tree
|
||||
"""
|
||||
index_generator = itertools.count(start=1)
|
||||
|
||||
def __init__(self, diagram_kwargs: typing.Optional[dict] = None) -> None:
|
||||
#: A dictionary mapping ParserElements to state relating to them
|
||||
self._element_diagram_states: dict[int, ElementState] = {}
|
||||
#: A dictionary mapping ParserElement IDs to subdiagrams generated from them
|
||||
self.diagrams: dict[int, EditablePartial[NamedDiagram]] = {}
|
||||
#: The index of the next element. This is used for sorting
|
||||
self.index: int = 0
|
||||
#: Shared kwargs that are used to customize the construction of diagrams
|
||||
self.diagram_kwargs: dict = diagram_kwargs or {}
|
||||
self.extracted_diagram_names: set[str] = set()
|
||||
|
||||
def __setitem__(self, key: int, value: ElementState):
|
||||
self._element_diagram_states[key] = value
|
||||
|
||||
def __getitem__(self, key: int) -> ElementState:
|
||||
return self._element_diagram_states[key]
|
||||
|
||||
def __delitem__(self, key: int):
|
||||
del self._element_diagram_states[key]
|
||||
|
||||
def __contains__(self, key: int):
|
||||
return key in self._element_diagram_states
|
||||
|
||||
def get(self, key, default=None):
|
||||
try:
|
||||
return self[key]
|
||||
except KeyError:
|
||||
return default
|
||||
|
||||
def generate_index(self) -> int:
|
||||
"""
|
||||
Generate a number used to index a diagram
|
||||
"""
|
||||
return next(self.index_generator)
|
||||
|
||||
def extract_into_diagram(self, el_id: int):
|
||||
"""
|
||||
Used when we encounter the same token twice in the same tree. When this
|
||||
happens, we replace all instances of that token with a terminal, and
|
||||
create a new subdiagram for the token
|
||||
"""
|
||||
position = self[el_id]
|
||||
|
||||
# Replace the original definition of this element with a regular block
|
||||
if position.parent:
|
||||
href = f"#{_make_bookmark(position.name)}"
|
||||
ret = EditablePartial.from_call(railroad.NonTerminal, text=position.name, href=href)
|
||||
if "item" in position.parent.kwargs:
|
||||
position.parent.kwargs["item"] = ret
|
||||
elif "items" in position.parent.kwargs:
|
||||
position.parent.kwargs["items"][position.parent_index] = ret
|
||||
|
||||
# If the element we're extracting is a group, skip to its content but keep the title
|
||||
if position.converted.func == railroad.Group:
|
||||
content = position.converted.kwargs["item"]
|
||||
else:
|
||||
content = position.converted
|
||||
|
||||
self.diagrams[el_id] = EditablePartial.from_call(
|
||||
NamedDiagram,
|
||||
name=position.name,
|
||||
diagram=EditablePartial.from_call(
|
||||
railroad.Diagram, content, **self.diagram_kwargs
|
||||
),
|
||||
index=position.number,
|
||||
)
|
||||
|
||||
del self[el_id]
|
||||
|
||||
|
||||
def _worth_extracting(element: pyparsing.ParserElement) -> bool:
|
||||
"""
|
||||
Returns true if this element is worth having its own sub-diagram. Simply, if any of its children
|
||||
themselves have children, then its complex enough to extract
|
||||
"""
|
||||
children = element.recurse()
|
||||
return any(child.recurse() for child in children)
|
||||
|
||||
|
||||
def _apply_diagram_item_enhancements(fn):
|
||||
"""
|
||||
decorator to ensure enhancements to a diagram item (such as results name annotations)
|
||||
get applied on return from _to_diagram_element (we do this since there are several
|
||||
returns in _to_diagram_element)
|
||||
"""
|
||||
|
||||
def _inner(
|
||||
element: pyparsing.ParserElement,
|
||||
parent: typing.Optional[EditablePartial],
|
||||
lookup: ConverterState = None,
|
||||
vertical: int = None,
|
||||
index: int = 0,
|
||||
name_hint: str = None,
|
||||
show_results_names: bool = False,
|
||||
show_groups: bool = False,
|
||||
show_hidden: bool = False,
|
||||
) -> typing.Optional[EditablePartial]:
|
||||
ret = fn(
|
||||
element,
|
||||
parent,
|
||||
lookup,
|
||||
vertical,
|
||||
index,
|
||||
name_hint,
|
||||
show_results_names,
|
||||
show_groups,
|
||||
show_hidden,
|
||||
)
|
||||
|
||||
# apply annotation for results name, if present
|
||||
if show_results_names and ret is not None:
|
||||
element_results_name = element.resultsName
|
||||
if element_results_name:
|
||||
# add "*" to indicate if this is a "list all results" name
|
||||
modal_tag = "" if element.modalResults else "*"
|
||||
ret = EditablePartial.from_call(
|
||||
railroad.Group,
|
||||
item=ret,
|
||||
label=f"{repr(element_results_name)}{modal_tag}",
|
||||
)
|
||||
|
||||
return ret
|
||||
|
||||
return _inner
|
||||
|
||||
|
||||
def _visible_exprs(exprs: Iterable[pyparsing.ParserElement]):
|
||||
non_diagramming_exprs = (
|
||||
pyparsing.ParseElementEnhance,
|
||||
pyparsing.PositionToken,
|
||||
pyparsing.And._ErrorStop,
|
||||
)
|
||||
return [
|
||||
e
|
||||
for e in exprs
|
||||
if not isinstance(e, non_diagramming_exprs)
|
||||
]
|
||||
|
||||
|
||||
@_apply_diagram_item_enhancements
|
||||
def _to_diagram_element(
|
||||
element: pyparsing.ParserElement,
|
||||
parent: typing.Optional[EditablePartial],
|
||||
lookup: ConverterState = None,
|
||||
vertical: int = None,
|
||||
index: int = 0,
|
||||
name_hint: str = None,
|
||||
show_results_names: bool = False,
|
||||
show_groups: bool = False,
|
||||
show_hidden: bool = False,
|
||||
) -> typing.Optional[EditablePartial]:
|
||||
"""
|
||||
Recursively converts a PyParsing Element to a railroad Element
|
||||
:param lookup: The shared converter state that keeps track of useful things
|
||||
:param index: The index of this element within the parent
|
||||
:param parent: The parent of this element in the output tree
|
||||
:param vertical: Controls at what point we make a list of elements vertical. If this is an integer (the default),
|
||||
it sets the threshold of the number of items before we go vertical. If True, always go vertical, if False, never
|
||||
do so
|
||||
:param name_hint: If provided, this will override the generated name
|
||||
:param show_results_names: bool flag indicating whether to add annotations for results names
|
||||
:param show_groups: bool flag indicating whether to show groups using bounding box
|
||||
:param show_hidden: bool flag indicating whether to show elements that are typically hidden
|
||||
:returns: The converted version of the input element, but as a Partial that hasn't yet been constructed
|
||||
"""
|
||||
exprs = element.recurse()
|
||||
name = name_hint or element.customName or type(element).__name__
|
||||
|
||||
# Python's id() is used to provide a unique identifier for elements
|
||||
el_id = id(element)
|
||||
|
||||
element_results_name = element.resultsName
|
||||
|
||||
# Here we basically bypass processing certain wrapper elements if they contribute nothing to the diagram
|
||||
if not element.customName:
|
||||
if isinstance(
|
||||
element,
|
||||
(
|
||||
# pyparsing.TokenConverter,
|
||||
pyparsing.Forward,
|
||||
pyparsing.Located,
|
||||
pyparsing.AtStringStart,
|
||||
pyparsing.AtLineStart,
|
||||
),
|
||||
):
|
||||
# However, if this element has a useful custom name, and its child does not, we can pass it on to the child
|
||||
if exprs:
|
||||
if not exprs[0].customName:
|
||||
propagated_name = name
|
||||
else:
|
||||
propagated_name = None
|
||||
|
||||
return _to_diagram_element(
|
||||
element.expr,
|
||||
parent=parent,
|
||||
lookup=lookup,
|
||||
vertical=vertical,
|
||||
index=index,
|
||||
name_hint=propagated_name,
|
||||
show_results_names=show_results_names,
|
||||
show_groups=show_groups,
|
||||
show_hidden=show_hidden,
|
||||
)
|
||||
|
||||
# If the element isn't worth extracting, we always treat it as the first time we say it
|
||||
if _worth_extracting(element):
|
||||
looked_up = lookup.get(el_id)
|
||||
if looked_up and looked_up.name is not None:
|
||||
# If we've seen this element exactly once before, we are only just now finding out that it's a duplicate,
|
||||
# so we have to extract it into a new diagram.
|
||||
looked_up.mark_for_extraction(el_id, lookup, name=name_hint)
|
||||
href = f"#{_make_bookmark(looked_up.name)}"
|
||||
ret = EditablePartial.from_call(railroad.NonTerminal, text=looked_up.name, href=href)
|
||||
return ret
|
||||
|
||||
elif el_id in lookup.diagrams:
|
||||
# If we have seen the element at least twice before, and have already extracted it into a subdiagram, we
|
||||
# just put in a marker element that refers to the sub-diagram
|
||||
text = lookup.diagrams[el_id].kwargs["name"]
|
||||
ret = EditablePartial.from_call(
|
||||
railroad.NonTerminal, text=text, href=f"#{_make_bookmark(text)}"
|
||||
)
|
||||
return ret
|
||||
|
||||
# Recursively convert child elements
|
||||
# Here we find the most relevant Railroad element for matching pyparsing Element
|
||||
# We use ``items=[]`` here to hold the place for where the child elements will go once created
|
||||
|
||||
# see if this element is normally hidden, and whether hidden elements are desired
|
||||
# if not, just return None
|
||||
if not element.show_in_diagram and not show_hidden:
|
||||
return None
|
||||
|
||||
if isinstance(element, pyparsing.And):
|
||||
# detect And's created with ``expr*N`` notation - for these use a OneOrMore with a repeat
|
||||
# (all will have the same name, and resultsName)
|
||||
if not exprs:
|
||||
return None
|
||||
if len(set((e.name, e.resultsName) for e in exprs)) == 1 and len(exprs) > 2:
|
||||
ret = EditablePartial.from_call(
|
||||
railroad.OneOrMore, item="", repeat=str(len(exprs))
|
||||
)
|
||||
elif _should_vertical(vertical, exprs):
|
||||
ret = EditablePartial.from_call(railroad.Stack, items=[])
|
||||
else:
|
||||
ret = EditablePartial.from_call(railroad.Sequence, items=[])
|
||||
elif isinstance(element, (pyparsing.Or, pyparsing.MatchFirst)):
|
||||
if not exprs:
|
||||
return None
|
||||
if _should_vertical(vertical, exprs):
|
||||
ret = EditablePartial.from_call(railroad.Choice, 0, items=[])
|
||||
else:
|
||||
ret = EditablePartial.from_call(railroad.HorizontalChoice, items=[])
|
||||
elif isinstance(element, pyparsing.Each):
|
||||
if not exprs:
|
||||
return None
|
||||
ret = EditablePartial.from_call(EachItem, items=[])
|
||||
elif isinstance(element, pyparsing.NotAny):
|
||||
ret = EditablePartial.from_call(AnnotatedItem, label="NOT", item="")
|
||||
elif isinstance(element, pyparsing.FollowedBy):
|
||||
ret = EditablePartial.from_call(AnnotatedItem, label="LOOKAHEAD", item="")
|
||||
elif isinstance(element, pyparsing.PrecededBy):
|
||||
ret = EditablePartial.from_call(AnnotatedItem, label="LOOKBEHIND", item="")
|
||||
elif isinstance(element, pyparsing.Group):
|
||||
if show_groups:
|
||||
ret = EditablePartial.from_call(AnnotatedItem, label="", item="")
|
||||
else:
|
||||
ret = EditablePartial.from_call(
|
||||
railroad.Group, item=None, label=element_results_name
|
||||
)
|
||||
elif isinstance(element, pyparsing.TokenConverter):
|
||||
label = type(element).__name__.lower()
|
||||
if label == "tokenconverter":
|
||||
ret = EditablePartial.from_call(railroad.Sequence, items=[])
|
||||
else:
|
||||
ret = EditablePartial.from_call(AnnotatedItem, label=label, item="")
|
||||
elif isinstance(element, pyparsing.Opt):
|
||||
ret = EditablePartial.from_call(railroad.Optional, item="")
|
||||
elif isinstance(element, pyparsing.OneOrMore):
|
||||
if element.not_ender is not None:
|
||||
args = [
|
||||
parent,
|
||||
lookup,
|
||||
vertical,
|
||||
index,
|
||||
name_hint,
|
||||
show_results_names,
|
||||
show_groups,
|
||||
show_hidden,
|
||||
]
|
||||
return _to_diagram_element(
|
||||
(~element.not_ender.expr + element.expr)[1, ...].set_name(element.name),
|
||||
*args,
|
||||
)
|
||||
ret = EditablePartial.from_call(railroad.OneOrMore, item=None)
|
||||
elif isinstance(element, pyparsing.ZeroOrMore):
|
||||
if element.not_ender is not None:
|
||||
args = [
|
||||
parent,
|
||||
lookup,
|
||||
vertical,
|
||||
index,
|
||||
name_hint,
|
||||
show_results_names,
|
||||
show_groups,
|
||||
show_hidden,
|
||||
]
|
||||
return _to_diagram_element(
|
||||
(~element.not_ender.expr + element.expr)[...].set_name(element.name),
|
||||
*args,
|
||||
)
|
||||
ret = EditablePartial.from_call(railroad.ZeroOrMore, item="")
|
||||
elif isinstance(element, pyparsing.Empty) and not element.customName:
|
||||
# Skip unnamed "Empty" elements
|
||||
ret = None
|
||||
elif isinstance(element, pyparsing.ParseElementEnhance):
|
||||
ret = EditablePartial.from_call(railroad.Sequence, items=[])
|
||||
elif len(exprs) > 0 and not element_results_name:
|
||||
ret = EditablePartial.from_call(railroad.Group, item="", label=name)
|
||||
elif isinstance(element, pyparsing.Regex):
|
||||
collapsed_patt = _collapse_verbose_regex(element.pattern)
|
||||
ret = EditablePartial.from_call(railroad.Terminal, collapsed_patt)
|
||||
elif len(exprs) > 0:
|
||||
ret = EditablePartial.from_call(railroad.Sequence, items=[])
|
||||
else:
|
||||
terminal = EditablePartial.from_call(railroad.Terminal, element.defaultName)
|
||||
ret = terminal
|
||||
|
||||
if ret is None:
|
||||
return
|
||||
|
||||
# Indicate this element's position in the tree so we can extract it if necessary
|
||||
lookup[el_id] = ElementState(
|
||||
element=element,
|
||||
converted=ret,
|
||||
parent=parent,
|
||||
parent_index=index,
|
||||
number=lookup.generate_index(),
|
||||
)
|
||||
if element.customName:
|
||||
lookup[el_id].mark_for_extraction(el_id, lookup, element.customName)
|
||||
|
||||
i = 0
|
||||
for expr in exprs:
|
||||
# Add a placeholder index in case we have to extract the child before we even add it to the parent
|
||||
if "items" in ret.kwargs:
|
||||
ret.kwargs["items"].insert(i, None)
|
||||
|
||||
item = _to_diagram_element(
|
||||
expr,
|
||||
parent=ret,
|
||||
lookup=lookup,
|
||||
vertical=vertical,
|
||||
index=i,
|
||||
show_results_names=show_results_names,
|
||||
show_groups=show_groups,
|
||||
show_hidden=show_hidden,
|
||||
)
|
||||
|
||||
# Some elements don't need to be shown in the diagram
|
||||
if item is not None:
|
||||
if "item" in ret.kwargs:
|
||||
ret.kwargs["item"] = item
|
||||
elif "items" in ret.kwargs:
|
||||
# If we've already extracted the child, don't touch this index, since it's occupied by a nonterminal
|
||||
ret.kwargs["items"][i] = item
|
||||
i += 1
|
||||
elif "items" in ret.kwargs:
|
||||
# If we're supposed to skip this element, remove it from the parent
|
||||
del ret.kwargs["items"][i]
|
||||
|
||||
# If all this items children are none, skip this item
|
||||
if ret and (
|
||||
("items" in ret.kwargs and len(ret.kwargs["items"]) == 0)
|
||||
or ("item" in ret.kwargs and ret.kwargs["item"] is None)
|
||||
):
|
||||
ret = EditablePartial.from_call(railroad.Terminal, name)
|
||||
|
||||
# Mark this element as "complete", ie it has all of its children
|
||||
if el_id in lookup:
|
||||
lookup[el_id].complete = True
|
||||
|
||||
if el_id in lookup and lookup[el_id].extract and lookup[el_id].complete:
|
||||
lookup.extract_into_diagram(el_id)
|
||||
if ret is not None:
|
||||
text = lookup.diagrams[el_id].kwargs["name"]
|
||||
href = f"#{_make_bookmark(text)}"
|
||||
ret = EditablePartial.from_call(
|
||||
railroad.NonTerminal, text=text, href=href
|
||||
)
|
||||
|
||||
return ret
|
||||
353
venv/lib/python3.12/site-packages/pyparsing/exceptions.py
Normal file
353
venv/lib/python3.12/site-packages/pyparsing/exceptions.py
Normal file
@@ -0,0 +1,353 @@
|
||||
# exceptions.py
|
||||
from __future__ import annotations
|
||||
|
||||
import copy
|
||||
import re
|
||||
import sys
|
||||
import typing
|
||||
import warnings
|
||||
from functools import cached_property
|
||||
|
||||
from .warnings import PyparsingDeprecationWarning
|
||||
from .unicode import pyparsing_unicode as ppu
|
||||
from .util import (
|
||||
_collapse_string_to_ranges,
|
||||
col,
|
||||
deprecate_argument,
|
||||
line,
|
||||
lineno,
|
||||
replaced_by_pep8,
|
||||
)
|
||||
|
||||
|
||||
class _ExceptionWordUnicodeSet(
|
||||
ppu.Latin1, ppu.LatinA, ppu.LatinB, ppu.Greek, ppu.Cyrillic
|
||||
):
|
||||
pass
|
||||
|
||||
|
||||
_extract_alphanums = _collapse_string_to_ranges(_ExceptionWordUnicodeSet.alphanums)
|
||||
_exception_word_extractor = re.compile(fr"([{_extract_alphanums}]{{1,16}})|.")
|
||||
|
||||
|
||||
class ParseBaseException(Exception):
|
||||
"""base exception class for all parsing runtime exceptions"""
|
||||
|
||||
loc: int
|
||||
msg: str
|
||||
pstr: str
|
||||
parser_element: typing.Any # "ParserElement"
|
||||
args: tuple[str, int, typing.Optional[str]]
|
||||
|
||||
__slots__ = (
|
||||
"loc",
|
||||
"msg",
|
||||
"pstr",
|
||||
"parser_element",
|
||||
"args",
|
||||
)
|
||||
|
||||
# Performance tuning: we construct a *lot* of these, so keep this
|
||||
# constructor as small and fast as possible
|
||||
def __init__(
|
||||
self,
|
||||
pstr: str,
|
||||
loc: int = 0,
|
||||
msg: typing.Optional[str] = None,
|
||||
elem=None,
|
||||
) -> None:
|
||||
if msg is None:
|
||||
msg, pstr = pstr, ""
|
||||
|
||||
self.loc = loc
|
||||
self.msg = msg
|
||||
self.pstr = pstr
|
||||
self.parser_element = elem
|
||||
self.args = (pstr, loc, msg)
|
||||
|
||||
@staticmethod
|
||||
def explain_exception(exc: Exception, depth: int = 16) -> str:
|
||||
"""
|
||||
Method to take an exception and translate the Python internal traceback into a list
|
||||
of the pyparsing expressions that caused the exception to be raised.
|
||||
|
||||
Parameters:
|
||||
|
||||
- exc - exception raised during parsing (need not be a ParseException, in support
|
||||
of Python exceptions that might be raised in a parse action)
|
||||
- depth (default=16) - number of levels back in the stack trace to list expression
|
||||
and function names; if None, the full stack trace names will be listed; if 0, only
|
||||
the failing input line, marker, and exception string will be shown
|
||||
|
||||
Returns a multi-line string listing the ParserElements and/or function names in the
|
||||
exception's stack trace.
|
||||
"""
|
||||
import inspect
|
||||
from .core import ParserElement
|
||||
|
||||
if depth is None:
|
||||
depth = sys.getrecursionlimit()
|
||||
ret: list[str] = []
|
||||
if isinstance(exc, ParseBaseException):
|
||||
ret.append(exc.line)
|
||||
ret.append(f"{'^':>{exc.column}}")
|
||||
ret.append(f"{type(exc).__name__}: {exc}")
|
||||
|
||||
if depth <= 0 or exc.__traceback__ is None:
|
||||
return "\n".join(ret)
|
||||
|
||||
callers = inspect.getinnerframes(exc.__traceback__, context=depth)
|
||||
seen: set[int] = set()
|
||||
for ff in callers[-depth:]:
|
||||
frm = ff[0]
|
||||
|
||||
f_self = frm.f_locals.get("self", None)
|
||||
if isinstance(f_self, ParserElement):
|
||||
if not frm.f_code.co_name.startswith(("parseImpl", "_parseNoCache")):
|
||||
continue
|
||||
if id(f_self) in seen:
|
||||
continue
|
||||
seen.add(id(f_self))
|
||||
|
||||
self_type = type(f_self)
|
||||
ret.append(f"{self_type.__module__}.{self_type.__name__} - {f_self}")
|
||||
|
||||
elif f_self is not None:
|
||||
self_type = type(f_self)
|
||||
ret.append(f"{self_type.__module__}.{self_type.__name__}")
|
||||
|
||||
else:
|
||||
code = frm.f_code
|
||||
if code.co_name in ("wrapper", "<module>"):
|
||||
continue
|
||||
|
||||
ret.append(code.co_name)
|
||||
|
||||
depth -= 1
|
||||
if not depth:
|
||||
break
|
||||
|
||||
return "\n".join(ret)
|
||||
|
||||
@classmethod
|
||||
def _from_exception(cls, pe) -> ParseBaseException:
|
||||
"""
|
||||
internal factory method to simplify creating one type of ParseException
|
||||
from another - avoids having __init__ signature conflicts among subclasses
|
||||
"""
|
||||
return cls(pe.pstr, pe.loc, pe.msg, pe.parser_element)
|
||||
|
||||
@cached_property
|
||||
def line(self) -> str:
|
||||
"""
|
||||
Return the line of text where the exception occurred.
|
||||
"""
|
||||
return line(self.loc, self.pstr)
|
||||
|
||||
@cached_property
|
||||
def lineno(self) -> int:
|
||||
"""
|
||||
Return the 1-based line number of text where the exception occurred.
|
||||
"""
|
||||
return lineno(self.loc, self.pstr)
|
||||
|
||||
@cached_property
|
||||
def col(self) -> int:
|
||||
"""
|
||||
Return the 1-based column on the line of text where the exception occurred.
|
||||
"""
|
||||
return col(self.loc, self.pstr)
|
||||
|
||||
@cached_property
|
||||
def column(self) -> int:
|
||||
"""
|
||||
Return the 1-based column on the line of text where the exception occurred.
|
||||
"""
|
||||
return col(self.loc, self.pstr)
|
||||
|
||||
@cached_property
|
||||
def found(self) -> str:
|
||||
if not self.pstr:
|
||||
return ""
|
||||
|
||||
if self.loc >= len(self.pstr):
|
||||
return "end of text"
|
||||
|
||||
# pull out next word at error location
|
||||
found_match = _exception_word_extractor.match(self.pstr, self.loc)
|
||||
if found_match is not None:
|
||||
found_text = found_match[0]
|
||||
else:
|
||||
found_text = self.pstr[self.loc : self.loc + 1]
|
||||
|
||||
return repr(found_text).replace(r"\\", "\\")
|
||||
|
||||
# pre-PEP8 compatibility
|
||||
@property
|
||||
def parserElement(self):
|
||||
warnings.warn(
|
||||
"parserElement is deprecated, use parser_element",
|
||||
PyparsingDeprecationWarning,
|
||||
stacklevel=2,
|
||||
)
|
||||
return self.parser_element
|
||||
|
||||
@parserElement.setter
|
||||
def parserElement(self, elem):
|
||||
warnings.warn(
|
||||
"parserElement is deprecated, use parser_element",
|
||||
PyparsingDeprecationWarning,
|
||||
stacklevel=2,
|
||||
)
|
||||
self.parser_element = elem
|
||||
|
||||
def copy(self):
|
||||
return copy.copy(self)
|
||||
|
||||
def formatted_message(self) -> str:
|
||||
"""
|
||||
Output the formatted exception message.
|
||||
Can be overridden to customize the message formatting or contents.
|
||||
|
||||
.. versionadded:: 3.2.0
|
||||
"""
|
||||
found_phrase = f", found {self.found}" if self.found else ""
|
||||
return f"{self.msg}{found_phrase} (at char {self.loc}), (line:{self.lineno}, col:{self.column})"
|
||||
|
||||
def __str__(self) -> str:
|
||||
"""
|
||||
.. versionchanged:: 3.2.0
|
||||
Now uses :meth:`formatted_message` to format message.
|
||||
"""
|
||||
try:
|
||||
return self.formatted_message()
|
||||
except Exception as ex:
|
||||
return (
|
||||
f"{type(self).__name__}: {self.msg}"
|
||||
f" ({type(ex).__name__}: {ex} while formatting message)"
|
||||
)
|
||||
|
||||
def __repr__(self):
|
||||
return str(self)
|
||||
|
||||
def mark_input_line(
|
||||
self, marker_string: typing.Optional[str] = None, **kwargs
|
||||
) -> str:
|
||||
"""
|
||||
Extracts the exception line from the input string, and marks
|
||||
the location of the exception with a special symbol.
|
||||
"""
|
||||
markerString: str = deprecate_argument(kwargs, "markerString", ">!<")
|
||||
|
||||
markerString = marker_string if marker_string is not None else markerString
|
||||
line_str = self.line
|
||||
line_column = self.column - 1
|
||||
if markerString:
|
||||
line_str = f"{line_str[:line_column]}{markerString}{line_str[line_column:]}"
|
||||
return line_str.strip()
|
||||
|
||||
def explain(self, depth: int = 16) -> str:
|
||||
"""
|
||||
Method to translate the Python internal traceback into a list
|
||||
of the pyparsing expressions that caused the exception to be raised.
|
||||
|
||||
Parameters:
|
||||
|
||||
- depth (default=16) - number of levels back in the stack trace to list expression
|
||||
and function names; if None, the full stack trace names will be listed; if 0, only
|
||||
the failing input line, marker, and exception string will be shown
|
||||
|
||||
Returns a multi-line string listing the ParserElements and/or function names in the
|
||||
exception's stack trace.
|
||||
|
||||
Example:
|
||||
|
||||
.. testcode::
|
||||
|
||||
# an expression to parse 3 integers
|
||||
expr = pp.Word(pp.nums) * 3
|
||||
try:
|
||||
# a failing parse - the third integer is prefixed with "A"
|
||||
expr.parse_string("123 456 A789")
|
||||
except pp.ParseException as pe:
|
||||
print(pe.explain(depth=0))
|
||||
|
||||
prints:
|
||||
|
||||
.. testoutput::
|
||||
|
||||
123 456 A789
|
||||
^
|
||||
ParseException: Expected W:(0-9), found 'A789' (at char 8), (line:1, col:9)
|
||||
|
||||
Note: the diagnostic output will include string representations of the expressions
|
||||
that failed to parse. These representations will be more helpful if you use `set_name` to
|
||||
give identifiable names to your expressions. Otherwise they will use the default string
|
||||
forms, which may be cryptic to read.
|
||||
|
||||
Note: pyparsing's default truncation of exception tracebacks may also truncate the
|
||||
stack of expressions that are displayed in the ``explain`` output. To get the full listing
|
||||
of parser expressions, you may have to set ``ParserElement.verbose_stacktrace = True``
|
||||
"""
|
||||
return self.explain_exception(self, depth)
|
||||
|
||||
# Compatibility synonyms
|
||||
# fmt: off
|
||||
markInputline = replaced_by_pep8("markInputline", mark_input_line)
|
||||
# fmt: on
|
||||
|
||||
|
||||
class ParseException(ParseBaseException):
|
||||
"""
|
||||
Exception thrown when a parse expression doesn't match the input string
|
||||
|
||||
Example:
|
||||
|
||||
.. testcode::
|
||||
|
||||
integer = Word(nums).set_name("integer")
|
||||
try:
|
||||
integer.parse_string("ABC")
|
||||
except ParseException as pe:
|
||||
print(pe, f"column: {pe.column}")
|
||||
|
||||
prints:
|
||||
|
||||
.. testoutput::
|
||||
|
||||
Expected integer, found 'ABC' (at char 0), (line:1, col:1) column: 1
|
||||
|
||||
"""
|
||||
|
||||
|
||||
class ParseFatalException(ParseBaseException):
|
||||
"""
|
||||
User-throwable exception thrown when inconsistent parse content
|
||||
is found; stops all parsing immediately
|
||||
"""
|
||||
|
||||
|
||||
class ParseSyntaxException(ParseFatalException):
|
||||
"""
|
||||
Just like :class:`ParseFatalException`, but thrown internally
|
||||
when an :class:`ErrorStop<And._ErrorStop>` ('-' operator) indicates
|
||||
that parsing is to stop immediately because an unbacktrackable
|
||||
syntax error has been found.
|
||||
"""
|
||||
|
||||
|
||||
class RecursiveGrammarException(Exception):
|
||||
"""
|
||||
.. deprecated:: 3.0.0
|
||||
Only used by the deprecated :meth:`ParserElement.validate`.
|
||||
|
||||
Exception thrown by :class:`ParserElement.validate` if the
|
||||
grammar could be left-recursive; parser may need to enable
|
||||
left recursion using :class:`ParserElement.enable_left_recursion<ParserElement.enable_left_recursion>`
|
||||
"""
|
||||
|
||||
def __init__(self, parseElementList) -> None:
|
||||
self.parseElementTrace = parseElementList
|
||||
|
||||
def __str__(self) -> str:
|
||||
return f"RecursiveGrammarException: {self.parseElementTrace}"
|
||||
1220
venv/lib/python3.12/site-packages/pyparsing/helpers.py
Normal file
1220
venv/lib/python3.12/site-packages/pyparsing/helpers.py
Normal file
File diff suppressed because it is too large
Load Diff
928
venv/lib/python3.12/site-packages/pyparsing/results.py
Normal file
928
venv/lib/python3.12/site-packages/pyparsing/results.py
Normal file
@@ -0,0 +1,928 @@
|
||||
# results.py
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import collections
|
||||
from collections.abc import (
|
||||
MutableMapping,
|
||||
Mapping,
|
||||
MutableSequence,
|
||||
Iterator,
|
||||
Iterable,
|
||||
)
|
||||
import pprint
|
||||
from typing import Any
|
||||
|
||||
from .util import deprecate_argument, _is_iterable, _flatten
|
||||
|
||||
|
||||
str_type: tuple[type, ...] = (str, bytes)
|
||||
_generator_type = type((_ for _ in ()))
|
||||
NULL_SLICE: slice = slice(None)
|
||||
|
||||
|
||||
class _ParseResultsWithOffset:
|
||||
tup: tuple[ParseResults, int]
|
||||
__slots__ = ["tup"]
|
||||
|
||||
def __init__(self, p1: ParseResults, p2: int) -> None:
|
||||
self.tup: tuple[ParseResults, int] = (p1, p2)
|
||||
|
||||
def __getitem__(self, i):
|
||||
return self.tup[i]
|
||||
|
||||
def __getstate__(self):
|
||||
return self.tup
|
||||
|
||||
def __setstate__(self, *args):
|
||||
self.tup = args[0]
|
||||
|
||||
|
||||
class ParseResults:
|
||||
"""Structured parse results, to provide multiple means of access to
|
||||
the parsed data:
|
||||
|
||||
- as a list (``len(results)``)
|
||||
- by list index (``results[0], results[1]``, etc.)
|
||||
- by attribute (``results.<results_name>`` - see :class:`ParserElement.set_results_name`)
|
||||
|
||||
Example:
|
||||
|
||||
.. testcode::
|
||||
|
||||
integer = Word(nums)
|
||||
date_str = (integer.set_results_name("year") + '/'
|
||||
+ integer.set_results_name("month") + '/'
|
||||
+ integer.set_results_name("day"))
|
||||
# equivalent form:
|
||||
# date_str = (integer("year") + '/'
|
||||
# + integer("month") + '/'
|
||||
# + integer("day"))
|
||||
|
||||
# parse_string returns a ParseResults object
|
||||
result = date_str.parse_string("1999/12/31")
|
||||
|
||||
def test(s, fn=repr):
|
||||
print(f"{s} -> {fn(eval(s))}")
|
||||
|
||||
test("list(result)")
|
||||
test("result[0]")
|
||||
test("result['month']")
|
||||
test("result.day")
|
||||
test("'month' in result")
|
||||
test("'minutes' in result")
|
||||
test("result.dump()", str)
|
||||
|
||||
prints:
|
||||
|
||||
.. testoutput::
|
||||
|
||||
list(result) -> ['1999', '/', '12', '/', '31']
|
||||
result[0] -> '1999'
|
||||
result['month'] -> '12'
|
||||
result.day -> '31'
|
||||
'month' in result -> True
|
||||
'minutes' in result -> False
|
||||
result.dump() -> ['1999', '/', '12', '/', '31']
|
||||
- day: '31'
|
||||
- month: '12'
|
||||
- year: '1999'
|
||||
|
||||
"""
|
||||
|
||||
_null_values: tuple[Any, ...] = (None, [], ())
|
||||
|
||||
_name: str
|
||||
_parent: ParseResults
|
||||
_all_names: set[str]
|
||||
_modal: bool
|
||||
_toklist: list[Any]
|
||||
_tokdict: dict[str, Any]
|
||||
|
||||
__slots__ = (
|
||||
"_name",
|
||||
"_parent",
|
||||
"_all_names",
|
||||
"_modal",
|
||||
"_toklist",
|
||||
"_tokdict",
|
||||
)
|
||||
|
||||
class List(list):
|
||||
"""
|
||||
Simple wrapper class to distinguish parsed list results that should be preserved
|
||||
as actual Python lists, instead of being converted to :class:`ParseResults`:
|
||||
|
||||
.. testcode::
|
||||
|
||||
import pyparsing as pp
|
||||
ppc = pp.common
|
||||
|
||||
LBRACK, RBRACK, LPAR, RPAR = pp.Suppress.using_each("[]()")
|
||||
element = pp.Forward()
|
||||
item = ppc.integer
|
||||
item_list = pp.DelimitedList(element)
|
||||
element_list = LBRACK + item_list + RBRACK | LPAR + item_list + RPAR
|
||||
element <<= item | element_list
|
||||
|
||||
# add parse action to convert from ParseResults
|
||||
# to actual Python collection types
|
||||
@element_list.add_parse_action
|
||||
def as_python_list(t):
|
||||
return pp.ParseResults.List(t.as_list())
|
||||
|
||||
element.run_tests('''
|
||||
100
|
||||
[2,3,4]
|
||||
[[2, 1],3,4]
|
||||
[(2, 1),3,4]
|
||||
(2,3,4)
|
||||
([2, 3], 4)
|
||||
''', post_parse=lambda s, r: (r[0], type(r[0]))
|
||||
)
|
||||
|
||||
prints:
|
||||
|
||||
.. testoutput::
|
||||
:options: +NORMALIZE_WHITESPACE
|
||||
|
||||
|
||||
100
|
||||
(100, <class 'int'>)
|
||||
|
||||
[2,3,4]
|
||||
([2, 3, 4], <class 'list'>)
|
||||
|
||||
[[2, 1],3,4]
|
||||
([[2, 1], 3, 4], <class 'list'>)
|
||||
|
||||
[(2, 1),3,4]
|
||||
([[2, 1], 3, 4], <class 'list'>)
|
||||
|
||||
(2,3,4)
|
||||
([2, 3, 4], <class 'list'>)
|
||||
|
||||
([2, 3], 4)
|
||||
([[2, 3], 4], <class 'list'>)
|
||||
|
||||
(Used internally by :class:`Group` when `aslist=True`.)
|
||||
"""
|
||||
|
||||
def __new__(cls, contained=None):
|
||||
if contained is None:
|
||||
contained = []
|
||||
|
||||
if not isinstance(contained, list):
|
||||
raise TypeError(
|
||||
f"{cls.__name__} may only be constructed with a list, not {type(contained).__name__}"
|
||||
)
|
||||
|
||||
return list.__new__(cls)
|
||||
|
||||
def __new__(cls, toklist=None, name=None, **kwargs):
|
||||
if isinstance(toklist, ParseResults):
|
||||
return toklist
|
||||
self = object.__new__(cls)
|
||||
self._name = None
|
||||
self._parent = None
|
||||
self._all_names = set()
|
||||
|
||||
if toklist is None:
|
||||
self._toklist = []
|
||||
elif isinstance(toklist, (list, _generator_type)):
|
||||
self._toklist = (
|
||||
[toklist[:]]
|
||||
if isinstance(toklist, ParseResults.List)
|
||||
else list(toklist)
|
||||
)
|
||||
else:
|
||||
self._toklist = [toklist]
|
||||
self._tokdict = dict()
|
||||
return self
|
||||
|
||||
# Performance tuning: we construct a *lot* of these, so keep this
|
||||
# constructor as small and fast as possible
|
||||
def __init__(
|
||||
self,
|
||||
toklist=None,
|
||||
name=None,
|
||||
aslist=True,
|
||||
modal=True,
|
||||
isinstance=isinstance,
|
||||
**kwargs,
|
||||
) -> None:
|
||||
asList = deprecate_argument(kwargs, "asList", True, new_name="aslist")
|
||||
|
||||
asList = asList and aslist
|
||||
self._tokdict: dict[str, _ParseResultsWithOffset]
|
||||
self._modal = modal
|
||||
|
||||
if name is None or name == "":
|
||||
return
|
||||
|
||||
if isinstance(name, int):
|
||||
name = str(name)
|
||||
|
||||
if not modal:
|
||||
self._all_names = {name}
|
||||
|
||||
self._name = name
|
||||
|
||||
if toklist in self._null_values:
|
||||
return
|
||||
|
||||
if isinstance(toklist, (str_type, type)):
|
||||
toklist = [toklist]
|
||||
|
||||
if asList:
|
||||
if isinstance(toklist, ParseResults):
|
||||
self[name] = _ParseResultsWithOffset(ParseResults(toklist._toklist), 0)
|
||||
else:
|
||||
self[name] = _ParseResultsWithOffset(ParseResults(toklist[0]), 0)
|
||||
self[name]._name = name
|
||||
return
|
||||
|
||||
try:
|
||||
self[name] = toklist[0]
|
||||
except (KeyError, TypeError, IndexError):
|
||||
if toklist is not self:
|
||||
self[name] = toklist
|
||||
else:
|
||||
self._name = name
|
||||
|
||||
def __getitem__(self, i):
|
||||
if isinstance(i, (int, slice)):
|
||||
return self._toklist[i]
|
||||
|
||||
if i not in self._all_names:
|
||||
return self._tokdict[i][-1][0]
|
||||
|
||||
return ParseResults([v[0] for v in self._tokdict[i]])
|
||||
|
||||
def __setitem__(self, k, v, isinstance=isinstance):
|
||||
if isinstance(v, _ParseResultsWithOffset):
|
||||
self._tokdict[k] = self._tokdict.get(k, list()) + [v]
|
||||
sub = v[0]
|
||||
elif isinstance(k, (int, slice)):
|
||||
self._toklist[k] = v
|
||||
sub = v
|
||||
else:
|
||||
self._tokdict[k] = self._tokdict.get(k, []) + [
|
||||
_ParseResultsWithOffset(v, 0)
|
||||
]
|
||||
sub = v
|
||||
if isinstance(sub, ParseResults):
|
||||
sub._parent = self
|
||||
|
||||
def __delitem__(self, i):
|
||||
if not isinstance(i, (int, slice)):
|
||||
del self._tokdict[i]
|
||||
return
|
||||
|
||||
# slight optimization if del results[:]
|
||||
if i == NULL_SLICE:
|
||||
self._toklist.clear()
|
||||
return
|
||||
|
||||
mylen = len(self._toklist)
|
||||
del self._toklist[i]
|
||||
|
||||
# convert int to slice
|
||||
if isinstance(i, int):
|
||||
if i < 0:
|
||||
i += mylen
|
||||
i = slice(i, i + 1)
|
||||
# get removed indices
|
||||
removed = list(range(*i.indices(mylen)))
|
||||
removed.reverse()
|
||||
# fixup indices in token dictionary
|
||||
for occurrences in self._tokdict.values():
|
||||
for j in removed:
|
||||
for k, (value, position) in enumerate(occurrences):
|
||||
occurrences[k] = _ParseResultsWithOffset(
|
||||
value, position - (position > j)
|
||||
)
|
||||
|
||||
def __contains__(self, k) -> bool:
|
||||
return k in self._tokdict
|
||||
|
||||
def __len__(self) -> int:
|
||||
return len(self._toklist)
|
||||
|
||||
def __bool__(self) -> bool:
|
||||
return not not (self._toklist or self._tokdict)
|
||||
|
||||
def __iter__(self) -> Iterator:
|
||||
return iter(self._toklist)
|
||||
|
||||
def __reversed__(self) -> Iterator:
|
||||
return iter(self._toklist[::-1])
|
||||
|
||||
def keys(self):
|
||||
return iter(self._tokdict)
|
||||
|
||||
def values(self):
|
||||
return (self[k] for k in self.keys())
|
||||
|
||||
def items(self):
|
||||
return ((k, self[k]) for k in self.keys())
|
||||
|
||||
def haskeys(self) -> bool:
|
||||
"""
|
||||
Since ``keys()`` returns an iterator, this method is helpful in bypassing
|
||||
code that looks for the existence of any defined results names."""
|
||||
return not not self._tokdict
|
||||
|
||||
def pop(self, *args, **kwargs):
|
||||
"""
|
||||
Removes and returns item at specified index (default= ``last``).
|
||||
Supports both ``list`` and ``dict`` semantics for ``pop()``. If
|
||||
passed no argument or an integer argument, it will use ``list``
|
||||
semantics and pop tokens from the list of parsed tokens. If passed
|
||||
a non-integer argument (most likely a string), it will use ``dict``
|
||||
semantics and pop the corresponding value from any defined results
|
||||
names. A second default return value argument is supported, just as in
|
||||
``dict.pop()``.
|
||||
|
||||
Example:
|
||||
|
||||
.. doctest::
|
||||
|
||||
>>> numlist = Word(nums)[...]
|
||||
>>> print(numlist.parse_string("0 123 321"))
|
||||
['0', '123', '321']
|
||||
|
||||
>>> def remove_first(tokens):
|
||||
... tokens.pop(0)
|
||||
...
|
||||
>>> numlist.add_parse_action(remove_first)
|
||||
[W:(0-9)]...
|
||||
>>> print(numlist.parse_string("0 123 321"))
|
||||
['123', '321']
|
||||
|
||||
>>> label = Word(alphas)
|
||||
>>> patt = label("LABEL") + Word(nums)[1, ...]
|
||||
>>> print(patt.parse_string("AAB 123 321").dump())
|
||||
['AAB', '123', '321']
|
||||
- LABEL: 'AAB'
|
||||
|
||||
>>> # Use pop() in a parse action to remove named result
|
||||
>>> # (note that corresponding value is not
|
||||
>>> # removed from list form of results)
|
||||
>>> def remove_LABEL(tokens):
|
||||
... tokens.pop("LABEL")
|
||||
... return tokens
|
||||
...
|
||||
>>> patt.add_parse_action(remove_LABEL)
|
||||
{W:(A-Za-z) {W:(0-9)}...}
|
||||
>>> print(patt.parse_string("AAB 123 321").dump())
|
||||
['AAB', '123', '321']
|
||||
|
||||
"""
|
||||
if not args:
|
||||
args = [-1]
|
||||
for k, v in kwargs.items():
|
||||
if k == "default":
|
||||
args = (args[0], v)
|
||||
else:
|
||||
raise TypeError(f"pop() got an unexpected keyword argument {k!r}")
|
||||
if isinstance(args[0], int) or len(args) == 1 or args[0] in self:
|
||||
index = args[0]
|
||||
ret = self[index]
|
||||
del self[index]
|
||||
return ret
|
||||
else:
|
||||
defaultvalue = args[1]
|
||||
return defaultvalue
|
||||
|
||||
def get(self, key, default_value=None):
|
||||
"""
|
||||
Returns named result matching the given key, or if there is no
|
||||
such name, then returns the given ``default_value`` or ``None`` if no
|
||||
``default_value`` is specified.
|
||||
|
||||
Similar to ``dict.get()``.
|
||||
|
||||
Example:
|
||||
|
||||
.. doctest::
|
||||
|
||||
>>> integer = Word(nums)
|
||||
>>> date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
|
||||
|
||||
>>> result = date_str.parse_string("1999/12/31")
|
||||
>>> result.get("year")
|
||||
'1999'
|
||||
>>> result.get("hour", "not specified")
|
||||
'not specified'
|
||||
>>> result.get("hour")
|
||||
|
||||
"""
|
||||
if key in self:
|
||||
return self[key]
|
||||
else:
|
||||
return default_value
|
||||
|
||||
def insert(self, index, ins_string):
|
||||
"""
|
||||
Inserts new element at location index in the list of parsed tokens.
|
||||
|
||||
Similar to ``list.insert()``.
|
||||
|
||||
Example:
|
||||
|
||||
.. doctest::
|
||||
|
||||
>>> numlist = Word(nums)[...]
|
||||
>>> print(numlist.parse_string("0 123 321"))
|
||||
['0', '123', '321']
|
||||
|
||||
>>> # use a parse action to insert the parse location
|
||||
>>> # in the front of the parsed results
|
||||
>>> def insert_locn(locn, tokens):
|
||||
... tokens.insert(0, locn)
|
||||
...
|
||||
>>> numlist.add_parse_action(insert_locn)
|
||||
[W:(0-9)]...
|
||||
>>> print(numlist.parse_string("0 123 321"))
|
||||
[0, '0', '123', '321']
|
||||
|
||||
"""
|
||||
self._toklist.insert(index, ins_string)
|
||||
# fixup indices in token dictionary
|
||||
for occurrences in self._tokdict.values():
|
||||
for k, (value, position) in enumerate(occurrences):
|
||||
occurrences[k] = _ParseResultsWithOffset(
|
||||
value, position + (position > index)
|
||||
)
|
||||
|
||||
def append(self, item):
|
||||
"""
|
||||
Add single element to end of ``ParseResults`` list of elements.
|
||||
|
||||
Example:
|
||||
|
||||
.. doctest::
|
||||
|
||||
>>> numlist = Word(nums)[...]
|
||||
>>> print(numlist.parse_string("0 123 321"))
|
||||
['0', '123', '321']
|
||||
|
||||
>>> # use a parse action to compute the sum of the parsed integers,
|
||||
>>> # and add it to the end
|
||||
>>> def append_sum(tokens):
|
||||
... tokens.append(sum(map(int, tokens)))
|
||||
...
|
||||
>>> numlist.add_parse_action(append_sum)
|
||||
[W:(0-9)]...
|
||||
>>> print(numlist.parse_string("0 123 321"))
|
||||
['0', '123', '321', 444]
|
||||
"""
|
||||
self._toklist.append(item)
|
||||
|
||||
def extend(self, itemseq):
|
||||
"""
|
||||
Add sequence of elements to end of :class:`ParseResults` list of elements.
|
||||
|
||||
Example:
|
||||
|
||||
.. testcode::
|
||||
|
||||
patt = Word(alphas)[1, ...]
|
||||
|
||||
# use a parse action to append the reverse of the matched strings,
|
||||
# to make a palindrome
|
||||
def make_palindrome(tokens):
|
||||
tokens.extend(reversed([t[::-1] for t in tokens]))
|
||||
return ''.join(tokens)
|
||||
|
||||
patt.add_parse_action(make_palindrome)
|
||||
print(patt.parse_string("lskdj sdlkjf lksd"))
|
||||
|
||||
prints:
|
||||
|
||||
.. testoutput::
|
||||
|
||||
['lskdjsdlkjflksddsklfjkldsjdksl']
|
||||
"""
|
||||
if isinstance(itemseq, ParseResults):
|
||||
self.__iadd__(itemseq)
|
||||
else:
|
||||
self._toklist.extend(itemseq)
|
||||
|
||||
def clear(self):
|
||||
"""
|
||||
Clear all elements and results names.
|
||||
"""
|
||||
del self._toklist[:]
|
||||
self._tokdict.clear()
|
||||
|
||||
def __getattr__(self, name):
|
||||
try:
|
||||
return self[name]
|
||||
except KeyError:
|
||||
if name.startswith("__"):
|
||||
raise AttributeError(name)
|
||||
return ""
|
||||
|
||||
def __add__(self, other: ParseResults) -> ParseResults:
|
||||
ret = self.copy()
|
||||
ret += other
|
||||
return ret
|
||||
|
||||
def __iadd__(self, other: ParseResults) -> ParseResults:
|
||||
if not other:
|
||||
return self
|
||||
|
||||
if other._tokdict:
|
||||
offset = len(self._toklist)
|
||||
addoffset = lambda a: offset if a < 0 else a + offset
|
||||
otheritems = other._tokdict.items()
|
||||
otherdictitems = [
|
||||
(k, _ParseResultsWithOffset(v[0], addoffset(v[1])))
|
||||
for k, vlist in otheritems
|
||||
for v in vlist
|
||||
]
|
||||
for k, v in otherdictitems:
|
||||
self[k] = v
|
||||
if isinstance(v[0], ParseResults):
|
||||
v[0]._parent = self
|
||||
|
||||
self._toklist += other._toklist
|
||||
self._all_names |= other._all_names
|
||||
return self
|
||||
|
||||
def __radd__(self, other) -> ParseResults:
|
||||
if isinstance(other, int) and other == 0:
|
||||
# useful for merging many ParseResults using sum() builtin
|
||||
return self.copy()
|
||||
else:
|
||||
# this may raise a TypeError - so be it
|
||||
return other + self
|
||||
|
||||
def __repr__(self) -> str:
|
||||
return f"{type(self).__name__}({self._toklist!r}, {self.as_dict()})"
|
||||
|
||||
def __str__(self) -> str:
|
||||
return (
|
||||
"["
|
||||
+ ", ".join(
|
||||
[
|
||||
str(i) if isinstance(i, ParseResults) else repr(i)
|
||||
for i in self._toklist
|
||||
]
|
||||
)
|
||||
+ "]"
|
||||
)
|
||||
|
||||
def _asStringList(self, sep=""):
|
||||
out = []
|
||||
for item in self._toklist:
|
||||
if out and sep:
|
||||
out.append(sep)
|
||||
if isinstance(item, ParseResults):
|
||||
out += item._asStringList()
|
||||
else:
|
||||
out.append(str(item))
|
||||
return out
|
||||
|
||||
def as_list(self, *, flatten: bool = False) -> list:
|
||||
"""
|
||||
Returns the parse results as a nested list of matching tokens, all converted to strings.
|
||||
If ``flatten`` is True, all the nesting levels in the returned list are collapsed.
|
||||
|
||||
Example:
|
||||
|
||||
.. doctest::
|
||||
|
||||
>>> patt = Word(alphas)[1, ...]
|
||||
>>> result = patt.parse_string("sldkj lsdkj sldkj")
|
||||
>>> # even though the result prints in string-like form,
|
||||
>>> # it is actually a pyparsing ParseResults
|
||||
>>> type(result)
|
||||
<class 'pyparsing.results.ParseResults'>
|
||||
>>> print(result)
|
||||
['sldkj', 'lsdkj', 'sldkj']
|
||||
|
||||
.. doctest::
|
||||
|
||||
>>> # Use as_list() to create an actual list
|
||||
>>> result_list = result.as_list()
|
||||
>>> type(result_list)
|
||||
<class 'list'>
|
||||
>>> print(result_list)
|
||||
['sldkj', 'lsdkj', 'sldkj']
|
||||
|
||||
.. versionchanged:: 3.2.0
|
||||
New ``flatten`` argument.
|
||||
"""
|
||||
|
||||
if flatten:
|
||||
return [*_flatten(self)]
|
||||
else:
|
||||
return [
|
||||
res.as_list() if isinstance(res, ParseResults) else res
|
||||
for res in self._toklist
|
||||
]
|
||||
|
||||
def as_dict(self) -> dict:
|
||||
"""
|
||||
Returns the named parse results as a nested dictionary.
|
||||
|
||||
Example:
|
||||
|
||||
.. doctest::
|
||||
|
||||
>>> integer = pp.Word(pp.nums)
|
||||
>>> date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
|
||||
|
||||
>>> result = date_str.parse_string('1999/12/31')
|
||||
>>> type(result)
|
||||
<class 'pyparsing.results.ParseResults'>
|
||||
>>> result
|
||||
ParseResults(['1999', '/', '12', '/', '31'], {'year': '1999', 'month': '12', 'day': '31'})
|
||||
|
||||
>>> result_dict = result.as_dict()
|
||||
>>> type(result_dict)
|
||||
<class 'dict'>
|
||||
>>> result_dict
|
||||
{'year': '1999', 'month': '12', 'day': '31'}
|
||||
|
||||
>>> # even though a ParseResults supports dict-like access,
|
||||
>>> # sometime you just need to have a dict
|
||||
>>> import json
|
||||
>>> print(json.dumps(result))
|
||||
Traceback (most recent call last):
|
||||
TypeError: Object of type ParseResults is not JSON serializable
|
||||
>>> print(json.dumps(result.as_dict()))
|
||||
{"year": "1999", "month": "12", "day": "31"}
|
||||
"""
|
||||
|
||||
def to_item(obj):
|
||||
if isinstance(obj, ParseResults):
|
||||
return obj.as_dict() if obj.haskeys() else [to_item(v) for v in obj]
|
||||
else:
|
||||
return obj
|
||||
|
||||
return dict((k, to_item(v)) for k, v in self.items())
|
||||
|
||||
def copy(self) -> ParseResults:
|
||||
"""
|
||||
Returns a new shallow copy of a :class:`ParseResults` object.
|
||||
:class:`ParseResults` items contained within the source are
|
||||
shared with the copy. Use :meth:`ParseResults.deepcopy` to
|
||||
create a copy with its own separate content values.
|
||||
"""
|
||||
ret = ParseResults(self._toklist)
|
||||
ret._tokdict = self._tokdict.copy()
|
||||
ret._parent = self._parent
|
||||
ret._all_names |= self._all_names
|
||||
ret._name = self._name
|
||||
return ret
|
||||
|
||||
def deepcopy(self) -> ParseResults:
|
||||
"""
|
||||
Returns a new deep copy of a :class:`ParseResults` object.
|
||||
|
||||
.. versionadded:: 3.1.0
|
||||
"""
|
||||
ret = self.copy()
|
||||
# replace values with copies if they are of known mutable types
|
||||
for i, obj in enumerate(self._toklist):
|
||||
if isinstance(obj, ParseResults):
|
||||
ret._toklist[i] = obj.deepcopy()
|
||||
elif isinstance(obj, (str, bytes)):
|
||||
pass
|
||||
elif isinstance(obj, MutableMapping):
|
||||
ret._toklist[i] = dest = type(obj)()
|
||||
for k, v in obj.items():
|
||||
dest[k] = v.deepcopy() if isinstance(v, ParseResults) else v
|
||||
elif isinstance(obj, Iterable):
|
||||
ret._toklist[i] = type(obj)(
|
||||
v.deepcopy() if isinstance(v, ParseResults) else v for v in obj # type: ignore[call-arg]
|
||||
)
|
||||
return ret
|
||||
|
||||
def get_name(self) -> str | None:
|
||||
r"""
|
||||
Returns the results name for this token expression.
|
||||
|
||||
Useful when several different expressions might match
|
||||
at a particular location.
|
||||
|
||||
Example:
|
||||
|
||||
.. testcode::
|
||||
|
||||
integer = Word(nums)
|
||||
ssn_expr = Regex(r"\d\d\d-\d\d-\d\d\d\d")
|
||||
house_number_expr = Suppress('#') + Word(nums, alphanums)
|
||||
user_data = (Group(house_number_expr)("house_number")
|
||||
| Group(ssn_expr)("ssn")
|
||||
| Group(integer)("age"))
|
||||
user_info = user_data[1, ...]
|
||||
|
||||
result = user_info.parse_string("22 111-22-3333 #221B")
|
||||
for item in result:
|
||||
print(item.get_name(), ':', item[0])
|
||||
|
||||
prints:
|
||||
|
||||
.. testoutput::
|
||||
|
||||
age : 22
|
||||
ssn : 111-22-3333
|
||||
house_number : 221B
|
||||
|
||||
"""
|
||||
if self._name:
|
||||
return self._name
|
||||
elif self._parent:
|
||||
par: ParseResults = self._parent
|
||||
parent_tokdict_items = par._tokdict.items()
|
||||
return next(
|
||||
(
|
||||
k
|
||||
for k, vlist in parent_tokdict_items
|
||||
for v, loc in vlist
|
||||
if v is self
|
||||
),
|
||||
None,
|
||||
)
|
||||
elif (
|
||||
len(self) == 1
|
||||
and len(self._tokdict) == 1
|
||||
and next(iter(self._tokdict.values()))[0][1] in (0, -1)
|
||||
):
|
||||
return next(iter(self._tokdict.keys()))
|
||||
else:
|
||||
return None
|
||||
|
||||
def dump(self, indent="", full=True, include_list=True, _depth=0) -> str:
|
||||
"""
|
||||
Diagnostic method for listing out the contents of
|
||||
a :class:`ParseResults`. Accepts an optional ``indent`` argument so
|
||||
that this string can be embedded in a nested display of other data.
|
||||
|
||||
Example:
|
||||
|
||||
.. testcode::
|
||||
|
||||
integer = Word(nums)
|
||||
date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
|
||||
|
||||
result = date_str.parse_string('1999/12/31')
|
||||
print(result.dump())
|
||||
|
||||
prints:
|
||||
|
||||
.. testoutput::
|
||||
|
||||
['1999', '/', '12', '/', '31']
|
||||
- day: '31'
|
||||
- month: '12'
|
||||
- year: '1999'
|
||||
"""
|
||||
out = []
|
||||
NL = "\n"
|
||||
out.append(indent + str(self.as_list()) if include_list else "")
|
||||
|
||||
if not full:
|
||||
return "".join(out)
|
||||
|
||||
if self.haskeys():
|
||||
items = sorted((str(k), v) for k, v in self.items())
|
||||
for k, v in items:
|
||||
if out:
|
||||
out.append(NL)
|
||||
out.append(f"{indent}{(' ' * _depth)}- {k}: ")
|
||||
if not isinstance(v, ParseResults):
|
||||
out.append(repr(v))
|
||||
continue
|
||||
|
||||
if not v:
|
||||
out.append(str(v))
|
||||
continue
|
||||
|
||||
out.append(
|
||||
v.dump(
|
||||
indent=indent,
|
||||
full=full,
|
||||
include_list=include_list,
|
||||
_depth=_depth + 1,
|
||||
)
|
||||
)
|
||||
if not any(isinstance(vv, ParseResults) for vv in self):
|
||||
return "".join(out)
|
||||
|
||||
v = self
|
||||
incr = " "
|
||||
nl = "\n"
|
||||
for i, vv in enumerate(v):
|
||||
if isinstance(vv, ParseResults):
|
||||
vv_dump = vv.dump(
|
||||
indent=indent,
|
||||
full=full,
|
||||
include_list=include_list,
|
||||
_depth=_depth + 1,
|
||||
)
|
||||
out.append(
|
||||
f"{nl}{indent}{incr * _depth}[{i}]:{nl}{indent}{incr * (_depth + 1)}{vv_dump}"
|
||||
)
|
||||
else:
|
||||
out.append(
|
||||
f"{nl}{indent}{incr * _depth}[{i}]:{nl}{indent}{incr * (_depth + 1)}{vv}"
|
||||
)
|
||||
|
||||
return "".join(out)
|
||||
|
||||
def pprint(self, *args, **kwargs):
|
||||
"""
|
||||
Pretty-printer for parsed results as a list, using the
|
||||
`pprint <https://docs.python.org/3/library/pprint.html>`_ module.
|
||||
Accepts additional positional or keyword args as defined for
|
||||
`pprint.pprint <https://docs.python.org/3/library/pprint.html#pprint.pprint>`_ .
|
||||
|
||||
Example:
|
||||
|
||||
.. testcode::
|
||||
|
||||
ident = Word(alphas, alphanums)
|
||||
num = Word(nums)
|
||||
func = Forward()
|
||||
term = ident | num | Group('(' + func + ')')
|
||||
func <<= ident + Group(Optional(DelimitedList(term)))
|
||||
result = func.parse_string("fna a,b,(fnb c,d,200),100")
|
||||
result.pprint(width=40)
|
||||
|
||||
prints:
|
||||
|
||||
.. testoutput::
|
||||
|
||||
['fna',
|
||||
['a',
|
||||
'b',
|
||||
['(', 'fnb', ['c', 'd', '200'], ')'],
|
||||
'100']]
|
||||
"""
|
||||
pprint.pprint(self.as_list(), *args, **kwargs)
|
||||
|
||||
# add support for pickle protocol
|
||||
def __getstate__(self):
|
||||
return (
|
||||
self._toklist,
|
||||
(
|
||||
self._tokdict.copy(),
|
||||
None,
|
||||
self._all_names,
|
||||
self._name,
|
||||
),
|
||||
)
|
||||
|
||||
def __setstate__(self, state):
|
||||
self._toklist, (self._tokdict, par, inAccumNames, self._name) = state
|
||||
self._all_names = set(inAccumNames)
|
||||
self._parent = None
|
||||
|
||||
def __getnewargs__(self):
|
||||
return self._toklist, self._name
|
||||
|
||||
def __dir__(self):
|
||||
return dir(type(self)) + list(self.keys())
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, other, name=None) -> ParseResults:
|
||||
"""
|
||||
Helper classmethod to construct a :class:`ParseResults` from a ``dict``, preserving the
|
||||
name-value relations as results names. If an optional ``name`` argument is
|
||||
given, a nested :class:`ParseResults` will be returned.
|
||||
"""
|
||||
ret = cls([])
|
||||
for k, v in other.items():
|
||||
if isinstance(v, Mapping):
|
||||
ret += cls.from_dict(v, name=k)
|
||||
else:
|
||||
ret += cls([v], name=k, aslist=_is_iterable(v))
|
||||
if name is not None:
|
||||
ret = cls([ret], name=name)
|
||||
return ret
|
||||
|
||||
asList = as_list
|
||||
"""
|
||||
.. deprecated:: 3.0.0
|
||||
use :meth:`as_list`
|
||||
"""
|
||||
asDict = as_dict
|
||||
"""
|
||||
.. deprecated:: 3.0.0
|
||||
use :meth:`as_dict`
|
||||
"""
|
||||
getName = get_name
|
||||
"""
|
||||
.. deprecated:: 3.0.0
|
||||
use :meth:`get_name`
|
||||
"""
|
||||
|
||||
|
||||
MutableMapping.register(ParseResults)
|
||||
MutableSequence.register(ParseResults)
|
||||
398
venv/lib/python3.12/site-packages/pyparsing/testing.py
Normal file
398
venv/lib/python3.12/site-packages/pyparsing/testing.py
Normal file
@@ -0,0 +1,398 @@
|
||||
# testing.py
|
||||
|
||||
from contextlib import contextmanager
|
||||
import re
|
||||
import typing
|
||||
import unittest
|
||||
|
||||
|
||||
from .core import (
|
||||
ParserElement,
|
||||
ParseException,
|
||||
Keyword,
|
||||
__diag__,
|
||||
__compat__,
|
||||
)
|
||||
from . import core_builtin_exprs
|
||||
|
||||
|
||||
class pyparsing_test:
|
||||
"""
|
||||
namespace class for classes useful in writing unit tests
|
||||
"""
|
||||
|
||||
class reset_pyparsing_context:
|
||||
"""
|
||||
Context manager to be used when writing unit tests that modify pyparsing config values:
|
||||
- packrat parsing
|
||||
- bounded recursion parsing
|
||||
- default whitespace characters
|
||||
- default keyword characters
|
||||
- literal string auto-conversion class
|
||||
- ``__diag__`` settings
|
||||
|
||||
Example:
|
||||
|
||||
.. testcode::
|
||||
|
||||
ppt = pyparsing.pyparsing_test
|
||||
|
||||
class MyTestClass(ppt.TestParseResultsAsserts):
|
||||
def test_literal(self):
|
||||
with ppt.reset_pyparsing_context():
|
||||
# test that literals used to construct
|
||||
# a grammar are automatically suppressed
|
||||
ParserElement.inline_literals_using(Suppress)
|
||||
|
||||
term = Word(alphas) | Word(nums)
|
||||
group = Group('(' + term[...] + ')')
|
||||
|
||||
# assert that the '()' characters
|
||||
# are not included in the parsed tokens
|
||||
self.assertParseAndCheckList(
|
||||
group,
|
||||
"(abc 123 def)",
|
||||
['abc', '123', 'def']
|
||||
)
|
||||
|
||||
# after exiting context manager, literals
|
||||
# are converted to Literal expressions again
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
self._save_context = {}
|
||||
|
||||
def save(self):
|
||||
self._save_context["default_whitespace"] = ParserElement.DEFAULT_WHITE_CHARS
|
||||
self._save_context["default_keyword_chars"] = Keyword.DEFAULT_KEYWORD_CHARS
|
||||
|
||||
self._save_context["literal_string_class"] = (
|
||||
ParserElement._literalStringClass
|
||||
)
|
||||
|
||||
self._save_context["verbose_stacktrace"] = ParserElement.verbose_stacktrace
|
||||
|
||||
self._save_context["packrat_enabled"] = ParserElement._packratEnabled
|
||||
if ParserElement._packratEnabled:
|
||||
self._save_context["packrat_cache_size"] = (
|
||||
ParserElement.packrat_cache.size
|
||||
)
|
||||
else:
|
||||
self._save_context["packrat_cache_size"] = None
|
||||
self._save_context["packrat_parse"] = ParserElement._parse
|
||||
self._save_context["recursion_enabled"] = (
|
||||
ParserElement._left_recursion_enabled
|
||||
)
|
||||
|
||||
self._save_context["__diag__"] = {
|
||||
name: getattr(__diag__, name) for name in __diag__._all_names
|
||||
}
|
||||
|
||||
self._save_context["__compat__"] = {
|
||||
"collect_all_And_tokens": __compat__.collect_all_And_tokens
|
||||
}
|
||||
|
||||
return self
|
||||
|
||||
def restore(self):
|
||||
# reset pyparsing global state
|
||||
if (
|
||||
ParserElement.DEFAULT_WHITE_CHARS
|
||||
!= self._save_context["default_whitespace"]
|
||||
):
|
||||
ParserElement.set_default_whitespace_chars(
|
||||
self._save_context["default_whitespace"]
|
||||
)
|
||||
|
||||
ParserElement.verbose_stacktrace = self._save_context["verbose_stacktrace"]
|
||||
|
||||
Keyword.DEFAULT_KEYWORD_CHARS = self._save_context["default_keyword_chars"]
|
||||
ParserElement.inline_literals_using(
|
||||
self._save_context["literal_string_class"]
|
||||
)
|
||||
|
||||
for name, value in self._save_context["__diag__"].items():
|
||||
(__diag__.enable if value else __diag__.disable)(name)
|
||||
|
||||
ParserElement._packratEnabled = False
|
||||
if self._save_context["packrat_enabled"]:
|
||||
ParserElement.enable_packrat(self._save_context["packrat_cache_size"])
|
||||
else:
|
||||
ParserElement._parse = self._save_context["packrat_parse"]
|
||||
ParserElement._left_recursion_enabled = self._save_context[
|
||||
"recursion_enabled"
|
||||
]
|
||||
|
||||
# clear debug flags on all builtins
|
||||
for expr in core_builtin_exprs:
|
||||
expr.set_debug(False)
|
||||
|
||||
__compat__.collect_all_And_tokens = self._save_context["__compat__"]
|
||||
|
||||
return self
|
||||
|
||||
def copy(self):
|
||||
ret = type(self)()
|
||||
ret._save_context.update(self._save_context)
|
||||
return ret
|
||||
|
||||
def __enter__(self):
|
||||
return self.save()
|
||||
|
||||
def __exit__(self, *args):
|
||||
self.restore()
|
||||
|
||||
class TestParseResultsAsserts(unittest.TestCase):
|
||||
"""
|
||||
A mixin class to add parse results assertion methods to normal unittest.TestCase classes.
|
||||
"""
|
||||
|
||||
def assertParseResultsEquals(
|
||||
self, result, expected_list=None, expected_dict=None, msg=None
|
||||
):
|
||||
"""
|
||||
Unit test assertion to compare a :class:`ParseResults` object with an optional ``expected_list``,
|
||||
and compare any defined results names with an optional ``expected_dict``.
|
||||
"""
|
||||
if expected_list is not None:
|
||||
self.assertEqual(expected_list, result.as_list(), msg=msg)
|
||||
if expected_dict is not None:
|
||||
self.assertEqual(expected_dict, result.as_dict(), msg=msg)
|
||||
|
||||
def assertParseAndCheckList(
|
||||
self, expr, test_string, expected_list, msg=None, verbose=True
|
||||
):
|
||||
"""
|
||||
Convenience wrapper assert to test a parser element and input string, and assert that
|
||||
the resulting :meth:`ParseResults.as_list` is equal to the ``expected_list``.
|
||||
"""
|
||||
result = expr.parse_string(test_string, parse_all=True)
|
||||
if verbose:
|
||||
print(result.dump())
|
||||
else:
|
||||
print(result.as_list())
|
||||
self.assertParseResultsEquals(result, expected_list=expected_list, msg=msg)
|
||||
|
||||
def assertParseAndCheckDict(
|
||||
self, expr, test_string, expected_dict, msg=None, verbose=True
|
||||
):
|
||||
"""
|
||||
Convenience wrapper assert to test a parser element and input string, and assert that
|
||||
the resulting :meth:`ParseResults.as_dict` is equal to the ``expected_dict``.
|
||||
"""
|
||||
result = expr.parse_string(test_string, parse_all=True)
|
||||
if verbose:
|
||||
print(result.dump())
|
||||
else:
|
||||
print(result.as_list())
|
||||
self.assertParseResultsEquals(result, expected_dict=expected_dict, msg=msg)
|
||||
|
||||
def assertRunTestResults(
|
||||
self, run_tests_report, expected_parse_results=None, msg=None
|
||||
):
|
||||
"""
|
||||
Unit test assertion to evaluate output of
|
||||
:meth:`~ParserElement.run_tests`.
|
||||
|
||||
If a list of list-dict tuples is given as the
|
||||
``expected_parse_results`` argument, then these are zipped
|
||||
with the report tuples returned by ``run_tests()``
|
||||
and evaluated using :meth:`assertParseResultsEquals`.
|
||||
Finally, asserts that the overall
|
||||
`:meth:~ParserElement.run_tests` success value is ``True``.
|
||||
|
||||
:param run_tests_report: the return value from :meth:`ParserElement.run_tests`
|
||||
:type run_tests_report: tuple[bool, list[tuple[str, ParseResults | Exception]]]
|
||||
:param expected_parse_results: (optional)
|
||||
:type expected_parse_results: list[tuple[str | list | dict | Exception, ...]]
|
||||
"""
|
||||
run_test_success, run_test_results = run_tests_report
|
||||
|
||||
if expected_parse_results is None:
|
||||
self.assertTrue(
|
||||
run_test_success, msg=msg if msg is not None else "failed runTests"
|
||||
)
|
||||
return
|
||||
|
||||
merged = [
|
||||
(*rpt, expected)
|
||||
for rpt, expected in zip(run_test_results, expected_parse_results)
|
||||
]
|
||||
for test_string, result, expected in merged:
|
||||
# expected should be a tuple containing a list and/or a dict or an exception,
|
||||
# and optional failure message string
|
||||
# an empty tuple will skip any result validation
|
||||
fail_msg = next((exp for exp in expected if isinstance(exp, str)), None)
|
||||
expected_exception = next(
|
||||
(
|
||||
exp
|
||||
for exp in expected
|
||||
if isinstance(exp, type) and issubclass(exp, Exception)
|
||||
),
|
||||
None,
|
||||
)
|
||||
if expected_exception is not None:
|
||||
with self.assertRaises(
|
||||
expected_exception=expected_exception, msg=fail_msg or msg
|
||||
):
|
||||
if isinstance(result, Exception):
|
||||
raise result
|
||||
else:
|
||||
expected_list = next(
|
||||
(exp for exp in expected if isinstance(exp, list)), None
|
||||
)
|
||||
expected_dict = next(
|
||||
(exp for exp in expected if isinstance(exp, dict)), None
|
||||
)
|
||||
if (expected_list, expected_dict) != (None, None):
|
||||
self.assertParseResultsEquals(
|
||||
result,
|
||||
expected_list=expected_list,
|
||||
expected_dict=expected_dict,
|
||||
msg=fail_msg or msg,
|
||||
)
|
||||
else:
|
||||
# warning here maybe?
|
||||
print(f"no validation for {test_string!r}")
|
||||
|
||||
# do this last, in case some specific test results can be reported instead
|
||||
self.assertTrue(
|
||||
run_test_success, msg=msg if msg is not None else "failed runTests"
|
||||
)
|
||||
|
||||
@contextmanager
|
||||
def assertRaisesParseException(
|
||||
self, exc_type=ParseException, expected_msg=None, msg=None
|
||||
):
|
||||
if expected_msg is not None:
|
||||
if isinstance(expected_msg, str):
|
||||
expected_msg = re.escape(expected_msg)
|
||||
with self.assertRaisesRegex(exc_type, expected_msg, msg=msg) as ctx:
|
||||
yield ctx
|
||||
|
||||
else:
|
||||
with self.assertRaises(exc_type, msg=msg) as ctx:
|
||||
yield ctx
|
||||
|
||||
@staticmethod
|
||||
def with_line_numbers(
|
||||
s: str,
|
||||
start_line: typing.Optional[int] = None,
|
||||
end_line: typing.Optional[int] = None,
|
||||
expand_tabs: bool = True,
|
||||
eol_mark: str = "|",
|
||||
mark_spaces: typing.Optional[str] = None,
|
||||
mark_control: typing.Optional[str] = None,
|
||||
*,
|
||||
indent: typing.Union[str, int] = "",
|
||||
base_1: bool = True,
|
||||
) -> str:
|
||||
"""
|
||||
Helpful method for debugging a parser - prints a string with line and column numbers.
|
||||
(Line and column numbers are 1-based by default - if debugging a parse action,
|
||||
pass base_1=False, to correspond to the loc value passed to the parse action.)
|
||||
|
||||
:param s: string to be printed with line and column numbers
|
||||
:param start_line: starting line number in s to print (default=1)
|
||||
:param end_line: ending line number in s to print (default=len(s))
|
||||
:param expand_tabs: expand tabs to spaces, to match the pyparsing default
|
||||
:param eol_mark: string to mark the end of lines, helps visualize trailing spaces
|
||||
:param mark_spaces: special character to display in place of spaces
|
||||
:param mark_control: convert non-printing control characters to a placeholding
|
||||
character; valid values:
|
||||
|
||||
- ``"unicode"`` - replaces control chars with Unicode symbols, such as "␍" and "␊"
|
||||
- any single character string - replace control characters with given string
|
||||
- ``None`` (default) - string is displayed as-is
|
||||
|
||||
|
||||
:param indent: string to indent with line and column numbers; if an int
|
||||
is passed, converted to ``" " * indent``
|
||||
:param base_1: whether to label string using base 1; if False, string will be
|
||||
labeled based at 0
|
||||
|
||||
:returns: input string with leading line numbers and column number headers
|
||||
|
||||
.. versionchanged:: 3.2.0
|
||||
New ``indent`` and ``base_1`` arguments.
|
||||
"""
|
||||
if expand_tabs:
|
||||
s = s.expandtabs()
|
||||
if isinstance(indent, int):
|
||||
indent = " " * indent
|
||||
indent = indent.expandtabs()
|
||||
if mark_control is not None:
|
||||
mark_control = typing.cast(str, mark_control)
|
||||
if mark_control == "unicode":
|
||||
transtable_map = {
|
||||
c: u for c, u in zip(range(0, 33), range(0x2400, 0x2433))
|
||||
}
|
||||
transtable_map[127] = 0x2421
|
||||
tbl = str.maketrans(transtable_map)
|
||||
eol_mark = ""
|
||||
else:
|
||||
ord_mark_control = ord(mark_control)
|
||||
tbl = str.maketrans(
|
||||
{c: ord_mark_control for c in list(range(0, 32)) + [127]}
|
||||
)
|
||||
s = s.translate(tbl)
|
||||
if mark_spaces is not None and mark_spaces != " ":
|
||||
if mark_spaces == "unicode":
|
||||
tbl = str.maketrans({9: 0x2409, 32: 0x2423})
|
||||
s = s.translate(tbl)
|
||||
else:
|
||||
s = s.replace(" ", mark_spaces)
|
||||
if start_line is None:
|
||||
start_line = 0
|
||||
if end_line is None:
|
||||
end_line = len(s.splitlines())
|
||||
end_line = min(end_line, len(s.splitlines()))
|
||||
start_line = min(max(0, start_line), end_line)
|
||||
|
||||
if mark_control != "unicode":
|
||||
s_lines = s.splitlines()[max(start_line - base_1, 0) : end_line]
|
||||
else:
|
||||
s_lines = [
|
||||
line + "␊"
|
||||
for line in s.split("␊")[max(start_line - base_1, 0) : end_line]
|
||||
]
|
||||
if not s_lines:
|
||||
return ""
|
||||
|
||||
lineno_width = len(str(end_line))
|
||||
max_line_len = max(len(line) for line in s_lines)
|
||||
lead = f"{indent}{' ' * (lineno_width + 1)}"
|
||||
|
||||
if max_line_len >= 99:
|
||||
header0 = (
|
||||
lead
|
||||
+ ("" if base_1 else " ")
|
||||
+ "".join(
|
||||
f"{' ' * 99}{(i + 1) % 100}"
|
||||
for i in range(max(max_line_len // 100, 1))
|
||||
)
|
||||
+ "\n"
|
||||
)
|
||||
else:
|
||||
header0 = ""
|
||||
|
||||
header1 = (
|
||||
("" if base_1 else " ")
|
||||
+ lead
|
||||
+ "".join(f" {(i + 1) % 10}" for i in range(-(-max_line_len // 10)))
|
||||
+ "\n"
|
||||
)
|
||||
digits = "1234567890"
|
||||
header2 = (
|
||||
lead + ("" if base_1 else "0") + digits * (-(-max_line_len // 10)) + "\n"
|
||||
)
|
||||
return (
|
||||
header0
|
||||
+ header1
|
||||
+ header2
|
||||
+ "\n".join(
|
||||
f"{indent}{i:{lineno_width}d}:{line}{eol_mark}"
|
||||
for i, line in enumerate(s_lines, start=start_line + base_1)
|
||||
)
|
||||
+ "\n"
|
||||
)
|
||||
@@ -0,0 +1,142 @@
|
||||
import warnings
|
||||
from functools import lru_cache
|
||||
import pyparsing as pp
|
||||
|
||||
|
||||
@lru_cache(maxsize=None)
|
||||
def camel_to_snake(s: str) -> str:
|
||||
"""
|
||||
Convert CamelCase to snake_case.
|
||||
"""
|
||||
return "".join(f"_{c.lower()}" if c.isupper() else c for c in s).lstrip("_")
|
||||
|
||||
|
||||
pre_pep8_method_names = """
|
||||
addCondition addParseAction anyCloseTag anyOpenTag asDict asList cStyleComment canParseNext conditionAsParseAction
|
||||
convertToDate convertToDatetime convertToFloat convertToInteger countedArray cppStyleComment dblQuotedString
|
||||
dblSlashComment defaultName dictOf disableMemoization downcaseTokens enableLeftRecursion enablePackrat getName
|
||||
htmlComment ignoreWhitespace infixNotation inlineLiteralsUsing javaStyleComment leaveWhitespace
|
||||
lineEnd lineStart matchOnlyAtCol matchPreviousExpr matchPreviousLiteral nestedExpr nullDebugAction oneOf
|
||||
originalTextFor parseFile parseString parseWithTabs pythonStyleComment quotedString removeQuotes replaceWith
|
||||
resetCache restOfLine runTests scanString searchString setBreak setDebug setDebugActions setDefaultWhitespaceChars
|
||||
setFailAction setName setParseAction setResultsName setWhitespaceChars sglQuotedString stringEnd stringStart tokenMap
|
||||
traceParseAction transformString tryParse unicodeString upcaseTokens withAttribute withClass
|
||||
""".split()
|
||||
|
||||
special_changes = {
|
||||
"opAssoc": "OpAssoc",
|
||||
"delimitedList": "DelimitedList",
|
||||
"delimited_list": "DelimitedList",
|
||||
"replaceHTMLEntity": "replace_html_entity",
|
||||
"makeHTMLTags": "make_html_tags",
|
||||
"makeXMLTags": "make_xml_tags",
|
||||
"commonHTMLEntity": "common_html_entity",
|
||||
"stripHTMLTags": "strip_html_tags",
|
||||
"indentedBlock": "IndentedBlock",
|
||||
"locatedExpr": "Located",
|
||||
}
|
||||
|
||||
pre_pep8_arg_names = """parseAll maxMatches listAllMatches callDuringTry includeSeparators fullDump printResults
|
||||
failureTests postParse matchString identChars maxMismatches initChars bodyChars asKeyword excludeChars asGroupList
|
||||
asMatch quoteChar escChar escQuote unquoteResults endQuoteChar convertWhitespaceEscapes notChars wordChars stopOn
|
||||
failOn joinString markerString intExpr useRegex asString ignoreExpr""".split()
|
||||
|
||||
special_changes_arg_names = {
|
||||
"asList": "aslist",
|
||||
}
|
||||
|
||||
pre_pep8_method_name = pp.one_of(pre_pep8_method_names, as_keyword=True)
|
||||
pre_pep8_method_name.set_parse_action(lambda t: camel_to_snake(t[0]))
|
||||
special_pre_pep8_name = pp.one_of(special_changes, as_keyword=True)
|
||||
def update_special_changes(s, l, t):
|
||||
if t[0] == "indentedBlock":
|
||||
warnings.warn(
|
||||
"Conversion of 'indentedBlock' to new 'IndentedBlock'"
|
||||
" requires added code changes to remove 'indentStack' argument\n"
|
||||
f" {pp.lineno(l, s)}: {pp.line(l, s)}",
|
||||
stacklevel=2,
|
||||
)
|
||||
elif t[0] == "locatedExpr":
|
||||
warnings.warn(
|
||||
"Conversion of 'locatedExpr' to new 'Located'"
|
||||
" may require added code changes - Located does not automatically"
|
||||
" group parsed elements\n"
|
||||
f" {pp.lineno(l, s)}: {pp.line(l, s)}",
|
||||
stacklevel=2,
|
||||
)
|
||||
return special_changes[t[0]]
|
||||
special_pre_pep8_name.set_parse_action(update_special_changes)
|
||||
# only replace arg names if part of an arg list
|
||||
pre_pep8_arg_name = pp.Regex(
|
||||
rf"{pp.util.make_compressed_re(pre_pep8_arg_names)}"
|
||||
) + pp.FollowedBy("=")
|
||||
pre_pep8_arg_name.set_parse_action(lambda t: camel_to_snake(t[0]))
|
||||
special_pre_pep8_arg_name = pp.one_of(special_changes_arg_names, as_keyword=True) + pp.FollowedBy("=")
|
||||
special_pre_pep8_arg_name.set_parse_action(lambda t: special_changes_arg_names[t[0]])
|
||||
|
||||
pep8_converter = special_pre_pep8_arg_name | pre_pep8_method_name | special_pre_pep8_name | pre_pep8_arg_name
|
||||
|
||||
if __name__ == "__main__":
|
||||
import argparse
|
||||
from pathlib import Path
|
||||
import sys
|
||||
|
||||
argparser = argparse.ArgumentParser(
|
||||
description = (
|
||||
"Utility to convert Python pyparsing scripts using legacy"
|
||||
" camelCase names to use PEP8 snake_case names."
|
||||
"\nBy default, this script will only show whether this script would make any changes."
|
||||
)
|
||||
)
|
||||
argparser.add_argument("--verbose", "-v", action="store_true", help="Show unified diff for each source file")
|
||||
argparser.add_argument("-vv", action="store_true", dest="verbose2", help="Show unified diff for each source file, plus names of scanned files with no changes")
|
||||
argparser.add_argument("--update", "-u", action="store_true", help="Update source files in-place")
|
||||
argparser.add_argument("--encoding", type=str, default="utf-8", help="Encoding of source files (default: utf-8)")
|
||||
argparser.add_argument("--exit-zero-even-if-changed", "-exit0", action="store_true", help="Exit with status code 0 even if changes were made")
|
||||
argparser.add_argument("source_filename", nargs="+", help="Source filenames or filename patterns of Python files to be converted")
|
||||
args = argparser.parse_args()
|
||||
|
||||
|
||||
def show_diffs(original, modified):
|
||||
import difflib
|
||||
|
||||
diff = difflib.unified_diff(
|
||||
original.splitlines(), modified.splitlines(), lineterm=""
|
||||
)
|
||||
sys.stdout.writelines(f"{diff_line}\n" for diff_line in diff)
|
||||
|
||||
exit_status = 0
|
||||
|
||||
for filename_pattern in args.source_filename:
|
||||
|
||||
for filename in Path().glob(filename_pattern):
|
||||
if not Path(filename).is_file():
|
||||
continue
|
||||
|
||||
try:
|
||||
original_contents = Path(filename).read_text(encoding=args.encoding)
|
||||
modified_contents = pep8_converter.transform_string(
|
||||
original_contents
|
||||
)
|
||||
|
||||
if modified_contents != original_contents:
|
||||
if args.update:
|
||||
Path(filename).write_text(modified_contents, encoding=args.encoding)
|
||||
print(f"Converted {filename}")
|
||||
else:
|
||||
print(f"Found required changes in {filename}")
|
||||
|
||||
if args.verbose:
|
||||
show_diffs(original_contents, modified_contents)
|
||||
print()
|
||||
|
||||
exit_status = 1
|
||||
|
||||
else:
|
||||
if args.verbose2:
|
||||
print(f"No required changes in {filename}")
|
||||
|
||||
except Exception as e:
|
||||
print(f"Failed to convert {filename}: {type(e).__name__}: {e}")
|
||||
|
||||
sys.exit(exit_status if not args.exit_zero_even_if_changed else 0)
|
||||
356
venv/lib/python3.12/site-packages/pyparsing/unicode.py
Normal file
356
venv/lib/python3.12/site-packages/pyparsing/unicode.py
Normal file
@@ -0,0 +1,356 @@
|
||||
# unicode.py
|
||||
|
||||
import sys
|
||||
from itertools import filterfalse
|
||||
from typing import Union
|
||||
|
||||
|
||||
class _lazyclassproperty:
|
||||
def __init__(self, fn):
|
||||
self.fn = fn
|
||||
self.__doc__ = fn.__doc__
|
||||
self.__name__ = fn.__name__
|
||||
|
||||
def __get__(self, obj, cls):
|
||||
if cls is None:
|
||||
cls = type(obj)
|
||||
if not hasattr(cls, "_intern") or any(
|
||||
cls._intern is getattr(superclass, "_intern", [])
|
||||
for superclass in cls.__mro__[1:]
|
||||
):
|
||||
cls._intern = {}
|
||||
attrname = self.fn.__name__
|
||||
if attrname not in cls._intern:
|
||||
cls._intern[attrname] = self.fn(cls)
|
||||
return cls._intern[attrname]
|
||||
|
||||
|
||||
UnicodeRangeList = list[Union[tuple[int, int], tuple[int]]]
|
||||
|
||||
|
||||
class unicode_set:
|
||||
"""
|
||||
A set of Unicode characters, for language-specific strings for
|
||||
``alphas``, ``nums``, ``alphanums``, and ``printables``.
|
||||
A unicode_set is defined by a list of ranges in the Unicode character
|
||||
set, in a class attribute ``_ranges``. Ranges can be specified using
|
||||
2-tuples or a 1-tuple, such as::
|
||||
|
||||
_ranges = [
|
||||
(0x0020, 0x007e),
|
||||
(0x00a0, 0x00ff),
|
||||
(0x0100,),
|
||||
]
|
||||
|
||||
Ranges are left- and right-inclusive. A 1-tuple of (x,) is treated as (x, x).
|
||||
|
||||
A unicode set can also be defined using multiple inheritance of other unicode sets::
|
||||
|
||||
class CJK(Chinese, Japanese, Korean):
|
||||
pass
|
||||
"""
|
||||
|
||||
_ranges: UnicodeRangeList = []
|
||||
|
||||
@_lazyclassproperty
|
||||
def _chars_for_ranges(cls) -> list[str]:
|
||||
ret: list[int] = []
|
||||
for cc in cls.__mro__: # type: ignore[attr-defined]
|
||||
if cc is unicode_set:
|
||||
break
|
||||
for rr in getattr(cc, "_ranges", ()):
|
||||
ret.extend(range(rr[0], rr[-1] + 1))
|
||||
return sorted(chr(c) for c in set(ret))
|
||||
|
||||
@_lazyclassproperty
|
||||
def printables(cls) -> str:
|
||||
"""all non-whitespace characters in this range"""
|
||||
return "".join(filterfalse(str.isspace, cls._chars_for_ranges))
|
||||
|
||||
@_lazyclassproperty
|
||||
def alphas(cls) -> str:
|
||||
"""all alphabetic characters in this range"""
|
||||
return "".join(filter(str.isalpha, cls._chars_for_ranges))
|
||||
|
||||
@_lazyclassproperty
|
||||
def nums(cls) -> str:
|
||||
"""all numeric digit characters in this range"""
|
||||
return "".join(filter(str.isdigit, cls._chars_for_ranges))
|
||||
|
||||
@_lazyclassproperty
|
||||
def alphanums(cls) -> str:
|
||||
"""all alphanumeric characters in this range"""
|
||||
return cls.alphas + cls.nums
|
||||
|
||||
@_lazyclassproperty
|
||||
def identchars(cls) -> str:
|
||||
"""all characters in this range that are valid identifier characters, plus underscore '_'"""
|
||||
return "".join(
|
||||
sorted(
|
||||
set(filter(str.isidentifier, cls._chars_for_ranges))
|
||||
| set(
|
||||
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzªµº"
|
||||
"ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõöøùúûüýþÿ"
|
||||
"_"
|
||||
)
|
||||
)
|
||||
)
|
||||
|
||||
@_lazyclassproperty
|
||||
def identbodychars(cls) -> str:
|
||||
"""
|
||||
all characters in this range that are valid identifier body characters,
|
||||
plus the digits 0-9, and · (Unicode MIDDLE DOT)
|
||||
"""
|
||||
identifier_chars = set(
|
||||
c for c in cls._chars_for_ranges if f"_{c}".isidentifier()
|
||||
)
|
||||
return "".join(
|
||||
sorted(identifier_chars | set(cls.identchars) | set("0123456789·"))
|
||||
)
|
||||
|
||||
@_lazyclassproperty
|
||||
def identifier(cls):
|
||||
"""
|
||||
a pyparsing Word expression for an identifier using this range's definitions for
|
||||
identchars and identbodychars
|
||||
"""
|
||||
from pyparsing import Word
|
||||
|
||||
return Word(cls.identchars, cls.identbodychars)
|
||||
|
||||
|
||||
class pyparsing_unicode(unicode_set):
|
||||
"""
|
||||
A namespace class for defining common language unicode_sets.
|
||||
"""
|
||||
|
||||
# fmt: off
|
||||
|
||||
# define ranges in language character sets
|
||||
_ranges: UnicodeRangeList = [
|
||||
(0x0020, sys.maxunicode),
|
||||
]
|
||||
|
||||
class BasicMultilingualPlane(unicode_set):
|
||||
"""Unicode set for the Basic Multilingual Plane"""
|
||||
_ranges: UnicodeRangeList = [
|
||||
(0x0020, 0xFFFF),
|
||||
]
|
||||
|
||||
class Latin1(unicode_set):
|
||||
"""Unicode set for Latin-1 Unicode Character Range"""
|
||||
_ranges: UnicodeRangeList = [
|
||||
(0x0020, 0x007E),
|
||||
(0x00A0, 0x00FF),
|
||||
]
|
||||
|
||||
class LatinA(unicode_set):
|
||||
"""Unicode set for Latin-A Unicode Character Range"""
|
||||
_ranges: UnicodeRangeList = [
|
||||
(0x0100, 0x017F),
|
||||
]
|
||||
|
||||
class LatinB(unicode_set):
|
||||
"""Unicode set for Latin-B Unicode Character Range"""
|
||||
_ranges: UnicodeRangeList = [
|
||||
(0x0180, 0x024F),
|
||||
]
|
||||
|
||||
class Greek(unicode_set):
|
||||
"""Unicode set for Greek Unicode Character Ranges"""
|
||||
_ranges: UnicodeRangeList = [
|
||||
(0x0342, 0x0345),
|
||||
(0x0370, 0x0377),
|
||||
(0x037A, 0x037F),
|
||||
(0x0384, 0x038A),
|
||||
(0x038C,),
|
||||
(0x038E, 0x03A1),
|
||||
(0x03A3, 0x03E1),
|
||||
(0x03F0, 0x03FF),
|
||||
(0x1D26, 0x1D2A),
|
||||
(0x1D5E,),
|
||||
(0x1D60,),
|
||||
(0x1D66, 0x1D6A),
|
||||
(0x1F00, 0x1F15),
|
||||
(0x1F18, 0x1F1D),
|
||||
(0x1F20, 0x1F45),
|
||||
(0x1F48, 0x1F4D),
|
||||
(0x1F50, 0x1F57),
|
||||
(0x1F59,),
|
||||
(0x1F5B,),
|
||||
(0x1F5D,),
|
||||
(0x1F5F, 0x1F7D),
|
||||
(0x1F80, 0x1FB4),
|
||||
(0x1FB6, 0x1FC4),
|
||||
(0x1FC6, 0x1FD3),
|
||||
(0x1FD6, 0x1FDB),
|
||||
(0x1FDD, 0x1FEF),
|
||||
(0x1FF2, 0x1FF4),
|
||||
(0x1FF6, 0x1FFE),
|
||||
(0x2129,),
|
||||
(0x2719, 0x271A),
|
||||
(0xAB65,),
|
||||
(0x10140, 0x1018D),
|
||||
(0x101A0,),
|
||||
(0x1D200, 0x1D245),
|
||||
(0x1F7A1, 0x1F7A7),
|
||||
]
|
||||
|
||||
class Cyrillic(unicode_set):
|
||||
"""Unicode set for Cyrillic Unicode Character Range"""
|
||||
_ranges: UnicodeRangeList = [
|
||||
(0x0400, 0x052F),
|
||||
(0x1C80, 0x1C88),
|
||||
(0x1D2B,),
|
||||
(0x1D78,),
|
||||
(0x2DE0, 0x2DFF),
|
||||
(0xA640, 0xA672),
|
||||
(0xA674, 0xA69F),
|
||||
(0xFE2E, 0xFE2F),
|
||||
]
|
||||
|
||||
class Chinese(unicode_set):
|
||||
"""Unicode set for Chinese Unicode Character Range"""
|
||||
_ranges: UnicodeRangeList = [
|
||||
(0x2E80, 0x2E99),
|
||||
(0x2E9B, 0x2EF3),
|
||||
(0x31C0, 0x31E3),
|
||||
(0x3400, 0x4DB5),
|
||||
(0x4E00, 0x9FEF),
|
||||
(0xA700, 0xA707),
|
||||
(0xF900, 0xFA6D),
|
||||
(0xFA70, 0xFAD9),
|
||||
(0x16FE2, 0x16FE3),
|
||||
(0x1F210, 0x1F212),
|
||||
(0x1F214, 0x1F23B),
|
||||
(0x1F240, 0x1F248),
|
||||
(0x20000, 0x2A6D6),
|
||||
(0x2A700, 0x2B734),
|
||||
(0x2B740, 0x2B81D),
|
||||
(0x2B820, 0x2CEA1),
|
||||
(0x2CEB0, 0x2EBE0),
|
||||
(0x2F800, 0x2FA1D),
|
||||
]
|
||||
|
||||
class Japanese(unicode_set):
|
||||
"""Unicode set for Japanese Unicode Character Range, combining Kanji, Hiragana, and Katakana ranges"""
|
||||
|
||||
class Kanji(unicode_set):
|
||||
"Unicode set for Kanji Unicode Character Range"
|
||||
_ranges: UnicodeRangeList = [
|
||||
(0x4E00, 0x9FBF),
|
||||
(0x3000, 0x303F),
|
||||
]
|
||||
|
||||
class Hiragana(unicode_set):
|
||||
"""Unicode set for Hiragana Unicode Character Range"""
|
||||
_ranges: UnicodeRangeList = [
|
||||
(0x3041, 0x3096),
|
||||
(0x3099, 0x30A0),
|
||||
(0x30FC,),
|
||||
(0xFF70,),
|
||||
(0x1B001,),
|
||||
(0x1B150, 0x1B152),
|
||||
(0x1F200,),
|
||||
]
|
||||
|
||||
class Katakana(unicode_set):
|
||||
"""Unicode set for Katakana Unicode Character Range"""
|
||||
_ranges: UnicodeRangeList = [
|
||||
(0x3099, 0x309C),
|
||||
(0x30A0, 0x30FF),
|
||||
(0x31F0, 0x31FF),
|
||||
(0x32D0, 0x32FE),
|
||||
(0xFF65, 0xFF9F),
|
||||
(0x1B000,),
|
||||
(0x1B164, 0x1B167),
|
||||
(0x1F201, 0x1F202),
|
||||
(0x1F213,),
|
||||
]
|
||||
|
||||
漢字 = Kanji
|
||||
カタカナ = Katakana
|
||||
ひらがな = Hiragana
|
||||
|
||||
_ranges = (
|
||||
Kanji._ranges
|
||||
+ Hiragana._ranges
|
||||
+ Katakana._ranges
|
||||
)
|
||||
|
||||
class Hangul(unicode_set):
|
||||
"""Unicode set for Hangul (Korean) Unicode Character Range"""
|
||||
_ranges: UnicodeRangeList = [
|
||||
(0x1100, 0x11FF),
|
||||
(0x302E, 0x302F),
|
||||
(0x3131, 0x318E),
|
||||
(0x3200, 0x321C),
|
||||
(0x3260, 0x327B),
|
||||
(0x327E,),
|
||||
(0xA960, 0xA97C),
|
||||
(0xAC00, 0xD7A3),
|
||||
(0xD7B0, 0xD7C6),
|
||||
(0xD7CB, 0xD7FB),
|
||||
(0xFFA0, 0xFFBE),
|
||||
(0xFFC2, 0xFFC7),
|
||||
(0xFFCA, 0xFFCF),
|
||||
(0xFFD2, 0xFFD7),
|
||||
(0xFFDA, 0xFFDC),
|
||||
]
|
||||
|
||||
Korean = Hangul
|
||||
|
||||
class CJK(Chinese, Japanese, Hangul):
|
||||
"""Unicode set for combined Chinese, Japanese, and Korean (CJK) Unicode Character Range"""
|
||||
|
||||
class Thai(unicode_set):
|
||||
"""Unicode set for Thai Unicode Character Range"""
|
||||
_ranges: UnicodeRangeList = [
|
||||
(0x0E01, 0x0E3A),
|
||||
(0x0E3F, 0x0E5B)
|
||||
]
|
||||
|
||||
class Arabic(unicode_set):
|
||||
"""Unicode set for Arabic Unicode Character Range"""
|
||||
_ranges: UnicodeRangeList = [
|
||||
(0x0600, 0x061B),
|
||||
(0x061E, 0x06FF),
|
||||
(0x0700, 0x077F),
|
||||
]
|
||||
|
||||
class Hebrew(unicode_set):
|
||||
"""Unicode set for Hebrew Unicode Character Range"""
|
||||
_ranges: UnicodeRangeList = [
|
||||
(0x0591, 0x05C7),
|
||||
(0x05D0, 0x05EA),
|
||||
(0x05EF, 0x05F4),
|
||||
(0xFB1D, 0xFB36),
|
||||
(0xFB38, 0xFB3C),
|
||||
(0xFB3E,),
|
||||
(0xFB40, 0xFB41),
|
||||
(0xFB43, 0xFB44),
|
||||
(0xFB46, 0xFB4F),
|
||||
]
|
||||
|
||||
class Devanagari(unicode_set):
|
||||
"""Unicode set for Devanagari Unicode Character Range"""
|
||||
_ranges: UnicodeRangeList = [
|
||||
(0x0900, 0x097F),
|
||||
(0xA8E0, 0xA8FF)
|
||||
]
|
||||
|
||||
BMP = BasicMultilingualPlane
|
||||
|
||||
# add language identifiers using language Unicode
|
||||
العربية = Arabic
|
||||
中文 = Chinese
|
||||
кириллица = Cyrillic
|
||||
Ελληνικά = Greek
|
||||
עִברִית = Hebrew
|
||||
日本語 = Japanese
|
||||
한국어 = Korean
|
||||
ไทย = Thai
|
||||
देवनागरी = Devanagari
|
||||
|
||||
# fmt: on
|
||||
514
venv/lib/python3.12/site-packages/pyparsing/util.py
Normal file
514
venv/lib/python3.12/site-packages/pyparsing/util.py
Normal file
@@ -0,0 +1,514 @@
|
||||
# util.py
|
||||
import contextlib
|
||||
import re
|
||||
from functools import lru_cache, wraps
|
||||
import inspect
|
||||
import itertools
|
||||
import types
|
||||
from typing import Callable, Union, Iterable, TypeVar, cast, Any
|
||||
import warnings
|
||||
|
||||
from .warnings import PyparsingDeprecationWarning, PyparsingDiagnosticWarning
|
||||
|
||||
_bslash = chr(92)
|
||||
C = TypeVar("C", bound=Callable)
|
||||
|
||||
|
||||
class __config_flags:
|
||||
"""Internal class for defining compatibility and debugging flags"""
|
||||
|
||||
_all_names: list[str] = []
|
||||
_fixed_names: list[str] = []
|
||||
_type_desc = "configuration"
|
||||
|
||||
@classmethod
|
||||
def _set(cls, dname, value):
|
||||
if dname in cls._fixed_names:
|
||||
warnings.warn(
|
||||
f"{cls.__name__}.{dname} {cls._type_desc} is {str(getattr(cls, dname)).upper()}"
|
||||
f" and cannot be overridden",
|
||||
PyparsingDiagnosticWarning,
|
||||
stacklevel=3,
|
||||
)
|
||||
return
|
||||
if dname in cls._all_names:
|
||||
setattr(cls, dname, value)
|
||||
else:
|
||||
raise ValueError(f"no such {cls._type_desc} {dname!r}")
|
||||
|
||||
enable = classmethod(lambda cls, name: cls._set(name, True))
|
||||
disable = classmethod(lambda cls, name: cls._set(name, False))
|
||||
|
||||
|
||||
@lru_cache(maxsize=128)
|
||||
def col(loc: int, strg: str) -> int:
|
||||
"""
|
||||
Returns current column within a string, counting newlines as line separators.
|
||||
The first column is number 1.
|
||||
|
||||
Note: the default parsing behavior is to expand tabs in the input string
|
||||
before starting the parsing process. See
|
||||
:meth:`ParserElement.parse_string` for more
|
||||
information on parsing strings containing ``<TAB>`` s, and suggested
|
||||
methods to maintain a consistent view of the parsed string, the parse
|
||||
location, and line and column positions within the parsed string.
|
||||
"""
|
||||
s = strg
|
||||
return 1 if 0 < loc < len(s) and s[loc - 1] == "\n" else loc - s.rfind("\n", 0, loc)
|
||||
|
||||
|
||||
@lru_cache(maxsize=128)
|
||||
def lineno(loc: int, strg: str) -> int:
|
||||
"""Returns current line number within a string, counting newlines as line separators.
|
||||
The first line is number 1.
|
||||
|
||||
Note - the default parsing behavior is to expand tabs in the input string
|
||||
before starting the parsing process. See :meth:`ParserElement.parse_string`
|
||||
for more information on parsing strings containing ``<TAB>`` s, and
|
||||
suggested methods to maintain a consistent view of the parsed string, the
|
||||
parse location, and line and column positions within the parsed string.
|
||||
"""
|
||||
return strg.count("\n", 0, loc) + 1
|
||||
|
||||
|
||||
@lru_cache(maxsize=128)
|
||||
def line(loc: int, strg: str) -> str:
|
||||
"""
|
||||
Returns the line of text containing loc within a string, counting newlines as line separators.
|
||||
"""
|
||||
last_cr = strg.rfind("\n", 0, loc)
|
||||
next_cr = strg.find("\n", loc)
|
||||
return strg[last_cr + 1 : next_cr] if next_cr >= 0 else strg[last_cr + 1 :]
|
||||
|
||||
|
||||
class _UnboundedCache:
|
||||
def __init__(self):
|
||||
cache = {}
|
||||
cache_get = cache.get
|
||||
self.not_in_cache = not_in_cache = object()
|
||||
|
||||
def get(_, key):
|
||||
return cache_get(key, not_in_cache)
|
||||
|
||||
def set_(_, key, value):
|
||||
cache[key] = value
|
||||
|
||||
def clear(_):
|
||||
cache.clear()
|
||||
|
||||
self.size = None
|
||||
self.get = types.MethodType(get, self)
|
||||
self.set = types.MethodType(set_, self)
|
||||
self.clear = types.MethodType(clear, self)
|
||||
|
||||
|
||||
class _FifoCache:
|
||||
def __init__(self, size):
|
||||
cache = {}
|
||||
self.size = size
|
||||
self.not_in_cache = not_in_cache = object()
|
||||
cache_get = cache.get
|
||||
cache_pop = cache.pop
|
||||
|
||||
def get(_, key):
|
||||
return cache_get(key, not_in_cache)
|
||||
|
||||
def set_(_, key, value):
|
||||
cache[key] = value
|
||||
while len(cache) > size:
|
||||
# pop oldest element in cache by getting the first key
|
||||
cache_pop(next(iter(cache)))
|
||||
|
||||
def clear(_):
|
||||
cache.clear()
|
||||
|
||||
self.get = types.MethodType(get, self)
|
||||
self.set = types.MethodType(set_, self)
|
||||
self.clear = types.MethodType(clear, self)
|
||||
|
||||
|
||||
class LRUMemo:
|
||||
"""
|
||||
A memoizing mapping that retains `capacity` deleted items
|
||||
|
||||
The memo tracks retained items by their access order; once `capacity` items
|
||||
are retained, the least recently used item is discarded.
|
||||
"""
|
||||
|
||||
def __init__(self, capacity):
|
||||
self._capacity = capacity
|
||||
self._active = {}
|
||||
self._memory = {}
|
||||
|
||||
def __getitem__(self, key):
|
||||
try:
|
||||
return self._active[key]
|
||||
except KeyError:
|
||||
self._memory[key] = self._memory.pop(key)
|
||||
return self._memory[key]
|
||||
|
||||
def __setitem__(self, key, value):
|
||||
self._memory.pop(key, None)
|
||||
self._active[key] = value
|
||||
|
||||
def __delitem__(self, key):
|
||||
try:
|
||||
value = self._active.pop(key)
|
||||
except KeyError:
|
||||
pass
|
||||
else:
|
||||
oldest_keys = list(self._memory)[: -(self._capacity + 1)]
|
||||
for key_to_delete in oldest_keys:
|
||||
self._memory.pop(key_to_delete)
|
||||
self._memory[key] = value
|
||||
|
||||
def clear(self):
|
||||
self._active.clear()
|
||||
self._memory.clear()
|
||||
|
||||
|
||||
class UnboundedMemo(dict):
|
||||
"""
|
||||
A memoizing mapping that retains all deleted items
|
||||
"""
|
||||
|
||||
def __delitem__(self, key):
|
||||
pass
|
||||
|
||||
|
||||
def _escape_regex_range_chars(s: str) -> str:
|
||||
# escape these chars: ^-[]
|
||||
for c in r"\^-[]":
|
||||
s = s.replace(c, _bslash + c)
|
||||
s = s.replace("\n", r"\n")
|
||||
s = s.replace("\t", r"\t")
|
||||
return str(s)
|
||||
|
||||
|
||||
class _GroupConsecutive:
|
||||
"""
|
||||
Used as a callable `key` for itertools.groupby to group
|
||||
characters that are consecutive:
|
||||
|
||||
.. testcode::
|
||||
|
||||
from itertools import groupby
|
||||
from pyparsing.util import _GroupConsecutive
|
||||
|
||||
grouped = groupby("abcdejkmpqrs", key=_GroupConsecutive())
|
||||
for index, group in grouped:
|
||||
print(tuple([index, list(group)]))
|
||||
|
||||
prints:
|
||||
|
||||
.. testoutput::
|
||||
|
||||
(0, ['a', 'b', 'c', 'd', 'e'])
|
||||
(1, ['j', 'k'])
|
||||
(2, ['m'])
|
||||
(3, ['p', 'q', 'r', 's'])
|
||||
"""
|
||||
|
||||
def __init__(self) -> None:
|
||||
self.prev = 0
|
||||
self.counter = itertools.count()
|
||||
self.value = -1
|
||||
|
||||
def __call__(self, char: str) -> int:
|
||||
c_int = ord(char)
|
||||
self.prev, prev = c_int, self.prev
|
||||
if c_int - prev > 1:
|
||||
self.value = next(self.counter)
|
||||
return self.value
|
||||
|
||||
|
||||
def _is_iterable(obj, _str_type=(str, bytes), _iter_exception=Exception):
|
||||
# str's are iterable, but in pyparsing, we don't want to iterate over them
|
||||
if isinstance(obj, _str_type):
|
||||
return False
|
||||
|
||||
try:
|
||||
iter(obj)
|
||||
except _iter_exception: # noqa
|
||||
return False
|
||||
else:
|
||||
return True
|
||||
|
||||
|
||||
def _escape_re_range_char(c: str) -> str:
|
||||
return fr"\{c}" if c in r"\^-][" else c
|
||||
|
||||
|
||||
def _collapse_string_to_ranges(
|
||||
s: Union[str, Iterable[str]], re_escape: bool = True
|
||||
) -> str:
|
||||
r"""
|
||||
Take a string or list of single-character strings, and return
|
||||
a string of the consecutive characters in that string collapsed
|
||||
into groups, as might be used in a regular expression '[a-z]'
|
||||
character set::
|
||||
|
||||
'a' -> 'a' -> '[a]'
|
||||
'bc' -> 'bc' -> '[bc]'
|
||||
'defgh' -> 'd-h' -> '[d-h]'
|
||||
'fdgeh' -> 'd-h' -> '[d-h]'
|
||||
'jklnpqrtu' -> 'j-lnp-rtu' -> '[j-lnp-rtu]'
|
||||
|
||||
Duplicates get collapsed out::
|
||||
|
||||
'aaa' -> 'a' -> '[a]'
|
||||
'bcbccb' -> 'bc' -> '[bc]'
|
||||
'defghhgf' -> 'd-h' -> '[d-h]'
|
||||
'jklnpqrjjjtu' -> 'j-lnp-rtu' -> '[j-lnp-rtu]'
|
||||
|
||||
Spaces are preserved::
|
||||
|
||||
'ab c' -> ' a-c' -> '[ a-c]'
|
||||
|
||||
Characters that are significant when defining regex ranges
|
||||
get escaped::
|
||||
|
||||
'acde[]-' -> r'\-\[\]ac-e' -> r'[\-\[\]ac-e]'
|
||||
"""
|
||||
|
||||
# Developer notes:
|
||||
# - Do not optimize this code assuming that the given input string
|
||||
# or internal lists will be short (such as in loading generators into
|
||||
# lists to make it easier to find the last element); this method is also
|
||||
# used to generate regex ranges for character sets in the pyparsing.unicode
|
||||
# classes, and these can be _very_ long lists of strings
|
||||
|
||||
escape_re_range_char: Callable[[str], str]
|
||||
if re_escape:
|
||||
escape_re_range_char = _escape_re_range_char
|
||||
else:
|
||||
escape_re_range_char = lambda ss: ss
|
||||
|
||||
ret = []
|
||||
|
||||
# reduce input string to remove duplicates, and put in sorted order
|
||||
s_chars: list[str] = sorted(set(s))
|
||||
|
||||
if len(s_chars) > 2:
|
||||
# find groups of characters that are consecutive (can be collapsed
|
||||
# down to "<first>-<last>")
|
||||
for _, chars in itertools.groupby(s_chars, key=_GroupConsecutive()):
|
||||
# _ is unimportant, is just used to identify groups
|
||||
# chars is an iterator of one or more consecutive characters
|
||||
# that comprise the current group
|
||||
first = last = next(chars)
|
||||
with contextlib.suppress(ValueError):
|
||||
*_, last = chars
|
||||
|
||||
if first == last:
|
||||
# there was only a single char in this group
|
||||
ret.append(escape_re_range_char(first))
|
||||
|
||||
elif last == chr(ord(first) + 1):
|
||||
# there were only 2 characters in this group
|
||||
# 'a','b' -> 'ab'
|
||||
ret.append(f"{escape_re_range_char(first)}{escape_re_range_char(last)}")
|
||||
|
||||
else:
|
||||
# there were > 2 characters in this group, make into a range
|
||||
# 'c','d','e' -> 'c-e'
|
||||
ret.append(
|
||||
f"{escape_re_range_char(first)}-{escape_re_range_char(last)}"
|
||||
)
|
||||
else:
|
||||
# only 1 or 2 chars were given to form into groups
|
||||
# 'a' -> ['a']
|
||||
# 'bc' -> ['b', 'c']
|
||||
# 'dg' -> ['d', 'g']
|
||||
# no need to list them with "-", just return as a list
|
||||
# (after escaping)
|
||||
ret = [escape_re_range_char(c) for c in s_chars]
|
||||
|
||||
return "".join(ret)
|
||||
|
||||
|
||||
def _flatten(ll: Iterable) -> list:
|
||||
ret = []
|
||||
for i in ll:
|
||||
# Developer notes:
|
||||
# - do not collapse this section of code, isinstance checks are done
|
||||
# in optimal order
|
||||
if isinstance(i, str):
|
||||
ret.append(i)
|
||||
elif isinstance(i, Iterable):
|
||||
ret.extend(_flatten(i))
|
||||
else:
|
||||
ret.append(i)
|
||||
return ret
|
||||
|
||||
|
||||
def _convert_escaped_numerics_to_char(s: str) -> str:
|
||||
if s == "0":
|
||||
return "\0"
|
||||
if s.isdigit() and len(s) == 3:
|
||||
return chr(int(s, 8))
|
||||
elif s.startswith(("u", "x")):
|
||||
return chr(int(s[1:], 16))
|
||||
return s
|
||||
|
||||
|
||||
def make_compressed_re(
|
||||
word_list: Iterable[str],
|
||||
max_level: int = 2,
|
||||
*,
|
||||
non_capturing_groups: bool = True,
|
||||
_level: int = 1,
|
||||
) -> str:
|
||||
"""
|
||||
Create a regular expression string from a list of words, collapsing by common
|
||||
prefixes and optional suffixes.
|
||||
|
||||
Calls itself recursively to build nested sublists for each group of suffixes
|
||||
that have a shared prefix.
|
||||
"""
|
||||
|
||||
def get_suffixes_from_common_prefixes(namelist: list[str]):
|
||||
if len(namelist) > 1:
|
||||
for prefix, suffixes in itertools.groupby(namelist, key=lambda s: s[:1]):
|
||||
yield prefix, sorted([s[1:] for s in suffixes], key=len, reverse=True)
|
||||
else:
|
||||
yield namelist[0][0], [namelist[0][1:]]
|
||||
|
||||
if _level == 1:
|
||||
if not word_list:
|
||||
raise ValueError("no words given to make_compressed_re()")
|
||||
|
||||
if "" in word_list:
|
||||
raise ValueError("word list cannot contain empty string")
|
||||
else:
|
||||
# internal recursive call, just return empty string if no words
|
||||
if not word_list:
|
||||
return ""
|
||||
|
||||
# dedupe the word list
|
||||
word_list = list({}.fromkeys(word_list))
|
||||
|
||||
if max_level == 0:
|
||||
if any(len(wd) > 1 for wd in word_list):
|
||||
return "|".join(
|
||||
sorted([re.escape(wd) for wd in word_list], key=len, reverse=True)
|
||||
)
|
||||
else:
|
||||
return f"[{''.join(_escape_regex_range_chars(wd) for wd in word_list)}]"
|
||||
|
||||
ret = []
|
||||
sep = ""
|
||||
ncgroup = "?:" if non_capturing_groups else ""
|
||||
|
||||
for initial, suffixes in get_suffixes_from_common_prefixes(sorted(word_list)):
|
||||
ret.append(sep)
|
||||
sep = "|"
|
||||
|
||||
initial = re.escape(initial)
|
||||
|
||||
trailing = ""
|
||||
if "" in suffixes:
|
||||
trailing = "?"
|
||||
suffixes.remove("")
|
||||
|
||||
if len(suffixes) > 1:
|
||||
if all(len(s) == 1 for s in suffixes):
|
||||
ret.append(
|
||||
f"{initial}[{''.join(_escape_regex_range_chars(s) for s in suffixes)}]{trailing}"
|
||||
)
|
||||
else:
|
||||
if _level < max_level:
|
||||
suffix_re = make_compressed_re(
|
||||
sorted(suffixes),
|
||||
max_level,
|
||||
non_capturing_groups=non_capturing_groups,
|
||||
_level=_level + 1,
|
||||
)
|
||||
ret.append(f"{initial}({ncgroup}{suffix_re}){trailing}")
|
||||
else:
|
||||
if all(len(s) == 1 for s in suffixes):
|
||||
ret.append(
|
||||
f"{initial}[{''.join(_escape_regex_range_chars(s) for s in suffixes)}]{trailing}"
|
||||
)
|
||||
else:
|
||||
suffixes.sort(key=len, reverse=True)
|
||||
ret.append(
|
||||
f"{initial}({ncgroup}{'|'.join(re.escape(s) for s in suffixes)}){trailing}"
|
||||
)
|
||||
else:
|
||||
if suffixes:
|
||||
suffix = re.escape(suffixes[0])
|
||||
if len(suffix) > 1 and trailing:
|
||||
ret.append(f"{initial}({ncgroup}{suffix}){trailing}")
|
||||
else:
|
||||
ret.append(f"{initial}{suffix}{trailing}")
|
||||
else:
|
||||
ret.append(initial)
|
||||
return "".join(ret)
|
||||
|
||||
|
||||
def replaced_by_pep8(compat_name: str, fn: C) -> C:
|
||||
|
||||
# Unwrap staticmethod/classmethod
|
||||
fn = getattr(fn, "__func__", fn)
|
||||
|
||||
# (Presence of 'self' arg in signature is used by explain_exception() methods, so we take
|
||||
# some extra steps to add it if present in decorated function.)
|
||||
if ["self"] == list(inspect.signature(fn).parameters)[:1]:
|
||||
|
||||
@wraps(fn)
|
||||
def _inner(self, *args, **kwargs):
|
||||
warnings.warn(
|
||||
f"{compat_name!r} deprecated - use {fn.__name__!r}",
|
||||
PyparsingDeprecationWarning,
|
||||
stacklevel=2,
|
||||
)
|
||||
return fn(self, *args, **kwargs)
|
||||
|
||||
else:
|
||||
|
||||
@wraps(fn)
|
||||
def _inner(*args, **kwargs):
|
||||
warnings.warn(
|
||||
f"{compat_name!r} deprecated - use {fn.__name__!r}",
|
||||
PyparsingDeprecationWarning,
|
||||
stacklevel=2,
|
||||
)
|
||||
return fn(*args, **kwargs)
|
||||
|
||||
_inner.__doc__ = f"""
|
||||
.. deprecated:: 3.0.0
|
||||
Use :class:`{fn.__name__}` instead
|
||||
"""
|
||||
_inner.__name__ = compat_name
|
||||
_inner.__annotations__ = fn.__annotations__
|
||||
if isinstance(fn, types.FunctionType):
|
||||
_inner.__kwdefaults__ = fn.__kwdefaults__ # type: ignore [attr-defined]
|
||||
elif isinstance(fn, type) and hasattr(fn, "__init__"):
|
||||
_inner.__kwdefaults__ = fn.__init__.__kwdefaults__ # type: ignore [misc,attr-defined]
|
||||
else:
|
||||
_inner.__kwdefaults__ = None # type: ignore [attr-defined]
|
||||
_inner.__qualname__ = fn.__qualname__
|
||||
return cast(C, _inner)
|
||||
|
||||
|
||||
def _to_pep8_name(s: str, _re_sub_pattern=re.compile(r"([a-z])([A-Z])")) -> str:
|
||||
s = _re_sub_pattern.sub(r"\1_\2", s)
|
||||
return s.lower()
|
||||
|
||||
|
||||
def deprecate_argument(
|
||||
kwargs: dict[str, Any], arg_name: str, default_value=None, *, new_name: str = ""
|
||||
) -> Any:
|
||||
|
||||
if arg_name in kwargs:
|
||||
new_name = new_name or _to_pep8_name(arg_name)
|
||||
warnings.warn(
|
||||
f"{arg_name!r} argument is deprecated, use {new_name!r}",
|
||||
category=PyparsingDeprecationWarning,
|
||||
stacklevel=3,
|
||||
)
|
||||
else:
|
||||
kwargs[arg_name] = default_value
|
||||
|
||||
return kwargs[arg_name]
|
||||
10
venv/lib/python3.12/site-packages/pyparsing/warnings.py
Normal file
10
venv/lib/python3.12/site-packages/pyparsing/warnings.py
Normal file
@@ -0,0 +1,10 @@
|
||||
class PyparsingWarning(UserWarning):
|
||||
"""Base warning class for all pyparsing warnings"""
|
||||
|
||||
|
||||
class PyparsingDeprecationWarning(PyparsingWarning, DeprecationWarning):
|
||||
"""Base warning class for all pyparsing deprecation warnings"""
|
||||
|
||||
|
||||
class PyparsingDiagnosticWarning(PyparsingWarning):
|
||||
"""Base warning class for all pyparsing diagnostic warnings"""
|
||||
Reference in New Issue
Block a user