|
| 1 | +# module pyparsing.py |
| 2 | +# |
| 3 | +# Copyright (c) 2003-2022 Paul T. McGuire |
| 4 | +# |
| 5 | +# Permission is hereby granted, free of charge, to any person obtaining |
| 6 | +# a copy of this software and associated documentation files (the |
| 7 | +# "Software"), to deal in the Software without restriction, including |
| 8 | +# without limitation the rights to use, copy, modify, merge, publish, |
| 9 | +# distribute, sublicense, and/or sell copies of the Software, and to |
| 10 | +# permit persons to whom the Software is furnished to do so, subject to |
| 11 | +# the following conditions: |
| 12 | +# |
| 13 | +# The above copyright notice and this permission notice shall be |
| 14 | +# included in all copies or substantial portions of the Software. |
| 15 | +# |
| 16 | +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, |
| 17 | +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
| 18 | +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. |
| 19 | +# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY |
| 20 | +# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, |
| 21 | +# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE |
| 22 | +# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
| 23 | +# |
| 24 | + |
| 25 | +__doc__ = """ |
| 26 | +pyparsing module - Classes and methods to define and execute parsing grammars |
| 27 | +============================================================================= |
| 28 | +
|
| 29 | +The pyparsing module is an alternative approach to creating and |
| 30 | +executing simple grammars, vs. the traditional lex/yacc approach, or the |
| 31 | +use of regular expressions. With pyparsing, you don't need to learn |
| 32 | +a new syntax for defining grammars or matching expressions - the parsing |
| 33 | +module provides a library of classes that you use to construct the |
| 34 | +grammar directly in Python. |
| 35 | +
|
| 36 | +Here is a program to parse "Hello, World!" (or any greeting of the form |
| 37 | +``"<salutation>, <addressee>!"``), built up using :class:`Word`, |
| 38 | +:class:`Literal`, and :class:`And` elements |
| 39 | +(the :meth:`'+'<ParserElement.__add__>` operators create :class:`And` expressions, |
| 40 | +and the strings are auto-converted to :class:`Literal` expressions):: |
| 41 | +
|
| 42 | + from pyparsing import Word, alphas |
| 43 | +
|
| 44 | + # define grammar of a greeting |
| 45 | + greet = Word(alphas) + "," + Word(alphas) + "!" |
| 46 | +
|
| 47 | + hello = "Hello, World!" |
| 48 | + print(hello, "->", greet.parse_string(hello)) |
| 49 | +
|
| 50 | +The program outputs the following:: |
| 51 | +
|
| 52 | + Hello, World! -> ['Hello', ',', 'World', '!'] |
| 53 | +
|
| 54 | +The Python representation of the grammar is quite readable, owing to the |
| 55 | +self-explanatory class names, and the use of :class:`'+'<And>`, |
| 56 | +:class:`'|'<MatchFirst>`, :class:`'^'<Or>` and :class:`'&'<Each>` operators. |
| 57 | +
|
| 58 | +The :class:`ParseResults` object returned from |
| 59 | +:class:`ParserElement.parse_string` can be |
| 60 | +accessed as a nested list, a dictionary, or an object with named |
| 61 | +attributes. |
| 62 | +
|
| 63 | +The pyparsing module handles some of the problems that are typically |
| 64 | +vexing when writing text parsers: |
| 65 | +
|
| 66 | + - extra or missing whitespace (the above program will also handle |
| 67 | + "Hello,World!", "Hello , World !", etc.) |
| 68 | + - quoted strings |
| 69 | + - embedded comments |
| 70 | +
|
| 71 | +
|
| 72 | +Getting Started - |
| 73 | +----------------- |
| 74 | +Visit the classes :class:`ParserElement` and :class:`ParseResults` to |
| 75 | +see the base classes that most other pyparsing |
| 76 | +classes inherit from. Use the docstrings for examples of how to: |
| 77 | +
|
| 78 | + - construct literal match expressions from :class:`Literal` and |
| 79 | + :class:`CaselessLiteral` classes |
| 80 | + - construct character word-group expressions using the :class:`Word` |
| 81 | + class |
| 82 | + - see how to create repetitive expressions using :class:`ZeroOrMore` |
| 83 | + and :class:`OneOrMore` classes |
| 84 | + - use :class:`'+'<And>`, :class:`'|'<MatchFirst>`, :class:`'^'<Or>`, |
| 85 | + and :class:`'&'<Each>` operators to combine simple expressions into |
| 86 | + more complex ones |
| 87 | + - associate names with your parsed results using |
| 88 | + :class:`ParserElement.set_results_name` |
| 89 | + - access the parsed data, which is returned as a :class:`ParseResults` |
| 90 | + object |
| 91 | + - find some helpful expression short-cuts like :class:`DelimitedList` |
| 92 | + and :class:`one_of` |
| 93 | + - find more useful common expressions in the :class:`pyparsing_common` |
| 94 | + namespace class |
| 95 | +""" |
| 96 | +from typing import NamedTuple |
| 97 | + |
| 98 | + |
| 99 | +class version_info(NamedTuple): |
| 100 | + major: int |
| 101 | + minor: int |
| 102 | + micro: int |
| 103 | + releaselevel: str |
| 104 | + serial: int |
| 105 | + |
| 106 | + @property |
| 107 | + def __version__(self): |
| 108 | + return ( |
| 109 | + f"{self.major}.{self.minor}.{self.micro}" |
| 110 | + + ( |
| 111 | + f"{'r' if self.releaselevel[0] == 'c' else ''}{self.releaselevel[0]}{self.serial}", |
| 112 | + "", |
| 113 | + )[self.releaselevel == "final"] |
| 114 | + ) |
| 115 | + |
| 116 | + def __str__(self): |
| 117 | + return f"{__name__} {self.__version__} / {__version_time__}" |
| 118 | + |
| 119 | + def __repr__(self): |
| 120 | + return f"{__name__}.{type(self).__name__}({', '.join('{}={!r}'.format(*nv) for nv in zip(self._fields, self))})" |
| 121 | + |
| 122 | + |
| 123 | +__version_info__ = version_info(3, 1, 1, "final", 1) |
| 124 | +__version_time__ = "29 Jul 2023 22:27 UTC" |
| 125 | +__version__ = __version_info__.__version__ |
| 126 | +__versionTime__ = __version_time__ |
| 127 | +__author__ = "Paul McGuire <[email protected]>" |
| 128 | + |
| 129 | +from .util import * |
| 130 | +from .exceptions import * |
| 131 | +from .actions import * |
| 132 | +from .core import __diag__, __compat__ |
| 133 | +from .results import * |
| 134 | +from .core import * # type: ignore[misc, assignment] |
| 135 | +from .core import _builtin_exprs as core_builtin_exprs |
| 136 | +from .helpers import * # type: ignore[misc, assignment] |
| 137 | +from .helpers import _builtin_exprs as helper_builtin_exprs |
| 138 | + |
| 139 | +from .unicode import unicode_set, UnicodeRangeList, pyparsing_unicode as unicode |
| 140 | +from .testing import pyparsing_test as testing |
| 141 | +from .common import ( |
| 142 | + pyparsing_common as common, |
| 143 | + _builtin_exprs as common_builtin_exprs, |
| 144 | +) |
| 145 | + |
| 146 | +# define backward compat synonyms |
| 147 | +if "pyparsing_unicode" not in globals(): |
| 148 | + pyparsing_unicode = unicode # type: ignore[misc] |
| 149 | +if "pyparsing_common" not in globals(): |
| 150 | + pyparsing_common = common # type: ignore[misc] |
| 151 | +if "pyparsing_test" not in globals(): |
| 152 | + pyparsing_test = testing # type: ignore[misc] |
| 153 | + |
| 154 | +core_builtin_exprs += common_builtin_exprs + helper_builtin_exprs |
| 155 | + |
| 156 | + |
| 157 | +__all__ = [ |
| 158 | + "__version__", |
| 159 | + "__version_time__", |
| 160 | + "__author__", |
| 161 | + "__compat__", |
| 162 | + "__diag__", |
| 163 | + "And", |
| 164 | + "AtLineStart", |
| 165 | + "AtStringStart", |
| 166 | + "CaselessKeyword", |
| 167 | + "CaselessLiteral", |
| 168 | + "CharsNotIn", |
| 169 | + "CloseMatch", |
| 170 | + "Combine", |
| 171 | + "DelimitedList", |
| 172 | + "Dict", |
| 173 | + "Each", |
| 174 | + "Empty", |
| 175 | + "FollowedBy", |
| 176 | + "Forward", |
| 177 | + "GoToColumn", |
| 178 | + "Group", |
| 179 | + "IndentedBlock", |
| 180 | + "Keyword", |
| 181 | + "LineEnd", |
| 182 | + "LineStart", |
| 183 | + "Literal", |
| 184 | + "Located", |
| 185 | + "PrecededBy", |
| 186 | + "MatchFirst", |
| 187 | + "NoMatch", |
| 188 | + "NotAny", |
| 189 | + "OneOrMore", |
| 190 | + "OnlyOnce", |
| 191 | + "OpAssoc", |
| 192 | + "Opt", |
| 193 | + "Optional", |
| 194 | + "Or", |
| 195 | + "ParseBaseException", |
| 196 | + "ParseElementEnhance", |
| 197 | + "ParseException", |
| 198 | + "ParseExpression", |
| 199 | + "ParseFatalException", |
| 200 | + "ParseResults", |
| 201 | + "ParseSyntaxException", |
| 202 | + "ParserElement", |
| 203 | + "PositionToken", |
| 204 | + "QuotedString", |
| 205 | + "RecursiveGrammarException", |
| 206 | + "Regex", |
| 207 | + "SkipTo", |
| 208 | + "StringEnd", |
| 209 | + "StringStart", |
| 210 | + "Suppress", |
| 211 | + "Token", |
| 212 | + "TokenConverter", |
| 213 | + "White", |
| 214 | + "Word", |
| 215 | + "WordEnd", |
| 216 | + "WordStart", |
| 217 | + "ZeroOrMore", |
| 218 | + "Char", |
| 219 | + "alphanums", |
| 220 | + "alphas", |
| 221 | + "alphas8bit", |
| 222 | + "any_close_tag", |
| 223 | + "any_open_tag", |
| 224 | + "autoname_elements", |
| 225 | + "c_style_comment", |
| 226 | + "col", |
| 227 | + "common_html_entity", |
| 228 | + "condition_as_parse_action", |
| 229 | + "counted_array", |
| 230 | + "cpp_style_comment", |
| 231 | + "dbl_quoted_string", |
| 232 | + "dbl_slash_comment", |
| 233 | + "delimited_list", |
| 234 | + "dict_of", |
| 235 | + "empty", |
| 236 | + "hexnums", |
| 237 | + "html_comment", |
| 238 | + "identchars", |
| 239 | + "identbodychars", |
| 240 | + "infix_notation", |
| 241 | + "java_style_comment", |
| 242 | + "line", |
| 243 | + "line_end", |
| 244 | + "line_start", |
| 245 | + "lineno", |
| 246 | + "make_html_tags", |
| 247 | + "make_xml_tags", |
| 248 | + "match_only_at_col", |
| 249 | + "match_previous_expr", |
| 250 | + "match_previous_literal", |
| 251 | + "nested_expr", |
| 252 | + "null_debug_action", |
| 253 | + "nums", |
| 254 | + "one_of", |
| 255 | + "original_text_for", |
| 256 | + "printables", |
| 257 | + "punc8bit", |
| 258 | + "pyparsing_common", |
| 259 | + "pyparsing_test", |
| 260 | + "pyparsing_unicode", |
| 261 | + "python_style_comment", |
| 262 | + "quoted_string", |
| 263 | + "remove_quotes", |
| 264 | + "replace_with", |
| 265 | + "replace_html_entity", |
| 266 | + "rest_of_line", |
| 267 | + "sgl_quoted_string", |
| 268 | + "srange", |
| 269 | + "string_end", |
| 270 | + "string_start", |
| 271 | + "token_map", |
| 272 | + "trace_parse_action", |
| 273 | + "ungroup", |
| 274 | + "unicode_set", |
| 275 | + "unicode_string", |
| 276 | + "with_attribute", |
| 277 | + "with_class", |
| 278 | + # pre-PEP8 compatibility names |
| 279 | + "__versionTime__", |
| 280 | + "anyCloseTag", |
| 281 | + "anyOpenTag", |
| 282 | + "cStyleComment", |
| 283 | + "commonHTMLEntity", |
| 284 | + "conditionAsParseAction", |
| 285 | + "countedArray", |
| 286 | + "cppStyleComment", |
| 287 | + "dblQuotedString", |
| 288 | + "dblSlashComment", |
| 289 | + "delimitedList", |
| 290 | + "dictOf", |
| 291 | + "htmlComment", |
| 292 | + "indentedBlock", |
| 293 | + "infixNotation", |
| 294 | + "javaStyleComment", |
| 295 | + "lineEnd", |
| 296 | + "lineStart", |
| 297 | + "locatedExpr", |
| 298 | + "makeHTMLTags", |
| 299 | + "makeXMLTags", |
| 300 | + "matchOnlyAtCol", |
| 301 | + "matchPreviousExpr", |
| 302 | + "matchPreviousLiteral", |
| 303 | + "nestedExpr", |
| 304 | + "nullDebugAction", |
| 305 | + "oneOf", |
| 306 | + "opAssoc", |
| 307 | + "originalTextFor", |
| 308 | + "pythonStyleComment", |
| 309 | + "quotedString", |
| 310 | + "removeQuotes", |
| 311 | + "replaceHTMLEntity", |
| 312 | + "replaceWith", |
| 313 | + "restOfLine", |
| 314 | + "sglQuotedString", |
| 315 | + "stringEnd", |
| 316 | + "stringStart", |
| 317 | + "tokenMap", |
| 318 | + "traceParseAction", |
| 319 | + "unicodeString", |
| 320 | + "withAttribute", |
| 321 | + "withClass", |
| 322 | + "common", |
| 323 | + "unicode", |
| 324 | + "testing", |
| 325 | +] |
0 commit comments