Skip to content

Commit 41c7eb2

Browse files
committed
Add srcheaders module and refactor tokenization
Introduce a new module for handling Python source headers and refactor the tokenization process to use an iterator for improved efficiency. Remove the obsolete pyheader module.
1 parent 46320ea commit 41c7eb2

File tree

6 files changed

+121
-82
lines changed

6 files changed

+121
-82
lines changed

omdev/.manifests.json

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -203,6 +203,18 @@
203203
}
204204
}
205205
},
206+
{
207+
"module": ".py.srcheaders",
208+
"attr": "_CLI_MODULE",
209+
"file": "omdev/py/srcheaders.py",
210+
"line": 64,
211+
"value": {
212+
"$.cli.types.CliModule": {
213+
"cmd_name": "py/srcheaders",
214+
"mod_name": "omdev.py.srcheaders"
215+
}
216+
}
217+
},
206218
{
207219
"module": ".py.tools.importscan",
208220
"attr": "_CLI_MODULE",

omdev/py/attrdocs.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -172,6 +172,8 @@ def _main() -> None:
172172
with open(args.file) as f:
173173
src = f.read()
174174

175+
#
176+
175177
attr_docs = extract_attr_docs(src)
176178

177179
#

omdev/py/srcheaders.py

Lines changed: 97 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,97 @@
1+
import ast
2+
import dataclasses as dc
3+
import typing as ta
4+
5+
from omdev.tokens import all as tks
6+
from omlish.lite.check import check
7+
8+
9+
##
10+
11+
12+
@dc.dataclass(frozen=True)
13+
class PyHeaderLine:
14+
kind: ta.Literal['comment', 'string']
15+
src: str
16+
content: str
17+
line: int
18+
col: int
19+
20+
21+
def get_py_header_lines(src: str) -> list[PyHeaderLine]:
22+
ret: list[PyHeaderLine] = []
23+
24+
src_toks = tks.iter_src_to_tokens(src)
25+
hdr_toks = (
26+
tok
27+
for tok in src_toks
28+
if tok.name == 'COMMENT' or
29+
(tok.name not in tks.WS_NAMES and tok.name != 'NL')
30+
)
31+
32+
for tok in hdr_toks:
33+
if tok.name == 'COMMENT':
34+
cs = tok.src.lstrip()
35+
check.state(cs.startswith('#'))
36+
cs = cs[1:].lstrip()
37+
ret.append(PyHeaderLine(
38+
kind='comment',
39+
src=tok.src,
40+
content=cs,
41+
line=check.isinstance(tok.line, int),
42+
col=check.isinstance(tok.utf8_byte_offset, int),
43+
))
44+
45+
elif tok.name == 'STRING':
46+
ss = ast.literal_eval(tok.src)
47+
ret.append(PyHeaderLine(
48+
kind='string',
49+
src=tok.src,
50+
content=ss,
51+
line=check.isinstance(tok.line, int),
52+
col=check.isinstance(tok.utf8_byte_offset, int),
53+
))
54+
55+
else:
56+
break
57+
58+
return ret
59+
60+
61+
##
62+
63+
64+
# @omlish-manifest
65+
_CLI_MODULE = {'$omdev.cli.types.CliModule': {
66+
'cmd_name': 'py/srcheaders',
67+
'mod_name': __name__,
68+
}}
69+
70+
71+
if __name__ == '__main__':
72+
def _main() -> None:
73+
import argparse
74+
75+
parser = argparse.ArgumentParser()
76+
parser.add_argument('file')
77+
args = parser.parse_args()
78+
79+
#
80+
81+
with open(args.file) as f:
82+
src = f.read()
83+
84+
#
85+
86+
hls = get_py_header_lines(src)
87+
88+
#
89+
90+
import json
91+
92+
print(json.dumps(
93+
[dc.asdict(hl) for hl in hls],
94+
indent=2,
95+
))
96+
97+
_main()

omdev/tokens/all.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
NON_CODING_TOKENS = TokenNames.NON_CODING_TOKENS # noqa
3131

3232
curly_escape = Tokenization.curly_escape # noqa
33+
iter_src_to_tokens = Tokenization.iter_src_to_tokens # noqa
3334
src_to_tokens = Tokenization.src_to_tokens # noqa
3435
parse_string_literal = Tokenization.parse_string_literal # noqa
3536
tokens_to_src = Tokenization.tokens_to_src # noqa

omdev/tokens/tokenizert.py

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -82,13 +82,12 @@ def _re_partition(cls, regex: ta.Pattern[str], s: str) -> ta.Tuple[str, str, str
8282
return (s, '', '')
8383

8484
@classmethod
85-
def src_to_tokens(cls, src: str) -> ta.List[Token]:
85+
def iter_src_to_tokens(cls, src: str) -> ta.Iterator[Token]:
8686
tokenize_target = io.StringIO(src)
8787
lines = ('', *tokenize_target)
8888

8989
tokenize_target.seek(0)
9090

91-
tokens = []
9291
last_line = 1
9392
last_col = 0
9493
end_offset = 0
@@ -106,20 +105,20 @@ def src_to_tokens(cls, src: str) -> ta.List[Token]:
106105
while cls._ESCAPED_NL_RE.search(newtok):
107106
ws, nl, newtok = cls._re_partition(cls._ESCAPED_NL_RE, newtok)
108107
if ws:
109-
tokens.append(Token(TokenNames.UNIMPORTANT_WS, ws, last_line, end_offset))
108+
yield Token(TokenNames.UNIMPORTANT_WS, ws, last_line, end_offset)
110109
end_offset += len(ws.encode())
111-
tokens.append(Token(TokenNames.ESCAPED_NL, nl, last_line, end_offset))
110+
yield Token(TokenNames.ESCAPED_NL, nl, last_line, end_offset)
112111
end_offset = 0
113112
last_line += 1
114113
if newtok:
115-
tokens.append(Token(TokenNames.UNIMPORTANT_WS, newtok, sline, 0))
114+
yield Token(TokenNames.UNIMPORTANT_WS, newtok, sline, 0)
116115
end_offset = len(newtok.encode())
117116
else:
118117
end_offset = 0
119118

120119
elif scol > last_col:
121120
newtok = line[last_col:scol]
122-
tokens.append(Token(TokenNames.UNIMPORTANT_WS, newtok, sline, end_offset))
121+
yield Token(TokenNames.UNIMPORTANT_WS, newtok, sline, end_offset)
123122
end_offset += len(newtok.encode())
124123

125124
tok_name = tokenize.tok_name[tok_type]
@@ -130,14 +129,16 @@ def src_to_tokens(cls, src: str) -> ta.List[Token]:
130129
ecol += len(new_tok_text) - len(tok_text)
131130
tok_text = new_tok_text
132131

133-
tokens.append(Token(tok_name, tok_text, sline, end_offset))
132+
yield Token(tok_name, tok_text, sline, end_offset)
134133
last_line, last_col = eline, ecol
135134
if sline != eline:
136135
end_offset = len(lines[last_line][:last_col].encode())
137136
else:
138137
end_offset += len(tok_text.encode())
139138

140-
return tokens
139+
@classmethod
140+
def src_to_tokens(cls, src: str) -> ta.List[Token]:
141+
return list(cls.iter_src_to_tokens(src))
141142

142143
@classmethod
143144
def parse_string_literal(cls, src: str) -> ta.Tuple[str, str]:

x/pyheader.py

Lines changed: 0 additions & 74 deletions
This file was deleted.

0 commit comments

Comments
 (0)