Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
a46a102
postprocess: combine options into `CommentTransferOptions`
kkysen Jan 16, 2026
d68223c
postprocess: make `CommentTransferPrompt` a `@dataclass`
kkysen Jan 16, 2026
f95692c
postprocess: use an `f""` for `def CommentTransferPrompt.__str__`
kkysen Jan 16, 2026
d1ec1aa
tests/integration: delete LLM cache for excluded fns
kkysen Jan 19, 2026
45446ae
postprocess: log which function we're transferring comments to
kkysen Jan 19, 2026
5ff9872
postprocess: add missing space in log for skipping fns due to `--excl…
kkysen Jan 19, 2026
edc407f
postprocess: add test for `def to_multiline_toml`
kkysen Jan 19, 2026
01ee0bd
postprocess: re-arrange things so we run all of the prompts at once
kkysen Jan 19, 2026
ad85495
postprocess: print which prompt number out of the total we're current…
kkysen Jan 19, 2026
45c136d
postprocess: use `logging.info` for printing the highlighted rust
kkysen Jan 19, 2026
df6c547
postprocess: add `--fail-fast`
kkysen Jan 19, 2026
f97c861
postprocess: run uncached prompts at the end with `--no-fail-fast` so…
kkysen Jan 19, 2026
aff0bbe
postprocess: print an exclude file for failures
kkysen Jan 19, 2026
79857eb
postprocess: print a unified diff of the C and Rust comments
kkysen Jan 19, 2026
6fc147a
postprocess: use `rich` to pretty-print the diff
kkysen Jan 19, 2026
fdad2ac
postprocess: add `--gc-cache` to garbage collect any cache entries no…
kkysen Jan 19, 2026
232e986
postprocess: remove `gc_cache` from `CommentTransferOptions` since it…
kkysen Jan 23, 2026
ae20c1e
postprocess: switch `--gc-cache` to use mtimes instead
kkysen Jan 23, 2026
aa0e905
postprocess: move `--exclude-file` checking to after comment transfer…
kkysen Jan 20, 2026
c7fcde3
postprocess: just say "fn" instead of "Rust fn"
kkysen Jan 20, 2026
1868b14
postprocess: warn if fn is excluded, but would've succeeded
kkysen Jan 20, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -55,3 +55,5 @@ polonius_cache/

# Outputs of c2rust-transpile snapshot tests
c2rust-transpile/tests/snapshots/**/*.rs

**/llm-cache/.gc
49 changes: 47 additions & 2 deletions c2rust-postprocess/postprocess/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@
SYSTEM_INSTRUCTION,
AbstractGenerativeModel,
CommentTransfer,
CommentTransferFailure,
CommentTransferOptions,
)
from postprocess.utils import existing_file

Expand Down Expand Up @@ -93,6 +95,23 @@ def build_arg_parser() -> argparse.ArgumentParser:
help="Update the Rust in-place",
)

parser.add_argument(
"--fail-fast",
required=False,
default=True,
action=BooleanOptionalAction,
help="Fail on the first error (vs. collecting errors until the end)",
)

parser.add_argument(
"--gc-cache",
required=False,
default=False,
action=BooleanOptionalAction,
help="Garbage collect cache entries that"
"have been unused since the last --gc-cache",
)

# TODO: add option to select model
# TODO: add option to configure cache
# TODO: add option to select what transforms to apply
Expand Down Expand Up @@ -135,13 +154,39 @@ def main(argv: Sequence[str] | None = None):

# TODO: instantiate transform(s) based on command line args
xform = CommentTransfer(cache, model)
xform.transfer_comments_dir(
root_rust_source_file=args.root_rust_source_file,

options = CommentTransferOptions(
exclude_list=IdentifierExcludeList(src_path=args.exclude_file),
ident_filter=args.ident_filter,
update_rust=args.update_rust,
fail_fast=args.fail_fast,
)

failures: list[CommentTransferFailure] = []
for failure in xform.transfer_comments_dir(
root_rust_source_file=args.root_rust_source_file,
options=options,
):
failures.append(failure)
if options.fail_fast:
break

if args.gc_cache:
cache.gc_sweep()

for failure in failures:
failure.print()
print()

if failures:
print(f"""\
failures exclude file:

```yaml
{CommentTransferFailure.to_exclude_file(failures)}
```""")
return 1

return 0
except KeyboardInterrupt:
logging.warning("Interrupted by user, terminating...")
Expand Down
98 changes: 94 additions & 4 deletions c2rust-postprocess/postprocess/cache.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import json
import logging
from abc import ABC, abstractmethod
from errno import ENOTEMPTY
from hashlib import sha256
from pathlib import Path
from tempfile import gettempdir
Expand Down Expand Up @@ -75,6 +76,13 @@ def flush(self) -> None: # noqa: B027
"""
pass

def gc_sweep(self) -> None:
"""
Garbage collect everything in the cache that
hasn't been used by `lookup` or `update` since the last `gc_sweep`.
"""
raise NotImplementedError


TomlValue = Union[None, str, int, float, bool, "TomlList", "TomlDict"]
TomlList = list[TomlValue]
Expand Down Expand Up @@ -161,6 +169,22 @@ def cache_dir(
message_digest = self.get_message_digest(messages)
return self._path / transform / identifier / message_digest

def gc_mark_file(self) -> Path:
"""
`.gc`, containing paths to not be swept/deleted.
"""

return self._path / ".gc"

def gc_mark(self, paths: list[Path]):
"""
Mark paths to not be swept/deleted by updating their mtime.
"""

for path in paths:
path.touch()
self.gc_mark_file().touch()

def lookup(
self,
*,
Expand All @@ -172,9 +196,10 @@ def lookup(
cache_dir = self.cache_dir(
transform=transform, identifier=identifier, messages=messages
)
cache_file = cache_dir / "metadata.toml"
metadata_path = cache_dir / "metadata.toml"
response_path = cache_dir / "response.txt"
try:
toml = cache_file.read_text()
toml = metadata_path.read_text()
except FileNotFoundError:
data = {
"transform": transform,
Expand All @@ -183,9 +208,11 @@ def lookup(
"messages": messages,
}
toml = to_multiline_toml(data)
logging.debug(f"Cache miss: {cache_file}:\n{toml}")
logging.debug(f"Cache miss: {metadata_path}:\n{toml}")
return None
logging.debug(f"Cache hit: {cache_file}:\n{toml}")

logging.debug(f"Cache hit: {metadata_path}:\n{toml}")
self.gc_mark([metadata_path, response_path])
data = tomli.loads(toml)

return data["response"]
Expand Down Expand Up @@ -216,11 +243,74 @@ def update(
response_path = cache_dir / "response.txt"
metadata_path.write_text(toml)
response_path.write_text(response)

logging.debug(f"Cache updated: {cache_dir}:\n{toml}")
# The `.write_text`s above updated the mtimes already,
# so no need to call `self.gc_mark`.
self.gc_mark([])

def clear(self) -> None:
self._path.unlink(missing_ok=True)

def gc_sweep(self) -> None:
"""
Sweep/delete everything in the cache with an mtime older than `.gc`'s ctime.
"""

gc_mark_file = self.gc_mark_file()
try:
oldest_allowed = gc_mark_file.stat().st_ctime_ns
except FileNotFoundError:
return # No `.gc` file; nothing to sweep.

def walk(dir: Path) -> bool:
"""
Walk `dir`, removing any files older than `oldest_allowed`.
If `dir` wasn't empty before and is now empty, remove it, too.
Return if `dir` was removed or not.
"""

# First remove all files in the dir and recurse into subdirs.
removed_any = False
for path in dir.iterdir():
if path.is_dir():
if walk(dir):
removed_any = True
else:
if path.stat().st_mtime_ns < oldest_allowed:
try:
path.unlink()
removed_any = True
except OSError as e:
logging.warning(f"gc_sweep: failed to unlink {path}: {e}")

# If we haven't removed anything in the dir,
# then there's no reason to remove it, even if it's empty.
if not removed_any:
return False

# If we have removed something, try to delete the dir.
# This only succeeds if the dir is empty, which is what we want.
try:
dir.rmdir()
return True
except OSError as e:
if e.errno == ENOTEMPTY:
pass
else:
logging.warning(f"gc_sweep: failed to rmdir {dir}: {e}")
return False

walk(self._path)

# Shouldn't have been unlinked above,
# as its mtime can't be older than its ctime.
# Similarly, because this wasn't deleted,
# the cache dir shouldn't have been deleted either.
# Delete this at the end, so that if the sweep is interrupted,
# we still have the ctime to try again.
gc_mark_file.unlink()


class FrozenCache(AbstractCache):
"""
Expand Down
Loading