diff --git a/.gitignore b/.gitignore index 5f104d12..89b95e2c 100644 --- a/.gitignore +++ b/.gitignore @@ -20,7 +20,6 @@ **/target /.idea -.vscode # python .env diff --git a/.vscode/launch.json b/.vscode/launch.json new file mode 100644 index 00000000..82a3391d --- /dev/null +++ b/.vscode/launch.json @@ -0,0 +1,33 @@ +{ + "configurations": [ + { + "name": "Debug Rust/Python", + "type": "debugpy", + "request": "launch", + "program": "${workspaceFolder}/python/tools/attach_debugger.py", + "args": [ + "${file}" + ], + "console": "internalConsole", + "serverReadyAction": { + "pattern": "pID = ([0-9]+)", + "action": "startDebugging", + "name": "Rust LLDB" + } + }, + { + "name": "Rust LLDB", + "pid": "0", + "type": "lldb", + "request": "attach", + "program": "${command:python.interpreterPath}", + "stopOnEntry": false, + "sourceLanguages": [ + "rust" + ], + "presentation": { + "hidden": true + } + } + ] +} \ No newline at end of file diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 00000000..77710a36 --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,12 @@ +{ + "rust-analyzer.check.extraEnv": { + "CARGO_TARGET_DIR": "target/analyzer" + }, + "rust-analyzer.check.features": "all", + "rust-analyzer.cargo.features": "all", + "python.testing.pytestArgs": [ + "tests" + ], + "python.testing.unittestEnabled": false, + "python.testing.pytestEnabled": true +} \ No newline at end of file diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 5dae4f27..b4a972c8 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -24,7 +24,7 @@ platform. This guide will walk you through the process of making your first cont ## File an issue -Testing and reporting bugs are also valueable contributions. Please follow +Testing and reporting bugs are also valuable contributions. Please follow the [issue template](https://github.com/apache/hudi-rs/issues/new?template=bug_report.yml) to file bug reports. ## Prepare for development @@ -41,6 +41,7 @@ and Rust modules. You don't need to `cd` to the root directory and run `cargo` c To setup python virtual env, run ```shell +cd python make setup-venv ``` @@ -88,6 +89,61 @@ pytest -s pytest tests/test_table_read.py -s -k "test_read_table_has_correct_schema" ``` +## Debugging on VSCode + +Debugging is a crucial part of developing/maintaining the project. This tutorial will guide you through setting up Visual Studio Code for debugging hudi-rs using the CodeLLDB extension. Assuming you have Visual Studio Code installed: + +1. Download the CodeLLDB VSCode extension. + +2. Open the **hudi-rs** project in VSCode. + +3. Add a `.launch` file (seen below) in your `.vscode` (if it does not appear in your root directory, consult [here] (https://code.visualstudio.com/docs/editor/debugging#_launch-configurations)): + +
launch.json + + ```json + { + "configurations": [ + { + "name": "Debug Rust/Python", + "type": "debugpy", + "request": "launch", + "program": "${workspaceFolder}/tools/attach_debugger.py", + "args": [ + "${file}" + ], + "console": "internalConsole", + "serverReadyAction": { + "pattern": "pID = ([0-9]+)", + "action": "startDebugging", + "name": "Rust LLDB" + } + }, + { + "name": "Rust LLDB", + "pid": "0", + "type": "lldb", + "request": "attach", + "program": "${command:python.interpreterPath}", + "sourceLanguages": [ + "rust" + ], + } + ] + } + ``` + +
+ +### Using the Debugger + +1. Create a Python file in your python environment which imports code from the hudi module. + +2. On the left of VSCode, there should be '**Run and Debug**' option. At the top-left of your screen, +you can select '**Debug Rust/Python**' in the dropdown options. + +Breakpoints can be added in the code to pinpoint debugging instances + ## Before creating a pull request Run test commands to make sure the code is working as expected. diff --git a/python/.gitignore b/python/.gitignore index db2736bb..d3ee040c 100644 --- a/python/.gitignore +++ b/python/.gitignore @@ -83,7 +83,7 @@ docs/_build/ .idea/ # VSCode -.vscode/ +.vscode # Pyenv .python-version diff --git a/python/tools/attach_debugger.py b/python/tools/attach_debugger.py new file mode 100644 index 00000000..7e3045fd --- /dev/null +++ b/python/tools/attach_debugger.py @@ -0,0 +1,101 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +""" +This file was copied from the Polars project (https://github.com/pola-rs/polars/blob/main/py-polars/debug/launch.py) +under the license provided by Ritchie Vink and NVIDIA Corporation & Affiliates. + +The following parameter determines the sleep time of the Python process after a signal +is sent that attaches the Rust LLDB debugger. If the Rust LLDB debugger attaches to the +current session too late, it might miss any set breakpoints. If this happens +consistently, it is recommended to increase this value. +""" + +import os +import re +import sys +import time +from pathlib import Path + +LLDB_DEBUG_WAIT_TIME_SECONDS = 1 + + +def launch_debugging() -> None: + """ + Debug Rust files via Python. + + Determine the pID for the current debugging session, attach the Rust LLDB launcher, + and execute the originally-requested script. + """ + if len(sys.argv) == 1: + msg = ( + "launch.py is not meant to be executed directly; please use the `Python: " + "Debug Rust` debugging configuration to run a python script that uses the " + "polars library." + ) + raise RuntimeError(msg) + + # Get the current process ID. + pID = os.getpid() + + # Print to the debug console to allow VSCode to pick up on the signal and start the + # Rust LLDB configuration automatically. + launch_file = Path(__file__).parents[1] / "../.vscode/launch.json" + if not launch_file.exists(): + msg = f"Cannot locate {launch_file}" + raise RuntimeError(msg) + with launch_file.open("r") as f: + launch_info = f.read() + + # Overwrite the pid found in launch.json with the pid for the current process. + # Match the initial "Rust LLDB" definition with the pid defined immediately after. + pattern = re.compile('("Rust LLDB",\\s*"pid":\\s*")\\d+(")') + found = pattern.search(launch_info) + if not found: + msg = ( + "Cannot locate pid definition in launch.json for Rust LLDB configuration. " + "Please follow the instructions in the debugging section of the " + "contributing guide (https://docs.pola.rs/development/contributing/ide/#debugging) " + "for creating the launch configuration." + ) + raise RuntimeError(msg) + + launch_info_with_new_pid = pattern.sub(rf"\g<1>{pID}\g<2>", launch_info) + with launch_file.open("w") as f: + f.write(launch_info_with_new_pid) + + # Print pID to the debug console. This auto-triggers the Rust LLDB configurations. + print(f"pID = {pID}") + + # Give the LLDB time to connect. Depending on how long it takes for your LLDB + # debugging session to initialize, you may have to adjust this setting. + time.sleep(LLDB_DEBUG_WAIT_TIME_SECONDS) + + # Update sys.argv so that when exec() is called, the first argument is the script + # name itself, and the remaining are the input arguments. + sys.argv.pop(0) + with Path(sys.argv[0]).open() as fh: + script_contents = fh.read() + + # Run the originally requested file by reading in the script, compiling, and + # executing the code. + file_to_execute = Path(sys.argv[0]) + exec(compile(script_contents, file_to_execute, mode="exec"), {"__name__": "__main__"}) + + +if __name__ == "__main__": + launch_debugging() \ No newline at end of file