From 9931e4beb902e03e9f4e0f3d5e33c0895bf8ff2d Mon Sep 17 00:00:00 2001 From: ObserverOfTime Date: Mon, 4 Nov 2024 21:59:01 +0200 Subject: [PATCH] feat: initial commit --- .editorconfig | 19 ++ .github/dependabot.yml | 14 ++ .github/workflows/ci.yml | 69 +++++++ .github/workflows/docs.yml | 40 +++++ .github/workflows/update.yml | 45 +++++ .gitignore | 24 +++ LICENSE | 19 ++ README.md | 14 ++ build.zig | 79 ++++++++ build.zig.zon | 25 +++ src/alloc.zig | 20 +++ src/language.zig | 121 +++++++++++++ src/lookahead_iterator.zig | 66 +++++++ src/node.zig | 333 ++++++++++++++++++++++++++++++++++ src/parser.zig | 191 ++++++++++++++++++++ src/query.zig | 215 ++++++++++++++++++++++ src/query_cursor.zig | 147 +++++++++++++++ src/root.zig | 30 ++++ src/test.zig | 340 +++++++++++++++++++++++++++++++++++ src/tree.zig | 91 ++++++++++ src/tree_cursor.zig | 157 ++++++++++++++++ src/types.zig | 88 +++++++++ 22 files changed, 2147 insertions(+) create mode 100644 .editorconfig create mode 100644 .github/dependabot.yml create mode 100644 .github/workflows/ci.yml create mode 100644 .github/workflows/docs.yml create mode 100644 .github/workflows/update.yml create mode 100644 .gitignore create mode 100644 LICENSE create mode 100644 README.md create mode 100644 build.zig create mode 100644 build.zig.zon create mode 100644 src/alloc.zig create mode 100644 src/language.zig create mode 100644 src/lookahead_iterator.zig create mode 100644 src/node.zig create mode 100644 src/parser.zig create mode 100644 src/query.zig create mode 100644 src/query_cursor.zig create mode 100644 src/root.zig create mode 100644 src/test.zig create mode 100644 src/tree.zig create mode 100644 src/tree_cursor.zig create mode 100644 src/types.zig diff --git a/.editorconfig b/.editorconfig new file mode 100644 index 0000000..eb4f82a --- /dev/null +++ b/.editorconfig @@ -0,0 +1,19 @@ +root = true + +[*] +charset = utf-8 +end_of_line = lf +indent_size = 4 +indent_style = space +insert_final_newline = true +trim_trailing_whitespace = true +max_line_length = 120 + +[LICENSE] +max_line_length = off + +[README.md] +max_line_length = off + +[*.yml] +indent_size = 2 diff --git a/.github/dependabot.yml b/.github/dependabot.yml new file mode 100644 index 0000000..a7da5a8 --- /dev/null +++ b/.github/dependabot.yml @@ -0,0 +1,14 @@ +version: 2 +updates: + - package-ecosystem: github-actions + directory: / + schedule: + interval: weekly + day: sunday + commit-message: + prefix: ci + labels: [dependencies] + open-pull-requests-limit: 1 + groups: + actions: + patterns: ["*"] diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000..074fd99 --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,69 @@ +name: CI + +on: + push: + branches: [master] + paths: + - src/* + - .github/workflows/* + pull_request: + paths: + - src/* + - .github/workflows/* + +concurrency: + cancel-in-progress: true + group: ${{github.workflow}}-${{github.ref_name}} + +jobs: + test: + runs-on: ${{matrix.os}} + name: Test target ${{matrix.target}} + strategy: + fail-fast: false + matrix: + target: + - x86_64-linux-gnu + - x86_64-linux-musl + - aarch64-linux-gnu + - x86_64-windows-gnu + - x86_64-windows-msvc + - aarch64-macos-none + include: + - os: ubuntu-latest + target: x86_64-linux-gnu + - os: ubuntu-latest + target: x86_64-linux-musl + - os: ubuntu-latest + target: aarch64-linux-gnu + flags: -fqemu -Ddynamic-linker=/usr/aarch64-linux-gnu/lib/ld-linux-aarch64.so.1 + - os: windows-latest + target: x86_64-windows-gnu + - os: windows-latest + target: x86_64-windows-msvc + - os: macos-latest + target: aarch64-macos-none + - os: macos-latest + target: x86_64-macos-none + flags: -frosetta + steps: + - name: Checkout repository + uses: actions/checkout@v4 + - name: Set up Zig + uses: mlugg/setup-zig@v1 + - name: Set up MSVC + uses: ilammy/msvc-dev-cmd@v1 + if: matrix.target == 'x86_64-windows-msvc' + - name: Set up QEMU + uses: docker/setup-qemu-action@v3 + if: matrix.target == 'aarch64-linux-gnu' + with: + platforms: arm64 + - name: Install packages + if: matrix.target == 'aarch64-linux-gnu' + run: |- + sudo apt-get update + sudo apt-get install -y libc6-dev-arm64-cross + printf 'LD_LIBRARY_PATH=/usr/aarch64-linux-gnu/lib\n' >> "$GITHUB_ENV" + - name: Run unit tests + run: zig build test -Dtarget=${{matrix.target}} ${{matrix.flags}} --verbose diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml new file mode 100644 index 0000000..09e7556 --- /dev/null +++ b/.github/workflows/docs.yml @@ -0,0 +1,40 @@ +name: Docs + +run-name: Update API docs + +on: + workflow_run: + workflows: [CI] + types: [completed] + branches: [master] + +concurrency: + cancel-in-progress: true + group: ${{github.workflow}}-${{github.ref_name}} + +permissions: + pages: write + id-token: write + +jobs: + docs: + runs-on: ubuntu-latest + name: Publish docs on GitHub pages + if: github.event.workflow_run.conclusion == 'success' + environment: + name: github-pages + url: ${{steps.deployment.outputs.page_url}} + steps: + - name: Checkout repository + uses: actions/checkout@v4 + - name: Set up Zig + uses: mlugg/setup-zig@v1 + - name: Generate documentation + run: zig build docs --verbose + - name: Upload pages artifact + uses: actions/upload-pages-artifact@v3 + with: + path: zig-out/docs + - name: Deploy to GitHub Pages + id: deployment + uses: actions/deploy-pages@v4 diff --git a/.github/workflows/update.yml b/.github/workflows/update.yml new file mode 100644 index 0000000..38a9380 --- /dev/null +++ b/.github/workflows/update.yml @@ -0,0 +1,45 @@ +name: Update + +on: + schedule: + # every Saturday at 13:00 UTC + - cron: "0 13 * * 6" + +permissions: + contents: write + pull-requests: write + +jobs: + update: + runs-on: ubuntu-latest + name: Update dependencies + steps: + - name: Checkout repository + uses: actions/checkout@v4 + - name: Set up Zig + uses: mlugg/setup-zig@v1 + - name: Create app token + uses: actions/create-github-app-token@v1 + id: app-token + with: + app-id: ${{vars.BACKPORT_APP}} + private-key: ${{secrets.BACKPORT_KEY}} + - name: Update tree-sitter + run: |- + tag="$(gh release view --json tagName -q .tagName)" + url="https://github.com/$GH_REPO/archive/refs/tags/$tag.tar.gz" + zig fetch --save=tree-sitter "$url" + env: + GH_REPO: tree-sitter/tree-sitter + GH_TOKEN: ${{github.token}} + - name: Create pull request + uses: peter-evans/create-pull-request@v7 + with: + add-paths: build.zig.zon + labels: dependencies + commit-message: "build: update dependencies" + title: "build: update dependencies" + branch: update-zig-dependencies + base: ${{github.head_ref}} + token: ${{steps.app-token.outputs.token}} + sign-commits: true diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..53361f4 --- /dev/null +++ b/.gitignore @@ -0,0 +1,24 @@ +### Zig ### +.zig-cache/ +zig-out/ +build/ +build-*/ +docgen_tmp/ + +### JetBrains ### +.idea/* +out/ +*.iws +*.iml +*.ipr + +### Vim ### +[._]*.s[a-v][a-z] +[._]*.sw[a-p] +[._]s[a-rt-v][a-z] +[._]ss[a-gi-z] +[._]sw[a-p] +Session.vim +.nvimrc +tags +*~ diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..1d2496c --- /dev/null +++ b/LICENSE @@ -0,0 +1,19 @@ +Copyright (c) 2024 tree-sitter contributors + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/README.md b/README.md new file mode 100644 index 0000000..3ebb27c --- /dev/null +++ b/README.md @@ -0,0 +1,14 @@ +# Zig Tree-sitter + +[![CI][ci]](https://github.com/tree-sitter/zig-tree-sitter/actions/workflows/ci.yml) +[![docs][docs]](https://tree-sitter.github.io/kotlin-tree-sitter/) + +Zig bindings to the [tree-sitter] parsing library. + +## Usage + +TBA + +[tree-sitter]: https://tree-sitter.github.io/tree-sitter/ +[ci]: https://img.shields.io/github/actions/workflow/status/tree-sitter/zig-tree-sitter/ci.yml?logo=github&label=CI +[docs]: https://img.shields.io/github/deployments/tree-sitter/zig-tree-sitter/github-pages?logo=zig&label=API%20Docs diff --git a/build.zig b/build.zig new file mode 100644 index 0000000..a45fff7 --- /dev/null +++ b/build.zig @@ -0,0 +1,79 @@ +const std = @import("std"); + +pub fn build(b: *std.Build) !void { + const target = b.standardTargetOptions(.{}); + const optimize = b.standardOptimizeOption(.{}); + + const core = b.dependency("tree-sitter", .{ + .target = target, + .optimize = optimize, + }); + + const lib = b.addStaticLibrary(.{ + .name = "zig-tree-sitter", + .root_source_file = b.path("src/root.zig"), + .target = target, + .optimize = optimize, + .link_libc = true, + }); + lib.linkLibrary(core.artifact("tree-sitter")); + + b.installArtifact(lib); + + const module = b.addModule("tree_sitter", .{ + .root_source_file = b.path("src/root.zig"), + .target = target, + .optimize = optimize, + }); + module.linkLibrary(lib); + + const docs = b.addObject(.{ + .name = "tree_sitter", + .root_source_file = b.path("src/root.zig"), + .target = target, + .optimize = .Debug, + }); + + const install_docs = b.addInstallDirectory(.{ + .source_dir = docs.getEmittedDocs(), + .install_dir = .prefix, + .install_subdir = "docs", + }); + + const docs_step = b.step("docs", "Install generated docs"); + docs_step.dependOn(&install_docs.step); + + const tests = b.addTest(.{ + .root_source_file = b.path("src/test.zig"), + .target = target, + .optimize = optimize, + }); + tests.linkLibrary(lib); + + const run_tests = b.addRunArtifact(tests); + + const test_step = b.step("test", "Run unit tests"); + test_step.dependOn(&run_tests.step); + + // HACK: fetch tree-sitter-c only for tests (ziglang/zig#19914) + var args = try std.process.argsWithAllocator(b.allocator); + defer args.deinit(); + while (args.next()) |a| { + if (std.mem.eql(u8, a, "test")) { + if (b.lazyDependency("tree-sitter-c", .{})) |dep| { + const dep_lib = dep.builder.addStaticLibrary(.{ + .name = "tree-sitter-c", + .target = target, + .optimize = optimize, + .link_libc = true, + }); + dep_lib.addIncludePath(dep.path("src")); + dep_lib.addCSourceFile(.{ + .file = dep.path("src/parser.c"), + }); + tests.linkLibrary(dep_lib); + } + break; + } + } +} diff --git a/build.zig.zon b/build.zig.zon new file mode 100644 index 0000000..e8e4796 --- /dev/null +++ b/build.zig.zon @@ -0,0 +1,25 @@ +.{ + .name = "tree_sitter", + + .version = "0.24.0", + + .dependencies = .{ + .@"tree-sitter" = .{ + .url = "https://github.com/tree-sitter/tree-sitter/archive/refs/tags/v0.24.3.tar.gz", + .hash = "1220d7730b4b548db37a9c352adc8bda88d9cada68c02883fcef2c6dca3b83fb642e", + }, + .@"tree-sitter-c" = .{ + .url = "https://github.com/tree-sitter/tree-sitter-c/archive/refs/tags/v0.23.1.tar.gz", + .hash = "1220422ed862a57c2a4bc3e0987fd014536c0caafff70656b84a566f2cde43e9a603", + .lazy = true, + }, + }, + + .paths = .{ + "build.zig", + "build.zig.zon", + "src", + "LICENSE", + "README.md", + }, +} diff --git a/src/alloc.zig b/src/alloc.zig new file mode 100644 index 0000000..69a67b9 --- /dev/null +++ b/src/alloc.zig @@ -0,0 +1,20 @@ +/// Set the allocation functions used by the library. +/// +/// By default, Tree-sitter uses the standard libc allocation functions, +/// but aborts the process when an allocation fails. This function lets +/// you supply alternative allocation functions at runtime. +/// +/// If you pass `null` for any parameter, Tree-sitter will switch back to +/// its default implementation of that function. +/// +/// If you call this function after the library has already been used, then +/// you must ensure that either: +/// 1. All the existing objects have been freed. +/// 2. The new allocator shares its state with the old one, so it is capable +/// of freeing memory that was allocated by the old allocator. +pub extern fn ts_set_allocator( + new_malloc: ?*const fn (size: usize) callconv(.C) ?*anyopaque, + new_calloc: ?*const fn (nmemb: usize, size: usize) callconv(.C) ?*anyopaque, + new_realloc: ?*const fn (ptr: ?*anyopaque, size: usize) callconv(.C) ?*anyopaque, + new_free: ?*const fn (ptr: ?*anyopaque) callconv(.C) void +) void; diff --git a/src/language.zig b/src/language.zig new file mode 100644 index 0000000..6641eda --- /dev/null +++ b/src/language.zig @@ -0,0 +1,121 @@ +const std = @import("std"); + +/// The type of a grammar symbol. +const SymbolType = enum(c_uint) { + Regular, + Anonymous, + Supertype, + Auxiliary, +}; + +const LanguageFn = *const fn () callconv(.C) *const Language; + +/// An opaque object that defines how to parse a particular language. +pub const Language = opaque { + /// Load the given language from a library at compile-time. + pub fn load(comptime language_name: [:0]const u8) *const Language { + const symbol_name = std.fmt.comptimePrint("tree_sitter_{s}", .{ language_name }); + return @extern(LanguageFn, .{ .name = symbol_name })(); + } + + /// Load the given language from a library at runtime. + /// + /// This returns an error if it failed to load the library or find the symbol. + pub fn dynLoad(library_path: []const u8, symbol_name: [:0]const u8) error{LibError, SymError}!*const Language { + const library = std.DynLib.open(library_path) catch return error.LibError; + const function = library.lookup(LanguageFn, symbol_name) orelse return error.SymError; + return function(); + } + + /// Free any dynamically-allocated resources for this language, if this is the last reference. + pub inline fn destroy(self: *const Language) void { + ts_language_delete(self); + } + + /// Get another reference to the given language. + pub inline fn dupe(self: *const Language) *const Language { + return ts_language_copy(self); + } + + /// Get the ABI version number for this language. + pub inline fn version(self: *const Language) u32 { + return ts_language_version(self); + } + + /// Get the number of distinct node types in this language. + pub inline fn symbolCount(self: *const Language) u32 { + return ts_language_symbol_count(self); + } + + /// Get the number of distinct field names in this language. + pub inline fn fieldCount(self: *const Language) u32 { + return ts_language_field_count(self); + } + + /// Get the number of valid states in this language. + pub inline fn stateCount(self: *const Language) u32 { + return ts_language_state_count(self); + } + + /// Get the numerical id for the given field name string. + pub inline fn fieldIdForName(self: *const Language, name: []const u8) u32 { + return ts_language_field_id_for_name(self, name.ptr, @intCast(name.len)); + } + + /// Get the field name string for the given numerical id. + pub fn fieldNameForId(self: *const Language, id: u16) ?[]const u8 { + return if (ts_language_field_name_for_id(self, id)) |name| std.mem.span(name) else null; + } + + /// Get the numerical id for the given node type string. + pub inline fn symbolForName(self: *const Language, string: []const u8, is_named: bool) u16 { + return ts_language_symbol_for_name(self, string.ptr, @intCast(string.len), is_named); + } + + /// Get a node type string for the given numerical id. + pub fn symbolName(self: *const Language, symbol: u16) ?[]const u8 { + return if (ts_language_symbol_name(self, symbol)) |name| std.mem.span(name) else null; + } + + /// Check if the node for the given numerical ID is named. + pub inline fn isNamed(self: *const Language, symbol: u16) bool { + return ts_language_symbol_type(self, symbol) == SymbolType.Regular; + } + + /// Check if the node for the given numerical ID is visible. + pub inline fn isVisible(self: *const Language, symbol: u16) bool { + const symbol_type = ts_language_symbol_type(self, symbol); + return @intFromEnum(symbol_type) <= @intFromEnum(SymbolType.Anonymous); + } + + /// Check if the node for the given numerical ID is a supertype. + pub inline fn isSupertype(self: *const Language, symbol: u16) bool { + return ts_language_symbol_type(self, symbol) == SymbolType.Supertype; + } + + /// Get the next parse state. + /// + /// Combine this with a `LookaheadIterator` to generate + /// completion suggestions or valid symbols in error nodes. + /// + /// **Example:** + /// ```zig + /// language.nextState(node.parseState(), node.grammarSymbol()); + /// ``` + pub inline fn nextState(self: *const Language, state: u16, symbol: u16) u16 { + return ts_language_next_state(self, state, symbol); + } +}; + +extern fn ts_language_copy(self: *const Language) *const Language; +extern fn ts_language_delete(self: *const Language) void; +extern fn ts_language_field_count(self: *const Language) u32; +extern fn ts_language_field_id_for_name(self: *const Language, name: [*]const u8, name_length: u32) u16; +extern fn ts_language_field_name_for_id(self: *const Language, id: u16) ?[*:0]const u8; +extern fn ts_language_next_state(self: *const Language, state: u16, symbol: u16) u16; +extern fn ts_language_state_count(self: *const Language) u32; +extern fn ts_language_symbol_count(self: *const Language) u32; +extern fn ts_language_symbol_for_name(self: *const Language, string: [*]const u8, length: u32, is_named: bool) u16; +extern fn ts_language_symbol_name(self: *const Language, symbol: u16) ?[*:0]const u8; +extern fn ts_language_symbol_type(self: *const Language, symbol: u16) SymbolType; +extern fn ts_language_version(self: *const Language) u32; diff --git a/src/lookahead_iterator.zig b/src/lookahead_iterator.zig new file mode 100644 index 0000000..aeacae5 --- /dev/null +++ b/src/lookahead_iterator.zig @@ -0,0 +1,66 @@ +const std = @import("std"); +const Language = @import("language.zig").Language; + +/// A stateful object that is used to look up valid symbols in a specific parse state. +/// +/// Repeatedly using `next()` and `currentSymbol()` will generate valid symbols in the given parse state. +/// +/// Lookahead iterators can be useful to generate suggestions and improve syntax error diagnostics. +/// To get symbols valid in an `ERROR` node, use the lookahead iterator on its first leaf node state. +/// For `MISSING` nodes, a lookahead iterator created on the previous non-extra leaf node may be appropriate. +pub const LookaheadIterator = opaque { + /// Create a new lookahead iterator for the given language and parse state. + /// + /// Newly created lookahead iterators will contain the `"ERROR"` symbol (`0xFFFF`). + /// + /// This returns `null` if the state is invalid for the language. + pub inline fn create(lang: *const Language, state: u16) ?*LookaheadIterator { + return ts_lookahead_iterator_new(lang, state); + } + + /// Delete the lookahead iterator freeing all the memory used. + pub inline fn destroy(self: *LookaheadIterator) void { + ts_lookahead_iterator_delete(self); + } + + /// Get the current language of the lookahead iterator. + pub inline fn language(self: *const LookaheadIterator) *const Language { + return ts_lookahead_iterator_language(self); + } + + /// Get the current symbol id of the lookahead iterator. + pub inline fn currentSymbol(self: *const LookaheadIterator) u16 { + return ts_lookahead_iterator_current_symbol(self); + } + + /// Get the current symbol name of the lookahead iterator. + pub fn currentSymbolName(self: *const LookaheadIterator) []const u8 { + return std.mem.span(ts_lookahead_iterator_current_symbol_name(self)); + } + + /// Advance the lookahead iterator to the next symbol. + /// + /// This returns `true` if there is a new symbol and `false` otherwise. + pub inline fn next(self: *LookaheadIterator) bool { + return ts_lookahead_iterator_next(self); + } + + /// Reset the lookahead iterator to another language and state. + pub inline fn reset(self: *LookaheadIterator, lang: *const Language, state: u16) bool { + return ts_lookahead_iterator_reset(self, lang, state); + } + + /// Reset the lookahead iterator to another state. + pub inline fn resetState(self: *LookaheadIterator, state: u16) bool { + return ts_lookahead_iterator_reset_state(self, state); + } +}; + +extern fn ts_lookahead_iterator_current_symbol(self: *const LookaheadIterator) u16; +extern fn ts_lookahead_iterator_current_symbol_name(self: *const LookaheadIterator) [*:0]const u8; +extern fn ts_lookahead_iterator_delete(self: *LookaheadIterator) void; +extern fn ts_lookahead_iterator_language(self: ?*const LookaheadIterator) *const Language; +extern fn ts_lookahead_iterator_new(self: *const Language, state: u16) ?*LookaheadIterator; +extern fn ts_lookahead_iterator_next(self: *LookaheadIterator) bool; +extern fn ts_lookahead_iterator_reset(self: *LookaheadIterator, language: *const Language, state: u16) bool; +extern fn ts_lookahead_iterator_reset_state(self: *LookaheadIterator, state: u16) bool; diff --git a/src/node.zig b/src/node.zig new file mode 100644 index 0000000..72dd89f --- /dev/null +++ b/src/node.zig @@ -0,0 +1,333 @@ +const std = @import("std"); + +const InputEdit = @import("types.zig").InputEdit; +const Point = @import("types.zig").Point; +const Range = @import("types.zig").Range; +const Language = @import("language.zig").Language; +const Tree = @import("tree.zig").Tree; + +/// A single node within a syntax tree. +pub const Node = extern struct { + /// **Internal.** The context of the node. + context: [4]u32, + + /// The ID of the node. + /// + /// Within any given syntax tree, no two nodes have the same ID. + /// However, if a new tree is created based on an older tree, + /// and a node from the old tree is reused in the process, + /// then that node will have the same ID in both trees. + id: *const anyopaque, + + /// The syntax tree this node belongs to. + tree: *const Tree, + + /// Check if two nodes are identical. + pub inline fn eql(self: Node, other: Node) bool { + return ts_node_eq(self, other); + } + + /// Get the node's language. + pub inline fn language(self: Node) *const Language { + return ts_node_language(self); + } + + /// Get the numerical ID of the node's type. + pub inline fn symbol(self: Node) u16 { + return ts_node_symbol(self); + } + + /// Get the numerical ID of the node's type, + /// as it appears in the grammar ignoring aliases. + pub inline fn grammarSymbol(self: Node) u16 { + return ts_node_grammar_symbol(self); + } + + /// Get the type of the node. + pub fn @"type"(self: Node) []const u8 { + return std.mem.span(ts_node_type(self)); + } + + /// Get the type of the node, as it appears in the grammar ignoring aliases. + pub fn grammarType(self: Node) []const u8 { + return std.mem.span(ts_node_grammar_type(self)); + } + + /// Check if the node is *named*. + /// + /// Named nodes correspond to named rules in the grammar, + /// whereas *anonymous* nodes correspond to string literals. + pub inline fn isNamed(self: Node) bool { + return ts_node_is_named(self); + } + + /// Check if the node is *extra*. + /// + /// Extra nodes represent things which are not required + /// by the grammar but can appear anywhere (e.g. whitespace). + pub inline fn isExtra(self: Node) bool { + return ts_node_is_extra(self); + } + + /// Check if the node is a syntax error. + pub inline fn isError(self: Node) bool { + return ts_node_is_error(self); + } + + /// Check if the node is *missing*. + /// + /// Missing nodes are inserted by the parser in order + /// to recover from certain kinds of syntax errors. + pub inline fn isMissing(self: Node) bool { + return ts_node_is_missing(self); + } + + /// Check if the node has been edited. + pub inline fn hasChanges(self: Node) bool { + return ts_node_has_changes(self); + } + + /// Check if the node is a syntax error, or contains any syntax errors. + pub inline fn hasError(self: Node) bool { + return ts_node_has_error(self); + } + + /// Get the parse state of this node. + pub inline fn parseState(self: Node) u16 { + return ts_node_parse_state(self); + } + + /// Get the parse state after this node. + pub inline fn nextParseState(self: Node) u16 { + return ts_node_next_parse_state(self); + } + + /// Get the start byte of the node. + pub inline fn startByte(self: Node) u32 { + return ts_node_start_byte(self); + } + + /// Get the end byte of the node. + pub inline fn endByte(self: Node) u32 { + return ts_node_end_byte(self); + } + + /// The start point of the node. + pub inline fn startPoint(self: Node) Point { + return ts_node_start_point(self); + } + + /// Get the end point of the node. + pub inline fn endPoint(self: Node) Point { + return ts_node_end_point(self); + } + + /// Get the range of the node. + pub fn range(self: Node) Range { + return .{ + .start_byte = self.startByte(), + .end_byte = self.endByte(), + .start_point = self.startPoint(), + .end_point = self.startPoint() + }; + } + + /// Get the number of the node's children. + pub inline fn childCount(self: Node) u32 { + return ts_node_child_count(self); + } + + /// Get the number of the node's *named* children. + pub inline fn namedChildCount(self: Node) u32 { + return ts_node_named_child_count(self); + } + + /// Get the number of the node's descendants, + /// including the node itself. + pub inline fn descendantCount(self: Node) u32 { + return ts_node_descendant_count(self); + } + + /// Get the node's immediate parent. + pub inline fn parent(self: Node) ?Node { + return ts_node_parent(self).orNull(); + } + + /// Get the node's next sibling. + pub inline fn nextSibling(self: Node) ?Node { + return ts_node_next_sibling(self).orNull(); + } + + /// Get the node's next *named* sibling. + pub inline fn nextNamedSibling(self: Node) ?Node { + return ts_node_next_named_sibling(self).orNull(); + } + + /// Get the node's previous sibling. + pub inline fn prevSibling(self: Node) ?Node { + return ts_node_prev_sibling(self).orNull(); + } + + /// Get the node's previous *named* sibling. + pub inline fn prevNamedSibling(self: Node) ?Node { + return ts_node_prev_named_sibling(self).orNull(); + } + + /// Get the node's child at the given index. + pub inline fn child(self: Node, child_index: u32) ?Node { + return ts_node_child(self, child_index).orNull(); + } + + /// Get the node's *named* child at the given index. + pub inline fn namedChild(self: Node, child_index: u32) ?Node { + return ts_node_named_child(self, child_index).orNull(); + } + + /// Get the node's child with the given numerical field id. + pub inline fn childByFieldId(self: Node, field_id: u16) ?Node { + return ts_node_child_by_field_id(self, field_id).orNull(); + } + + /// Get the node's child with the given field name. + pub inline fn childByFieldName(self: Node, name: []const u8) ?Node { + return ts_node_child_by_field_name(self, name.ptr, @intCast(name.len)).orNull(); + } + + /// **Deprecated.** Use `childWithDescendant` instead. + /// + /// Get the node's child containing `descendant`. + /// + /// This will not return the descendant if it is a direct child of `self`. + pub inline fn childContainingDescendant(self: Node, descendant: Node) ?Node { + return ts_node_child_containing_descendant(self, descendant).orNull(); + } + + /// Get the node that contains `descendant`. + pub inline fn childWithDescendant(self: Node, descendant: Node) ?Node { + return ts_node_child_with_descendant(self, descendant).orNull(); + } + + /// Get the smallest node within this node that spans the given byte range. + pub inline fn descendantForByteRange(self: Node, start: u32, end: u32) ?Node { + return ts_node_descendant_for_byte_range(self, start, end).orNull(); + } + + /// Get the smallest *named* node within this node that spans the given byte range. + pub inline fn namedDescendantForByteRange(self: Node, start: u32, end: u32) ?Node { + return ts_node_named_descendant_for_byte_range(self, start, end).orNull(); + } + + /// Get the smallest node within this node that spans the given point range. + pub inline fn descendantForPointRange(self: Node, start: Point, end: Point) ?Node { + return ts_node_descendant_for_point_range(self, start, end).orNull(); + } + + /// Get the smallest *named* node within this node that spans the given point range. + pub inline fn namedDescendantForPointRange(self: Node, start: Point, end: Point) ?Node { + return ts_node_named_descendant_for_point_range(self, start, end).orNull(); + } + + /// Get the field name for the node's child at the given index. + pub fn fieldNameForChild(self: Node, child_index: u32) ?[]const u8 { + return if (ts_node_field_name_for_child(self, child_index)) |name| std.mem.span(name) else null; + } + + /// Get the field name for the node's named child at the given index. + pub fn fieldNameForNamedChild(self: Node, child_index: u32) ?[]const u8 { + return if (ts_node_field_name_for_named_child(self, child_index)) |name| std.mem.span(name) else null; + } + + /// Edit the node to keep it in-sync with source code that has been edited. + /// + /// This function is only rarely needed. When you edit a syntax tree with the + /// `Tree.edit()` function, all of the nodes that you retrieve from the tree + /// afterward will already reflect the edit. You only need to use this when you + /// have a `Node` instance that you want to keep and continue to use after an edit. + pub inline fn edit(self: *Node, input_edit: InputEdit) void { + ts_node_edit(self, &input_edit); + } + + /// Get an S-expression representing the node. + /// + /// The caller is responsible for freeing it using `freeSexp`. + pub fn toSexp(self: Node) []const u8 { + return std.mem.span(ts_node_string(self)); + } + + /// Free an S-expression allocated with `toSexp()`. + pub fn freeSexp(sexp: []const u8) void { + std.c.free(@ptrCast(@constCast(sexp))); + } + + /// Format the node as a string. + /// + /// Use `{s}` to get an S-expression. + pub fn format(self: Node, comptime fmt: []const u8, _: std.fmt.FormatOptions, writer: anytype) !void { + if (std.mem.eql(u8, fmt, "s")) { + const sexp = self.toSexp(); + defer freeSexp(sexp); + return writer.print("{s}", .{ sexp }); + } + + if (fmt.len == 0 or std.mem.eql(u8, fmt, "any")) { + return writer.print( + "Node(id=0x{x}, type={s}, start={d}, end={d})", .{ + @intFromPtr(self.id), + self.@"type"(), + self.startByte(), + self.endByte() + } + ); + } + + return std.fmt.invalidFmtError(fmt, self); + } + + inline fn orNull(self: Node) ?Node { + return if (!ts_node_is_null(self)) self else null; + } +}; + +extern fn ts_node_child(self: Node, child_index: u32) Node; +extern fn ts_node_child_by_field_id(self: Node, field_id: u16) Node; +extern fn ts_node_child_by_field_name(self: Node, name: [*]const u8, name_length: u32) Node; +extern fn ts_node_child_containing_descendant(self: Node, descendant: Node) Node; +extern fn ts_node_child_with_descendant(self: Node, descendant: Node) Node; +extern fn ts_node_child_count(self: Node) u32; +extern fn ts_node_descendant_count(self: Node) u32; +extern fn ts_node_descendant_for_byte_range(self: Node, start: u32, end: u32) Node; +extern fn ts_node_descendant_for_point_range(self: Node, start: Point, end: Point) Node; +extern fn ts_node_edit(self: *Node, edit: *const InputEdit) void; +extern fn ts_node_end_byte(self: Node) u32; +extern fn ts_node_end_point(self: Node) Point; +extern fn ts_node_eq(self: Node, other: Node) bool; +extern fn ts_node_field_name_for_child(self: Node, child_index: u32) ?[*:0]const u8; +extern fn ts_node_field_name_for_named_child(self: Node, named_child_index: u32) ?[*:0]const u8; +extern fn ts_node_first_child_for_byte(self: Node, byte: u32) Node; +extern fn ts_node_first_named_child_for_byte(self: Node, byte: u32) Node; +extern fn ts_node_grammar_symbol(self: Node) u16; +extern fn ts_node_grammar_type(self: Node) [*:0]const u8; +extern fn ts_node_has_changes(self: Node) bool; +extern fn ts_node_has_error(self: Node) bool; +extern fn ts_node_is_error(self: Node) bool; +extern fn ts_node_is_extra(self: Node) bool; +extern fn ts_node_is_missing(self: Node) bool; +extern fn ts_node_is_named(self: Node) bool; +extern fn ts_node_is_null(self: Node) bool; +extern fn ts_node_language(self: Node) *const Language; +extern fn ts_node_named_child(self: Node, child_index: u32) Node; +extern fn ts_node_named_child_count(self: Node) u32; +extern fn ts_node_named_descendant_for_byte_range(self: Node, start: u32, end: u32) Node; +extern fn ts_node_named_descendant_for_point_range(self: Node, start: Point, end: Point) Node; +extern fn ts_node_next_named_sibling(self: Node) Node; +extern fn ts_node_next_parse_state(self: Node) u16; +extern fn ts_node_next_sibling(self: Node) Node; +extern fn ts_node_parent(self: Node) Node; +extern fn ts_node_parse_state(self: Node) u16; +extern fn ts_node_prev_named_sibling(self: Node) Node; +extern fn ts_node_prev_sibling(self: Node) Node; +extern fn ts_node_start_byte(self: Node) u32; +extern fn ts_node_start_point(self: Node) Point; +extern fn ts_node_string(self: Node) [*c]u8; +extern fn ts_node_symbol(self: Node) u16; +extern fn ts_node_type(self: Node) [*:0]const u8; diff --git a/src/parser.zig b/src/parser.zig new file mode 100644 index 0000000..b4f5920 --- /dev/null +++ b/src/parser.zig @@ -0,0 +1,191 @@ +const std = @import("std"); + +const Input = @import("types.zig").Input; +const InputEdit = @import("types.zig").InputEdit; +const InputEncoding = @import("types.zig").InputEncoding; +const Language = @import("language.zig").Language; +const Logger = @import("types.zig").Logger; +const LogType = @import("types.zig").LogType; +const Node = @import("node.zig").Node; +const Point = @import("types.zig").Point; +const Range = @import("types.zig").Range; +const Tree = @import("tree.zig").Tree; + +/// A stateful object that is used to produce +/// a syntax tree based on some source code. +pub const Parser = opaque { + /// Create a new parser. + pub inline fn create() *Parser { + return ts_parser_new(); + } + + /// Delete the parser, freeing all of the memory that it used. + pub inline fn destroy(self: *Parser) void { + ts_parser_delete(self); + } + + /// Get the parser's current language. + pub inline fn getLanguage(self: *const Parser) ?*const Language { + return ts_parser_language(self); + } + + /// Set the language that the parser should use for parsing. + /// + /// Returns an error if the language has an incompatible version. + pub fn setLanguage(self: *Parser, language: ?*const Language) error{IncompatibleVersion}!void { + if (!ts_parser_set_language(self, language)) { + return error.IncompatibleVersion; + } + } + + /// Get the parser's current logger. + pub inline fn getLogger(self: *const Parser) Logger { + return ts_parser_logger(self); + } + + /// Set the logger that will be used during parsing. + /// + /// **Example:** + /// ```zig + /// fn scopedLogger(_: ?*anyopaque, log_type: LogType, buffer: [*:0]const u8) callconv(.C) void { + /// const scope = switch (log_type) { + /// .Parse => std.log.scoped(.PARSE), + /// .Lex => std.log.scoped(.LEX), + /// }; + /// scope.debug("{s}", .{ std.mem.span(buffer) }); + /// } + /// + /// parser.setLogger(.{ .log = &scopedLogger }); + /// ``` + pub inline fn setLogger(self: *Parser, logger: Logger) void { + return ts_parser_set_logger(self, logger); + } + + /// Get the maximum duration in microseconds that parsing + /// should be allowed to take before halting. + pub inline fn getTimeoutMicros(self: *const Parser) u64 { + return ts_parser_timeout_micros(self); + } + + /// Set the maximum duration in microseconds that parsing + /// should be allowed to take before halting. + pub inline fn setTimeoutMicros(self: *Parser, timeout: u64) void { + return ts_parser_set_timeout_micros(self, timeout); + } + + /// Get the parser's current cancellation flag pointer. + pub inline fn getCancellationFlag(self: *const Parser) ?*const usize { + return ts_parser_cancellation_flag(self); + } + + /// Set the parser's cancellation flag pointer. + /// + /// If a non-null pointer is assigned, then the parser will + /// periodically read from this pointer during parsing. + /// If it reads a non-zero value, it will halt early. + pub inline fn setCancellationFlag(self: *const Parser, flag: ?*const usize) void { + return ts_parser_set_cancellation_flag(self, flag); + } + + /// Get the ranges of text that the parser will include when parsing. + pub fn getIncludedRanges(self: *const Parser) []const Range { + var count: u32 = undefined; + const ranges = ts_parser_included_ranges(self, &count); + return ranges[0..count]; + } + + /// Set the ranges of text that the parser should include when parsing. + /// + /// By default, the parser will always include entire documents. + /// This method allows you to parse only a *portion* of a document + /// but still return a syntax tree whose ranges match up with the + /// document as a whole. You can also pass multiple disjoint ranges. + /// + /// If `ranges` is `null`, the entire document will be parsed. + /// Otherwise, the given ranges must be ordered from earliest + /// to latest in the document, and they must not overlap. + pub fn setIncludedRanges(self: *Parser, ranges: ?[]const Range) error{RangeOverlap}!void { + if (ranges) |r| { + if (!ts_parser_set_included_ranges(self, r.ptr, @intCast(r.len))) { + return error.RangeOverlap; + } + } else { + _ = ts_parser_set_included_ranges(self, null, 0); + } + } + + /// Use the parser to parse some source code and create a syntax tree. + /// + /// If you are parsing this document for the first time, pass `null` for the + /// `old_tree` parameter. Otherwise, if you have already parsed an earlier + /// version of this document and the document has since been edited, pass + /// the previous tree so that the unchanged parts of it can be reused. + /// This will save time and memory. For this to work correctly, you must + /// have already edited the old syntax tree using the `Tree.edit()` + /// method in a way that exactly matches the source code changes. + /// + /// This method returns a syntax tree on success or an appropriate + /// error if the parser does not have a language assigned, or parsing + /// was cancelled (either via a timeout or a cancellation flag). + /// + /// If parsing was cancelled, you can resume from where the parser stopped + /// by calling the method again with the same arguments. Or you can + /// start parsing from scratch by first calling the `reset()` method. + pub fn parseInput(self: *Parser, input: Input, old_tree: ?*const Tree) error{NoLanguage, Cancellation}!*Tree { + if (self.getLanguage() == null) return error.NoLanguage; + return ts_parser_parse(self, old_tree, input) orelse error.Cancellation; + } + + /// Use the parser to parse some source code stored in one contiguous buffer, + /// optionally with a given encoding (defaults to `InputEncoding.UTF_8`). + /// + /// See the `parseInput()` method for more details. + pub fn parseBuffer(self: *Parser, buffer: []const u8, old_tree: ?*const Tree, + encoding: ?InputEncoding) error{NoLanguage, Cancellation}!*Tree { + if (self.getLanguage() == null) return error.NoLanguage; + return ts_parser_parse_string_encoding(self, old_tree, buffer.ptr, @intCast(buffer.len), + encoding orelse InputEncoding.UTF_8) orelse error.Cancellation; + } + + /// Instruct the parser to start the next parse from the beginning. + /// + /// If the parser previously failed because of a timeout or a cancellation, + /// then by default, it will resume where it left off on the next call to a + /// parsing method. If you don't want to resume, and instead intend to use + /// this parser to parse some other document, you must call this method first. + pub inline fn reset(self: *Parser) void { + ts_parser_reset(self); + } + + /// Set the file to which the parser should write debugging graphs + /// during parsing. The graphs are formatted in the DOT language. + /// + /// Pass a `null` file to stop printing debugging graphs. + /// + /// **Example:** + /// ```zig + /// parser.printDotGraphs(std.io.getStdOut()); + /// ``` + pub fn printDotGraphs(self: *Parser, file: ?std.fs.File) void { + ts_parser_print_dot_graphs(self, if (file) |f| f.handle else -1); + } +}; + +extern fn ts_parser_new() *Parser; +extern fn ts_parser_delete(self: *Parser) void; +extern fn ts_parser_language(self: *const Parser) ?*const Language; +extern fn ts_parser_set_language(self: *Parser, language: ?*const Language) bool; +extern fn ts_parser_set_included_ranges(self: *Parser, ranges: [*c]const Range, count: u32) bool; +extern fn ts_parser_included_ranges(self: *const Parser, count: *u32) [*c]const Range; +extern fn ts_parser_parse(self: *Parser, old_tree: ?*const Tree, input: Input) ?*Tree; +// extern fn ts_parser_parse_string(self: *Parser, old_tree: ?*const Tree, string: [*c]const u8, length: u32) ?*Tree; +extern fn ts_parser_parse_string_encoding( + self: *Parser, old_tree: ?*const Tree, string: [*c]const u8, length: u32, encoding: InputEncoding) ?*Tree; +extern fn ts_parser_reset(self: *Parser) void; +extern fn ts_parser_set_timeout_micros(self: *Parser, timeout_micros: u64) void; +extern fn ts_parser_timeout_micros(self: *const Parser) u64; +extern fn ts_parser_set_cancellation_flag(self: *Parser, flag: ?*const usize) void; +extern fn ts_parser_cancellation_flag(self: *const Parser) ?*const usize; +extern fn ts_parser_set_logger(self: *Parser, logger: Logger) void; +extern fn ts_parser_logger(self: *const Parser) Logger; +extern fn ts_parser_print_dot_graphs(self: *Parser, fd: c_int) void; diff --git a/src/query.zig b/src/query.zig new file mode 100644 index 0000000..e2f6697 --- /dev/null +++ b/src/query.zig @@ -0,0 +1,215 @@ +const Language = @import("language.zig").Language; +const Node = @import("node.zig").Node; + +const QueryError = enum(c_uint) { + None, + Syntax, + NodeType, + Field, + Capture, + Structure, + Language, +}; + +// TODO: implement matches, captures & predicates + +/// A set of patterns that match nodes in a syntax tree. +pub const Query = opaque { + /// Create a new query from a string containing one or more S-expression + /// patterns. The query is associated with a particular language + /// and can only be run on syntax nodes parsed with that language. + /// + /// If a pattern is invalid, this returns a `Query.Error` and writes + /// the byte offset of the error to the `error_offset` parameter. + /// + /// **Example:** + /// ```zig + /// var error_offset: u32 = 0; + /// const query = Query.create(language, "(identifier) @variable", &error_offset) + /// catch |err| std.debug.panic("{s} error at position {d}", . { @errorName(err), error_offset }); + /// ``` + pub fn create(language: *const Language, source: []const u8, error_offset: *u32) Error!*Query { + var error_type: QueryError = .None; + return ts_query_new(language, source.ptr, @intCast(source.len), error_offset, &error_type) + orelse switch (error_type) { + .Syntax => error.InvalidSyntax, + .NodeType => error.InvalidNodeType, + .Field => error.InvalidField, + .Capture => error.InvalidCapture, + .Structure => error.InvalidStructure, + .Language => error.InvalidLanguage, + else => unreachable, + }; + } + + /// Delete the query, freeing all of the memory that it used. + pub inline fn destroy(self: *Query) void { + ts_query_delete(self); + } + + /// Get the number of patterns in the query. + pub inline fn patternCount(self: *const Query) u32 { + return ts_query_pattern_count(self); + } + + /// Get the number of captures in the query. + pub inline fn captureCount(self: *const Query) u32 { + return ts_query_capture_count(self); + } + + /// Get the number of literal strings in the query. + pub inline fn stringCount(self: *const Query) u32 { + return ts_query_string_count(self); + } + + /// Get the byte offset where the given pattern starts in the query's source. + pub inline fn startByteForPattern(self: *const Query, pattern_index: u32) u32 { + return ts_query_start_byte_for_pattern(self, pattern_index); + } + + /// Get the byte offset where the given pattern ends in the query's source. + pub inline fn endByteForPattern(self: *const Query, pattern_index: u32) u32 { + return ts_query_end_byte_for_pattern(self, pattern_index); + } + + /// Check if the given pattern in the query has a single root node. + pub inline fn isPatternRooted(self: *const Query, pattern_index: u32) bool { + return ts_query_is_pattern_rooted(self, pattern_index); + } + + /// Check if the given pattern in the query is non-local. + /// + /// A non-local pattern has multiple root nodes and can match within a + /// repeating sequence of nodes, as specified by the grammar. Non-local + /// patterns disable certain optimizations that would otherwise be possible + /// when executing a query on a specific range of a syntax tree. + pub inline fn isPatternNonLocal(self: *const Query, pattern_index: u32) bool { + return ts_query_is_pattern_non_local(self, pattern_index); + } + + /// Check if a given pattern is guaranteed to match once a given step is reached. + /// + /// The step is specified by its byte offset in the query's source code. + pub inline fn isPatternGuaranteedAtStep(self: *const Query, byte_offset: u32) bool { + return ts_query_is_pattern_guaranteed_at_step(self, byte_offset); + } + + /// Get the name of one of the query's captures. + /// + /// Each capture is associated with a numeric id based + /// on the order that it appeared in the query's source. + pub fn captureNameForId(self: *const Query, index: u32) ?[]const u8 { + var length: u32 = 0; + const name = ts_query_capture_name_for_id(self, index, &length); + return if (length > 0) name[0..length] else null; + } + + /// Get the quantifier of the query's captures. + pub inline fn captureQuantifierForId(self: *const Query, pattern_index: u32, capture_index: u32) ?Quantifier { + if (pattern_index >= self.patternCount() or capture_index >= self.captureCount()) return null; + return ts_query_capture_quantifier_for_id(self, pattern_index, capture_index); + } + + /// Get the name of one of the query's literal strings. + /// + /// Each string is associated with a numeric id based + /// on the order that it appeared in the query's source. + pub fn stringValueForId(self: *const Query, index: u32) ?[]const u8 { + var length: u32 = 0; + if (self.stringCount() == 0) return null; + const name = ts_query_string_value_for_id(self, index, &length); + return if (length > 0) name[0..length] else null; + } + + /// Disable a certain capture within a query. + /// + /// This prevents the capture from being returned in matches + /// and also avoids any resource usage associated with recording + /// the capture. Currently, there is no way to undo this. + pub inline fn disableCapture(self: *Query, name: []const u8) void { + ts_query_disable_capture(self, name.ptr, @intCast(name.len)); + } + + /// Disable a certain pattern within a query. + /// + /// This prevents the pattern from matching and removes most of the overhead + /// associated with the pattern. Currently, there is no way to undo this. + pub inline fn disablePattern(self: *Query, pattern_index: u32) void { + ts_query_disable_pattern(self, pattern_index); + } + + /// Get all of the predicates for the given pattern in the query. + pub fn predicatesForPattern(self: *const Query, pattern_index: u32) []const PredicateStep { + var count: u32 = 0; + const predicates = ts_query_predicates_for_pattern(self, pattern_index, &count); + return if (count > 0) predicates[0..count] else &.{}; + } + + /// The kind of error that occurred while creating a `Query`. + pub const Error = error { + InvalidSyntax, + InvalidNodeType, + InvalidField, + InvalidCapture, + InvalidStructure, + InvalidLanguage, + }; + + /// A quantifier for captures. + pub const Quantifier = enum(c_uint) { + Zero, + ZeroOrOne, + ZeroOrMore, + One, + OneOrMore, + }; + + /// A particular `Node` that has been captured within a query. + pub const Capture = extern struct { + node: Node, + index: u32, + }; + + /// A match that corresponds to a certain pattern in the query. + pub const Match = struct { + id: u32, + pattern_index: u16, + captures: []const Query.Capture, + }; + + /// A predicate step within a query. + /// + /// There are three types of steps: + /// * `Done` - Steps with this type are *sentinels* that + /// represent the end of an individual predicate. + /// * `Capture` - Steps with this type represent names of captures. + /// Their `value_id` can be used with the `captureNameForId()` + /// method to obtain the name of the capture. + /// * `String` - Steps with this type represent literal strings. + /// Their `value_id` can be used with the `stringValueForId()` + /// method to obtain their string value. + pub const PredicateStep = extern struct { + type: enum(c_uint) { Done, Capture, String }, + value_id: u32, + }; +}; + +extern fn ts_query_new(language: ?*const Language, source: [*c]const u8, source_len: u32, + error_offset: *u32, error_type: *QueryError) ?*Query; +extern fn ts_query_delete(self: *Query) void; +extern fn ts_query_pattern_count(self: *const Query) u32; +extern fn ts_query_capture_count(self: *const Query) u32; +extern fn ts_query_string_count(self: *const Query) u32; +extern fn ts_query_start_byte_for_pattern(self: *const Query, pattern_index: u32) u32; +extern fn ts_query_end_byte_for_pattern(self: *const Query, pattern_index: u32) u32; +extern fn ts_query_is_pattern_rooted(self: *const Query, pattern_index: u32) bool; +extern fn ts_query_is_pattern_non_local(self: *const Query, pattern_index: u32) bool; +extern fn ts_query_is_pattern_guaranteed_at_step(self: *const Query, byte_offset: u32) bool; +extern fn ts_query_capture_name_for_id(self: *const Query, index: u32, length: *u32) [*c]const u8; +extern fn ts_query_capture_quantifier_for_id(self: *const Query, pattern_index: u32, + capture_index: u32) Query.Quantifier; +extern fn ts_query_string_value_for_id(self: *const Query, index: u32, length: *u32) [*c]const u8; +extern fn ts_query_disable_capture(self: *Query, name: [*c]const u8, length: u32) void; +extern fn ts_query_disable_pattern(self: *Query, pattern_index: u32) void; +extern fn ts_query_predicates_for_pattern(self: *const Query, pattern_index: u32, + step_count: *u32) [*c]const Query.PredicateStep; diff --git a/src/query_cursor.zig b/src/query_cursor.zig new file mode 100644 index 0000000..1751570 --- /dev/null +++ b/src/query_cursor.zig @@ -0,0 +1,147 @@ +const Node = @import("node.zig").Node; +const Point = @import("types.zig").Point; +const Query = @import("query.zig").Query; + +const QueryMatch = extern struct { + id: u32, + pattern_index: u16, + capture_count: u16, + captures: [*c]const Query.Capture, + + inline fn into(self: *QueryMatch) Query.Match { + return .{ + .id = self.id, + .pattern_index = self.pattern_index, + .captures = self.captures[0..self.capture_count] + }; + } +}; + +/// A stateful object for executing a `Query` on a syntax `Tree`. +/// +/// To use the query cursor, first call `exec()` to start +/// running a given query on a given syntax node. Then, there +/// are two options for consuming the results of the query: +/// 1. Repeatedly call `nextMatch()` to iterate over all of the *matches* +/// in the order that they were found. Each match contains the index of +/// the pattern that matched, and an array of captures. Because multiple +/// patterns can match the same set of nodes, one match may contain captures +/// that appear *before* some of the captures from a previous match. +/// 2. Repeatedly call `nextCapture()` to iterate over all of the individual +/// *captures* in the order that they appear. This is useful if you don't care +/// about which pattern matched, and just want a single ordered sequence of captures. +/// +/// If you don't care about consuming all of the results, you can stop +/// calling `nextMatch()` or `nextCapture()` at any point. You can then +/// start executing another query on another node by calling `exec()` again. +pub const QueryCursor = opaque { + /// Create a new cursor for executing a given query. + pub inline fn create() *QueryCursor { + return ts_query_cursor_new(); + } + + /// Delete the query cursor, freeing all of the memory that it used. + pub inline fn destroy(self: *QueryCursor) void { + ts_query_cursor_delete(self); + } + + /// Start a given query on a certain node. + pub inline fn exec(self: *QueryCursor, query: *const Query, node: Node) void { + ts_query_cursor_exec(self, query, node); + } + + /// Check if this cursor exceeded its maximum capacity for storing in-progress matches. + /// + /// If this capacity is exceeded, then the earliest-starting match will silently + /// be dropped to make room for further matches. This maximum capacity is optional. + /// By default, query cursors allow any number of pending matches, dynamically + /// allocating new space for them as needed as the query is executed. + pub inline fn didExceedMatchLimit(self: *const QueryCursor) bool { + return ts_query_cursor_did_exceed_match_limit(self); + } + + /// Get the cursor's maximum number of in-progress matches. + pub inline fn getMatchLimit(self: *const QueryCursor) u32 { + return ts_query_cursor_match_limit(self); + } + + /// Set the cursor's maximum number of in-progress matches. + pub inline fn setMatchLimit(self: *QueryCursor, limit: u32) void { + ts_query_cursor_set_match_limit(self, limit); + } + + /// Get the maximum duration in microseconds that query + /// execution should be allowed to take before halting. + pub inline fn getTimeoutMicros(self: *const QueryCursor) u64 { + return ts_query_cursor_timeout_micros(self); + } + + /// Set the maximum duration in microseconds that query + /// execution should be allowed to take before halting. + pub inline fn setTimeoutMicros(self: *QueryCursor, timeout_micros: u64) void { + ts_query_cursor_set_timeout_micros(self, timeout_micros); + } + + /// Set the range of bytes in which the query will be executed. + pub inline fn setByteRange(self: *QueryCursor, start_byte: u32, end_byte: u32) void { + ts_query_cursor_set_byte_range(self, start_byte, end_byte); + } + + /// Set the range of points in which the query will be executed. + pub inline fn setPointRange(self: *QueryCursor, start_point: Point, end_point: Point) void { + ts_query_cursor_set_point_range(self, start_point, end_point); + } + + /// Set the maximum start depth for a query cursor. + /// + /// This prevents cursors from exploring children nodes at a certain depth. + /// Note that if a pattern includes many children, they will still be checked. + /// + /// The `0` max start depth value can be used as a special behavior and it + /// helps to destructure a subtree by staying on a node and using captures + /// for interested parts. Note that it will only limit the search depth for + /// a pattern's root node, while other nodes that are parts of the pattern + /// may be searched at any depth what defined by the pattern structure. + /// + /// Set to `0xFFFFFFFF` to remove the maximum start depth. + pub inline fn setMaxStartDepth(self: *QueryCursor, max_start_depth: u32) void { + ts_query_cursor_set_max_start_depth(self, max_start_depth); + } + + /// Advance to the next match of the currently running query. + pub fn nextMatch(self: *QueryCursor) ?Query.Match { + var match: QueryMatch = undefined; + return if (ts_query_cursor_next_match(self, &match)) match.into() else null; + } + + /// Advance to the next capture of the currently running query. + /// + /// This returns a tuple where the first element is the + /// index of the capture and the second is the match. + pub fn nextCapture(self: *QueryCursor) ?struct { u32, Query.Match } { + var index: u32 = 0; + var match: QueryMatch = undefined; + const result = ts_query_cursor_next_capture(self, &match, &index); + return if (result) .{ index, match.into() } else null; + } + + /// Remove a match from the query cursor. + pub inline fn removeMatch(self: *QueryCursor, match_id: u32) void { + ts_query_cursor_remove_match(self, match_id); + } +}; + +pub extern fn ts_query_cursor_new() *QueryCursor; +pub extern fn ts_query_cursor_delete(self: *QueryCursor) void; +pub extern fn ts_query_cursor_exec(self: *QueryCursor, query: *const Query, node: Node) void; +pub extern fn ts_query_cursor_did_exceed_match_limit(self: *const QueryCursor) bool; +pub extern fn ts_query_cursor_match_limit(self: *const QueryCursor) u32; +pub extern fn ts_query_cursor_set_match_limit(self: *QueryCursor, limit: u32) void; +pub extern fn ts_query_cursor_set_timeout_micros(self: *QueryCursor, timeout_micros: u64) void; +pub extern fn ts_query_cursor_timeout_micros(self: *const QueryCursor) u64; +pub extern fn ts_query_cursor_set_byte_range(self: *QueryCursor, start_byte: u32, end_byte: u32) void; +pub extern fn ts_query_cursor_set_point_range(self: *QueryCursor, start_point: Point, end_point: Point) void; +pub extern fn ts_query_cursor_set_max_start_depth(self: *QueryCursor, max_start_depth: u32) void; +pub extern fn ts_query_cursor_next_match(self: *QueryCursor, match: *QueryMatch) bool; +pub extern fn ts_query_cursor_next_capture(self: *QueryCursor, match: *QueryMatch, capture_index: *u32) bool; +pub extern fn ts_query_cursor_remove_match(self: *QueryCursor, match_id: u32) void; diff --git a/src/root.zig b/src/root.zig new file mode 100644 index 0000000..31cb0c3 --- /dev/null +++ b/src/root.zig @@ -0,0 +1,30 @@ +// NOTE: remember to update the version numbers + +/// The latest ABI version that is supported by the current version of the library. +/// +/// The Tree-sitter library is generally backwards-compatible with +/// languages generated using older CLI versions, but is not forwards-compatible. +pub const LANGUAGE_VERSION = 14; + +/// The earliest ABI version that is supported by the current version of the library. +pub const MIN_COMPATIBLE_LANGUAGE_VERSION = 13; + +pub const set_allocator = @import("alloc.zig").ts_set_allocator; + +const structs = @import("types.zig"); +pub const Input = structs.Input; +pub const InputEdit = structs.InputEdit; +pub const InputEncoding = structs.InputEncoding; +pub const LogType = structs.LogType; +pub const Logger = structs.Logger; +pub const Point = structs.Point; +pub const Range = structs.Range; + +pub const Language = @import("language.zig").Language; +pub const LookaheadIterator = @import("lookahead_iterator.zig").LookaheadIterator; +pub const Node = @import("node.zig").Node; +pub const Parser = @import("parser.zig").Parser; +pub const Query = @import("query.zig").Query; +pub const QueryCursor = @import("query_cursor.zig").QueryCursor; +pub const Tree = @import("tree.zig").Tree; +pub const TreeCursor = @import("tree_cursor.zig").TreeCursor; diff --git a/src/test.zig b/src/test.zig new file mode 100644 index 0000000..1d1ed34 --- /dev/null +++ b/src/test.zig @@ -0,0 +1,340 @@ +const std = @import("std"); +const testing = std.testing; +const ts = @import("root.zig"); + +test "Language" { + const language = ts.Language.load("c"); + defer language.destroy(); + + try testing.expectEqual(14, language.version()); + try testing.expect(language.symbolCount() > 1); + try testing.expect(language.fieldCount() > 1); + try testing.expect(language.stateCount() > 1); + try testing.expect(language.fieldIdForName("body") > 0); + try testing.expect(language.fieldNameForId(1) != null); + try testing.expectEqual(161, language.symbolForName("translation_unit", true)); + try testing.expectEqualStrings("identifier", language.symbolName(1) orelse ""); + try testing.expect(language.isNamed(1)); + try testing.expect(language.isVisible(1)); + try testing.expect(!language.isSupertype(1)); + try testing.expect(language.nextState(1, 161) > 1); + + const copy = language.dupe(); + try testing.expectEqual(language, copy); + copy.destroy(); +} + +test "LookaheadIterator" { + const language = ts.Language.load("c"); + defer language.destroy(); + + const state = language.nextState(1, 161); + const lookahead = ts.LookaheadIterator.create(language, state).?; + defer lookahead.destroy(); + + try testing.expectEqual(language, lookahead.language()); + try testing.expectEqual(0xFFFF, lookahead.currentSymbol()); + try testing.expectEqualStrings("ERROR", lookahead.currentSymbolName()); + + try testing.expect(lookahead.next()); + try testing.expectEqual(160, lookahead.currentSymbol()); + try testing.expectEqualStrings("comment", lookahead.currentSymbolName()); + + try testing.expect(lookahead.next()); + try testing.expectEqual(0, lookahead.currentSymbol()); + try testing.expectEqualStrings("end", lookahead.currentSymbolName()); + + try testing.expect(!lookahead.next()); + try testing.expect(lookahead.resetState(state)); + + try testing.expect(lookahead.next()); + try testing.expect(lookahead.reset(language, state)); +} + +test "Parser" { + const language = ts.Language.load("c"); + defer language.destroy(); + + const parser = ts.Parser.create(); + defer parser.destroy(); + try parser.setLanguage(language); + + try testing.expectEqual(language, parser.getLanguage()); + try testing.expectEqual(null, parser.getLogger().log); + try testing.expectEqual(0, parser.getTimeoutMicros()); + try testing.expectEqual(null, parser.getCancellationFlag()); + + try testing.expectEqualSlices(ts.Range, &.{ .{} }, parser.getIncludedRanges()); + try testing.expectError(error.RangeOverlap, parser.setIncludedRanges(&.{ .{ .start_byte = 1 }, .{} })); + + // TODO: more tests +} + +test "Tree" { + const language = ts.Language.load("c"); + defer language.destroy(); + + const parser = ts.Parser.create(); + defer parser.destroy(); + try parser.setLanguage(language); + + const tree = try parser.parseBuffer("int main() {}", null, .UTF_8); + defer tree.destroy(); + try testing.expectEqual(language, tree.language()); + try testing.expectEqual(13, tree.rootNode().endByte()); + try testing.expectEqual(3, tree.rootNodeWithOffset(3, .{ .row = 0, .column = 3 }).?.startByte()); + + var ranges = tree.getIncludedRanges(); + var range: ts.Range = .{ + .start_point = .{ .row = 0, .column = 0 }, + .end_point = .{ .row = 0xFFFFFFFF, .column = 0xFFFFFFFF }, + .start_byte = 0, + .end_byte = 0xFFFFFFFF + }; + try testing.expectEqualSlices(ts.Range, &.{ range }, ranges); + ts.Tree.freeRanges(ranges); + + const old_tree = tree.dupe(); + try testing.expect(tree != old_tree); + defer old_tree.destroy(); + + old_tree.edit(.{ + .start_byte = 0, + .start_point = .{ .row = 0, .column = 0 }, + .old_end_byte = 13, + .new_end_byte = 9, + .old_end_point = .{ .row = 0, .column = 13 }, + .new_end_point = .{ .row = 0, .column = 9 }, + }); + const new_tree = try parser.parseBuffer("main() {}", old_tree, .UTF_8); + defer new_tree.destroy(); + range = .{ + .start_point = .{ .row = 0, .column = 0 }, + .end_point = .{ .row = 0, .column = 9 }, + .start_byte = 0, + .end_byte = 9 + }; + ranges = old_tree.getChangedRanges(new_tree); + try testing.expectEqualSlices(ts.Range, &.{ range }, ranges); + ts.Tree.freeRanges(ranges); +} + +test "TreeCursor" { + const language = ts.Language.load("c"); + defer language.destroy(); + + const parser = ts.Parser.create(); + defer parser.destroy(); + try parser.setLanguage(language); + + const tree = try parser.parseBuffer("int main() {}", null, .UTF_8); + defer tree.destroy(); + const root_node = tree.rootNode(); + + var cursor = ts.TreeCursor.create(root_node); + defer cursor.destroy(); + + var node = cursor.currentNode(); + try testing.expect(node.eql(root_node)); + try testing.expectEqual(node, cursor.currentNode()); + + var copy = cursor.dupe(); + try testing.expect(cursor.id != copy.id); + try testing.expectEqual(cursor.tree, copy.tree); + + cursor.resetTo(©); + try testing.expectEqual(copy.currentNode(), cursor.currentNode()); + copy.destroy(); + + try testing.expect(cursor.gotoFirstChild()); + try testing.expectEqualStrings("function_definition", cursor.currentNode().@"type"()); + try testing.expectEqual(1, cursor.currentDepth()); + + try testing.expect(cursor.gotoLastChild()); + try testing.expectEqualStrings("compound_statement", cursor.currentNode().@"type"()); + try testing.expectEqualStrings("body", cursor.currentFieldName().?); + + try testing.expect(cursor.gotoParent()); + try testing.expectEqualStrings("function_definition", cursor.currentNode().@"type"()); + try testing.expectEqual(0, cursor.currentFieldId()); + + try testing.expect(!cursor.gotoNextSibling()); + try testing.expect(!cursor.gotoPreviousSibling()); + + cursor.gotoDescendant(2); + try testing.expectEqual(2, cursor.currentDescendantIndex()); + cursor.reset(root_node); + + try testing.expectEqual(0, cursor.gotoFirstChildForByte(1)); + try testing.expectEqual(1, cursor.gotoFirstChildForPoint(.{ .row = 0, .column = 5 })); + try testing.expectEqualStrings("declarator", cursor.currentFieldName().?); +} + +test "Node" { + const language = ts.Language.load("c"); + defer language.destroy(); + + const parser = ts.Parser.create(); + defer parser.destroy(); + try parser.setLanguage(language); + + const tree = try parser.parseBuffer("int main() {}", null, .UTF_8); + defer tree.destroy(); + var node = tree.rootNode(); + + try testing.expectEqual(tree, node.tree); + try testing.expectEqual(tree.language(), node.language()); + + try testing.expectEqual(161, node.symbol()); + try testing.expectEqual(161, node.grammarSymbol()); + try testing.expectEqualStrings("translation_unit", node.@"type"()); + try testing.expectEqualStrings("translation_unit", node.grammarType()); + + try testing.expect(node.isNamed()); + try testing.expect(!node.isExtra()); + try testing.expect(!node.isError()); + try testing.expect(!node.isMissing()); + + try testing.expectEqual(0, node.parseState()); + try testing.expectEqual(0, node.nextParseState()); + + try testing.expectEqual(0, node.startByte()); + try testing.expectEqual(13, node.endByte()); + try testing.expectEqual(0, node.startPoint().column); + try testing.expectEqual(13, node.endPoint().column); + + try testing.expectEqual(1, node.childCount()); + try testing.expectEqual(1, node.namedChildCount()); + try testing.expectEqual(11, node.descendantCount()); + + node = node.child(0).?; + try testing.expectEqual(tree.rootNode(), node.parent()); + try testing.expectEqualStrings("function_declarator", node.namedChild(1).?.@"type"()); + try testing.expectEqual(null, node.childByFieldId(1)); + try testing.expectEqualStrings("primitive_type", node.childByFieldName("type").?.@"type"()); + + try testing.expectEqualStrings("function_declarator", node.child(0).?.nextSibling().?.@"type"()); + try testing.expectEqualStrings("function_declarator", node.child(0).?.nextNamedSibling().?.@"type"()); + try testing.expectEqualStrings("function_declarator", node.child(2).?.prevSibling().?.@"type"()); + try testing.expectEqualStrings("function_declarator", node.child(2).?.prevNamedSibling().?.@"type"()); + + try testing.expectEqual(node, tree.rootNode().childWithDescendant(node)); + try testing.expectEqualStrings("{", node.descendantForByteRange(11, 12).?.@"type"()); + try testing.expectEqualStrings("compound_statement", node.namedDescendantForByteRange(11, 12).?.@"type"()); + + const points: [2]ts.Point = .{ .{ .row = 0, .column = 4 }, .{ .row = 0, .column = 8 }}; + try testing.expectEqualStrings("identifier", node.descendantForPointRange(points[0], points[1]).?.@"type"()); + try testing.expectEqualStrings("identifier", node.namedDescendantForPointRange(points[0], points[1]).?.@"type"()); + + try testing.expectEqualStrings("body", node.fieldNameForChild(2).?); + try testing.expectEqualStrings("body", node.fieldNameForNamedChild(2).?); + + const sexp = node.toSexp(); + defer ts.Node.freeSexp(sexp); + try testing.expectStringStartsWith(sexp, "(function_definition type:"); + + const new_tree = tree.dupe(); + defer new_tree.destroy(); + const edit: ts.InputEdit = .{ + .start_byte = 0, + .start_point = .{ .row = 0, .column = 0 }, + .old_end_byte = 13, + .new_end_byte = 9, + .old_end_point = .{ .row = 0, .column = 13 }, + .new_end_point = .{ .row = 0, .column = 9 }, + }; + new_tree.edit(edit); + node = new_tree.rootNode(); + node.edit(edit); + + try testing.expect(node.hasChanges()); + try testing.expect(!node.hasError()); +} + +test "Query" { + const language = ts.Language.load("c"); + defer language.destroy(); + + var error_offset: u32 = 0; + try testing.expectError(error.InvalidNodeType, ts.Query.create(language, "(foo) @foo", &error_offset)); + try testing.expectEqual(1, error_offset); + + const source = + \\(identifier) @variable + \\["{" "}" "(" ")"] @punctuation + \\((identifier) @main + \\ (#eq? @main "main")) + ; + var query = try ts.Query.create(language, source, &error_offset); + defer query.destroy(); + + try testing.expectEqual(3, query.patternCount()); + try testing.expectEqual(3, query.captureCount()); + try testing.expectEqual(2, query.stringCount()); + + try testing.expectEqual(23, query.startByteForPattern(1)); + try testing.expectEqual(54, query.endByteForPattern(1)); + + try testing.expect(query.isPatternRooted(0)); + try testing.expect(!query.isPatternNonLocal(2)); + try testing.expect(!query.isPatternGuaranteedAtStep(9)); + + try testing.expectEqualStrings("punctuation", query.captureNameForId(1).?); + try testing.expectEqual(.One, query.captureQuantifierForId(0, 0).?); + try testing.expectEqualStrings("main", query.stringValueForId(1).?); + + const steps: [4]ts.Query.PredicateStep = .{ + .{ .type = .String, .value_id = 0 }, + .{ .type = .Capture, .value_id = 2 }, + .{ .type = .String, .value_id = 1 }, + .{ .type = .Done, .value_id = 0 }, + }; + try testing.expectEqualSlices(ts.Query.PredicateStep, &steps, query.predicatesForPattern(2)); +} + +test "QueryCursor" { + const language = ts.Language.load("c"); + defer language.destroy(); + + const source = + \\(identifier) @variable + \\["{" "}" "(" ")"] @punctuation + \\((identifier) @main + \\ (#eq? @main "main")) + ; + var error_offset: u32 = 0; + var query = try ts.Query.create(language, source, &error_offset); + defer query.destroy(); + + const parser = ts.Parser.create(); + defer parser.destroy(); + try parser.setLanguage(language); + + const tree = try parser.parseBuffer("int main() {}", null, .UTF_8); + defer tree.destroy(); + + const cursor = ts.QueryCursor.create(); + defer cursor.destroy(); + + cursor.exec(query, tree.rootNode()); + + try testing.expect(!cursor.didExceedMatchLimit()); + try testing.expectEqual(0xFFFFFFFF, cursor.getMatchLimit()); + try testing.expectEqual(0, cursor.getTimeoutMicros()); + + var match = cursor.nextMatch().?; + try testing.expectEqual(0, match.id); + try testing.expectEqual(0, match.pattern_index); + try testing.expectEqual(1, match.captures.len); + try testing.expectEqual(0, match.captures[0].index); + try testing.expectEqualStrings("identifier", match.captures[0].node.@"type"()); + + _ = cursor.nextMatch(); + + match = cursor.nextCapture().?[1]; + try testing.expectEqual(2, match.id); + try testing.expectEqual(1, match.pattern_index); + try testing.expectEqual(1, match.captures.len); + try testing.expectEqual(1, match.captures[0].index); + try testing.expectEqualStrings("(", match.captures[0].node.@"type"()); +} diff --git a/src/tree.zig b/src/tree.zig new file mode 100644 index 0000000..54f3a3e --- /dev/null +++ b/src/tree.zig @@ -0,0 +1,91 @@ +const std = @import("std"); + +const InputEdit = @import("types.zig").InputEdit; +const Language = @import("language.zig").Language; +const Node = @import("node.zig").Node; +const Point = @import("types.zig").Point; +const Range = @import("types.zig").Range; + +/// A tree that represents the syntactic structure of a source code file. +pub const Tree = opaque { + /// Delete the syntax tree, freeing all of the memory that it used. + pub inline fn destroy(self: *Tree) void { + ts_tree_delete(self); + } + + /// Create a shallow copy of the syntax tree. + /// + /// You need to copy a syntax tree in order to use it on more than + /// one thread at a time, as syntax trees are not thread safe. + pub inline fn dupe(self: *const Tree) *Tree { + return ts_tree_copy(self); + } + + /// Get the root node of the syntax tree. + pub inline fn rootNode(self: *const Tree) Node { + return ts_tree_root_node(self); + } + + /// Get the root node of the syntax tree, with + /// its position shifted forward by the given offset. + pub inline fn rootNodeWithOffset(self: *const Tree, offset_bytes: u32, offset_extent: Point) ?Node { + const node = ts_tree_root_node_with_offset(self, offset_bytes, offset_extent); + return if (!ts_node_is_null(node)) node else null; + } + + /// Get the language that was used to parse the syntax tree. + pub inline fn language(self: *const Tree) *const Language { + return ts_tree_language(self); + } + + /// Get the included ranges of the syntax tree. + /// + /// The caller is responsible for freeing them using `freeRanges`. + pub fn getIncludedRanges(self: *const Tree) []const Range { + var length: u32 = 0; + const ranges = ts_tree_included_ranges(self, &length); + return ranges[0..length]; + } + + /// Compare an old edited syntax tree to a new syntax + /// tree representing the same document, returning the + /// ranges whose syntactic structure has changed. + /// + /// For this to work correctly, this tree must have been + /// edited such that its ranges match up to the new tree. + /// + /// The caller is responsible for freeing them using `freeRanges()`. + pub fn getChangedRanges(self: *const Tree, new_tree: *const Tree) []const Range { + var length: u32 = 0; + const ranges = ts_tree_get_changed_ranges(self, new_tree, &length); + return ranges[0..length]; + } + + /// Free the ranges allocated with `getIncludedRanges()` or `getChangedRanges()`. + pub fn freeRanges(ranges: []const Range) void { + std.c.free(@ptrCast(@constCast(ranges))); + } + + /// Edit the syntax tree to keep it in sync with source code that has been edited. + pub inline fn edit(self: *Tree, input_edit: InputEdit) void { + ts_tree_edit(self, &input_edit); + } + + /// Write a DOT graph describing the syntax tree to the given file. + /// + /// The file is closed automatically. + pub fn printDotGraph(self: *const Tree, file: std.fs.File) void { + ts_tree_print_dot_graph(self, file.handle); + } +}; + +extern fn ts_node_is_null(self: Node) bool; +extern fn ts_tree_copy(self: *const Tree) *Tree; +extern fn ts_tree_delete(self: *Tree) void; +extern fn ts_tree_root_node(self: *const Tree) Node; +extern fn ts_tree_root_node_with_offset(self: *const Tree, offset_bytes: u32, offset_extent: Point) Node; +extern fn ts_tree_language(self: *const Tree) *const Language; +extern fn ts_tree_included_ranges(self: *const Tree, length: *u32) [*c]Range; +extern fn ts_tree_edit(self: *Tree, edit: *const InputEdit) void; +extern fn ts_tree_get_changed_ranges(old_tree: *const Tree, new_tree: *const Tree, length: *u32) [*c]Range; +extern fn ts_tree_print_dot_graph(self: *const Tree, file_descriptor: c_int) void; diff --git a/src/tree_cursor.zig b/src/tree_cursor.zig new file mode 100644 index 0000000..d4cce1f --- /dev/null +++ b/src/tree_cursor.zig @@ -0,0 +1,157 @@ +const std = @import("std"); + +const Point = @import("types.zig").Point; +const Node = @import("node.zig").Node; +const Tree = @import("tree.zig").Tree; + +/// A stateful object for walking a syntax tree efficiently. +pub const TreeCursor = extern struct { + /// The syntax tree this cursor belongs to. + tree: *const Tree, + + /// **Internal.** The id of the tree cursor. + id: *const anyopaque, + + /// **Internal.** The context of the tree cursor. + context: [3]u32, + + /// Create a new tree cursor starting from the given node. + pub inline fn create(node: Node) TreeCursor { + return ts_tree_cursor_new(node); + } + + /// Delete the tree cursor, freeing all of the memory that it used. + pub inline fn destroy(self: *TreeCursor) void { + ts_tree_cursor_delete(self); + } + + /// Create a shallow copy of the tree cursor. + pub inline fn dupe(self: *const TreeCursor) TreeCursor { + return ts_tree_cursor_copy(self); + } + + /// Get the current node of the tree cursor. + pub inline fn currentNode(self: *const TreeCursor) Node { + return ts_tree_cursor_current_node(self); + } + + /// Get the field name of the tree cursor's current node. + /// + /// This returns `null` if the current node doesn't have a field. + pub fn currentFieldName(self: *const TreeCursor) ?[]const u8 { + return if (ts_tree_cursor_current_field_name(self)) |name| std.mem.span(name) else null; + } + + /// Get the field id of the tree cursor's current node. + /// + /// This returns `0` if the current node doesn't have a field. + pub inline fn currentFieldId(self: *const TreeCursor) u16 { + return ts_tree_cursor_current_field_id(self); + } + + /// Get the depth of the cursor's current node relative to + /// the original node that the cursor was constructed with. + pub inline fn currentDepth(self: *const TreeCursor) u32 { + return ts_tree_cursor_current_depth(self); + } + + /// Get the index of the cursor's current node out of all of the + /// descendants of the original node that the cursor was constructed with. + pub inline fn currentDescendantIndex(self: *const TreeCursor) u32 { + return ts_tree_cursor_current_descendant_index(self); + } + + /// Move the cursor to the parent of its current node. + /// + /// This returns `true` if the cursor successfully moved, + /// or `false` if there was no parent node. + pub inline fn gotoParent(self: *TreeCursor) bool { + return ts_tree_cursor_goto_parent(self); + } + + /// Move the cursor to the next sibling of its current node. + /// + /// This returns `true` if the cursor successfully moved, + /// or `false` if there was no next sibling node. + pub inline fn gotoNextSibling(self: *TreeCursor) bool { + return ts_tree_cursor_goto_next_sibling(self); + } + + /// Move the cursor to the previous sibling of its current node. + /// + /// This returns `true` if the cursor successfully moved, + /// or `false` if there was no previous sibling node. + pub inline fn gotoPreviousSibling(self: *TreeCursor) bool { + return ts_tree_cursor_goto_previous_sibling(self); + } + + /// Move the cursor to the first child of its current node. + /// + /// This returns `true` if the cursor successfully moved, + /// or `false` if there were no children. + pub inline fn gotoFirstChild(self: *TreeCursor) bool { + return ts_tree_cursor_goto_first_child(self); + } + + /// Move the cursor to the last child of its current node. + /// + /// This returns `true` if the cursor successfully moved, + /// or `false` if there were no children. + pub inline fn gotoLastChild(self: *TreeCursor) bool { + return ts_tree_cursor_goto_last_child(self); + } + + /// Move the cursor to the nth descendant node of the + /// original node that the cursor was constructed with, + /// where `0` represents the original node itself. + pub inline fn gotoDescendant(self: *TreeCursor, index: u32) void { + return ts_tree_cursor_goto_descendant(self, index); + } + + /// Move the cursor to the first child of its current + /// node that extends beyond the given byte offset. + /// + /// This returns the index of the child node if one was found, or `null`. + pub inline fn gotoFirstChildForByte(self: *TreeCursor, byte: u32) ?u32 { + const index = ts_tree_cursor_goto_first_child_for_byte(self, byte); + return if (index >= 0) @intCast(index) else null; + } + + /// Move the cursor to the first child of its current + /// node that extends beyond the given point. + /// + /// This returns the index of the child node if one was found, or `null`. + pub inline fn gotoFirstChildForPoint(self: *TreeCursor, point: Point) ?u32 { + const index = ts_tree_cursor_goto_first_child_for_point(self, point); + return if (index >= 0) @intCast(index) else null; + } + + /// Re-initialize a tree cursor to start at the node it was constructed with. + pub inline fn reset(self: *TreeCursor, node: Node) void { + ts_tree_cursor_reset(self, node); + } + + /// Re-initialize a tree cursor to the same position as another cursor. + pub inline fn resetTo(self: *TreeCursor, other: *const TreeCursor) void { + ts_tree_cursor_reset_to(self, other); + } +}; + +extern fn ts_tree_cursor_new(node: Node) TreeCursor; +extern fn ts_tree_cursor_delete(self: *TreeCursor) void; +extern fn ts_tree_cursor_reset(self: *TreeCursor, node: Node) void; +extern fn ts_tree_cursor_reset_to(dst: *TreeCursor, src: *const TreeCursor) void; +extern fn ts_tree_cursor_current_node(self: *const TreeCursor) Node; +extern fn ts_tree_cursor_current_field_name(self: *const TreeCursor) ?[*:0]const u8; +extern fn ts_tree_cursor_current_field_id(self: *const TreeCursor) u16; +extern fn ts_tree_cursor_goto_parent(self: *TreeCursor) bool; +extern fn ts_tree_cursor_goto_next_sibling(self: *TreeCursor) bool; +extern fn ts_tree_cursor_goto_previous_sibling(self: *TreeCursor) bool; +extern fn ts_tree_cursor_goto_first_child(self: *TreeCursor) bool; +extern fn ts_tree_cursor_goto_last_child(self: *TreeCursor) bool; +extern fn ts_tree_cursor_goto_descendant(self: *TreeCursor, goal_descendant_index: u32) void; +extern fn ts_tree_cursor_current_descendant_index(self: *const TreeCursor) u32; +extern fn ts_tree_cursor_current_depth(self: *const TreeCursor) u32; +extern fn ts_tree_cursor_goto_first_child_for_byte(self: *TreeCursor, goal_byte: u32) i64; +extern fn ts_tree_cursor_goto_first_child_for_point(self: *TreeCursor, goal_point: Point) i64; +extern fn ts_tree_cursor_copy(cursor: *const TreeCursor) TreeCursor; diff --git a/src/types.zig b/src/types.zig new file mode 100644 index 0000000..a8784bf --- /dev/null +++ b/src/types.zig @@ -0,0 +1,88 @@ +/// A struct that specifies how to read input text. +pub const Input = extern struct { + /// An arbitrary pointer that will be passed + /// to each invocation of the `read` method. + payload: ?*anyopaque, + /// A function to retrieve a chunk of text at a given byte offset + /// and (row, column) position. The function should return a pointer + /// to the text and write its length to the `bytes_read` pointer. + /// The parser does not take ownership of this buffer, it just borrows + /// it until it has finished reading it. The function should write a `0` + /// value to the `bytes_read` pointer to indicate the end of the document. + read: *const fn ( + payload: ?*anyopaque, + byte_index: u32, + position: Point, + bytes_read: *u32 + ) callconv(.C) [*c]const u8, + /// An indication of how the text is encoded. + encoding: InputEncoding = InputEncoding.UTF_8, +}; + +/// An edit to a text document. +pub const InputEdit = extern struct { + start_byte: u32, + old_end_byte: u32, + new_end_byte: u32, + start_point: Point, + old_end_point: Point, + new_end_point: Point, +}; + +/// A wrapper around a function that logs parsing results. +pub const Logger = extern struct { + /// The payload of the function. + payload: ?*anyopaque = null, + /// The callback function. + log: ?*const fn ( + payload: ?*anyopaque, + log_type: LogType, + buffer: [*:0]const u8 + ) callconv(.C) void = null, +}; + +/// A position in a text document in terms of rows and columns. +pub const Point = extern struct { + /// The zero-based row of the document. + row: u32, + /// The zero-based column of the document. + column: u32, + + /// Compare two points. + /// + /// ``` + /// self == other => 0 + /// self > other => 1 + /// self < other => -1 + /// ``` + pub fn cmp(self: *Point, other: *Point) comptime_int { + const row_diff = self.row - other.row; + if (row_diff > 0) return 1; + if (row_diff < 0) return -1; + + const col_diff = self.column - other.column; + if (col_diff == 0) return 0; + return if (col_diff > 0) 1 else -1; + } +}; + +/// A range of positions in a text document, +/// both in terms of bytes and of row-column points. +pub const Range = extern struct { + start_point: Point = .{ .row = 0, .column = 0 }, + end_point: Point = .{ .row = 0xFFFFFFFF, .column = 0xFFFFFFFF }, + start_byte: u32 = 0, + end_byte: u32 = 0xFFFFFFFF, +}; + +/// The encoding of source code. +pub const InputEncoding = enum(c_uint) { + UTF_8, + UTF_16, +}; + +/// The type of a log message. +pub const LogType = enum(c_uint) { + Parse, + Lex, +};