diff --git a/lib/rouge/lexers/ruchy.rb b/lib/rouge/lexers/ruchy.rb new file mode 100644 index 0000000000..90a316124e --- /dev/null +++ b/lib/rouge/lexers/ruchy.rb @@ -0,0 +1,258 @@ +# -*- coding: utf-8 -*- # +# frozen_string_literal: true + +# +# Rouge Lexer for Ruchy Programming Language +# RSYN-0404: Rouge grammar for GitHub/GitLab syntax highlighting +# +# @fileoverview Ruchy language support for Rouge (GitHub/GitLab highlighter) +# @version 1.0.0 +# @license MIT +# +# Quality Requirements: +# - Test Coverage: ≥80% +# - Cyclomatic Complexity: ≤20 +# - Performance: <25ms for 50K lines +# + +module Rouge + module Lexers + # Rouge lexer for the Ruchy programming language + # + # Ruchy is a systems programming language with built-in actor model support + # and pipeline operators for functional programming. + class Ruchy < RegexLexer + title "Ruchy" + desc "The Ruchy programming language (ruchy-lang.org)" + + tag 'ruchy' + aliases 'rhy' + filenames '*.rhy', '*.ruchy' + mimetypes 'text/x-ruchy', 'application/x-ruchy' + + # Define keyword categories for better organization + KEYWORDS = %w[ + fn let mut const static struct enum trait impl type mod use + if else match case for while loop break continue return + pub async await unsafe extern move ref box + actor spawn send + self Self super crate as in where + ].freeze + + BUILTIN_TYPES = %w[ + bool char str + i8 i16 i32 i64 i128 isize + u8 u16 u32 u64 u128 usize + f32 f64 + String Vec HashMap HashSet Result Option Box Rc Arc + Some None Ok Err + ].freeze + + LITERALS = %w[true false].freeze + + # Main tokenization state + state :root do + rule %r/\s+/, Text + + # Shebang line + rule %r/^#!.*$/, Comment::Hashbang + + # Documentation comments (/// or /** */) + rule %r{///.*$}, Comment::Doc + rule %r{/\*\*.*?\*/}m, Comment::Doc + + # Regular comments with SATD detection + rule %r{//.*$} do |m| + if m[0] =~ /\b(?:TODO|FIXME|NOTE|HACK|XXX|BUG|DEBT|WORKAROUND)\b/ + token Comment::Special + else + token Comment::Single + end + end + + rule %r{/\*} do + token Comment::Multiline + push :comment + end + + # Attributes (#[...] or #![...]) + rule %r{#!?\[[^\]]*\]}, Comment::Preproc + + # Lifetimes ('static, 'a, etc.) + rule %r{'[a-z_]\w*}, Name::Label + + # Raw strings (r"..." or r#"..."#) + rule %r{r#*"} do |m| + @string_delim = m[0] + @hash_count = m[0].count('#') + token Str::Other + push :raw_string + end + + # Regular strings with interpolation support + rule %r{"} do + token Str::Double + push :string + end + + # Character literals + rule %r{'(?:[^'\\]|\\.)'}, Str::Char + + # Numeric literals + # Binary literals + rule %r{0b[01_]+(?:[iu](?:8|16|32|64|128|size))?}, Num::Bin + + # Octal literals + rule %r{0o[0-7_]+(?:[iu](?:8|16|32|64|128|size))?}, Num::Oct + + # Hexadecimal literals + rule %r{0x[0-9a-fA-F_]+(?:[iu](?:8|16|32|64|128|size))?}, Num::Hex + + # Float literals + rule %r{\d[\d_]*\.[\d_]*(?:[eE][+-]?[\d_]+)?(?:f32|f64)?}, Num::Float + rule %r{\d[\d_]*(?:[eE][+-]?[\d_]+)(?:f32|f64)?}, Num::Float + rule %r{\d[\d_]*(?:f32|f64)}, Num::Float + + # Integer literals with type suffixes + rule %r{\d[\d_]*(?:[iu](?:8|16|32|64|128|size))?}, Num::Integer + + # Pipeline operator (Ruchy-specific) + rule %r{>>}, Operator + + # Actor operators (Ruchy-specific) + rule %r{<-|<\?}, Operator + + # Other operators + rule %r{[=!<>+\-*/%&|^~:?]+}, Operator + rule %r{\.\.=?}, Operator + rule %r{=>}, Operator + rule %r{->}, Operator + rule %r{::}, Operator + + # Macro invocations (identifier!) + rule %r{[a-zA-Z_]\w*!} do |m| + token Name::Builtin + end + + # Function definitions + rule %r{(fn)\s+([a-zA-Z_]\w*)} do |m| + groups Keyword, Name::Function + end + + # Actor definitions (Ruchy-specific) + rule %r{(actor)\s+([A-Z]\w*)} do |m| + groups Keyword, Name::Class + end + + # Type definitions + rule %r{(struct|enum|trait|type)\s+([A-Z]\w*)} do |m| + groups Keyword, Name::Class + end + + # Keywords + rule %r{\b(?:#{KEYWORDS.join('|')})\b}, Keyword + + # Built-in types + rule %r{\b(?:#{BUILTIN_TYPES.join('|')})\b}, Keyword::Type + + # Literals + rule %r{\b(?:#{LITERALS.join('|')})\b}, Keyword::Constant + + # Type names (PascalCase identifiers) + rule %r{[A-Z]\w*}, Name::Class + + # Regular identifiers + rule %r{[a-z_]\w*}, Name + + # Delimiters + rule %r{[{}()\[\];,.]}, Punctuation + + # Generic brackets + rule %r{<}, Punctuation, :generic + rule %r{>}, Error # Unmatched > + end + + # Comment state for nested block comments + state :comment do + rule %r{/\*}, Comment::Multiline, :comment + rule %r{\*/}, Comment::Multiline, :pop! + + # SATD keyword detection in comments + rule %r{\b(?:TODO|FIXME|NOTE|HACK|XXX|BUG|DEBT|WORKAROUND)\b}, Comment::Special + rule %r{[^/*]+}, Comment::Multiline + rule %r{[/*]}, Comment::Multiline + end + + # Raw string state + state :raw_string do + rule %r{"#{Regexp.escape('#' * @hash_count)}} do + token Str::Other + pop! + end + rule %r{[^"]+}, Str::Other + rule %r{"}, Str::Other + end + + # Regular string state with interpolation + state :string do + rule %r{"}, Str::Double, :pop! + rule %r{\\[\\'"nrt0]}, Str::Escape + rule %r{\\x[0-9a-fA-F]{2}}, Str::Escape + rule %r{\\u\{[0-9a-fA-F]{1,6}\}}, Str::Escape + rule %r{\\.}, Str::Escape # Invalid escape + + # String interpolation (${...}) + rule %r{\$\{} do + token Str::Interpol + push :interpolation + end + + rule %r{[^"\\$]+}, Str::Double + rule %r{\$}, Str::Double + end + + # String interpolation state + state :interpolation do + rule %r{\}}, Str::Interpol, :pop! + + # Nested braces tracking + rule %r{\{}, Punctuation, :interpolation + + # Include most root rules inside interpolation + rule %r{[a-zA-Z_]\w*}, Name + rule %r{\d+}, Num::Integer + rule %r{[+\-*/]}, Operator + rule %r{[()]}, Punctuation + rule %r{\s+}, Text + rule %r{[^}]+}, Text + end + + # Generic type parameters state + state :generic do + rule %r{>}, Punctuation, :pop! + rule %r{<}, Punctuation, :generic + rule %r{[A-Z]\w*}, Name::Class + rule %r{[a-z_]\w*}, Name + rule %r{'[a-z_]\w*}, Name::Label # lifetimes + rule %r{,\s*}, Punctuation + rule %r{\s+}, Text + rule %r{::}, Operator + rule %r{where\b}, Keyword + rule %r{[+]}, Operator + rule %r{[^<>,+]+}, Name + end + + # Preprocessing step for better tokenization + def self.analyze_text(text) + # Look for Ruchy-specific constructs + return 0.3 if text.include?('actor ') + return 0.2 if text.include?('spawn ') + return 0.2 if text.include?(' >> ') + return 0.1 if text.include?(' <- ') + return 0.1 if text =~ /fn\s+\w+/ + return 0.1 if text.include?('#[') + return 0.0 + end + end + end +end \ No newline at end of file diff --git a/spec/lexers/ruchy_spec.rb b/spec/lexers/ruchy_spec.rb new file mode 100644 index 0000000000..0224f8c7de --- /dev/null +++ b/spec/lexers/ruchy_spec.rb @@ -0,0 +1,120 @@ +# -*- coding: utf-8 -*- # +# frozen_string_literal: true + +describe Rouge::Lexers::Ruchy do + let(:subject) { Rouge::Lexers::Ruchy.new } + + describe 'lexing' do + include Support::Lexing + + describe 'basic syntax' do + it 'handles function definitions' do + assert_tokens_equal "fn main() {\n println!(\"Hello, world!\");\n}", + ['Keyword', 'fn'], + ['Text', ' '], + ['Name.Function', 'main'], + ['Punctuation', '()'], + ['Text', ' '], + ['Punctuation', '{'], + ['Text', "\n "], + ['Name.Function', 'println!'], + ['Punctuation', '('], + ['Literal.String.Double', '"Hello, world!"'], + ['Punctuation', ')'], + ['Punctuation', ';'], + ['Text', "\n"], + ['Punctuation', '}'] + end + + it 'handles actor definitions' do + assert_tokens_equal "actor Counter {\n let mut count: i32 = 0;\n}", + ['Keyword', 'actor'], + ['Text', ' '], + ['Name.Class', 'Counter'], + ['Text', ' '], + ['Punctuation', '{'], + ['Text', "\n "], + ['Keyword', 'let'], + ['Text', ' '], + ['Keyword', 'mut'], + ['Text', ' '], + ['Name.Variable', 'count'], + ['Punctuation', ':'], + ['Text', ' '], + ['Keyword.Type', 'i32'], + ['Text', ' '], + ['Operator', '='], + ['Text', ' '], + ['Literal.Number.Integer', '0'], + ['Punctuation', ';'], + ['Text', "\n"], + ['Punctuation', '}'] + end + + it 'handles pipeline operator' do + assert_tokens_equal "data >> process >> output", + ['Name', 'data'], + ['Text', ' '], + ['Operator', '>>'], + ['Text', ' '], + ['Name', 'process'], + ['Text', ' '], + ['Operator', '>>'], + ['Text', ' '], + ['Name', 'output'] + end + + it 'handles actor send operator' do + assert_tokens_equal "counter <- Increment(5)", + ['Name', 'counter'], + ['Text', ' '], + ['Operator', '<-'], + ['Text', ' '], + ['Name.Function', 'Increment'], + ['Punctuation', '('], + ['Literal.Number.Integer', '5'], + ['Punctuation', ')'] + end + end + + describe 'comments' do + it 'handles line comments' do + assert_tokens_equal "// This is a comment\nlet x = 5;", + ['Comment.Single', '// This is a comment'], + ['Text', "\n"], + ['Keyword', 'let'], + ['Text', ' '], + ['Name.Variable', 'x'], + ['Text', ' '], + ['Operator', '='], + ['Text', ' '], + ['Literal.Number.Integer', '5'], + ['Punctuation', ';'] + end + + it 'handles block comments' do + assert_tokens_equal "/* Multi\n line\n comment */", + ['Comment.Multiline', "/* Multi\n line\n comment */"] + end + end + end + + describe 'guessing' do + include Support::Guessing + + it 'guesses by filename' do + assert_guess :filename => 'test.rhy' + assert_guess :filename => 'test.ruchy' + end + + it 'guesses by mimetype' do + assert_guess :mimetype => 'text/x-ruchy' + end + + it 'guesses by source' do + assert_guess :source => 'actor Main {' + assert_guess :source => 'fn main() {' + assert_guess :source => 'data >> process' + end + end +end \ No newline at end of file diff --git a/spec/visual/samples/ruchy_demo.rhy b/spec/visual/samples/ruchy_demo.rhy new file mode 100644 index 0000000000..7b9bfbed98 --- /dev/null +++ b/spec/visual/samples/ruchy_demo.rhy @@ -0,0 +1,247 @@ +#!/usr/bin/env ruchy +//! Comprehensive Ruchy Language Demonstration +//! This file showcases all major features of the Ruchy programming language + +/// Documentation comment for MessageQueue actor +/// Demonstrates actor model patterns and concurrent message processing +actor MessageQueue { + messages: Vec, + capacity: usize, + + fn new(capacity: usize) -> Self { + Self { + messages: Vec::with_capacity(capacity), + capacity + } + } + + /// Handle incoming messages with pattern matching + fn handle(&mut self, msg: Message) -> Response { + match msg { + Message::Add(text) if self.messages.len() < self.capacity => { + self.messages.push(text); + Response::Ok("Message added".to_string()) + } + Message::Add(_) => Response::Error("Queue full".to_string()), + Message::Get => { + self.messages.pop() + .map(Response::Data) + .unwrap_or(Response::Empty) + } + Message::Count => Response::Data(self.messages.len().to_string()), + } + } +} + +/// Message types for actor communication +#[derive(Debug, Clone)] +enum Message { + Add(String), + Get, + Count, +} + +/// Response types from actors +#[derive(Debug)] +enum Response { + Ok(String), + Error(String), + Data(String), + Empty, +} + +/// Generic processor trait +trait Processor { + fn process(&mut self, input: T) -> Result; +} + +/// Custom error type +struct ProcessError { + message: String, +} + +/// Configuration struct with various data types +struct Config { + name: String, + timeout_ms: u64, + retry_count: i32, + enabled: bool, + weights: Vec, +} + +impl Default for Config { + fn default() -> Self { + Self { + name: "default".to_string(), + timeout_ms: 5000u64, + retry_count: 3i32, + enabled: true, + weights: vec![1.0f64, 0.5f64], + } + } +} + +/// Async function with pipeline operators +#[async] +fn process_data(data: &str) -> Result { + // Demonstrate pipeline operator (Ruchy-specific feature) + let result = data + >> |s| s.trim() + >> |s| s.to_lowercase() + >> |s| s.replace(" ", "_") + >> |s| format!("processed_{}", s); + + // String interpolation + let message = "Processing completed: ${result}"; + + Ok(result) +} + +/// Function with lifetime annotations and generics +fn combine_data<'a, T>(items: &'a [T], separator: &str) -> String +where + T: std::fmt::Display +{ + items + .iter() + .map(|item| item.to_string()) + .collect::>() + .join(separator) +} + +/// Main function demonstrating actor usage +#[async] +fn main() -> Result<(), Box> { + // Numeric literals with type suffixes + let binary_val = 0b1010_1101u8; + let hex_val = 0xFF_AA_00u32; + let octal_val = 0o755i32; + let float_val = 3.141_592_653_589f64; + let scientific = 1.23e-4f32; + + // Raw string literals + let raw_string = r#"This is a "raw" string with \n no escapes"#; + let raw_multiline = r###" + Multi-line raw string + with ### delimiters + "###; + + // Character literals + let char_simple = 'A'; + let char_escape = '\n'; + let char_unicode = '\u{1F600}'; // 😀 + + // Actor spawning and communication + let queue = spawn(MessageQueue::new(10)); + + // Send messages using actor operator + queue <- Message::Add("Hello World".to_string()); + queue <- Message::Add("Second message".to_string()); + + // Ask for response using actor operator + let count_response = queue "Empty".to_string(), + 1..=3 => "Small".to_string(), + n if n > 10 => "Large".to_string(), + _ => "Medium".to_string(), + }; + + // Loop constructs + for item in data.iter() { + if *item % 2 == 0 { + continue; + } + println!("Odd number: {}", item); + if *item > 3 { + break; + } + } + + // While loop with mutable state + let mut counter = 0usize; + while counter < 5 { + counter += 1; + if counter == 3 { + continue; + } + println!("Counter: {}", counter); + } + + // Infinite loop with break + let mut attempts = 0; + loop { + attempts += 1; + if attempts > 3 { + break; + } + // TODO: Implement retry logic here + println!("Attempt {}", attempts); + } + + // Pipeline with complex transformations + let processed = " HELLO WORLD " + >> |s| s.trim() + >> |s| s.to_lowercase() + >> |s| s.split_whitespace().collect::>() + >> |words| words.join("-"); + + println!("Processed: {}", processed); + + // Closure with move semantics + let captured_val = 42; + let closure = move |x: i32| x + captured_val; + println!("Closure result: {}", closure(8)); + + Ok(()) +} + +// FIXME: This function needs optimization +// HACK: Temporary workaround for performance issue +// TODO: Add proper error handling +// NOTE: This demonstrates SATD comment detection +fn legacy_function() { + /* + * DEBT: This code has technical debt + * WORKAROUND: Using suboptimal algorithm + * XXX: Remove this before production + * BUG: Known issue with edge case handling + */ + println!("Legacy code with debt markers"); +} + +/* Multi-line comment demonstrating + nested comment support in some contexts + and various formatting styles */ + +/// Module-level documentation +mod utils { + use super::*; + + pub fn helper_function() -> &'static str { + "Helper function in module" + } + + pub const CONSTANT_VALUE: i32 = 100; + pub static STATIC_VALUE: &str = "Static string"; +} + +/// Trait implementation for custom types +impl std::fmt::Display for ProcessError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "Process error: {}", self.message) + } +} + +/// Generic function with where clause +fn complex_function(param1: T, param2: U) -> String +where + T: std::fmt::Debug + Clone, + U: std::fmt::Display, +{ + format!("T: {:?}, U: {}", param1, param2) +} \ No newline at end of file