Skip to content

Commit

Permalink
fnmatch: Introduce the naive fnmatch module
Browse files Browse the repository at this point in the history
This module simply compiles an input string of format:

    /path/(group_id:glob)

To a usable regex, ala fnmatch, for nice matching of strings
and capturing of specific globs.

Signed-off-by: Ikey Doherty <[email protected]>
  • Loading branch information
ikeycode committed Dec 9, 2023
1 parent fa949bd commit eca14bf
Show file tree
Hide file tree
Showing 2 changed files with 256 additions and 0 deletions.
255 changes: 255 additions & 0 deletions src/fnmatch.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,255 @@
// SPDX-FileCopyrightText: Copyright © 2020-2023 Serpent OS Developers
//
// SPDX-License-Identifier: MPL-2.0

use std::{
collections::{HashMap, HashSet},
convert::Infallible,
str::FromStr,
};

use regex::Regex;
use thiserror::Error;
#[derive(Debug)]
enum PatternFragment {
/// `?`
MatchOne,

/// `*`
MatchAny,

/// `\`
BackSlash,

/// `.`
Dot,

/// `/
ForwardSlash,

/// Normal text.
Text(String),

/// Group: Name to fragment mapping
Group(String, Vec<PatternFragment>),
}

#[derive(Clone)]
struct StringWalker<'a> {
data: &'a str,
index: usize,
length: usize,
}

impl<'a> Iterator for StringWalker<'a> {
type Item = char;

fn next(&mut self) -> Option<Self::Item> {
if self.index >= self.length {
None
} else {
self.index += 1;
self.data
.get(self.index - 1..self.index)
.and_then(|s| s.chars().nth(0))
}
}
}

impl<'a> StringWalker<'a> {
/// Return a new StringWalker
pub fn new(data: &'a str) -> Self {
Self {
data,
index: 0,
length: data.len(),
}
}

pub fn eat(&mut self, much: usize) {
self.index += much
}

/// Find next occurance of the character, and substring up to it
pub fn substring_to(&self, c: char) -> Option<&'a str> {
// Clone ourselves and search that iterator.
let walker = self.clone();
for (idx, t) in walker.enumerate() {
if t == c {
return self.data.get(self.index..self.index + idx);
}
}
None
}
}

#[derive(Debug)]
pub struct Pattern {
pub pattern: String,
regex: Regex,
groups: Vec<String>,
}

#[derive(Debug)]
pub struct Match {
pub path: String,
pub variables: HashMap<String, String>,
}

impl Pattern {
pub fn match_path(&self, path: &str) -> Option<Match> {
match self.regex.captures(path) {
Some(m) => {
let kv = self
.groups
.iter()
.map(|k| (k.clone(), m.name(k).unwrap().as_str().to_string()));
Some(Match {
path: path.into(),
variables: kv.collect(),
})
}
None => None,
}
}
}

#[derive(Error, Debug)]
pub enum PatternError {
#[error("lol")]
Basic,

#[error("malformed: {0}")]
String(#[from] Infallible),

#[error("malformed group")]
Group,

#[error("invalid regex: {0}")]
Regex(#[from] regex::Error),
}

fn fragments_from_string(s: &str) -> Result<Vec<PatternFragment>, PatternError> {
let mut walker = StringWalker::new(s);
let mut builder = vec![];
let mut text = String::new();
while let Some(ch) = walker.next() {
let next_token = match ch {
'?' => Some(PatternFragment::MatchOne),
'*' => Some(PatternFragment::MatchAny),
'\\' => Some(PatternFragment::BackSlash),
'/' => Some(PatternFragment::ForwardSlash),
'.' => Some(PatternFragment::Dot),
'(' => {
if let Some(end) = walker.substring_to(')') {
walker.eat(end.len() + 1);

let splits = end.split(':').collect::<Vec<_>>();
if splits.len() != 2 {
return Err(PatternError::Group);
}
let key = splits.first().ok_or(PatternError::Group)?;
let value = splits.get(1).ok_or(PatternError::Group)?;

let subpattern = fragments_from_string(value)?;
builder.push(PatternFragment::Group(String::from_str(key)?, subpattern));
} else {
return Err(PatternError::Group);
}
None
}
')' => None,
_ => {
text.push(ch);
None
}
};

if let Some(token) = next_token {
if !text.is_empty() {
builder.push(PatternFragment::Text(text.clone()));
text.clear();
}
builder.push(token)
}
}

if !text.is_empty() {
builder.push(PatternFragment::Text(text.clone()));
}

Ok(builder)
}

fn fragment_to_regex_str(fragment: &PatternFragment) -> (String, Vec<String>) {
let mut groups = vec![];
let string = match fragment {
PatternFragment::MatchOne => ".".into(),
PatternFragment::MatchAny => "[^\\/]*".into(),
PatternFragment::BackSlash => "\\".into(),
PatternFragment::ForwardSlash => "\\/".into(),
PatternFragment::Dot => "\\.".into(),
PatternFragment::Text(t) => t.clone(),
PatternFragment::Group(id, elements) => {
let elements = elements
.iter()
.map(|m| {
let (s, g) = fragment_to_regex_str(m);
groups.extend(g);
s
})
.collect::<String>();
groups.push(id.clone());
format!("(?<{id}>{elements})")
}
};
(string, groups)
}

impl FromStr for Pattern {
type Err = PatternError;

fn from_str(s: &str) -> Result<Self, Self::Err> {
let fragments = fragments_from_string(s)?;
let mut groups = HashSet::new();

let compiled = fragments
.iter()
.map(|m| {
let (s, g) = fragment_to_regex_str(m);
groups.extend(g);
s
})
.collect::<String>();

Ok(Self {
pattern: s.into(),
regex: Regex::new(&compiled)?,
groups: groups.into_iter().collect(),
})
}
}

#[cfg(test)]
pub mod path_tests {
use super::Pattern;

/// test me
#[test]
fn test_pattern() {
let k = "/usr/lib/modules/(version:*)/modules.symbols"
.parse::<Pattern>()
.unwrap();

let good = k.match_path("/usr/lib/modules/6.2.6/modules.symbols");
assert!(good.is_some());
let m = good.unwrap();
assert_eq!(m.path, "/usr/lib/modules/6.2.6/modules.symbols");
let version = m.variables.get("version");
assert!(version.is_some());
assert_eq!(version.unwrap(), "6.2.6");

let bad = k.match_path("/usr/lib/modules/6.2.6/l/modules.symbols");
assert!(bad.is_none());
}
}
1 change: 1 addition & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,5 +2,6 @@
//
// SPDX-License-Identifier: MPL-2.0

pub mod fnmatch;
pub mod osenv;
pub mod trigger;

0 comments on commit eca14bf

Please sign in to comment.