rune/
search.rs

1//! Search utilities.
2use crate::core::{
3    cons::Cons,
4    env::Env,
5    gc::{Context, Rt},
6    object::{List, NIL, Object, ObjectType, OptionalFlag},
7};
8use anyhow::{Result, bail, ensure};
9use fallible_iterator::FallibleIterator;
10use fancy_regex::Regex;
11use rune_macros::defun;
12
13#[defun]
14fn string_match<'ob>(
15    regexp: &str,
16    string: &str,
17    start: Option<i64>,
18    _inhibit_modify: OptionalFlag,
19    env: &mut Rt<Env>,
20    cx: &'ob Context,
21) -> Result<Object<'ob>> {
22    // TODO: implement inhibit-modify
23    let re = Regex::new(&lisp_regex_to_rust(regexp))?;
24
25    let start = start.unwrap_or(0) as usize;
26    if let Some(matches) = re.captures_iter(&string[start..]).next() {
27        let mut all: Vec<Object> = Vec::new();
28        let matches = matches?;
29        let mut groups = matches.iter();
30        // TODO: match data should be char position, not byte
31        while let Some(Some(group)) = groups.next() {
32            all.push(group.start().into());
33            all.push(group.end().into());
34        }
35        let match_data = crate::fns::slice_into_list(&all, None, cx);
36        env.match_data.set(match_data);
37        let head: &Cons = match_data.try_into().unwrap();
38        Ok(head.car())
39    } else {
40        Ok(NIL)
41    }
42}
43
44#[defun]
45fn replace_match(
46    newtext: &str,
47    _fixedcase: OptionalFlag,
48    _literal: OptionalFlag,
49    string: Option<&str>,
50    subexp: Option<usize>,
51    env: &Rt<Env>,
52    cx: &Context,
53) -> Result<String> {
54    // TODO: Handle newtext interpolation. Treat \ as special. See docstring for more.
55    //
56    // TODO: Handle automatic case adjustment
57    let Some(string) = string else { bail!("replace-match for buffers not yet implemented") };
58    let mut match_data = env.match_data.bind(cx).as_list()?.fallible();
59    let subexp = subexp.unwrap_or(0);
60    let sub_err = || format!("replace-match subexpression {subexp} does not exist");
61    for _ in 0..(subexp * 2) {
62        ensure!(match_data.next()?.is_some(), sub_err());
63    }
64    let Some(beg) = match_data.next()? else { bail!(sub_err()) };
65    let Some(end) = match_data.next()? else { bail!(sub_err()) };
66
67    // TODO: match data should be char position, not byte
68    let beg: usize = beg.try_into()?;
69    let end: usize = end.try_into()?;
70
71    // replace the range beg..end in string with newtext
72    let mut new_string = String::new();
73    new_string.push_str(&string[..beg]);
74    new_string.push_str(newtext);
75    new_string.push_str(&string[end..]);
76    Ok(new_string)
77}
78
79#[defun]
80fn regexp_quote(string: &str) -> String {
81    let mut quoted = String::new();
82    for ch in string.chars() {
83        if let '[' | '*' | '.' | '\\' | '?' | '+' | '^' | '$' = ch {
84            quoted.push('\\');
85        }
86        quoted.push(ch);
87    }
88    quoted
89}
90
91fn lisp_regex_to_rust(regexp: &str) -> String {
92    let mut norm_regex = String::new();
93    let mut chars = regexp.char_indices();
94    while let Some((idx, ch)) = chars.next() {
95        match ch {
96            // Invert the escaping of parens. i.e. \( => ( and ( => \(
97            '(' | ')' | '{' | '}' => {
98                norm_regex.push('\\');
99                norm_regex.push(ch);
100            }
101            '\\' => match chars.next() {
102                Some((_, c @ '('..=')' | c @ '{' | c @ '}')) => norm_regex.push(c),
103                Some((_, '`')) => norm_regex += "\\A",
104                Some((_, '\'')) => norm_regex += "\\z",
105                Some((_, c)) => {
106                    norm_regex.push('\\');
107                    norm_regex.push(c);
108                }
109                None => norm_regex.push('\\'),
110            },
111            '[' => {
112                let word = "[:word:]";
113                if regexp[idx..].starts_with(word) {
114                    chars.nth(word.len() - 2);
115                    norm_regex.push_str("a-zA-Z");
116                } else {
117                    norm_regex.push('[');
118                }
119            }
120            c => norm_regex.push(c),
121        }
122    }
123    norm_regex
124}
125
126#[defun]
127fn match_data<'ob>(
128    integer: OptionalFlag,
129    reuse: OptionalFlag,
130    reseat: OptionalFlag,
131    env: &Rt<Env>,
132    cx: &'ob Context,
133) -> Result<Object<'ob>> {
134    ensure!(integer.is_none(), "match-data integer field is not implemented");
135    ensure!(reuse.is_none(), "match-data reuse field is not implemented");
136    ensure!(reseat.is_none(), "match-data reseat field is not implemented");
137    Ok(env.match_data.bind(cx))
138}
139
140#[defun]
141fn set_match_data<'ob>(list: List, _reseat: OptionalFlag, env: &mut Rt<Env>) -> Object<'ob> {
142    // TODO: add reseat when markers implemented
143    let obj: Object = list.into();
144    env.match_data.set(obj);
145    NIL
146}
147
148#[defun]
149fn match_beginning<'ob>(subexp: usize, env: &Rt<Env>, cx: &'ob Context) -> Result<Object<'ob>> {
150    let list = env.match_data.bind(cx).as_list()?;
151    Ok(list.fallible().nth(subexp)?.unwrap_or_default())
152}
153
154#[defun]
155fn match_end<'ob>(subexp: usize, env: &Rt<Env>, cx: &'ob Context) -> Result<Object<'ob>> {
156    let list = env.match_data.bind(cx).as_list()?;
157    Ok(list.fallible().nth(subexp + 1)?.unwrap_or_default())
158}
159
160#[defun]
161#[expect(non_snake_case)]
162fn match_data__translate(n: i64, env: &Rt<Env>, cx: &Context) -> Result<()> {
163    let search_regs: List = env.match_data.bind(cx).try_into()?;
164    for reg in search_regs.conses() {
165        let reg = reg?;
166        if let ObjectType::Int(old) = reg.car().untag() {
167            reg.set_car((old + n).into())?;
168        } else {
169            bail!("match data was not int");
170        }
171    }
172    Ok(())
173}
174
175#[cfg(test)]
176mod test {
177    use crate::core::gc::RootSet;
178    use rune_core::macros::root;
179
180    use super::*;
181
182    #[test]
183    fn lisp_regex() {
184        assert_eq!(lisp_regex_to_rust("foo"), "foo");
185        assert_eq!(lisp_regex_to_rust("\\foo"), "\\foo");
186        assert_eq!(lisp_regex_to_rust("\\(foo\\)"), "(foo)");
187        assert_eq!(lisp_regex_to_rust("(foo)"), "\\(foo\\)");
188        assert_eq!(lisp_regex_to_rust("\\`"), "\\A");
189        assert_eq!(lisp_regex_to_rust("\\'"), "\\z");
190        assert_eq!(lisp_regex_to_rust("[[:word:]]"), "[a-zA-Z]");
191        assert_eq!(lisp_regex_to_rust("[[:word:]_]"), "[a-zA-Z_]");
192    }
193
194    #[test]
195    fn test_replace_match() {
196        let roots = &RootSet::default();
197        let cx = &mut Context::new(roots);
198        root!(env, new(Env), cx);
199        let string = "foo bar baz";
200        let newtext = "quux";
201        string_match("bar", string, None, None, env, cx).unwrap();
202        let result = replace_match(newtext, None, None, Some(string), None, env, cx).unwrap();
203        assert_eq!(result, "foo quux baz");
204    }
205}