rune/
casefiddle.rs

1//! String and character case conversion.
2use std::ops::Range;
3
4use crate::core::{
5    gc::Rt,
6    object::{NIL, Object},
7};
8use crate::fns::StringOrChar;
9use crate::{Context, Env};
10use rune_macros::defun;
11use text_buffer::Buffer as TextBuffer;
12
13#[defun]
14fn capitalize<'ob>(string_or_char: StringOrChar<'ob>, cx: &'ob Context<'ob>) -> Object<'ob> {
15    match string_or_char {
16        StringOrChar::String(s) => cx.add(casify_string(s, CaseMode::Capitalize)),
17        StringOrChar::Char(c) => cx.add(casify_char(c, char::to_uppercase)),
18    }
19}
20
21#[defun]
22fn upcase<'ob>(string_or_char: StringOrChar<'ob>, cx: &'ob Context<'ob>) -> Object<'ob> {
23    match string_or_char {
24        StringOrChar::String(s) => cx.add(casify_string(s, CaseMode::Upcase)),
25        StringOrChar::Char(c) => cx.add(casify_char(c, char::to_uppercase)),
26    }
27}
28
29#[defun]
30fn downcase<'ob>(string_or_char: StringOrChar<'ob>, cx: &'ob Context<'ob>) -> Object<'ob> {
31    match string_or_char {
32        StringOrChar::String(s) => cx.add(casify_string(s, CaseMode::Downcase)),
33        StringOrChar::Char(c) => cx.add(casify_char(c, char::to_lowercase)),
34    }
35}
36
37#[defun]
38fn upcase_initials<'ob>(string_or_char: StringOrChar<'ob>, cx: &'ob Context<'ob>) -> Object<'ob> {
39    match string_or_char {
40        StringOrChar::String(s) => cx.add(casify_string(s, CaseMode::UpcaseInitials)),
41        StringOrChar::Char(c) => cx.add(casify_char(c, char::to_uppercase)),
42    }
43}
44
45#[defun]
46fn upcase_word<'ob>(offset: i64, env: &mut Rt<Env>) -> Object<'ob> {
47    let text_buf = &mut env.current_buffer.get_mut().text;
48    let forward_upcase = offset >= 0;
49    let range = if forward_upcase {
50        find_forward_word(text_buf)
51    } else {
52        find_backward_word(text_buf)
53    };
54    let (start, end) = (range.start, range.end);
55    let (a, b) = text_buf.slice(range);
56    let upcase = |x| casify_string(x, CaseMode::Upcase);
57    let upcased = upcase(a) + &upcase(b);
58    text_buf.delete_range(start, end);
59    text_buf.insert(&upcased);
60    NIL
61}
62
63#[defun]
64fn downcase_word<'ob>(offset: i64, env: &mut Rt<Env>) -> Object<'ob> {
65    let text_buf = &mut env.current_buffer.get_mut().text;
66    let forward_downcase = offset >= 0;
67    let range = if forward_downcase {
68        find_forward_word(text_buf)
69    } else {
70        find_backward_word(text_buf)
71    };
72    let (start, end) = (range.start, range.end);
73    let (a, b) = text_buf.slice(range);
74    let downcase = |x| casify_string(x, CaseMode::Downcase);
75    let downcased = downcase(a) + &downcase(b);
76    text_buf.delete_range(start, end);
77    text_buf.insert(&downcased);
78    NIL
79}
80
81#[defun]
82fn capitalize_word<'ob>(offset: i64, env: &mut Rt<Env>) -> Object<'ob> {
83    let text_buf = &mut env.current_buffer.get_mut().text;
84    let forward_capitalize = offset >= 0;
85    let range = if forward_capitalize {
86        find_forward_word(text_buf)
87    } else {
88        find_backward_word(text_buf)
89    };
90    let (start, end) = (range.start, range.end);
91    let (a, b) = text_buf.slice(range);
92    let capitalize = |x| casify_string(x, CaseMode::Capitalize);
93    let capitalized = capitalize(a) + &capitalize(b);
94    text_buf.delete_range(start, end);
95    text_buf.insert(&capitalized);
96    NIL
97}
98
99fn casify_string(s: &str, mode: CaseMode) -> String {
100    let mut out = String::with_capacity(s.len());
101
102    for word in s.split_inclusive(|c: char| precedes_capitalization(c)) {
103        let mut chars = word.chars();
104        if let Some(c) = chars.next() {
105            match mode {
106                CaseMode::Downcase => out.extend(c.to_lowercase()),
107                CaseMode::Upcase | CaseMode::Capitalize | CaseMode::UpcaseInitials => {
108                    out.extend(c.to_uppercase())
109                }
110            }
111        }
112        for c in chars {
113            match mode {
114                CaseMode::Upcase => out.extend(c.to_uppercase()),
115                CaseMode::Downcase | CaseMode::Capitalize => out.extend(c.to_lowercase()),
116                CaseMode::UpcaseInitials => out.push(c),
117            }
118        }
119    }
120    out
121}
122
123fn precedes_capitalization(c: char) -> bool {
124    !c.is_alphanumeric()
125}
126
127enum CaseMode {
128    Downcase,
129    Upcase,
130    Capitalize,
131    UpcaseInitials,
132}
133
134fn casify_char<T>(c: u64, f: impl Fn(char) -> T) -> u64
135where
136    T: Iterator<Item = char>,
137{
138    // emacs uses an identity function for invalid codepoints
139    if c > crate::lisp::CHAR_MODIFIER_MASK {
140        return c;
141    }
142    let Ok(u) = u32::try_from(c) else { return c };
143    let Ok(chr) = char::try_from(u) else { return c };
144    let mut cased = f(chr);
145    let first = cased.next().unwrap();
146    // if the char changes case to multiple characters, don't change case
147    match cased.next() {
148        Some(_) => c,
149        None => first as u64,
150    }
151}
152
153fn find_forward_word(buf: &TextBuffer) -> Range<usize> {
154    let cursor = buf.cursor().chars();
155    let (s1, s2) = buf.slice(cursor..);
156    let end = cursor
157        + s1.chars()
158            .chain(s2.chars())
159            .enumerate()
160            .skip_while(|(_, c)| !c.is_alphanumeric())
161            .find(|(_, c)| c.is_whitespace())
162            .map(|(idx, _)| idx)
163            .unwrap_or_else(|| buf.len_chars() - cursor);
164    cursor..end
165}
166
167fn find_backward_word(buf: &TextBuffer) -> Range<usize> {
168    let cursor = buf.cursor().chars();
169    let (s1, s2) = buf.slice(..cursor);
170    let start = cursor
171        - s1.chars()
172            .chain(s2.chars())
173            .rev()
174            .enumerate()
175            .skip_while(|(_, c)| !c.is_alphanumeric())
176            .find(|(_, c)| c.is_whitespace())
177            .map(|(idx, _)| idx)
178            .unwrap_or(0);
179    start..cursor
180}
181
182#[cfg(test)]
183mod tests {
184    use super::*;
185    use crate::RootSet;
186
187    #[test]
188    fn test_downcase() {
189        let roots = &RootSet::default();
190        let cx = &Context::new(roots);
191        assert_eq!(downcase("The cat in the hat".into(), cx), "the cat in the hat");
192        assert_eq!(downcase('x'.into(), cx), 'x');
193        assert_eq!(downcase('X'.into(), cx), 'x');
194    }
195
196    #[test]
197    fn test_upcase() {
198        let roots = &RootSet::default();
199        let cx = &Context::new(roots);
200        // Emacs Doc Tests
201        assert_eq!(upcase("The cat in the hat".into(), cx), "THE CAT IN THE HAT");
202        assert_eq!(upcase("fi".into(), cx), "FI");
203        assert_eq!(upcase('fi'.into(), cx), 'fi');
204        assert_eq!(upcase('x'.into(), cx), 'X');
205        assert_eq!(upcase('X'.into(), cx), 'X');
206
207        // Basic escape characters
208        assert_eq!(upcase("\n".into(), cx), "\n");
209        assert_eq!(upcase("\t".into(), cx), "\t");
210        assert_eq!(upcase("\r".into(), cx), "\r");
211
212        // Control characters
213        assert_eq!(upcase("\u{0}".into(), cx), "\u{0}");
214        assert_eq!(upcase("\u{1B}".into(), cx), "\u{1B}");
215        assert_eq!(upcase("\u{7F}".into(), cx), "\u{7F}");
216
217        // Non-ASCII characters
218        assert_eq!(upcase("αβγ".into(), cx), "ΑΒΓ");
219        assert_eq!(upcase("åäö".into(), cx), "ÅÄÖ");
220
221        // Mixed content
222        assert_eq!(upcase("hello\nworld".into(), cx), "HELLO\nWORLD");
223        assert_eq!(upcase("foo\tbar".into(), cx), "FOO\tBAR");
224        assert_eq!(upcase("path\\to\\file\"name\"".into(), cx), "PATH\\TO\\FILE\"NAME\"");
225
226        // Invalid code points
227        assert_eq!(upcase(StringOrChar::Char(0xD800), cx), 0xD800);
228        assert_eq!(upcase(StringOrChar::Char(u64::MAX), cx), cx.add(u64::MAX));
229    }
230
231    #[test]
232    fn test_capitalize() {
233        let roots = &RootSet::default();
234        let cx = &Context::new(roots);
235
236        // Emacs doc tests
237        assert_eq!(capitalize("The cat in the hat".into(), cx), "The Cat In The Hat");
238        assert_eq!(capitalize("THE 77TH-HATTED CAT".into(), cx), "The 77th-Hatted Cat");
239        assert_eq!(capitalize('x'.into(), cx), 'X');
240        assert_eq!(capitalize('X'.into(), cx), 'X');
241        assert_eq!(capitalize('ß'.into(), cx), 'ß');
242        assert_eq!(capitalize("ß".into(), cx), "SS");
243
244        // from elprop
245        // TODO: implement syntax tables so it's known whether a character makes a word or symbol
246        // // U+1D100 MUSICAL SYMBOL SINGLE BARLINE (Other-Symbol)
247        // // U+0041 LATIN CAPITAL LETTER A
248        // assert_eq!(capitalize("𝄀A", cx), Ok("𝄀a"));
249        // // U+0024 DOLLAR SIGN (Currency-Symbol)
250        // // U+0041 LATIN CAPITAL LETTER A
251        // assert_eq!(capitalize("$A", cx), Ok("$a"));
252        // // U+002D HYPHEN-MINUS (Dash-Punctuation)
253        // // U+0041 LATIN CAPITAL LETTER A
254        // assert_eq!(capitalize("-A", cx), Ok("-A"));
255        // // U+005E CIRCUMFLEX ACCENT (Modifier-Symbol)
256        // // U+0041 LATIN CAPITAL LETTER A
257        // assert_eq!(capitalize("^A", cx), Ok("^A"));
258        // // U+0FBE TIBETAN KU RU KHA (Other-Symbol)
259        // // U+0041 LATIN CAPITAL LETTER A
260        // assert_eq!(capitalize("྾A", cx), Ok("྾A"));
261        // // U+10A50 KHAROSHTHI PUNCTUATION DOT (Other-Punctuation)
262        // // U+104B0 OSAGE CAPITAL LETTER A
263        // // (becomes) U+104D8 OSAGE SMALL LETTER A
264        // assert_eq!(capitalize("𐩐𐒰", cx), Ok("𐩐𐓘"));
265    }
266
267    #[test]
268    fn test_upcase_initials() {
269        let roots = &RootSet::default();
270        let cx = &Context::new(roots);
271
272        // Emacs Doc Tests
273        assert_eq!(upcase_initials("The CAT in the hAt".into(), cx), "The CAT In The HAt");
274        assert_eq!(upcase_initials('x'.into(), cx), 'X');
275        assert_eq!(upcase_initials('X'.into(), cx), 'X');
276    }
277
278    #[cfg(not(miri))] // Uses SIMD
279    mod upcase_word {
280        use crate::core::gc::Context;
281        use crate::core::gc::RootSet;
282        use rune_core::macros::root;
283
284        use super::*;
285
286        #[test]
287        fn forward() {
288            let roots = &RootSet::default();
289            let cx = &mut Context::new(roots);
290            root!(env, new(Env), cx);
291
292            // αβγ word
293            // ^-----
294            env.current_buffer.get_mut().text.insert("αβγ word");
295            env.current_buffer.get_mut().text.set_cursor(0);
296            upcase_word(1, env);
297            assert_eq!(env.current_buffer.get().text, "ΑΒΓ word");
298            env.current_buffer.get_mut().text = text_buffer::Buffer::default();
299            env.current_buffer.get_mut().text.insert("ΑΒΓ woRd");
300            env.current_buffer.get_mut().text.set_cursor(0);
301            downcase_word(1, env);
302            assert_eq!(env.current_buffer.get().text, "αβγ woRd");
303            env.current_buffer.get_mut().text = text_buffer::Buffer::default();
304            env.current_buffer.get_mut().text.insert("αΒΓ wORD");
305            env.current_buffer.get_mut().text.set_cursor(0);
306            capitalize_word(1, env);
307            assert_eq!(env.current_buffer.get().text, "Αβγ wORD");
308        }
309
310        #[test]
311        fn backward() {
312            let roots = &RootSet::default();
313            let cx = &mut Context::new(roots);
314            root!(env, new(Env), cx);
315
316            // upcase αβγword
317            //        -------^
318            env.current_buffer.get_mut().text.insert("upcase αβγword ");
319            env.current_buffer.get_mut().text.set_cursor(15);
320            upcase_word(-1, env);
321            assert_eq!(env.current_buffer.get().text, "upcase ΑΒΓWORD ");
322            env.current_buffer.get_mut().text = text_buffer::Buffer::default();
323            env.current_buffer.get_mut().text.insert("dOwNcAsE αΒΓWord ");
324            env.current_buffer.get_mut().text.set_cursor(17);
325            downcase_word(-1, env);
326            assert_eq!(env.current_buffer.get().text, "dOwNcAsE αβγword ");
327            env.current_buffer.get_mut().text = text_buffer::Buffer::default();
328            env.current_buffer.get_mut().text.insert("cAPITALIZE αΒΓWORD ");
329            env.current_buffer.get_mut().text.set_cursor(19);
330            capitalize_word(-1, env);
331            assert_eq!(env.current_buffer.get().text, "cAPITALIZE Αβγword ");
332        }
333
334        #[test]
335        fn forward_cursor_inside_of_word() {
336            let roots = &RootSet::default();
337            let cx = &mut Context::new(roots);
338            root!(env, new(Env), cx);
339
340            // upcase word
341            //  ^----
342            env.current_buffer.get_mut().text.insert("upcase word");
343            env.current_buffer.get_mut().text.set_cursor(2);
344            upcase_word(1, env);
345            assert_eq!(env.current_buffer.get().text, "upCASE word");
346            env.current_buffer.get_mut().text = text_buffer::Buffer::default();
347            env.current_buffer.get_mut().text.insert("DOWNCASE WORD");
348            env.current_buffer.get_mut().text.set_cursor(2);
349            downcase_word(1, env);
350            assert_eq!(env.current_buffer.get().text, "DOwncase WORD");
351            env.current_buffer.get_mut().text = text_buffer::Buffer::default();
352            env.current_buffer.get_mut().text.insert("capitalize word");
353            env.current_buffer.get_mut().text.set_cursor(2);
354            capitalize_word(1, env);
355            assert_eq!(env.current_buffer.get().text, "caPitalize word");
356        }
357
358        #[test]
359        fn backward_cursor_inside_of_word() {
360            let roots = &RootSet::default();
361            let cx = &mut Context::new(roots);
362            root!(env, new(Env), cx);
363
364            // upcase word
365            //        --^
366            env.current_buffer.get_mut().text.insert("upcase word");
367            env.current_buffer.get_mut().text.set_cursor(9);
368            upcase_word(-1, env);
369            assert_eq!(env.current_buffer.get().text, "upcase WOrd");
370            env.current_buffer.get_mut().text = text_buffer::Buffer::default();
371            env.current_buffer.get_mut().text.insert("downcase WORD");
372            env.current_buffer.get_mut().text.set_cursor(11);
373            downcase_word(-1, env);
374            assert_eq!(env.current_buffer.get().text, "downcase woRD");
375            env.current_buffer.get_mut().text = text_buffer::Buffer::default();
376            env.current_buffer.get_mut().text.insert("capitalize word");
377            env.current_buffer.get_mut().text.set_cursor(13);
378            capitalize_word(-1, env);
379            assert_eq!(env.current_buffer.get().text, "capitalize Word");
380        }
381    }
382}