text_buffer/
buffer.rs

1//! A gap buffer implementation for text editing.
2//!
3//! Note that character here refers to a [unicode scalar
4//! value](https://www.unicode.org/glossary/#unicode_scalar_value), not a grapheme cluster, ascii
5//! character, codepoint, or byte.
6#![warn(clippy::all, clippy::pedantic)]
7#![expect(clippy::must_use_candidate)]
8#![expect(clippy::missing_panics_doc)]
9use crate::{
10    Position,
11    metric::{BufferMetrics, Metric},
12};
13use get_size2::GetSize;
14use std::{
15    cmp,
16    fmt::{self, Debug, Display},
17    ops::{Bound, Deref, Range, RangeBounds},
18};
19use str_indices::chars;
20
21/// A Gap buffer. This represents the text of a buffer, and allows for
22/// efficient insertion and deletion of text.
23#[derive(Default, GetSize)]
24pub struct Buffer {
25    /// The buffer data
26    data: Box<[u8]>,
27    /// start of the gap. Both `gap_start` and `gap_end` are the same point, but
28    /// `gap_start` is never a valid byte index, and `gap_end` is always used
29    /// instead.
30    gap_start: usize,
31    /// The end of the gap in bytes
32    gap_end: usize,
33    /// The number of characters until the gap
34    gap_chars: usize,
35    /// The current cursor.
36    cursor: GapMetric,
37    total: Metric,
38    /// A mapping between byte and character positions. Doesn't account for the gap.
39    metrics: BufferMetrics,
40    new_gap_size: usize,
41}
42
43impl Debug for Buffer {
44    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
45        let start = self.to_str(..self.gap_start);
46        let end = self.to_str(self.gap_end..);
47        // repeat _ for the gap length
48        let gap = "_".repeat(self.gap_len());
49        f.debug_struct("Buffer")
50            .field("data", &format!("{start}{gap}{end}"))
51            .field("gap_start", &self.gap_start)
52            .field("gap_end", &self.gap_end)
53            .field("gap_chars", &self.gap_chars)
54            .field("cursor", &self.cursor)
55            .field("metrics", &self.metrics)
56            .field("total_chars", &self.total.chars)
57            .field("new_gap_size", &self.new_gap_size)
58            .finish()
59    }
60}
61
62impl Display for Buffer {
63    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
64        let (s1, s2) = self.slice(..);
65        write!(f, "{s1}")?;
66        write!(f, "{s2}")
67    }
68}
69
70const METRIC_SIZE: usize = crate::metric::MAX_LEAF;
71struct MetricBuilder<'a> {
72    slice: &'a str,
73    start: usize,
74    end: usize,
75}
76
77impl<'a> MetricBuilder<'a> {
78    fn new(slice: &'a str) -> Self {
79        Self { slice, start: 0, end: slice.len().min(METRIC_SIZE) }
80    }
81}
82
83impl Iterator for MetricBuilder<'_> {
84    type Item = Metric;
85
86    fn next(&mut self) -> Option<Self::Item> {
87        if self.start == self.slice.len() {
88            return None;
89        }
90        let mut end = self.end;
91        while !self.slice.is_char_boundary(end) {
92            end -= 1;
93        }
94        let slice = &self.slice[self.start..end];
95        self.start = end;
96        self.end = cmp::min(self.end + METRIC_SIZE, self.slice.len());
97        Some(metrics(slice))
98    }
99
100    fn size_hint(&self) -> (usize, Option<usize>) {
101        let len = self.slice.len() - self.start;
102        let extra = usize::from(len % METRIC_SIZE != 0);
103        let size = len / METRIC_SIZE;
104        (size + extra, None)
105    }
106}
107
108/// Metric with gap buffer accounted for
109#[derive(Debug, Default, Copy, Clone, Eq, GetSize)]
110struct GapMetric {
111    bytes: usize,
112    chars: usize,
113}
114
115impl std::ops::Sub for GapMetric {
116    type Output = Metric;
117
118    fn sub(self, rhs: Self) -> Self::Output {
119        Metric { bytes: self.bytes - rhs.bytes, chars: self.chars - rhs.chars }
120    }
121}
122
123impl Display for GapMetric {
124    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
125        write!(f, "{}", Metric { bytes: self.bytes, chars: self.chars })
126    }
127}
128
129impl PartialEq for GapMetric {
130    fn eq(&self, other: &Self) -> bool {
131        let eq = self.bytes == other.bytes;
132        if eq {
133            debug_assert_eq!(self.chars, other.chars);
134        } else {
135            debug_assert_ne!(self.chars, other.chars);
136        }
137        eq
138    }
139}
140
141fn calc_start_gap_size(len: usize) -> usize {
142    // The gap can be up to 5% of the total size, but at least 64 bytes
143    cmp::max(len / 20, Buffer::GAP_SIZE)
144}
145
146impl From<String> for Buffer {
147    #[inline]
148    fn from(data: String) -> Self {
149        // reuse the allocation from the string. This means we *might* have a
150        // gap of 0
151        let builder = MetricBuilder::new(&data);
152        let metrics = BufferMetrics::build(builder);
153        let (storage, len) = {
154            let len = data.len();
155            let mut vec: Vec<u8> = data.into_bytes();
156            vec.resize(vec.capacity(), 0);
157            debug_assert_eq!(vec.capacity(), vec.len());
158            (vec.into_boxed_slice(), len)
159        };
160        let gap_len = storage.len() - len;
161        let total = metrics.len();
162        Self {
163            data: storage,
164            gap_start: len,
165            gap_end: len + gap_len,
166            gap_chars: total.chars,
167            cursor: GapMetric::default(),
168            total,
169            metrics,
170            new_gap_size: calc_start_gap_size(len),
171        }
172    }
173}
174
175impl From<&str> for Buffer {
176    #[inline]
177    fn from(data: &str) -> Self {
178        let new_gap_size = calc_start_gap_size(data.len());
179        let storage = {
180            let capacity = data.len() + new_gap_size;
181            let mut storage = Vec::with_capacity(capacity);
182            storage.resize(new_gap_size, 0);
183            storage.extend_from_slice(data.as_bytes());
184            assert_eq!(storage.len(), capacity);
185            storage.into_boxed_slice()
186        };
187        let builder = MetricBuilder::new(data);
188        let metrics = BufferMetrics::build(builder);
189        Self {
190            data: storage,
191            gap_start: 0,
192            gap_end: new_gap_size,
193            gap_chars: 0,
194            cursor: GapMetric { bytes: new_gap_size, chars: 0 },
195            total: metrics.len(),
196            new_gap_size,
197            metrics,
198        }
199    }
200}
201
202impl<T> PartialEq<T> for Buffer
203where
204    T: Deref<Target = str>,
205{
206    fn eq(&self, other: &T) -> bool {
207        PartialEq::eq(self, &**other)
208    }
209}
210
211impl PartialEq<str> for Buffer {
212    fn eq(&self, other: &str) -> bool {
213        if self.len_bytes() != other.len() {
214            return false;
215        }
216        self.to_str(..self.gap_start) == &other[..self.gap_start]
217            && self.to_str(self.gap_end..) == &other[self.gap_start..]
218    }
219}
220
221impl Buffer {
222    #[cfg(not(test))]
223    const GAP_SIZE: usize = 64;
224    #[cfg(test)]
225    const GAP_SIZE: usize = 5;
226    const MAX_GAP: usize = 1024 * 8;
227
228    /// Create a new empty text buffer with the default gap size.
229    #[must_use]
230    pub fn new() -> Self {
231        Self::with_gap(Self::GAP_SIZE)
232    }
233
234    /// Create a new empty text buffer with the specified gap size.
235    #[must_use]
236    pub fn with_gap(gap: usize) -> Self {
237        Self { new_gap_size: gap, ..Self::default() }
238    }
239
240    /// Grow the buffer to accommodate the new slice. Moves the gap to the
241    /// cursor position at the same time.
242    fn grow(&mut self, slice: &str) {
243        // If the string being inserted is large, we want to grow the gap faster
244        if slice.len() >= self.new_gap_size {
245            let len = slice.len() + self.len_bytes();
246            self.new_gap_size =
247                cmp::max(len / 20, cmp::min(slice.len().next_power_of_two(), Self::MAX_GAP));
248        }
249        let new_capacity = {
250            let pre_gap = self.gap_start;
251            let post_gap = self.data.len() - self.gap_end;
252            pre_gap + slice.len() + self.new_gap_size + post_gap
253        };
254        let mut buffer = Vec::with_capacity(new_capacity);
255        let bytes;
256        #[expect(clippy::comparison_chain)]
257        if self.cursor.chars < self.gap_chars {
258            buffer.extend_from_slice(&self.data[..self.cursor.bytes]); // pre cursor
259            buffer.extend_from_slice(slice.as_bytes()); // new text
260            buffer.resize(buffer.len() + self.new_gap_size, 0); // new gap
261            bytes = buffer.len();
262            buffer.extend_from_slice(&self.data[self.cursor.bytes..self.gap_start]); // cursor to gap
263            buffer.extend_from_slice(&self.data[self.gap_end..]); // post gap
264        } else if self.cursor.chars > self.gap_chars {
265            buffer.extend_from_slice(&self.data[..self.gap_start]); // pre gap
266            buffer.extend_from_slice(&self.data[self.gap_end..self.cursor.bytes]); // gap to cursor
267            buffer.extend_from_slice(slice.as_bytes()); // new text
268            buffer.resize(buffer.len() + self.new_gap_size, 0); // new gap
269            bytes = buffer.len();
270            buffer.extend_from_slice(&self.data[self.cursor.bytes..]); // post cursor
271        } else {
272            // cursor is at gap
273            buffer.extend_from_slice(&self.data[..self.gap_start]); // pre gap
274            buffer.extend_from_slice(slice.as_bytes()); // new text
275            buffer.resize(buffer.len() + self.new_gap_size, 0); // new gap
276            bytes = buffer.len();
277            buffer.extend_from_slice(&self.data[self.gap_end..]); // post gap
278        }
279        self.cursor.bytes = bytes;
280        self.data = buffer.into_boxed_slice();
281        debug_assert_eq!(self.data.len(), new_capacity);
282        let new = metrics(slice);
283        self.cursor.chars += new.chars;
284        self.gap_chars = self.cursor.chars;
285        self.gap_end = self.cursor.bytes;
286        self.gap_start = self.gap_end - self.new_gap_size;
287        self.total += new;
288        self.new_gap_size =
289            cmp::max(self.len_bytes() / 20, cmp::min(self.new_gap_size * 2, Self::MAX_GAP));
290    }
291
292    /// Insert a character into the buffer at the cursor.
293    #[inline]
294    pub fn insert_char(&mut self, chr: char) {
295        let buf = &mut [0; 4];
296        self.insert(chr.encode_utf8(buf));
297    }
298
299    /// Insert the text into the buffer at the cursor.
300    #[inline]
301    pub fn insert(&mut self, slice: &str) {
302        if slice.is_empty() {
303            return;
304        }
305        self.metrics.insert(self.to_abs_pos(self.cursor), MetricBuilder::new(slice));
306        if self.gap_len() < slice.len() {
307            self.grow(slice);
308        } else {
309            // if gap is not at cursor, move it there
310            if self.gap_chars != self.cursor.chars {
311                self.move_gap(self.cursor);
312            }
313            let new_slice = &mut self.data[self.gap_start..(self.gap_start + slice.len())];
314            new_slice.copy_from_slice(slice.as_bytes());
315            self.gap_start += slice.len();
316            let new = metrics(slice);
317            self.gap_chars += new.chars;
318            self.cursor.chars += new.chars;
319            self.total += new;
320        }
321    }
322
323    /// Delete backwards from the cursor `size` characters.
324    #[inline]
325    pub fn delete_backwards(&mut self, size: usize) {
326        let size = size.min(self.cursor.chars);
327        self.delete_range(self.cursor.chars - size, self.cursor.chars);
328    }
329
330    /// Delete forwards from the cursor `size` characters.
331    #[inline]
332    pub fn delete_forwards(&mut self, size: usize) {
333        self.delete_range(self.cursor.chars, self.cursor.chars + size);
334    }
335
336    /// Delete all text between the two character positions.
337    #[inline]
338    pub fn delete_range(&mut self, beg: usize, end: usize) {
339        if beg == end {
340            return;
341        }
342        let (mut beg_chars, mut end_chars) = (beg, end);
343        if beg_chars > end_chars {
344            (beg_chars, end_chars) = (end_chars, beg_chars);
345        }
346        end_chars = end_chars.min(self.total.chars);
347        beg_chars = beg_chars.min(self.total.chars);
348        let end_bytes = self.char_to_byte(end_chars);
349        let beg_bytes = self.char_to_byte(beg_chars);
350        if end_bytes != beg_bytes {
351            let beg = GapMetric { bytes: beg_bytes, chars: beg_chars };
352            let end = GapMetric { bytes: end_bytes, chars: end_chars };
353            self.metrics.delete(self.to_abs_pos(beg), self.to_abs_pos(end));
354            self.delete_byte_range(beg, end);
355        }
356    }
357
358    fn delete_byte_range(&mut self, beg: GapMetric, end: GapMetric) {
359        // TODO: optimize this so that we count the chars deleted when calculating position
360        assert!(beg.bytes <= end.bytes, "beg ({beg}) is greater then end ({end})");
361        assert!(end.bytes <= self.data.len(), "end out of bounds");
362        self.assert_char_boundary(beg.bytes);
363        self.assert_char_boundary(end.bytes);
364        if end.bytes < self.gap_start {
365            // delete before gap
366            //
367            // hello New York City||||||||||
368            //     ^      ^       ^         ^
369            //     beg    end     gap_start gap_end
370            //
371            // shift end..gap_start to the right
372            //
373            // hell|||||||||||||||||ork City
374            //     ^                ^
375            //     gap_start        gap_end
376
377            // update character count
378            let deleted = end - beg;
379            let delete_offset_chars = self.gap_chars - end.chars;
380            self.gap_chars -= deleted.chars + delete_offset_chars;
381            self.total -= deleted;
382            let new_end = self.gap_end - (self.gap_start - end.bytes);
383            // shift data
384            self.data.copy_within(end.bytes..self.gap_start, new_end);
385            // update cursor
386            self.update_cursor_chars(beg.bytes, end.bytes, deleted.chars);
387            if self.cursor.bytes < self.gap_start {
388                if self.cursor.bytes > end.bytes {
389                    self.cursor.bytes += self.gap_len();
390                } else if self.cursor.bytes >= beg.bytes {
391                    self.cursor.bytes = new_end;
392                }
393            }
394            // update gap position
395            self.gap_end = new_end;
396            self.gap_start = beg.bytes;
397        } else if beg.bytes >= self.gap_end {
398            // delete after gap
399            //
400            // ||||||||||hello New York City
401            // ^         ^         ^   ^
402            // gap_start gap_end   beg end
403            //
404            // shift gap_end..beg to the left
405            //
406            // hello New |||||||||||||| City
407            //           ^             ^
408            //           gap_start     gap_end
409
410            // update character count
411
412            let deleted = end - beg;
413            self.total -= deleted;
414            self.gap_chars += beg.chars - self.gap_chars;
415            // shift data
416            self.data.copy_within(self.gap_end..beg.bytes, self.gap_start);
417            // update cursor
418            self.update_cursor_chars(beg.bytes, end.bytes, deleted.chars);
419            if self.cursor.bytes >= self.gap_end {
420                if self.cursor.bytes < beg.bytes {
421                    self.cursor.bytes -= self.gap_len();
422                } else if self.cursor.bytes < end.bytes {
423                    self.cursor.bytes = end.bytes;
424                }
425            }
426            // update gap position
427            self.gap_start += beg.bytes - self.gap_end;
428            self.gap_end = end.bytes;
429        } else if beg.bytes < self.gap_start && end.bytes >= self.gap_end {
430            // delete spans gap
431            //
432            // hello|||||||||| New York City
433            //  ^   ^         ^       ^
434            //  beg gap_start gap_end end
435            //
436            // update start and end of gap
437            //
438            // h||||||||||||||||||||||k City
439            //  ^                     ^
440            //  gap_start             gap_end
441
442            // update character count
443            let gap_start = GapMetric { bytes: self.gap_start, chars: self.gap_chars };
444            let before = gap_start - beg;
445            let gap_end = GapMetric { bytes: self.gap_end, chars: self.gap_chars };
446            let after = end - gap_end;
447            self.gap_chars -= before.chars;
448            self.total -= before + after;
449            // update gap position
450            self.gap_start = beg.bytes;
451            self.gap_end = end.bytes;
452            self.update_cursor_chars(beg.bytes, end.bytes, before.chars + after.chars);
453            if (beg.bytes..end.bytes).contains(&self.cursor.bytes) {
454                self.cursor.bytes = end.bytes;
455            }
456        } else {
457            unreachable!(
458                "delete region inside gap -- gap: {}-{}, span: {beg}-{end}",
459                self.gap_start, self.gap_end
460            );
461        }
462    }
463
464    fn update_cursor_chars(&mut self, beg: usize, end: usize, size: usize) {
465        if self.cursor.bytes > beg {
466            if self.cursor.bytes > end {
467                self.cursor.chars -= size;
468            } else {
469                self.cursor.chars = self.gap_chars;
470            }
471        }
472    }
473
474    /// Get the cursor position.
475    #[inline]
476    pub fn cursor(&self) -> Position {
477        Position::new(self.to_abs_pos(self.cursor))
478    }
479
480    /// Get the character at `pos`.
481    #[inline]
482    pub fn char_at(&self, pos: usize) -> Option<char> {
483        if pos == self.len_chars() {
484            return None;
485        }
486        let byte = self.char_to_byte(pos);
487        let mut end = byte + 1;
488        // UTF-8 character can only be 4 bytes long
489        for _ in 0..3 {
490            if self.is_char_boundary(end) {
491                break;
492            }
493            end += 1;
494        }
495        debug_assert!(self.is_char_boundary(end));
496        self.to_str(byte..end).chars().next()
497    }
498
499    /// Move the gap out of `range`. This is useful when you need to access the
500    /// data in a contiguous manner.
501    #[inline]
502    pub fn move_gap_out_of(&mut self, range: impl RangeBounds<usize>) {
503        if !range.contains(&self.gap_chars)
504            || range.start_bound() == Bound::Included(&self.gap_chars)
505        {
506            return;
507        }
508
509        let Range { start, end } = Self::bounds_to_range(range, self.len_bytes());
510
511        let pos = if self.gap_chars - start < end - self.gap_chars {
512            GapMetric { bytes: self.char_to_byte(start), chars: start }
513        } else {
514            GapMetric { bytes: self.char_to_byte(end), chars: end }
515        };
516        self.move_gap(pos);
517    }
518
519    #[inline]
520    fn bounds_to_range(bounds: impl RangeBounds<usize>, max: usize) -> Range<usize> {
521        let start = match bounds.start_bound() {
522            Bound::Included(x) => *x,
523            Bound::Excluded(_) => unreachable!(),
524            Bound::Unbounded => 0,
525        };
526
527        let end = match bounds.end_bound() {
528            Bound::Included(_) => unimplemented!("inclusive end bound not supported"),
529            Bound::Excluded(x) => *x,
530            Bound::Unbounded => max,
531        };
532
533        start..end
534    }
535
536    #[doc(hidden)]
537    #[inline]
538    pub fn benchmark_move_gap(&mut self) {
539        if self.gap_chars == 0 {
540            self.move_gap(GapMetric { bytes: self.data.len(), chars: self.len_chars() });
541        } else {
542            self.move_gap(GapMetric::default());
543        }
544    }
545
546    #[doc(hidden)]
547    #[inline]
548    pub fn benchmark_build_metrics(string: &str) -> usize {
549        let builder = MetricBuilder::new(string);
550        let metrics = BufferMetrics::build(builder);
551        metrics.len().bytes
552    }
553
554    fn move_gap(&mut self, pos: GapMetric) {
555        assert!(pos.bytes <= self.data.len(), "attempt to move gap out of bounds");
556        self.assert_char_boundary(pos.bytes);
557        if pos.bytes < self.gap_start {
558            // move gap backwards
559            let shift = GapMetric { bytes: self.gap_start, chars: self.gap_chars } - pos;
560            self.gap_chars -= shift.chars;
561
562            self.data.copy_within(pos.bytes..self.gap_start, self.gap_end - shift.bytes);
563            // if gap moves across cursor, update cursor position
564            if self.cursor.bytes < self.gap_start && self.cursor.bytes >= pos.bytes {
565                self.cursor.bytes += self.gap_len();
566            }
567            self.gap_start = pos.bytes;
568            self.gap_end -= shift.bytes;
569        } else if pos.bytes >= self.gap_end {
570            // move gap forwards
571            self.gap_chars += pos.chars - self.gap_chars;
572            self.data.copy_within(self.gap_end..pos.bytes, self.gap_start);
573            let size = pos.bytes - self.gap_end;
574            // if gap moves across cursor, update cursor position
575            if self.cursor.bytes >= self.gap_end && self.cursor.bytes < pos.bytes {
576                self.cursor.bytes -= self.gap_len();
577            }
578            self.gap_start += size;
579            self.gap_end = pos.bytes;
580        } else {
581            panic!(
582                "move gap position byte: ({pos}) inside gap ({}-{})",
583                self.gap_start, self.gap_end
584            );
585        }
586    }
587
588    /// Set the cursor to the `pos` character.
589    #[inline]
590    pub fn set_cursor(&mut self, pos: usize) {
591        let pos = pos.min(self.total.chars);
592        let byte_pos = self.char_to_byte(pos);
593        self.cursor = GapMetric { bytes: byte_pos, chars: pos };
594    }
595
596    fn to_abs_pos(&self, pos: GapMetric) -> Metric {
597        let chars = pos.chars;
598        let bytes = if pos.bytes < self.gap_start {
599            pos.bytes
600        } else if pos.bytes >= self.gap_end {
601            pos.bytes - self.gap_len()
602        } else {
603            unreachable!()
604        };
605        Metric { bytes, chars }
606    }
607
608    fn to_gapped_pos(&self, pos: Metric) -> GapMetric {
609        let chars = pos.chars;
610        let bytes = if pos.bytes < self.gap_start {
611            pos.bytes
612        } else if pos.bytes >= self.gap_start {
613            pos.bytes + self.gap_len()
614        } else {
615            unreachable!()
616        };
617        GapMetric { bytes, chars }
618    }
619
620    /// Get the length of the buffer in bytes.
621    #[inline]
622    pub fn len_bytes(&self) -> usize {
623        debug_assert_eq!(self.total.bytes + self.gap_len(), self.data.len());
624        self.total.bytes
625    }
626
627    /// Get the length of the buffer in characters.
628    #[inline]
629    pub const fn len_chars(&self) -> usize {
630        self.total.chars
631    }
632
633    /// Return true if the buffer is empty.
634    #[inline]
635    pub const fn is_empty(&self) -> bool {
636        self.total.chars == 0
637    }
638
639    #[inline]
640    const fn gap_len(&self) -> usize {
641        self.gap_end - self.gap_start
642    }
643
644    /// Covert the character position to a byte position.
645    #[inline]
646    pub fn char_to_byte(&self, pos: usize) -> usize {
647        if pos == self.gap_chars {
648            return self.gap_end;
649        }
650
651        if pos + 1 == self.gap_chars {
652            for i in 1..=4 {
653                let pos = self.gap_start - i;
654                if self.is_char_boundary(pos) {
655                    return pos;
656                }
657            }
658            unreachable!("couldn't find char boundary");
659        }
660
661        let (base, offset) = self.metrics.search_char(pos);
662        debug_assert_eq!(base.chars + offset, pos);
663
664        let base = self.to_gapped_pos(base);
665
666        if offset == 0 {
667            return base.bytes;
668        }
669
670        self.assert_char_boundary(base.bytes);
671
672        if base.chars < self.gap_chars {
673            if pos < self.gap_chars {
674                let string = self.to_str(base.bytes..self.gap_start);
675                chars::to_byte_idx(string, offset) + base.bytes
676            } else {
677                // the char crosses the gap
678                let string = self.to_str(self.gap_end..);
679                self.gap_end + chars::to_byte_idx(string, pos - self.gap_chars)
680            }
681        } else {
682            let string = self.to_str(base.bytes..);
683            chars::to_byte_idx(string, offset) + base.bytes
684        }
685    }
686
687    /// Convert the byte position to a character position.
688    #[inline]
689    pub fn byte_to_char(&self, pos: usize) -> usize {
690        if pos == self.gap_end {
691            return self.gap_chars;
692        }
693
694        if pos == self.gap_start {
695            return self.gap_chars;
696        }
697
698        assert!(pos >= self.gap_end || pos < self.gap_start, "position ({pos}) inside gap");
699
700        let raw_pos = if pos > self.gap_end { pos - self.gap_len() } else { pos };
701
702        let (base, offset) = self.metrics.search_byte(raw_pos);
703        debug_assert_eq!(base.bytes + offset, raw_pos);
704
705        let base = self.to_gapped_pos(base);
706
707        if offset == 0 {
708            return base.chars;
709        }
710
711        self.assert_char_boundary(base.bytes);
712
713        if base.bytes < self.gap_start {
714            if pos < self.gap_start {
715                let string = self.to_str(base.bytes..self.gap_start);
716                base.chars + chars::count(&string[..offset])
717            } else {
718                // the char crosses the gap
719                let string = self.to_str(self.gap_end..);
720                self.gap_chars + chars::count(&string[..pos - self.gap_end])
721            }
722        } else {
723            let string = self.to_str(base.bytes..);
724            base.chars + chars::count(&string[..offset])
725        }
726    }
727
728    #[inline]
729    fn to_str(&self, range: impl std::slice::SliceIndex<[u8], Output = [u8]>) -> &str {
730        if cfg!(debug_assertions) {
731            std::str::from_utf8(&self.data[range]).unwrap()
732        } else {
733            unsafe { std::str::from_utf8_unchecked(&self.data[range]) }
734        }
735    }
736
737    #[inline]
738    #[doc(hidden)]
739    pub fn as_str(&mut self) -> &str {
740        self.move_gap_out_of(..);
741        let slice = if self.gap_start == 0 {
742            self.to_str(self.gap_end..)
743        } else {
744            self.to_str(..self.gap_start)
745        };
746        assert_eq!(slice.len(), self.len_bytes());
747        slice
748    }
749
750    #[expect(clippy::doc_markdown)]
751    /// Get a slice from the buffer with the given character bounds. If the slice is split across
752    /// the gap, the two halves are returned separately. If a contiguous slice is needed, call
753    /// `](crate::Buffer::slice_[`move_gap_out_of`](crate::Buffer::move_gap_out_of) first.
754    #[inline]
755    pub fn slice(&self, bounds: impl RangeBounds<usize>) -> (&str, &str) {
756        let mut range = Self::bounds_to_range(bounds, self.total.chars);
757        range.end = self.char_to_byte(range.end);
758        range.start = self.char_to_byte(range.start);
759        // the range straddles the gap, so we need to copy the two halves
760        if range.start < self.gap_start && self.gap_start < range.end {
761            let before = self.to_str(range.start..self.gap_start);
762            let after = self.to_str(self.gap_end..range.end);
763            (before, after)
764        } else {
765            (self.to_str(range), "")
766        }
767    }
768
769    fn assert_char_boundary(&self, pos: usize) {
770        if cfg!(debug_assertions) {
771            if pos == self.gap_start {
772                return;
773            }
774            assert!(self.is_char_boundary(pos), "position ({pos}) not on utf8 boundary");
775        }
776    }
777
778    fn is_char_boundary(&self, pos: usize) -> bool {
779        match self.data.get(pos) {
780            Some(byte) => is_char_boundary(*byte),
781            None => pos == self.data.len(),
782        }
783    }
784}
785
786fn metrics(slice: &str) -> Metric {
787    let chars = chars::count(slice);
788    Metric { bytes: slice.len(), chars }
789}
790
791#[expect(clippy::cast_possible_wrap)]
792const fn is_char_boundary(byte: u8) -> bool {
793    // This is bit magic equivalent to: b < 128 || b >= 192
794    (byte as i8) >= -0x40
795}
796
797#[cfg(test)]
798mod test {
799    use super::*;
800
801    #[test]
802    fn create() {
803        let string = "hello buffer";
804        let buffer = Buffer::from(string);
805        assert_eq!(buffer.data.len(), string.len() + Buffer::GAP_SIZE);
806        assert_eq!(buffer.gap_end, Buffer::GAP_SIZE);
807        assert_eq!(buffer.gap_start, 0);
808
809        let string = String::from("hello buffer");
810        let buffer = Buffer::from(string);
811        assert_eq!(buffer, "hello buffer");
812    }
813
814    #[test]
815    fn test_empty() {
816        let mut buffer = Buffer::new();
817        assert_eq!(buffer.data.len(), 0);
818        assert_eq!(buffer.gap_len(), 0);
819        assert_eq!(buffer, "");
820        buffer.insert("hello");
821        assert_eq!(buffer, "hello");
822
823        let mut buffer = Buffer::new();
824        buffer.delete_range(0, 0);
825        assert_eq!(buffer, "");
826        buffer.delete_range(0, 5);
827        assert_eq!(buffer, "");
828    }
829
830    #[test]
831    fn insert() {
832        let string = "hello buffer";
833        let mut buffer = Buffer::from(string);
834        buffer.len_bytes();
835        buffer.insert_char('x');
836        buffer.len_bytes();
837        assert_eq!(buffer.data.len(), string.len() + Buffer::GAP_SIZE);
838        assert_eq!(buffer.gap_end, Buffer::GAP_SIZE);
839        assert_eq!(buffer.gap_start, 1);
840        assert_eq!(buffer, "xhello buffer");
841    }
842
843    #[test]
844    fn insert_slice() {
845        let string = String::from("world");
846        let new_string = "hi ";
847        let mut buffer = Buffer::from(string);
848        buffer.insert(new_string);
849        buffer.move_gap_out_of(..);
850        assert_eq!(buffer, "hi world");
851        buffer.insert("starting Θ text ");
852        assert_eq!(buffer, "hi starting Θ text world");
853        buffer.set_cursor(21);
854        buffer.insert("x");
855        assert_eq!(buffer, "hi starting Θ text woxrld");
856    }
857
858    #[test]
859    fn empty() {
860        let mut buffer = Buffer::from("");
861        assert_eq!(buffer, "");
862        buffer.delete_range(1, 2);
863        assert_eq!(buffer, "");
864    }
865
866    #[test]
867    fn test_delete() {
868        let world = "world";
869        let hello = "hello ";
870        let mut buffer = Buffer::from(world);
871        buffer.insert(hello);
872        buffer.delete_backwards(4);
873        assert_eq!(buffer.gap_start, hello.len() - 4);
874        buffer.move_gap_out_of(..);
875        buffer.move_gap_out_of(..);
876        buffer.move_gap(GapMetric { bytes: buffer.char_to_byte(7), chars: 7 });
877        buffer.move_gap_out_of(..);
878        assert_eq!(buffer, "heworld");
879    }
880
881    #[test]
882    fn delete_forwards() {
883        let world = "world";
884        let hello = "hello ";
885        let mut buffer = Buffer::from(world);
886        buffer.insert(hello);
887        buffer.delete_forwards(4);
888        buffer.move_gap_out_of(..);
889        assert_eq!(buffer, "hello d");
890    }
891
892    #[test]
893    fn test_delete_region() {
894        let mut buffer = Buffer::from("world");
895        buffer.insert("hello ");
896        buffer.delete_range(1, 3);
897        buffer.move_gap_out_of(..);
898        assert_eq!(buffer, "hlo world");
899        buffer.delete_range(4, 6);
900        buffer.move_gap_out_of(..);
901        assert_eq!(buffer, "hlo rld");
902    }
903
904    #[test]
905    fn test_char_at() {
906        let buffer = Buffer::from("aµ福");
907        assert_eq!(buffer.char_at(0), Some('a'));
908        assert_eq!(buffer.char_at(1), Some('µ'));
909        assert_eq!(buffer.char_at(2), Some('福'));
910        assert_eq!(buffer.char_at(3), None);
911    }
912
913    #[test]
914    fn test_delete_nothing() {
915        let mut buffer = Buffer::from("world");
916        buffer.insert("hello ");
917        buffer.delete_range(3, 3);
918        assert_eq!(buffer, "hello world");
919    }
920
921    // cases found during fuzzing
922    #[test]
923    fn edge_cases() {
924        let mut buffer = Buffer::from(":?abdix7");
925        assert_eq!(buffer.len_bytes(), 8);
926        buffer.delete_range(2, 5);
927        assert_eq!(buffer.len_bytes(), 5);
928        buffer.delete_range(5, 4);
929        assert_eq!(buffer.len_bytes(), 4);
930        buffer.delete_range(0, 3);
931
932        let mut buffer = Buffer::from("xyz");
933        buffer.insert("abc");
934        buffer.set_cursor(2);
935        buffer.delete_range(1, 4);
936        assert_eq!(buffer, "ayz");
937        buffer.insert("b");
938        assert_eq!(buffer, "abyz");
939
940        let mut buffer = Buffer::from("ƽaejcoeuz");
941        buffer.delete_range(5, 6);
942        buffer.delete_range(1, 8);
943        assert_eq!(buffer, "ƽ");
944    }
945
946    // from reference implementation
947    #[test]
948    fn test_delete_to_gap() {
949        let mut buffer = Buffer::from("\n\n\n\nAutomerge is too");
950        buffer.insert("per. Some graduate students in ");
951        buffer.set_cursor(10);
952        buffer.delete_forwards(21);
953        assert_eq!(buffer, "per. Some \n\n\n\nAutomerge is too");
954    }
955
956    // fuzzing
957    #[test]
958    fn test_bounds() {
959        let mut buffer = Buffer::from("world");
960        buffer.insert("hello ");
961        buffer.delete_range(3, 100);
962        assert_eq!(buffer, "hel");
963        buffer.delete_range(10, 1);
964        assert_eq!(buffer, "h");
965
966        let mut buffer = Buffer::from(",skeobg x");
967        buffer.delete_range(10, 10);
968
969        let mut buffer = Buffer::from("+skeocptv'eigp");
970        buffer.delete_range(30, 6);
971    }
972
973    #[test]
974    fn resize() {
975        let world = "world";
976        let hello = "hello ";
977        let mut buffer = Buffer::from(world);
978        buffer.insert(hello);
979        assert_eq!(buffer.gap_start, hello.len());
980        assert_eq!(buffer, "hello world");
981    }
982
983    #[test]
984    fn cursor() {
985        let string = "world";
986        let new_string = "hi ";
987        let mut buffer = Buffer::from(string);
988        buffer.insert(new_string);
989        assert_eq!(buffer.gap_chars, new_string.len());
990    }
991
992    #[test]
993    fn test_slice() {
994        let mut buffer = Buffer::from("hello world");
995        assert_eq!(buffer.slice(..), ("hello world", ""));
996        buffer.set_cursor(5);
997        buffer.insert("\u{B5}");
998        assert_eq!(buffer.slice(0..0), ("", ""));
999        assert_eq!(buffer.slice(..6), ("hello\u{B5}", ""));
1000        assert_eq!(buffer.slice(6..), (" world", ""));
1001        assert_eq!(buffer.slice(6..6), ("", ""));
1002        assert_eq!(buffer.slice(5..11), ("\u{B5}", " worl"));
1003        assert_eq!(buffer.slice(5..11), ("\u{B5}", " worl"));
1004        assert_eq!(buffer.slice(4..6), ("o\u{B5}", ""));
1005        assert_eq!(buffer.slice(..), ("hello\u{B5}", " world"));
1006    }
1007
1008    #[test]
1009    fn test_byte_to_char_conversion() {
1010        let buffer = Buffer::from("hello world");
1011        for i in 0..buffer.len_chars() {
1012            assert_eq!(buffer.byte_to_char(buffer.char_to_byte(i)), i);
1013        }
1014        let buffer = Buffer::from("hello\u{B5} world");
1015        for i in 0..buffer.len_chars() {
1016            assert_eq!(buffer.byte_to_char(buffer.char_to_byte(i)), i);
1017        }
1018        let mut buffer = Buffer::from("\u{B5}\u{45}hello world");
1019        buffer.set_cursor(5);
1020        buffer.insert("\u{B9}");
1021        buffer.set_cursor(10);
1022        buffer.insert("\u{A0}x");
1023        for i in 0..buffer.len_chars() {
1024            assert_eq!(buffer.byte_to_char(buffer.char_to_byte(i)), i);
1025        }
1026    }
1027
1028    #[test]
1029    fn test_build_unicode() {
1030        let string = "aaaaaaaaaՂaaaaaaaaa";
1031        let _ = Buffer::from(string);
1032    }
1033
1034    #[test]
1035    fn test_append() {
1036        let mut buffer = Buffer::from("aa");
1037        buffer.set_cursor(3);
1038        let string = "B\u{1b}BBBBBB\u{1b}\0\0\0\0\0\0BB\u{1b}\u{1b}\u{1b}\u{1b}\u{1b}\u{1b}B\u{7}BBBBBBBBBBB\u{1b}\u{1b}\u{1b}B\u{7}BBBBBBBBBBBB\u{1b}\u{1b}B\u{7}BBBBBBBBB";
1039        buffer.insert(string);
1040    }
1041
1042    #[test]
1043    fn test_fuzzer() {
1044        let mut buffer = Buffer::new();
1045        buffer.set_cursor(1);
1046        buffer.insert("Ղ\u{2}\u{2}\0\0\0");
1047        buffer.set_cursor(4);
1048        buffer.insert("&\0''''''''''''''''''''%'''''&\0''''''''''''''''''''%@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@''''''''''");
1049        buffer.set_cursor(39);
1050        buffer.insert("'\u{2}&\0''''''''''''''''''''%''''''''''''''''''''''''''''");
1051        buffer.delete_range(184, 169);
1052        buffer.set_cursor(127);
1053        buffer.insert("00000000061288823:*********");
1054        buffer.set_cursor(132);
1055        buffer.insert("5''''''''''''''\0\0\0\0\0'''''''");
1056        buffer.set_cursor(97);
1057        buffer.insert("''?????????????????????z?????????????????????'''''\u{1d}\u{1d}\u{1d}\u{1d}\u{1d}\u{1d}\u{1d}\u{1d}\u{1d}\u{1d}\u{1d}\u{1d}\u{1d}\u{1d}\u{10}");
1058        buffer.delete_range(13, 138);
1059        buffer.set_cursor(25);
1060        buffer
1061            .insert("yyyyyyyyyyyyyy\u{2}\0\u{2}\0\0\u{1}\u{17}H\u{17}\u{17}\u{17}\u{17}\u{17}\0\0");
1062        buffer.set_cursor(138);
1063        buffer.insert("\u{17}?\u{17}\u{17}\u{17}\u{17}\u{17}\u{17}\u{17}\u{17}\u{17}\0\0\0\0\0\0\u{3}\0\0\0''''''''");
1064        buffer.set_cursor(39);
1065        buffer.insert("\0\0\0''''''''''");
1066        buffer.delete_range(247, 45);
1067    }
1068
1069    #[test]
1070    fn test_pos() {
1071        let mut buffer = Buffer::new();
1072        buffer.set_cursor(1);
1073        buffer.insert("AAAAAAAAAAAAAAAAAAA");
1074        buffer.set_cursor(10);
1075        buffer.insert("AAAAAA\0\0AAAAAA");
1076        buffer.set_cursor(26);
1077    }
1078}