Skip to content

Commit 738d3c1

Browse files
committed
Replaced advance_by_range and retreat_by_range with a more optimized impl. Perf hasnt changed much (1%) but code size dropped significantly.
1 parent 41328ce commit 738d3c1

File tree

2 files changed

+61
-100
lines changed

2 files changed

+61
-100
lines changed

src/listmerge/advance_retreat.rs

+52-99
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ use crate::listmerge::merge::notify_for;
1111
use crate::LV;
1212
use crate::ost::LeafIdx;
1313
use crate::rev_range::RangeRev;
14+
use crate::stats::marker_a;
1415

1516
#[derive(Debug, Eq, PartialEq)]
1617
pub(super) struct QueryResult {
@@ -68,16 +69,34 @@ impl M2Tracker {
6869
}
6970
}
7071

71-
pub(crate) fn advance_by_range(&mut self, mut range: DTRange) {
72+
fn adv_retreat_range(&mut self, mut range: DTRange, incr: i32) {
73+
// This method handles both advancing and retreating. In either case, because of the way
74+
// SpanState is designed, we need to either increment or decrement the state of every
75+
// visited item in the LV range.
76+
77+
// Note: When retreating, we still visit all the items in the range in earliest-to-latest
78+
// order. This is a bit of a wild optimisation, because its possible (common even) for
79+
// the range to include an edit which inserts a character followed by an edit which deletes
80+
// the character. The obvious way to process that would be to first undo the delete event,
81+
// then undo the insert.
82+
//
83+
// However, that requires that we visit the range in reverse order, which has worse
84+
// performance and requires advance and retreat to be handled differently. So long as the
85+
// *result* of running retreat() is the same, its safe to not do that, and instead treat the
86+
// span state as an integer and just decrement it twice.
7287
while !range.is_empty() {
73-
// Note the delete could be reversed - but we don't really care here; we just mark the
74-
// whole range anyway.
75-
// let (tag, target, mut len) = self.next_action(range.start);
88+
// if let Some(mut cursor) = self.range_tree.try_find_item(range.start) {
89+
// crate::stats::marker_a();
90+
// self.range_tree.emplace_cursor_unknown(cursor);
91+
// } else {
92+
// crate::stats::marker_b();
93+
// }
94+
7695
let QueryResult {
77-
tag,
7896
target,
7997
offset,
8098
mut leaf_idx,
99+
..
81100
} = self.index_query(range.start);
82101

83102
let len = usize::min(target.len() - offset, range.len());
@@ -100,109 +119,43 @@ impl M2Tracker {
100119
target_range.len(),
101120
&mut notify_for(&mut self.index),
102121
|e| {
103-
if tag == Ins {
104-
e.current_state.mark_inserted();
105-
} else {
106-
e.delete();
107-
}
122+
e.current_state.0 = e.current_state.0.wrapping_add_signed(incr);
108123
}
109124
).0;
110125

111-
// TODO: Emplace it if we can.
112-
// cursor.flush(&mut self.range_tree);
113126
self.range_tree.emplace_cursor_unknown(cursor);
114127
}
115128

116129
range.truncate_keeping_right(len);
117130
}
118-
}
119-
120131

121-
pub fn retreat_by_range(&mut self, mut range: DTRange) {
122-
// We need to go through the range in reverse order to make sure if we visit an insert then
123-
// delete of the same item, we un-delete before un-inserting.
124-
// TODO: Could probably relax this restriction when I feel more comfortable about overall
125-
// correctness.
126-
127-
while !range.is_empty() {
128-
// TODO: This is gross. Clean this up. There's totally a nicer way to write this.
129-
let last_lv = range.last();
130-
131-
if let Some(mut cursor) = self.range_tree.try_find_item(last_lv) {
132-
// Try just modifying the item directly.
133-
//
134-
// The item will only exist in the range tree at all if it was an insert.
135-
let (e, _offset) = cursor.0.get_item(&self.range_tree);
136-
// let chunk_start = last_lv - offset;
137-
let start = range.start.max(e.id.start);
138-
cursor.0.offset = start - e.id.start;
139-
let max_len = range.end - start;
140-
141-
range.end -= self.range_tree.mutate_entry(
142-
&mut cursor,
143-
max_len,
144-
&mut notify_for(&mut self.index),
145-
|e| {
146-
e.current_state.mark_not_inserted_yet();
147-
}
148-
).0;
149-
self.range_tree.emplace_cursor_unknown(cursor);
150-
} else {
151-
// Figure it out the "slow" way, by looking up the item in the index.
152-
let QueryResult { tag, target, offset, mut leaf_idx } = self.index_query(last_lv);
153-
154-
let chunk_start = last_lv - offset;
155-
let start = range.start.max(chunk_start);
156-
let end = usize::min(range.end, chunk_start + target.len());
157-
158-
let e_offset = start - chunk_start; // Usually 0.
159-
160-
let len = end - start;
161-
debug_assert!(len <= range.len());
162-
range.end -= len;
163-
164-
let mut target_range = target.range(e_offset, e_offset + len);
165-
166-
while !target_range.is_empty() {
167-
// STATS.with(|s| {
168-
// let mut s = s.borrow_mut();
169-
// s.2 += 1;
170-
// });
171-
172-
// Because the tag is either entirely delete or entirely insert, its safe to move
173-
// forwards in this child range. (Which I'm doing because that makes the code much
174-
// easier to reason about).
175-
176-
// We can't reuse the pointer returned by the index_query call because we mutate
177-
// each loop iteration.
178-
179-
let leaf_idx = match replace(&mut leaf_idx, LeafIdx::default()) {
180-
LeafIdx(usize::MAX) => self.marker_at(target_range.start),
181-
x => x,
182-
};
183-
// let mut cursor = self.old_range_tree.mut_cursor_before_item(target_range.start, ptr);
184-
let (mut cursor, _pos) = self.range_tree.mut_cursor_before_item(target_range.start, leaf_idx);
185-
186-
target_range.start += self.range_tree.mutate_entry(
187-
&mut cursor,
188-
target_range.len(),
189-
&mut notify_for(&mut self.index),
190-
|e| {
191-
if tag == Ins {
192-
e.current_state.mark_not_inserted_yet();
193-
} else {
194-
e.current_state.undelete();
195-
}
196-
}
197-
).0;
198-
199-
// TODO: Emplace it if we can.
200-
// cursor.flush(&mut self.range_tree);
201-
self.range_tree.emplace_cursor_unknown(cursor);
202-
}
203-
}
204-
}
132+
}
205133

206-
// self.check_index();
134+
pub(crate) fn advance_by_range(&mut self, range: DTRange) {
135+
self.adv_retreat_range(range, 1);
207136
}
137+
pub(crate) fn retreat_by_range(&mut self, range: DTRange) {
138+
self.adv_retreat_range(range, -1);
139+
}
140+
141+
// // if let Some(mut cursor) = self.range_tree.try_find_item(last_lv) {
142+
// // // Try just modifying the item directly.
143+
// // //
144+
// // // The item will only exist in the range tree at all if it was an insert.
145+
// // let (e, _offset) = cursor.0.get_item(&self.range_tree);
146+
// // // let chunk_start = last_lv - offset;
147+
// // let start = range.start.max(e.id.start);
148+
// // cursor.0.offset = start - e.id.start;
149+
// // let max_len = range.end - start;
150+
// //
151+
// // range.end -= self.range_tree.mutate_entry(
152+
// // &mut cursor,
153+
// // max_len,
154+
// // &mut notify_for(&mut self.index),
155+
// // |e| {
156+
// // e.current_state.mark_not_inserted_yet();
157+
// // }
158+
// // ).0;
159+
// // self.range_tree.emplace_cursor_unknown(cursor);
160+
// // } else {
208161
}

src/listmerge/yjsspan.rs

+9-1
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ use crate::ost::content_tree::Content;
1111
/// Note a u16 (or even a u8) should be fine in practice. Double deletes almost never happen in
1212
/// reality - unless someone is maliciously generating them.
1313
#[derive(Debug, Copy, Clone, PartialEq, Eq, Default)]
14-
pub struct SpanState(u32);
14+
pub struct SpanState(pub(crate) u32);
1515

1616
pub const NOT_INSERTED_YET: SpanState = SpanState(0);
1717
pub const INSERTED: SpanState = SpanState(1);
@@ -79,6 +79,14 @@ impl SpanState {
7979
}
8080
}
8181

82+
pub(crate) fn raw_decrement(&mut self) {
83+
debug_assert!(self.0 >= 1);
84+
self.0 -= 1;
85+
}
86+
pub(crate) fn raw_increment(&mut self) {
87+
self.0 += 1;
88+
}
89+
8290
pub(crate) fn mark_inserted(&mut self) {
8391
if *self != NOT_INSERTED_YET {
8492
panic!("Invalid insert target - item already marked as inserted");

0 commit comments

Comments
 (0)