Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

refactor(optimizer): use Cell instead of RefCell for interior mutability #19883

Merged
merged 4 commits into from
Dec 23, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions src/frontend/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
#![feature(iterator_try_collect)]
#![feature(used_with_arg)]
#![feature(try_trait_v2)]
#![feature(cell_update)]
#![recursion_limit = "256"]

#[cfg(test)]
Expand Down
81 changes: 47 additions & 34 deletions src/frontend/src/optimizer/optimizer_context.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
// limitations under the License.

use core::fmt::Formatter;
use std::cell::{RefCell, RefMut};
use std::cell::{Cell, RefCell, RefMut};
use std::collections::HashMap;
use std::marker::PhantomData;
use std::rc::Rc;
Expand All @@ -35,8 +35,6 @@ type PhantomUnsend = PhantomData<Rc<()>>;

pub struct OptimizerContext {
session_ctx: Arc<SessionImpl>,
/// Store plan node id
next_plan_node_id: RefCell<i32>,
/// The original SQL string, used for debugging.
sql: Arc<str>,
/// Normalized SQL string. See [`HandlerArgs::normalize_sql`].
Expand All @@ -47,14 +45,10 @@ pub struct OptimizerContext {
optimizer_trace: RefCell<Vec<String>>,
/// Store the optimized logical plan of optimizer
logical_explain: RefCell<Option<String>>,
/// Store correlated id
next_correlated_id: RefCell<u32>,
/// Store options or properties from the `with` clause
with_options: WithOptions,
/// Store the Session Timezone and whether it was used.
session_timezone: RefCell<SessionTimezone>,
/// Store expr display id.
next_expr_display_id: RefCell<usize>,
/// Total number of optimization rules have been applied.
total_rule_applied: RefCell<usize>,
/// Store the configs can be overwritten in with clause
Expand All @@ -64,9 +58,22 @@ pub struct OptimizerContext {
/// `PlanRef`, used by rcte's planning. (e.g., in `LogicalCteRef`)
rcte_cache: RefCell<HashMap<ShareId, PlanRef>>,

/// Last assigned plan node ID.
last_plan_node_id: Cell<i32>,
/// Last assigned correlated ID.
last_correlated_id: Cell<u32>,
/// Last assigned expr display ID.
last_expr_display_id: Cell<usize>,

_phantom: PhantomUnsend,
}

pub(in crate::optimizer) struct LastAssignedIds {
last_plan_node_id: i32,
last_correlated_id: u32,
last_expr_display_id: usize,
}

pub type OptimizerContextRef = Rc<OptimizerContext>;

impl OptimizerContext {
Expand All @@ -84,19 +91,21 @@ impl OptimizerContext {
let overwrite_options = OverwriteOptions::new(&mut handler_args);
Self {
session_ctx: handler_args.session,
next_plan_node_id: RefCell::new(RESERVED_ID_NUM.into()),
sql: handler_args.sql,
normalized_sql: handler_args.normalized_sql,
explain_options,
optimizer_trace: RefCell::new(vec![]),
logical_explain: RefCell::new(None),
next_correlated_id: RefCell::new(0),
with_options: handler_args.with_options,
session_timezone,
next_expr_display_id: RefCell::new(RESERVED_ID_NUM.into()),
total_rule_applied: RefCell::new(0),
overwrite_options,
rcte_cache: RefCell::new(HashMap::new()),

last_plan_node_id: Cell::new(RESERVED_ID_NUM.into()),
last_correlated_id: Cell::new(0),
last_expr_display_id: Cell::new(RESERVED_ID_NUM.into()),

_phantom: Default::default(),
}
}
Expand All @@ -107,53 +116,57 @@ impl OptimizerContext {
pub async fn mock() -> OptimizerContextRef {
Self {
session_ctx: Arc::new(SessionImpl::mock()),
next_plan_node_id: RefCell::new(0),
sql: Arc::from(""),
normalized_sql: "".to_owned(),
explain_options: ExplainOptions::default(),
optimizer_trace: RefCell::new(vec![]),
logical_explain: RefCell::new(None),
next_correlated_id: RefCell::new(0),
with_options: Default::default(),
session_timezone: RefCell::new(SessionTimezone::new("UTC".into())),
next_expr_display_id: RefCell::new(0),
total_rule_applied: RefCell::new(0),
overwrite_options: OverwriteOptions::default(),
rcte_cache: RefCell::new(HashMap::new()),

last_plan_node_id: Cell::new(0),
last_correlated_id: Cell::new(0),
last_expr_display_id: Cell::new(0),

_phantom: Default::default(),
}
.into()
}

pub fn next_plan_node_id(&self) -> PlanNodeId {
*self.next_plan_node_id.borrow_mut() += 1;
PlanNodeId(*self.next_plan_node_id.borrow())
}

pub fn get_plan_node_id(&self) -> i32 {
*self.next_plan_node_id.borrow()
PlanNodeId(self.last_plan_node_id.update(|id| id + 1))
}

pub fn set_plan_node_id(&self, next_plan_node_id: i32) {
*self.next_plan_node_id.borrow_mut() = next_plan_node_id;
pub fn next_correlated_id(&self) -> CorrelatedId {
self.last_correlated_id.update(|id| id + 1)
}

pub fn next_expr_display_id(&self) -> usize {
*self.next_expr_display_id.borrow_mut() += 1;
*self.next_expr_display_id.borrow()
self.last_expr_display_id.update(|id| id + 1)
}

pub fn get_expr_display_id(&self) -> usize {
*self.next_expr_display_id.borrow()
pub(in crate::optimizer) fn backup_elem_ids(&self) -> LastAssignedIds {
LastAssignedIds {
last_plan_node_id: self.last_plan_node_id.get(),
last_correlated_id: self.last_correlated_id.get(),
last_expr_display_id: self.last_expr_display_id.get(),
}
}

pub fn set_expr_display_id(&self, expr_display_id: usize) {
*self.next_expr_display_id.borrow_mut() = expr_display_id;
/// This should only be called in [`crate::optimizer::plan_node::reorganize_elements_id`].
pub(in crate::optimizer) fn reset_elem_ids(&self) {
self.last_plan_node_id.set(0);
self.last_correlated_id.set(0);
self.last_expr_display_id.set(0);
}

pub fn next_correlated_id(&self) -> CorrelatedId {
*self.next_correlated_id.borrow_mut() += 1;
*self.next_correlated_id.borrow()
pub(in crate::optimizer) fn restore_elem_ids(&self, backup: LastAssignedIds) {
self.last_plan_node_id.set(backup.last_plan_node_id);
self.last_correlated_id.set(backup.last_correlated_id);
self.last_expr_display_id.set(backup.last_expr_display_id);
Comment on lines +159 to +169
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What about just inlining these functions info reorganize_elements_id?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

When adding a new id later, we can hardly remember that there's a reorganize_elements_id far away need to be updated. So I decided to hide all the ids inside LastAssignedIds.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't get you. We can still add an id in OptimizerContext but not in LastAssignedIds

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe you should replace the fields to LastAssignedIds in OptimizerContext

Copy link
Member Author

@stdrc stdrc Dec 23, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The fact is that we missed last_correlated_id in reorganize_elements_id before. I want to reduce the possibility of future error, so I moved them to LastAssignedIds and put it right next to OptimizerContext. I also definitely considered directly using LastAssignedIds as a field in OptimizerContext, but IMO it's too annoying to access these ID fields via a long self.last_assigned_ids.last_plan_node_id. After all forgetting to add new id field to LastAssignedIds is not a critical issue, we just need to prevent it to a proper degree.

This PR doesn't aim to completely refactor the optimizer element IDs, otherwise I would extract an IdAllocator struct to allocate IDs instead of self.last_plan_node_id.update in OptimizerContext. Also I don't think use interior mutability here in OptimizerContext is a good idea. But I don't want to do that in this PR. It's just a quick fix to unblock my future work on other things.

}

pub fn add_rule_applied(&self, num: usize) {
Expand Down Expand Up @@ -255,12 +268,12 @@ impl std::fmt::Debug for OptimizerContext {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
write!(
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Use .debug_struct?

f,
"QueryContext {{ next_plan_node_id = {}, sql = {}, explain_options = {}, next_correlated_id = {}, with_options = {:?} }}",
self.next_plan_node_id.borrow(),
"QueryContext {{ sql = {}, explain_options = {}, with_options = {:?}, last_plan_node_id = {}, last_correlated_id = {} }}",
self.sql,
self.explain_options,
self.next_correlated_id.borrow(),
&self.with_options
self.with_options,
self.last_plan_node_id.get(),
self.last_correlated_id.get(),
)
}
}
9 changes: 3 additions & 6 deletions src/frontend/src/optimizer/plan_node/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -631,13 +631,10 @@ impl BatchPlanRef for PlanRef {
/// other places. We will reset expression display id to 0 and clone the whole plan to reset the
/// schema.
pub fn reorganize_elements_id(plan: PlanRef) -> PlanRef {
let old_expr_display_id = plan.ctx().get_expr_display_id();
let old_plan_node_id = plan.ctx().get_plan_node_id();
plan.ctx().set_expr_display_id(0);
plan.ctx().set_plan_node_id(0);
let backup = plan.ctx().backup_elem_ids();
plan.ctx().reset_elem_ids();
let plan = PlanCloner::clone_whole_plan(plan);
plan.ctx().set_expr_display_id(old_expr_display_id);
plan.ctx().set_plan_node_id(old_plan_node_id);
plan.ctx().restore_elem_ids(backup);
plan
}

Expand Down
Loading