Skip to content

Commit a7f3957

Browse files
committed
refactor: clarify the purpose of assert_valid_optimization(), runs after all optimizer passes, except in debug mode it runs after each pass.
1 parent ad1a1f8 commit a7f3957

File tree

2 files changed

+36
-26
lines changed

2 files changed

+36
-26
lines changed

datafusion/expr/src/logical_plan/invariants.rs

+3-11
Original file line numberDiff line numberDiff line change
@@ -77,23 +77,15 @@ fn assert_valid_semantic_plan(plan: &LogicalPlan) -> Result<()> {
7777

7878
/// Returns an error if the plan does not have the expected schema.
7979
/// Ignores metadata and nullability.
80-
pub fn assert_expected_schema(
81-
rule_name: &str,
82-
schema: &DFSchemaRef,
83-
plan: &LogicalPlan,
84-
) -> Result<()> {
80+
pub fn assert_expected_schema(schema: &DFSchemaRef, plan: &LogicalPlan) -> Result<()> {
8581
let equivalent = plan.schema().equivalent_names_and_types(schema);
8682

8783
if !equivalent {
88-
let e = DataFusionError::Internal(format!(
84+
Err(DataFusionError::Internal(format!(
8985
"Failed due to a difference in schemas, original schema: {:?}, new schema: {:?}",
9086
schema,
9187
plan.schema()
92-
));
93-
Err(DataFusionError::Context(
94-
String::from(rule_name),
95-
Box::new(e),
96-
))
88+
)))
9789
} else {
9890
Ok(())
9991
}

datafusion/optimizer/src/optimizer.rs

+33-15
Original file line numberDiff line numberDiff line change
@@ -360,7 +360,7 @@ impl Optimizer {
360360
plan.check_invariants(InvariantLevel::Executable)
361361
.map_err(|e| {
362362
DataFusionError::Context(
363-
"check_plan_before_optimizers".to_string(),
363+
"check_plan_is_executable before optimizers".to_string(),
364364
Box::new(e),
365365
)
366366
})?;
@@ -372,6 +372,8 @@ impl Optimizer {
372372
let mut previous_plans = HashSet::with_capacity(16);
373373
previous_plans.insert(LogicalPlanSignature::new(&new_plan));
374374

375+
let starting_schema = Arc::clone(new_plan.schema());
376+
375377
let mut i = 0;
376378
while i < options.optimizer.max_passes {
377379
log_plan(&format!("Optimizer input (pass {i})"), &new_plan);
@@ -395,14 +397,23 @@ impl Optimizer {
395397
None => optimize_plan_node(new_plan, rule.as_ref(), config),
396398
}
397399
.and_then(|tnr| {
398-
// verify after each optimizer pass.
399-
assert_valid_optimization(rule.name(), &tnr.data, &starting_schema)
400+
// in debug mode, run checks are each optimer pass
401+
#[cfg(debug_assertions)]
402+
assert_valid_optimization(&tnr.data, &starting_schema)
403+
.map_err(|e| {
404+
DataFusionError::Context(
405+
format!("check_optimizer_specific_invariants after optimizer pass: {}", rule.name()),
406+
Box::new(e),
407+
)
408+
})?;
409+
#[cfg(debug_assertions)]
410+
tnr.data.check_invariants(InvariantLevel::Executable)
400411
.map_err(|e| {
401-
DataFusionError::Context(
402-
"check_optimized_plan".to_string(),
403-
Box::new(e),
404-
)
405-
})?;
412+
DataFusionError::Context(
413+
format!("check_plan_is_executable after optimizer pass: {}", rule.name()),
414+
Box::new(e),
415+
)
416+
})?;
406417

407418
Ok(tnr)
408419
});
@@ -463,12 +474,20 @@ impl Optimizer {
463474
i += 1;
464475
}
465476

477+
// verify that the optimizer passes only mutated what was permitted.
478+
assert_valid_optimization(&new_plan, &starting_schema).map_err(|e| {
479+
DataFusionError::Context(
480+
"check_optimizer_specific_invariants after all passes".to_string(),
481+
Box::new(e),
482+
)
483+
})?;
484+
466485
// verify LP is valid, after the last optimizer pass.
467486
new_plan
468487
.check_invariants(InvariantLevel::Executable)
469488
.map_err(|e| {
470489
DataFusionError::Context(
471-
"check_plan_after_optimizers".to_string(),
490+
"check_plan_is_executable after optimizers".to_string(),
472491
Box::new(e),
473492
)
474493
})?;
@@ -479,19 +498,18 @@ impl Optimizer {
479498
}
480499
}
481500

482-
/// These are invariants which should hold true before and after each optimization.
501+
/// These are invariants which should hold true before and after [`LogicalPlan`] optimization.
483502
///
484503
/// This differs from [`LogicalPlan::check_invariants`], which addresses if a singular
485504
/// LogicalPlan is valid. Instead this address if the optimization (before and after)
486505
/// is valid based upon permitted changes.
487506
fn assert_valid_optimization(
488-
rule_name: &str,
489507
plan: &LogicalPlan,
490508
prev_schema: &Arc<DFSchema>,
491509
) -> Result<()> {
492-
// verify invariant: optimizer rule didn't change the schema
510+
// verify invariant: optimizer passes should not change the schema
493511
// Refer to <https://datafusion.apache.org/contributor-guide/specification/invariants.html#logical-schema-is-invariant-under-logical-optimization>
494-
assert_expected_schema(rule_name, prev_schema, plan)?;
512+
assert_expected_schema(prev_schema, plan)?;
495513

496514
Ok(())
497515
}
@@ -549,8 +567,8 @@ mod tests {
549567
let err = opt.optimize(plan, &config, &observe).unwrap_err();
550568
assert_eq!(
551569
"Optimizer rule 'get table_scan rule' failed\n\
552-
caused by\ncheck_optimized_plan\n\
553-
caused by\nget table_scan rule\n\
570+
caused by\n\
571+
check_optimizer_specific_invariants after optimizer pass: get table_scan rule\n\
554572
caused by\n\
555573
Internal error: Failed due to a difference in schemas, \
556574
original schema: DFSchema { inner: Schema { \

0 commit comments

Comments
 (0)