@@ -97,7 +97,19 @@ extern const int ILLEGAL_TYPE_OF_ARGUMENT;
97
97
extern const int INVALID_JOIN_ON_EXPRESSION;
98
98
}
99
99
}
100
-
100
+ namespace
101
+ {
102
+ DB::NamesAndTypesList blockToNameAndTypeList (const DB::Block & header)
103
+ {
104
+ DB::NamesAndTypesList types;
105
+ for (const auto & name : header.getNames ())
106
+ {
107
+ const auto * column = header.findByName (name);
108
+ types.push_back (DB::NameAndTypePair (column->name , column->type ));
109
+ }
110
+ return types;
111
+ }
112
+ }
101
113
namespace local_engine
102
114
{
103
115
using namespace DB ;
@@ -377,90 +389,87 @@ DataTypePtr wrapNullableType(bool nullable, DataTypePtr nested_type)
377
389
return nested_type;
378
390
}
379
391
380
- QueryPlanPtr SerializedPlanParser::parse ( const substrait::Plan & plan )
392
+ void adjustOutput ( const DB::QueryPlanPtr & query_plan, const substrait::PlanRel & root_rel )
381
393
{
382
- logDebugMessage (plan, " substrait plan" );
383
- parseExtensions (plan.extensions ());
384
- if (plan.relations_size () == 1 )
394
+ if (root_rel.root ().names_size ())
385
395
{
386
- auto root_rel = plan.relations ().at (0 );
387
- if (!root_rel.has_root ())
388
- {
389
- throw Exception (ErrorCodes::BAD_ARGUMENTS, " must have root rel!" );
390
- }
391
- std::list<const substrait::Rel *> rel_stack;
392
- auto query_plan = parseOp (root_rel.root ().input (), rel_stack);
393
- if (root_rel.root ().names_size ())
394
- {
395
- ActionsDAGPtr actions_dag = std::make_shared<ActionsDAG>(blockToNameAndTypeList (query_plan->getCurrentDataStream ().header ));
396
- NamesWithAliases aliases;
397
- auto cols = query_plan->getCurrentDataStream ().header .getNamesAndTypesList ();
398
- if (cols.getNames ().size () != static_cast <size_t >(root_rel.root ().names_size ()))
399
- {
400
- throw Exception (ErrorCodes::LOGICAL_ERROR, " Missmatch result columns size." );
401
- }
402
- for (int i = 0 ; i < static_cast <int >(cols.getNames ().size ()); i++)
403
- {
404
- aliases.emplace_back (NameWithAlias (cols.getNames ()[i], root_rel.root ().names (i)));
405
- }
406
- actions_dag->project (aliases);
407
- auto expression_step = std::make_unique<ExpressionStep>(query_plan->getCurrentDataStream (), actions_dag);
408
- expression_step->setStepDescription (" Rename Output" );
409
- query_plan->addStep (std::move (expression_step));
410
- }
396
+ ActionsDAGPtr actions_dag = std::make_shared<ActionsDAG>(blockToNameAndTypeList (query_plan->getCurrentDataStream ().header ));
397
+ NamesWithAliases aliases;
398
+ auto cols = query_plan->getCurrentDataStream ().header .getNamesAndTypesList ();
399
+ if (cols.getNames ().size () != static_cast <size_t >(root_rel.root ().names_size ()))
400
+ throw Exception (ErrorCodes::LOGICAL_ERROR, " Missmatch result columns size." );
401
+ for (int i = 0 ; i < static_cast <int >(cols.getNames ().size ()); i++)
402
+ aliases.emplace_back (NameWithAlias (cols.getNames ()[i], root_rel.root ().names (i)));
403
+ actions_dag->project (aliases);
404
+ auto expression_step = std::make_unique<ExpressionStep>(query_plan->getCurrentDataStream (), actions_dag);
405
+ expression_step->setStepDescription (" Rename Output" );
406
+ query_plan->addStep (std::move (expression_step));
407
+ }
411
408
412
- // fixes: issue-1874, to keep the nullability as expected.
413
- const auto & output_schema = root_rel.root ().output_schema ();
414
- if (output_schema.types_size ())
409
+ // fixes: issue-1874, to keep the nullability as expected.
410
+ const auto & output_schema = root_rel.root ().output_schema ();
411
+ if (output_schema.types_size ())
412
+ {
413
+ auto original_header = query_plan->getCurrentDataStream ().header ;
414
+ const auto & original_cols = original_header.getColumnsWithTypeAndName ();
415
+ if (static_cast <size_t >(output_schema.types_size ()) != original_cols.size ())
416
+ throw Exception (ErrorCodes::LOGICAL_ERROR, " Mismatch output schema" );
417
+ bool need_final_project = false ;
418
+ ColumnsWithTypeAndName final_cols;
419
+ for (int i = 0 ; i < output_schema.types_size (); ++i)
415
420
{
416
- auto original_header = query_plan->getCurrentDataStream ().header ;
417
- const auto & original_cols = original_header.getColumnsWithTypeAndName ();
418
- if (static_cast <size_t >(output_schema.types_size ()) != original_cols.size ())
419
- {
420
- throw Exception (ErrorCodes::LOGICAL_ERROR, " Mismatch output schema" );
421
- }
422
- bool need_final_project = false ;
423
- ColumnsWithTypeAndName final_cols;
424
- for (int i = 0 ; i < output_schema.types_size (); ++i)
421
+ const auto & col = original_cols[i];
422
+ auto type = TypeParser::parseType (output_schema.types (i));
423
+ // At present, we only check nullable mismatch.
424
+ // intermediate aggregate data is special, no check here.
425
+ if (type->isNullable () != col.type ->isNullable () && !typeid_cast<const DataTypeAggregateFunction *>(col.type .get ()))
425
426
{
426
- const auto & col = original_cols[i];
427
- auto type = TypeParser::parseType (output_schema.types (i));
428
- // At present, we only check nullable mismatch.
429
- // intermediate aggregate data is special, no check here.
430
- if (type->isNullable () != col.type ->isNullable () && !typeid_cast<const DataTypeAggregateFunction *>(col.type .get ()))
427
+ if (type->isNullable ())
431
428
{
432
- if (type->isNullable ())
433
- {
434
- auto wrapped = wrapNullableType (true , col.type );
435
- final_cols.emplace_back (type->createColumn (), wrapped, col.name );
436
- need_final_project = !wrapped->equals (*col.type );
437
- }
438
- else
439
- {
440
- final_cols.emplace_back (type->createColumn (), removeNullable (col.type ), col.name );
441
- need_final_project = true ;
442
- }
429
+ auto wrapped = wrapNullableType (true , col.type );
430
+ final_cols.emplace_back (type->createColumn (), wrapped, col.name );
431
+ need_final_project = !wrapped->equals (*col.type );
443
432
}
444
433
else
445
434
{
446
- final_cols.push_back (col);
435
+ final_cols.emplace_back (type->createColumn (), removeNullable (col.type ), col.name );
436
+ need_final_project = true ;
447
437
}
448
438
}
449
- if (need_final_project)
439
+ else
450
440
{
451
- ActionsDAGPtr final_project
452
- = ActionsDAG::makeConvertingActions (original_cols, final_cols, ActionsDAG::MatchColumnsMode::Position);
453
- QueryPlanStepPtr final_project_step = std::make_unique<ExpressionStep>(query_plan->getCurrentDataStream (), final_project);
454
- final_project_step->setStepDescription (" Project for output schema" );
455
- query_plan->addStep (std::move (final_project_step));
441
+ final_cols.push_back (col);
456
442
}
457
443
}
458
- return query_plan;
444
+ if (need_final_project)
445
+ {
446
+ ActionsDAGPtr final_project
447
+ = ActionsDAG::makeConvertingActions (original_cols, final_cols, ActionsDAG::MatchColumnsMode::Position);
448
+ QueryPlanStepPtr final_project_step = std::make_unique<ExpressionStep>(query_plan->getCurrentDataStream (), final_project);
449
+ final_project_step->setStepDescription (" Project for output schema" );
450
+ query_plan->addStep (std::move (final_project_step));
451
+ }
459
452
}
460
- else
461
- {
453
+ }
454
+
455
+ QueryPlanPtr SerializedPlanParser::parse (const substrait::Plan & plan)
456
+ {
457
+ logDebugMessage (plan, " substrait plan" );
458
+ parseExtensions (plan.extensions ());
459
+ if (plan.relations_size () != 1 )
462
460
throw Exception (ErrorCodes::BAD_ARGUMENTS, " too many relations found" );
463
- }
461
+
462
+ const substrait::PlanRel & root_rel = plan.relations ().at (0 );
463
+ if (!root_rel.has_root ())
464
+ throw Exception (ErrorCodes::BAD_ARGUMENTS, " must have root rel!" );
465
+
466
+ if (root_rel.root ().input ().has_write ())
467
+ throw Exception (ErrorCodes::BAD_ARGUMENTS, " write pipeline is not supported yet!" );
468
+
469
+ std::list<const substrait::Rel *> rel_stack;
470
+ auto query_plan = parseOp (root_rel.root ().input (), rel_stack);
471
+ adjustOutput (query_plan, root_rel);
472
+ return query_plan;
464
473
}
465
474
466
475
QueryPlanPtr SerializedPlanParser::parseOp (const substrait::Rel & rel, std::list<const substrait::Rel *> & rel_stack)
@@ -553,17 +562,6 @@ QueryPlanPtr SerializedPlanParser::parseOp(const substrait::Rel & rel, std::list
553
562
return query_plan;
554
563
}
555
564
556
- NamesAndTypesList SerializedPlanParser::blockToNameAndTypeList (const Block & header)
557
- {
558
- NamesAndTypesList types;
559
- for (const auto & name : header.getNames ())
560
- {
561
- const auto * column = header.findByName (name);
562
- types.push_back (NameAndTypePair (column->name , column->type ));
563
- }
564
- return types;
565
- }
566
-
567
565
std::optional<String> SerializedPlanParser::getFunctionSignatureName (UInt32 function_ref) const
568
566
{
569
567
auto it = function_mapping.find (std::to_string (function_ref));
@@ -1713,14 +1711,11 @@ substrait::ReadRel::LocalFiles SerializedPlanParser::parseLocalFiles(const std::
1713
1711
return local_files;
1714
1712
}
1715
1713
1716
- std::unique_ptr<LocalExecutor> SerializedPlanParser::createExecutor (DB::QueryPlanPtr query_plan)
1714
+ DB::QueryPipelineBuilderPtr SerializedPlanParser::buildQueryPipeline (DB::QueryPlan & query_plan)
1717
1715
{
1718
- Stopwatch stopwatch;
1719
- auto * logger = &Poco::Logger::get (" SerializedPlanParser" );
1720
1716
const Settings & settings = context->getSettingsRef ();
1721
-
1722
1717
QueryPriorities priorities;
1723
- auto query_status = std::make_shared<QueryStatus>(
1718
+ const auto query_status = std::make_shared<QueryStatus>(
1724
1719
context,
1725
1720
" " ,
1726
1721
context->getClientInfo (),
@@ -1729,26 +1724,35 @@ std::unique_ptr<LocalExecutor> SerializedPlanParser::createExecutor(DB::QueryPla
1729
1724
IAST::QueryKind::Select,
1730
1725
settings,
1731
1726
0 );
1732
-
1733
- QueryPlanOptimizationSettings optimization_settings{.optimize_plan = settings.query_plan_enable_optimizations };
1734
- auto pipeline_builder = query_plan->buildQueryPipeline (
1727
+ const QueryPlanOptimizationSettings optimization_settings{.optimize_plan = settings.query_plan_enable_optimizations };
1728
+ return query_plan.buildQueryPipeline (
1735
1729
optimization_settings,
1736
1730
BuildQueryPipelineSettings{
1737
1731
.actions_settings
1738
1732
= ExpressionActionsSettings{.can_compile_expressions = true , .min_count_to_compile_expression = 3 , .compile_expressions = CompileExpressions::yes},
1739
1733
.process_list_element = query_status});
1734
+ }
1735
+
1736
+ std::unique_ptr<LocalExecutor> SerializedPlanParser::createExecutor (DB::QueryPlanPtr query_plan)
1737
+ {
1738
+ Stopwatch stopwatch;
1739
+
1740
+ const Settings & settings = context->getSettingsRef ();
1741
+ auto pipeline_builder = buildQueryPipeline (*query_plan);
1742
+
1740
1743
QueryPipeline pipeline = QueryPipelineBuilder::getPipeline (std::move (*pipeline_builder));
1741
- LOG_INFO (logger, " build pipeline {} ms" , stopwatch.elapsedMicroseconds () / 1000.0 );
1742
1744
1745
+ auto * logger = &Poco::Logger::get (" SerializedPlanParser" );
1746
+ LOG_INFO (logger, " build pipeline {} ms" , stopwatch.elapsedMicroseconds () / 1000.0 );
1743
1747
LOG_DEBUG (
1744
1748
logger, " clickhouse plan [optimization={}]:\n {}" , settings.query_plan_enable_optimizations , PlanUtil::explainPlan (*query_plan));
1745
1749
LOG_DEBUG (logger, " clickhouse pipeline:\n {}" , QueryPipelineUtil::explainPipeline (pipeline));
1746
1750
1747
- return std::make_unique<LocalExecutor>(
1748
- context, std::move (query_plan), std::move (pipeline), query_plan-> getCurrentDataStream (). header . cloneEmpty () );
1751
+ bool dump_pipeline = context-> getConfigRef (). getBool ( " dump_pipeline " , false );
1752
+ return std::make_unique<LocalExecutor>( std:: move (query_plan), std::move (pipeline), dump_pipeline );
1749
1753
}
1750
1754
1751
- QueryPlanPtr SerializedPlanParser::parse (const std::string_view plan)
1755
+ QueryPlanPtr SerializedPlanParser::parse (std::string_view plan)
1752
1756
{
1753
1757
substrait::Plan s_plan;
1754
1758
// / https://stackoverflow.com/questions/52028583/getting-error-parsing-protobuf-data
@@ -1776,15 +1780,6 @@ QueryPlanPtr SerializedPlanParser::parse(const std::string_view plan)
1776
1780
return res;
1777
1781
}
1778
1782
1779
- QueryPlanPtr SerializedPlanParser::parseJson (const std::string_view & json_plan)
1780
- {
1781
- substrait::Plan plan;
1782
- auto s = google::protobuf::util::JsonStringToMessage (json_plan, &plan);
1783
- if (!s.ok ())
1784
- throw Exception (ErrorCodes::CANNOT_PARSE_PROTOBUF_SCHEMA, " Parse substrait::Plan from json string failed: {}" , s.ToString ());
1785
- return parse (plan);
1786
- }
1787
-
1788
1783
SerializedPlanParser::SerializedPlanParser (const ContextPtr & context_) : context(context_)
1789
1784
{
1790
1785
}
@@ -2035,7 +2030,7 @@ SharedContextHolder SerializedPlanParser::shared_context;
2035
2030
2036
2031
LocalExecutor::~LocalExecutor ()
2037
2032
{
2038
- if (context-> getConfigRef (). getBool ( " dump_pipeline" , false ) )
2033
+ if (dump_pipeline)
2039
2034
LOG_INFO (&Poco::Logger::get (" LocalExecutor" ), " Dump pipeline:\n {}" , dumpPipeline ());
2040
2035
2041
2036
if (spark_buffer)
@@ -2109,11 +2104,11 @@ Block & LocalExecutor::getHeader()
2109
2104
return header;
2110
2105
}
2111
2106
2112
- LocalExecutor::LocalExecutor (const ContextPtr & context_, QueryPlanPtr query_plan, QueryPipeline && pipeline, const Block & header_ )
2107
+ LocalExecutor::LocalExecutor (QueryPlanPtr query_plan, QueryPipeline && pipeline, bool dump_pipeline_ )
2113
2108
: query_pipeline(std::move(pipeline))
2114
2109
, executor(std::make_unique<PullingPipelineExecutor>(query_pipeline))
2115
- , header(header_ )
2116
- , context(context_ )
2110
+ , header(query_plan-> getCurrentDataStream ().header.cloneEmpty() )
2111
+ , dump_pipeline(dump_pipeline_ )
2117
2112
, ch_column_to_spark_row(std::make_unique<CHColumnToSparkRow>())
2118
2113
, current_query_plan(std::move(query_plan))
2119
2114
{
0 commit comments