@@ -18,20 +18,25 @@ package org.apache.gluten.backendsapi.clickhouse
18
18
19
19
import org .apache .gluten .{CH_BRANCH , CH_COMMIT , GlutenConfig }
20
20
import org .apache .gluten .backendsapi ._
21
+ import org .apache .gluten .execution .WriteFilesExecTransformer
21
22
import org .apache .gluten .expression .WindowFunctionsBuilder
22
23
import org .apache .gluten .extension .ValidationResult
23
24
import org .apache .gluten .substrait .rel .LocalFilesNode .ReadFileFormat
24
25
import org .apache .gluten .substrait .rel .LocalFilesNode .ReadFileFormat ._
25
26
26
27
import org .apache .spark .SparkEnv
27
28
import org .apache .spark .internal .Logging
29
+ import org .apache .spark .sql .catalyst .catalog .BucketSpec
28
30
import org .apache .spark .sql .catalyst .expressions ._
29
31
import org .apache .spark .sql .catalyst .expressions .aggregate .AggregateExpression
30
32
import org .apache .spark .sql .catalyst .plans .physical .{HashPartitioning , Partitioning }
31
33
import org .apache .spark .sql .execution .SparkPlan
32
34
import org .apache .spark .sql .execution .aggregate .HashAggregateExec
35
+ import org .apache .spark .sql .execution .datasources .FileFormat
36
+ import org .apache .spark .sql .execution .datasources .orc .OrcFileFormat
37
+ import org .apache .spark .sql .execution .datasources .parquet .ParquetFileFormat
33
38
import org .apache .spark .sql .internal .SQLConf
34
- import org .apache .spark .sql .types .{ArrayType , MapType , StructField , StructType }
39
+ import org .apache .spark .sql .types .{ArrayType , MapType , Metadata , StructField , StructType }
35
40
36
41
import java .util .Locale
37
42
@@ -187,6 +192,73 @@ object CHBackendSettings extends BackendSettingsApi with Logging {
187
192
}
188
193
}
189
194
195
+ override def supportWriteFilesExec (
196
+ format : FileFormat ,
197
+ fields : Array [StructField ],
198
+ bucketSpec : Option [BucketSpec ],
199
+ options : Map [String , String ]): ValidationResult = {
200
+
201
+ def validateCompressionCodec (): Option [String ] = {
202
+ // FIXME: verify Support compression codec
203
+ val compressionCodec = WriteFilesExecTransformer .getCompressionCodec(options)
204
+ None
205
+ }
206
+
207
+ def validateFileFormat (): Option [String ] = {
208
+ format match {
209
+ case _ : ParquetFileFormat => None
210
+ case _ : OrcFileFormat => None
211
+ case f : FileFormat => Some (s " Not support FileFormat: ${f.getClass.getSimpleName}" )
212
+ }
213
+ }
214
+
215
+ // Validate if all types are supported.
216
+ def validateDateTypes (): Option [String ] = {
217
+ None
218
+ }
219
+
220
+ def validateFieldMetadata (): Option [String ] = {
221
+ // copy CharVarcharUtils.CHAR_VARCHAR_TYPE_STRING_METADATA_KEY
222
+ val CHAR_VARCHAR_TYPE_STRING_METADATA_KEY = " __CHAR_VARCHAR_TYPE_STRING"
223
+ fields
224
+ .find(_.metadata != Metadata .empty)
225
+ .filterNot(_.metadata.contains(CHAR_VARCHAR_TYPE_STRING_METADATA_KEY ))
226
+ .map {
227
+ filed =>
228
+ s " StructField contain the metadata information: $filed, metadata: ${filed.metadata}"
229
+ }
230
+ }
231
+ def validateWriteFilesOptions (): Option [String ] = {
232
+ val maxRecordsPerFile = options
233
+ .get(" maxRecordsPerFile" )
234
+ .map(_.toLong)
235
+ .getOrElse(SQLConf .get.maxRecordsPerFile)
236
+ if (maxRecordsPerFile > 0 ) {
237
+ Some (" Unsupported native write: maxRecordsPerFile not supported." )
238
+ } else {
239
+ None
240
+ }
241
+ }
242
+
243
+ def validateBucketSpec (): Option [String ] = {
244
+ if (bucketSpec.nonEmpty) {
245
+ Some (" Unsupported native write: bucket write is not supported." )
246
+ } else {
247
+ None
248
+ }
249
+ }
250
+
251
+ validateCompressionCodec()
252
+ .orElse(validateFileFormat())
253
+ .orElse(validateFieldMetadata())
254
+ .orElse(validateDateTypes())
255
+ .orElse(validateWriteFilesOptions())
256
+ .orElse(validateBucketSpec()) match {
257
+ case Some (reason) => ValidationResult .failed(reason)
258
+ case _ => ValidationResult .succeeded
259
+ }
260
+ }
261
+
190
262
override def supportShuffleWithProject (
191
263
outputPartitioning : Partitioning ,
192
264
child : SparkPlan ): Boolean = {
0 commit comments