Skip to content

Commit 81c2bdd

Browse files
authored
fix: fix spell style (#151)
* bugfix: fix spell
1 parent 520fe7a commit 81c2bdd

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

42 files changed

+242
-109
lines changed

java/openmldb-batch/scala_style.xml

+9-3
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,9 @@
6060
<parameter name="maxParameters"><![CDATA[8]]></parameter>
6161
</parameters>
6262
</check>
63-
<check level="warning" class="org.scalastyle.scalariform.MagicNumberChecker" enabled="true">
63+
<!-- Update by 4paradigm -->
64+
<!-- init:<check level="warning" class="org.scalastyle.scalariform.MagicNumberChecker" enabled="true"> -->
65+
<check level="warning" class="org.scalastyle.scalariform.MagicNumberChecker" enabled="false">
6466
<parameters>
6567
<parameter name="ignore"><![CDATA[-1,0,1,2,3]]></parameter>
6668
</parameters>
@@ -73,7 +75,7 @@
7375
<check level="warning" class="org.scalastyle.scalariform.NoFinalizeChecker" enabled="true"/>
7476
<check level="warning" class="org.scalastyle.scalariform.CovariantEqualsChecker" enabled="true"/>
7577
<check level="warning" class="org.scalastyle.scalariform.StructuralTypeChecker" enabled="true"/>
76-
<check level="warning" class="org.scalastyle.file.RegexChecker" enabled="true">
78+
<check level="warning" class="org.scalastyle.file.RegexChecker" enabled="false">
7779
<parameters>
7880
<parameter name="regex"><![CDATA[println]]></parameter>
7981
</parameters>
@@ -118,6 +120,8 @@
118120
<check level="warning" class="org.scalastyle.scalariform.VarFieldChecker" enabled="false"/>
119121
<check level="warning" class="org.scalastyle.scalariform.VarLocalChecker" enabled="false"/>
120122
<check level="warning" class="org.scalastyle.scalariform.RedundantIfChecker" enabled="false"/>
123+
<!-- Update by 4paradigm -->
124+
<!-- init:<check level="warning" class="org.scalastyle.scalariform.TokenChecker" enabled="true"> -->
121125
<check level="warning" class="org.scalastyle.scalariform.TokenChecker" enabled="false">
122126
<parameters>
123127
<parameter name="regex"><![CDATA[println]]></parameter>
@@ -134,7 +138,9 @@
134138
<check level="warning" class="org.scalastyle.scalariform.LowercasePatternMatchChecker" enabled="true"/>
135139
<check level="warning" class="org.scalastyle.scalariform.MultipleStringLiteralsChecker" enabled="true">
136140
<parameters>
137-
<parameter name="allowed"><![CDATA[2]]></parameter>
141+
<!-- Update by 4paradigm -->
142+
<!-- init:<parameter name="allowed"><![CDATA[2]]></parameter> -->
143+
<parameter name="allowed"><![CDATA[10]]></parameter>
138144
<parameter name="ignoreRegex"><![CDATA[^""$]]></parameter>
139145
</parameters>
140146
</check>

java/openmldb-batch/src/main/scala/com/_4paradigm/openmldb/batch/OpenmldbBatchMain.scala

+3-1
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,9 @@ import com._4paradigm.openmldb.batch.utils.{DDLEngine, HDFSUtil, HybridseUtil, S
2121
import org.apache.spark.sql.SparkSession
2222
import org.slf4j.LoggerFactory
2323

24+
import scala.collection.JavaConverters.{asScalaBufferConverter, mapAsScalaMapConverter}
25+
26+
2427

2528
object OpenmldbBatchMain {
2629

@@ -38,7 +41,6 @@ object OpenmldbBatchMain {
3841
sessionBuilder.appName(appName)
3942
}
4043

41-
import scala.collection.JavaConverters._
4244
for (e <- config.getSparkConfig.asScala) {
4345
val arg: Array[String] = e.split("=")
4446
val k = arg(0)

java/openmldb-batch/src/main/scala/com/_4paradigm/openmldb/batch/PlanContext.scala

+1-2
Original file line numberDiff line numberDiff line change
@@ -17,9 +17,8 @@
1717
package com._4paradigm.openmldb.batch
1818

1919
import java.nio.ByteBuffer
20-
2120
import com._4paradigm.hybridse.sdk.SerializableByteBuffer
22-
import com._4paradigm.hybridse.vm._
21+
import com._4paradigm.hybridse.vm.PhysicalOpNode
2322
import com._4paradigm.openmldb.batch.utils.NodeIndexInfo
2423
import org.apache.spark.sql.catalyst.QueryPlanningTracker
2524
import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan

java/openmldb-batch/src/main/scala/com/_4paradigm/openmldb/batch/SparkPlanner.scala

+8-4
Original file line numberDiff line numberDiff line change
@@ -17,11 +17,15 @@
1717
package com._4paradigm.openmldb.batch
1818

1919
import com._4paradigm.hybridse.HybridSeLibrary
20-
import com._4paradigm.hybridse.`type`.TypeOuterClass._
20+
import com._4paradigm.hybridse.`type`.TypeOuterClass.Database
21+
import com._4paradigm.hybridse.vm.{CoreAPI, Engine, PhysicalConstProjectNode, PhysicalDataProviderNode,
22+
PhysicalGroupAggrerationNode, PhysicalGroupNode, PhysicalJoinNode, PhysicalLimitNode, PhysicalOpNode,
23+
PhysicalOpType, PhysicalProjectNode, PhysicalRenameNode, PhysicalSimpleProjectNode, PhysicalTableProjectNode,
24+
PhysicalWindowAggrerationNode, ProjectType}
2125
import com._4paradigm.hybridse.sdk.{SqlEngine, UnsupportedHybridSeException}
2226
import com._4paradigm.hybridse.node.JoinType
23-
import com._4paradigm.hybridse.vm._
24-
import com._4paradigm.openmldb.batch.nodes.{ConstProjectPlan, DataProviderPlan, GroupByAggregationPlan, GroupByPlan, JoinPlan, LimitPlan, RenamePlan, RowProjectPlan, SimpleProjectPlan, WindowAggPlan}
27+
import com._4paradigm.openmldb.batch.nodes.{ConstProjectPlan, DataProviderPlan, GroupByAggregationPlan, GroupByPlan,
28+
JoinPlan, LimitPlan, RenamePlan, RowProjectPlan, SimpleProjectPlan, WindowAggPlan}
2529
import com._4paradigm.openmldb.batch.utils.{GraphvizUtil, HybridseUtil, NodeIndexInfo, NodeIndexType}
2630
import org.apache.hadoop.fs.{FileSystem, Path}
2731
import org.apache.spark.sql.{DataFrame, SparkSession}
@@ -74,7 +78,7 @@ class SparkPlanner(session: SparkSession, config: OpenmldbBatchConfig) {
7478

7579
logger.info("Visit concat join node to add node index info")
7680
val processedConcatJoinNodeIds = mutable.HashSet[Long]()
77-
val indexColumnName = "__CONCATJOIN_INDEX__"+ System.currentTimeMillis()
81+
val indexColumnName = "__CONCATJOIN_INDEX__" + System.currentTimeMillis()
7882
concatJoinNodes.map(joinNode => bindNodeIndexInfo(joinNode, planCtx, processedConcatJoinNodeIds, indexColumnName))
7983

8084
if (config.slowRunCacheDir != null) {

java/openmldb-batch/src/main/scala/com/_4paradigm/openmldb/batch/SparkRowCodec.scala

+4-4
Original file line numberDiff line numberDiff line change
@@ -17,13 +17,13 @@
1717
package com._4paradigm.openmldb.batch
1818

1919
import java.sql.{Date, Timestamp}
20-
2120
import com._4paradigm.hybridse.codec.{RowBuilder, RowView, Row => NativeRow}
2221
import com._4paradigm.hybridse.sdk.HybridSeException
2322
import com._4paradigm.hybridse.vm.CoreAPI
2423
import com._4paradigm.openmldb.batch.utils.HybridseUtil
2524
import org.apache.spark.sql.Row
26-
import org.apache.spark.sql.types._
25+
import org.apache.spark.sql.types.{BooleanType, DateType, DoubleType, FloatType,
26+
IntegerType, LongType, ShortType, StringType, StructType, TimestampType}
2727
import org.slf4j.LoggerFactory
2828

2929
import scala.collection.mutable
@@ -49,7 +49,6 @@ class SparkRowCodec(sliceSchemas: Array[StructType]) {
4949

5050
def encode(row: Row): NativeRow = {
5151
var result: NativeRow = null
52-
5352
// collect slice size and string raw bytes
5453
val sliceSizes = Array.fill(sliceNum)(0)
5554
val sliceStrings = Array.fill(sliceNum)(mutable.ArrayBuffer[Array[Byte]]())
@@ -194,7 +193,8 @@ class SparkRowCodec(sliceSchemas: Array[StructType]) {
194193
fieldOffset += 1
195194
}
196195
}
197-
196+
197+
198198
private def inferStringFields(): Array[Array[Int]] = {
199199
var fieldOffset = 0
200200
sliceSchemas.map(schema => {

java/openmldb-batch/src/main/scala/com/_4paradigm/openmldb/batch/nodes/ConstProjectPlan.scala

+4-3
Original file line numberDiff line numberDiff line change
@@ -22,10 +22,11 @@ import com._4paradigm.hybridse.vm.PhysicalConstProjectNode
2222
import com._4paradigm.openmldb.batch.{PlanContext, SparkInstance}
2323
import com._4paradigm.openmldb.batch.utils.HybridseUtil
2424
import org.apache.spark.sql.Column
25-
import org.apache.spark.sql.functions._
26-
import org.apache.spark.sql.types._
25+
import org.apache.spark.sql.functions.{lit, to_date, to_timestamp, typedLit, when}
26+
import org.apache.spark.sql.types.{BooleanType, DateType, DoubleType, FloatType,
27+
IntegerType, LongType, ShortType, StringType, TimestampType}
2728

28-
import scala.collection.JavaConverters._
29+
import scala.collection.JavaConverters.asScalaBufferConverter
2930

3031

3132
object ConstProjectPlan {

java/openmldb-batch/src/main/scala/com/_4paradigm/openmldb/batch/nodes/GroupByAggregationPlan.scala

+5-2
Original file line numberDiff line numberDiff line change
@@ -106,8 +106,11 @@ object GroupByAggregationPlan {
106106
val inputHybridseSchema = HybridseUtil.getHybridseSchema(projectConfig.inputSchema)
107107

108108
val outputFields =
109-
if (projectConfig.keepIndexColumn) projectConfig.outputSchemaSlices.map(_.size).sum + 1
110-
else projectConfig.outputSchemaSlices.map(_.size).sum
109+
if (projectConfig.keepIndexColumn) {
110+
projectConfig.outputSchemaSlices.map(_.size).sum + 1
111+
} else {
112+
projectConfig.outputSchemaSlices.map(_.size).sum
113+
}
111114

112115
// Init first groupby interface
113116
var groupbyInterface = new GroupbyInterface(inputHybridseSchema)

java/openmldb-batch/src/main/scala/com/_4paradigm/openmldb/batch/nodes/RowProjectPlan.scala

+12-3
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,12 @@ object RowProjectPlan {
6060
}
6161

6262
// Get Spark DataFrame and limit the number of rows
63-
val inputDf = if (node.GetLimitCnt > 0) inputTable.getDfConsideringIndex(ctx, node.GetNodeId()).limit(node.GetLimitCnt()) else inputTable.getDfConsideringIndex(ctx, node.GetNodeId())
63+
val inputDf = if (node.GetLimitCnt > 0) {
64+
inputTable.getDfConsideringIndex(ctx, node.GetNodeId())
65+
.limit(node.GetLimitCnt())
66+
} else {
67+
inputTable.getDfConsideringIndex(ctx, node.GetNodeId())
68+
}
6469

6570
val hybridseJsdkLibraryPath = ctx.getConf.hybridseJsdkLibraryPath
6671

@@ -99,7 +104,7 @@ object RowProjectPlan {
99104

100105
})
101106

102-
SparkUtil.RddInternalRowToDf(ctx.getSparkSession, outputInternalRowRdd, outputSchema)
107+
SparkUtil.rddInternalRowToDf(ctx.getSparkSession, outputInternalRowRdd, outputSchema)
103108

104109
} else { // enableUnsafeRowOptimization is false
105110
val ouputRdd = inputDf.rdd.mapPartitions(partitionIter => {
@@ -118,7 +123,11 @@ object RowProjectPlan {
118123
val fn = jit.FindFunction(projectConfig.functionName)
119124
val encoder = new SparkRowCodec(projectConfig.inputSchemaSlices)
120125
val decoder = new SparkRowCodec(projectConfig.outputSchemaSlices)
121-
val outputFields = if (projectConfig.keepIndexColumn) projectConfig.outputSchemaSlices.map(_.size).sum + 1 else projectConfig.outputSchemaSlices.map(_.size).sum
126+
val outputFields = if (projectConfig.keepIndexColumn) {
127+
projectConfig.outputSchemaSlices.map(_.size).sum + 1
128+
} else {
129+
projectConfig.outputSchemaSlices.map(_.size).sum
130+
}
122131
val outputArr = Array.fill[Any](outputFields)(null)
123132

124133
val resultIter = partitionIter.map(row => {

java/openmldb-batch/src/main/scala/com/_4paradigm/openmldb/batch/nodes/SimpleProjectPlan.scala

+1-1
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ import com._4paradigm.openmldb.batch.{PlanContext, SparkInstance}
2424
import org.apache.spark.sql.{Column, DataFrame}
2525
import org.slf4j.LoggerFactory
2626

27-
import scala.collection.JavaConverters._
27+
import scala.collection.JavaConverters.asScalaBufferConverter
2828
import scala.collection.mutable
2929

3030

java/openmldb-batch/src/main/scala/com/_4paradigm/openmldb/batch/nodes/WindowAggPlan.scala

+19-12
Original file line numberDiff line numberDiff line change
@@ -17,20 +17,19 @@
1717
package com._4paradigm.openmldb.batch.nodes
1818

1919
import java.util
20-
2120
import com._4paradigm.hybridse.vm.PhysicalWindowAggrerationNode
22-
import com._4paradigm.openmldb.batch.utils.{AutoDestructibleIterator, HybridseUtil, PhysicalNodeUtil, SkewUtils, SparkUtil}
21+
import com._4paradigm.openmldb.batch.utils.{AutoDestructibleIterator, HybridseUtil,
22+
PhysicalNodeUtil, SkewUtils, SparkUtil}
2323
import com._4paradigm.openmldb.batch.window.WindowAggPlanUtil.WindowAggConfig
2424
import com._4paradigm.openmldb.batch.window.{WindowAggPlanUtil, WindowComputer}
25-
import com._4paradigm.openmldb.batch.{PlanContext, OpenmldbBatchConfig, SparkInstance}
25+
import com._4paradigm.openmldb.batch.{OpenmldbBatchConfig, PlanContext, SparkInstance}
2626
import org.apache.spark.sql.catalyst.InternalRow
27-
import org.apache.spark.sql.types._
27+
import org.apache.spark.sql.types.{IntegerType, LongType, StructType, TimestampType}
2828
import org.apache.spark.sql.{DataFrame, Row}
2929
import org.apache.spark.util.SerializableConfiguration
3030
import org.slf4j.LoggerFactory
3131

32-
import scala.collection.JavaConverters._
33-
32+
import scala.collection.JavaConverters.{bufferAsJavaListConverter, asScalaBufferConverter}
3433

3534
/** The planner which implements window agg physical node.
3635
*
@@ -68,7 +67,11 @@ object WindowAggPlan {
6867
val dfWithIndex = inputTable.getDfConsideringIndex(ctx, physicalNode.GetNodeId())
6968

7069
// Do union if physical node has union flag
71-
val unionTable = if (isWindowWithUnion) WindowAggPlanUtil.windowUnionTables(ctx, physicalNode, dfWithIndex) else dfWithIndex
70+
val unionTable = if (isWindowWithUnion) {
71+
WindowAggPlanUtil.windowUnionTables(ctx, physicalNode, dfWithIndex)
72+
} else {
73+
dfWithIndex
74+
}
7275

7376
// Do groupby and sort with window skew optimization or not
7477
val repartitionDf = if (isWindowSkewOptimization) {
@@ -99,7 +102,7 @@ object WindowAggPlan {
99102
val computer = WindowAggPlanUtil.createComputer(partitionIndex, hadoopConf, sparkFeConfig, windowAggConfig)
100103
unsafeWindowAggIter(computer, iter, sparkFeConfig, windowAggConfig, outputSchema)
101104
}
102-
SparkUtil.RddInternalRowToDf(ctx.getSparkSession, outputInternalRowRdd, outputSchema)
105+
SparkUtil.rddInternalRowToDf(ctx.getSparkSession, outputInternalRowRdd, outputSchema)
103106

104107
} else { // isUnsafeRowOptimization is false
105108
val outputRdd = if (isWindowWithUnion) {
@@ -170,7 +173,8 @@ object WindowAggPlan {
170173
val distributionTableName = "_DISTRIBUTION_TABLE_" + uniqueNamePostfix
171174
val countColumnName = "_COUNT_" + uniqueNamePostfix
172175

173-
val distributionSqlText = SkewUtils.genPercentileSql(inputTableName, quantile.intValue(), repartitionColNames, orderbyColName, countColumnName)
176+
val distributionSqlText = SkewUtils
177+
.genPercentileSql(inputTableName, quantile.intValue(), repartitionColNames, orderbyColName, countColumnName)
174178
logger.info(s"Generate distribution sql: $distributionSqlText")
175179
val distributionDf = ctx.sparksql(distributionSqlText)
176180
distributionDf.createOrReplaceTempView(distributionTableName)
@@ -179,7 +183,8 @@ object WindowAggPlan {
179183
val keysMap = new util.HashMap[String, String]()
180184
keyScala.foreach(e => keysMap.put(e, e))
181185

182-
val addColumnsSqlText = SkewUtils.genPercentileTagSql(inputTableName, distributionTableName, quantile.intValue(), schemas, keysMap, orderbyColName,
186+
val addColumnsSqlText = SkewUtils.genPercentileTagSql(inputTableName, distributionTableName,
187+
quantile.intValue(), schemas, keysMap, orderbyColName,
183188
partColumnName, expandColumnName, countColumnName, ctx.getConf.skewCnt.longValue())
184189
logger.info(s"Generate add columns sql: $addColumnsSqlText")
185190
ctx.sparksql(addColumnsSqlText)
@@ -189,14 +194,16 @@ object WindowAggPlan {
189194

190195
val distributionMap = Map(distributionCollect.map(p => (p.get(0), p.get(1))):_*)
191196

192-
val outputSchema = inputDf.schema.add("_PART_", IntegerType, false).add("_EXPAND_", IntegerType, false)
197+
val outputSchema = inputDf.schema.add("_PART_", IntegerType, false)
198+
.add("_EXPAND_", IntegerType, false)
193199

194200
val outputRdd = inputDf.rdd.map(row => {
195201
// Combine the repartition keys to one string which is equal to the first column of skew config
196202
val combineString = repartitionColIndexes.map(index => row.get(index)).mkString("_")
197203
// TODO: Support for more datatype of orderby columns
198204
val condition = if (orderbyColType.equals(TimestampType)) {
199-
row.get(orderbyColIndex).asInstanceOf[java.sql.Timestamp].compareTo(distributionMap(combineString).asInstanceOf[java.sql.Timestamp])
205+
row.get(orderbyColIndex).asInstanceOf[java.sql.Timestamp].compareTo(distributionMap(combineString)
206+
.asInstanceOf[java.sql.Timestamp])
200207
} else if (orderbyColType.equals(LongType)) {
201208
row.get(orderbyColIndex).asInstanceOf[Long].compareTo(distributionMap(combineString).asInstanceOf[Long])
202209
} else {

java/openmldb-batch/src/main/scala/com/_4paradigm/openmldb/batch/udf/PercentileApprox.scala

+7-6
Original file line numberDiff line numberDiff line change
@@ -18,17 +18,18 @@ package com._4paradigm.openmldb.batch.udf
1818

1919
import org.apache.spark.sql.Column
2020
import org.apache.spark.sql.catalyst.expressions.aggregate.ApproximatePercentile
21-
import org.apache.spark.sql.functions._
21+
import org.apache.spark.sql.functions.lit
22+
2223

2324

2425
object PercentileApprox {
25-
def percentile_approx(col: Column, percentage: Column, accuracy: Column): Column = {
26+
def percentileApprox(col: Column, percentage: Column, accuracy: Column): Column = {
2627
val expr = new ApproximatePercentile(
2728
col.expr, percentage.expr, accuracy.expr
2829
).toAggregateExpression
2930
new Column(expr)
3031
}
31-
def percentile_approx(col: Column, percentage: Column): Column = percentile_approx(
32+
def percentileApprox(col: Column, percentage: Column): Column = percentileApprox(
3233
col, percentage, lit(ApproximatePercentile.DEFAULT_PERCENTILE_ACCURACY)
3334
)
3435

@@ -39,11 +40,11 @@ object PercentileApprox {
3940
* @param accu
4041
* @return
4142
*/
42-
def percentile_approx(col: Column, percentage: Column, accu: Int): Column = {
43+
def percentileApprox(col: Column, percentage: Column, accu: Int): Column = {
4344
if (accu > 0) {
44-
percentile_approx(col, percentage, lit(accu))
45+
percentileApprox(col, percentage, lit(accu))
4546
} else {
46-
percentile_approx(col, percentage, lit(ApproximatePercentile.DEFAULT_PERCENTILE_ACCURACY))
47+
percentileApprox(col, percentage, lit(ApproximatePercentile.DEFAULT_PERCENTILE_ACCURACY))
4748
}
4849
}
4950
}

java/openmldb-batch/src/main/scala/com/_4paradigm/openmldb/batch/utils/ConfigImplicits.scala

+1-3
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,4 @@ trait ConfigImplicits[T] {
2020
def parse(value: Any): T
2121
}
2222

23-
object ConfigImplicits {
24-
25-
}
23+
object ConfigImplicits

java/openmldb-batch/src/main/scala/com/_4paradigm/openmldb/batch/utils/ConfigReflections.scala

+1-1
Original file line numberDiff line numberDiff line change
@@ -96,4 +96,4 @@ object ConfigReflections {
9696
}
9797
}
9898

99-
class ConfigOption(name: String, doc: String = "") extends StaticAnnotation {}
99+
class ConfigOption(name: String, doc: String = "") extends StaticAnnotation

java/openmldb-batch/src/main/scala/com/_4paradigm/openmldb/batch/utils/GraphvizUtil.scala

+2-2
Original file line numberDiff line numberDiff line change
@@ -17,9 +17,9 @@
1717
package com._4paradigm.openmldb.batch.utils
1818

1919
import java.io.File
20-
2120
import com._4paradigm.hybridse.node.JoinType
22-
import com._4paradigm.hybridse.vm._
21+
import com._4paradigm.hybridse.vm.{PhysicalDataProviderNode, PhysicalJoinNode, PhysicalOpNode,
22+
PhysicalOpType, PhysicalProjectNode, PhysicalRenameNode, PhysicalWindowAggrerationNode, ProjectType}
2323
import guru.nidi.graphviz.engine.{Format, Graphviz}
2424
import guru.nidi.graphviz.model.Factory.{mutGraph, mutNode}
2525
import guru.nidi.graphviz.model.MutableNode

0 commit comments

Comments
 (0)