[SPARK-30653][INFRA][SQL] EOL character enforcement for java/scala/xm…

…l/py/R files ### What changes were proposed in this pull request? This patch converts CR/LF into LF in 3 source files, which most files are only using LF. This patch also add rules to enforce EOL as LF for all java, scala, xml, py, R files. ### Why are the changes needed? The majority of source code files are using LF and only three files are CR/LF. While using IDE would let us don't bother with the difference, it still has a chance to make unnecessary diff if the file is modified with the editor which doesn't handle it automatically. ### Does this PR introduce any user-facing change? No ### How was this patch tested? ``` grep -IUrl --color "^M" . | grep "\.java\|\.scala\|\.xml\|\.py\|\.R" | grep -v "/target/" | grep -v "/build/" | grep -v "/dist/" | grep -v "dependency-reduced-pom.xml" | grep -v ".pyc" ``` (Please note you'll need to type CTRL+V -> CTRL+M in bash shell to get `^M` because it's representing CR/LF, not a combination of `^` and `M`.) Before the patch, the result is: ``` ./sql/core/src/main/java/org/apache/spark/sql/execution/columnar/ColumnDictionary.java ./sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/complexTypesSuite.scala ./sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/ComplexTypes.scala ``` and after the patch, the result is None. And git shows WARNING message if EOL of any of source files in given types are modified to CR/LF, like below: ``` warning: CRLF will be replaced by LF in sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala. The file will have its original line endings in your working directory. ``` Closes #27365 from HeartSaVioR/MINOR-remove-CRLF-in-source-codes. Authored-by: Jungtaek Lim (HeartSaVioR) <[email protected]> Signed-off-by: Dongjoon Hyun <[email protected]>
apache · Jan 27, 2020 · 0436b3d · 0436b3d
1 parent 8a24cf2
commit 0436b3d
Show file tree

Hide file tree

Showing 4 changed files with 582 additions and 577 deletions.
diff --git a/.gitattributes b/.gitattributes
@@ -1,2 +1,7 @@
 *.bat text eol=crlf
 *.cmd text eol=crlf
+*.java text eol=lf
+*.scala text eol=lf
+*.xml text eol=lf
+*.py text eol=lf
+*.R text eol=lf
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/ComplexTypes.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/ComplexTypes.scala
@@ -1,64 +1,64 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.catalyst.optimizer
-
-import org.apache.spark.sql.catalyst.expressions._
-import org.apache.spark.sql.catalyst.plans.logical.{Aggregate, LogicalPlan}
-import org.apache.spark.sql.catalyst.rules.Rule
-
-/**
- * Simplify redundant [[CreateNamedStruct]], [[CreateArray]] and [[CreateMap]] expressions.
- */
-object SimplifyExtractValueOps extends Rule[LogicalPlan] {
-  override def apply(plan: LogicalPlan): LogicalPlan = plan transform {
-    // One place where this optimization is invalid is an aggregation where the select
-    // list expression is a function of a grouping expression:
-    //
-    // SELECT struct(a,b).a FROM tbl GROUP BY struct(a,b)
-    //
-    // cannot be simplified to SELECT a FROM tbl GROUP BY struct(a,b). So just skip this
-    // optimization for Aggregates (although this misses some cases where the optimization
-    // can be made).
-    case a: Aggregate => a
-    case p => p.transformExpressionsUp {
-      // Remove redundant field extraction.
-      case GetStructField(createNamedStruct: CreateNamedStruct, ordinal, _) =>
-        createNamedStruct.valExprs(ordinal)
-
-      // Remove redundant array indexing.
-      case GetArrayStructFields(CreateArray(elems), field, ordinal, _, _) =>
-        // Instead of selecting the field on the entire array, select it from each member
-        // of the array. Pushing down the operation this way may open other optimizations
-        // opportunities (i.e. struct(...,x,...).x)
-        CreateArray(elems.map(GetStructField(_, ordinal, Some(field.name))))
-
-      // Remove redundant map lookup.
-      case ga @ GetArrayItem(CreateArray(elems), IntegerLiteral(idx)) =>
-        // Instead of creating the array and then selecting one row, remove array creation
-        // altogether.
-        if (idx >= 0 && idx < elems.size) {
-          // valid index
-          elems(idx)
-        } else {
-          // out of bounds, mimic the runtime behavior and return null
-          Literal(null, ga.dataType)
-        }
-      case GetMapValue(CreateMap(elems), key) => CaseKeyWhen(key, elems)
-    }
-  }
-}
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.optimizer
+
+import org.apache.spark.sql.catalyst.expressions._
+import org.apache.spark.sql.catalyst.plans.logical.{Aggregate, LogicalPlan}
+import org.apache.spark.sql.catalyst.rules.Rule
+
+/**
+ * Simplify redundant [[CreateNamedStruct]], [[CreateArray]] and [[CreateMap]] expressions.
+ */
+object SimplifyExtractValueOps extends Rule[LogicalPlan] {
+  override def apply(plan: LogicalPlan): LogicalPlan = plan transform {
+    // One place where this optimization is invalid is an aggregation where the select
+    // list expression is a function of a grouping expression:
+    //
+    // SELECT struct(a,b).a FROM tbl GROUP BY struct(a,b)
+    //
+    // cannot be simplified to SELECT a FROM tbl GROUP BY struct(a,b). So just skip this
+    // optimization for Aggregates (although this misses some cases where the optimization
+    // can be made).
+    case a: Aggregate => a
+    case p => p.transformExpressionsUp {
+      // Remove redundant field extraction.
+      case GetStructField(createNamedStruct: CreateNamedStruct, ordinal, _) =>
+        createNamedStruct.valExprs(ordinal)
+
+      // Remove redundant array indexing.
+      case GetArrayStructFields(CreateArray(elems), field, ordinal, _, _) =>
+        // Instead of selecting the field on the entire array, select it from each member
+        // of the array. Pushing down the operation this way may open other optimizations
+        // opportunities (i.e. struct(...,x,...).x)
+        CreateArray(elems.map(GetStructField(_, ordinal, Some(field.name))))
+
+      // Remove redundant map lookup.
+      case ga @ GetArrayItem(CreateArray(elems), IntegerLiteral(idx)) =>
+        // Instead of creating the array and then selecting one row, remove array creation
+        // altogether.
+        if (idx >= 0 && idx < elems.size) {
+          // valid index
+          elems(idx)
+        } else {
+          // out of bounds, mimic the runtime behavior and return null
+          Literal(null, ga.dataType)
+        }
+      case GetMapValue(CreateMap(elems), key) => CaseKeyWhen(key, elems)
+    }
+  }
+}