Skip to content

Commit 28d3374

Browse files
DylanGuedesHyukjinKwon
authored andcommitted
[SPARK-23647][PYTHON][SQL] Adds more types for hint in pyspark
Signed-off-by: DylanGuedes <djmgguedesgmail.com> ## What changes were proposed in this pull request? Addition of float, int and list hints for `pyspark.sql` Hint. ## How was this patch tested? I did manual tests following the same principles used in the Scala version, and also added unit tests. Closes apache#20788 from DylanGuedes/jira-21030. Authored-by: DylanGuedes <[email protected]> Signed-off-by: Hyukjin Kwon <[email protected]>
1 parent 6be272b commit 28d3374

File tree

2 files changed

+17
-2
lines changed

2 files changed

+17
-2
lines changed

python/pyspark/sql/dataframe.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -485,10 +485,12 @@ def hint(self, name, *parameters):
485485
if not isinstance(name, str):
486486
raise TypeError("name should be provided as str, got {0}".format(type(name)))
487487

488+
allowed_types = (basestring, list, float, int)
488489
for p in parameters:
489-
if not isinstance(p, str):
490+
if not isinstance(p, allowed_types):
490491
raise TypeError(
491-
"all parameters should be str, got {0} of type {1}".format(p, type(p)))
492+
"all parameters should be in {0}, got {1} of type {2}".format(
493+
allowed_types, p, type(p)))
492494

493495
jdf = self._jdf.hint(name, self._jseq(parameters))
494496
return DataFrame(jdf, self.sql_ctx)

python/pyspark/sql/tests/test_dataframe.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -375,6 +375,19 @@ def test_generic_hints(self):
375375
plan = df1.join(df2.hint("broadcast"), "id")._jdf.queryExecution().executedPlan()
376376
self.assertEqual(1, plan.toString().count("BroadcastHashJoin"))
377377

378+
# add tests for SPARK-23647 (test more types for hint)
379+
def test_extended_hint_types(self):
380+
from pyspark.sql import DataFrame
381+
382+
df = self.spark.range(10e10).toDF("id")
383+
such_a_nice_list = ["itworks1", "itworks2", "itworks3"]
384+
hinted_df = df.hint("my awesome hint", 1.2345, "what", such_a_nice_list)
385+
logical_plan = hinted_df._jdf.queryExecution().logical()
386+
387+
self.assertEqual(1, logical_plan.toString().count("1.2345"))
388+
self.assertEqual(1, logical_plan.toString().count("what"))
389+
self.assertEqual(3, logical_plan.toString().count("itworks"))
390+
378391
def test_sample(self):
379392
self.assertRaisesRegexp(
380393
TypeError,

0 commit comments

Comments
 (0)