From fda9fa01037cccd9f327facf3131f2345f2e270c Mon Sep 17 00:00:00 2001 From: Chi Wang Date: Thu, 28 Sep 2023 20:07:21 -0700 Subject: [PATCH] improve docstr of preprocessors (#1227) * improve docstr of preprocessors * Update SynapseML version * RFix test --------- Co-authored-by: Li Jiang --- flaml/automl/automl.py | 4 ++-- test/spark/test_0sparkml.py | 18 ++++++++++-------- 2 files changed, 12 insertions(+), 10 deletions(-) diff --git a/flaml/automl/automl.py b/flaml/automl/automl.py index d9530b957a..9a29093875 100644 --- a/flaml/automl/automl.py +++ b/flaml/automl/automl.py @@ -476,12 +476,12 @@ def save_best_config(self, filename): @property def feature_transformer(self): - """Returns AutoML Transformer""" + """Returns feature transformer which is used to preprocess data before applying training or inference.""" return getattr(self, "_transformer", None) @property def label_transformer(self): - """Returns AutoML label transformer""" + """Returns label transformer which is used to preprocess labels before scoring, and inverse transform labels after inference.""" return getattr(self, "_label_transformer", None) @property diff --git a/test/spark/test_0sparkml.py b/test/spark/test_0sparkml.py index c5da1d9d3f..1e4af808df 100644 --- a/test/spark/test_0sparkml.py +++ b/test/spark/test_0sparkml.py @@ -17,13 +17,14 @@ from pyspark.ml.feature import VectorAssembler from flaml.automl.spark.utils import to_pandas_on_spark + postfix_version = "-spark3.3," if pyspark.__version__ > "3.2" else "," spark = ( pyspark.sql.SparkSession.builder.appName("MyApp") .master("local[2]") .config( "spark.jars.packages", ( - "com.microsoft.azure:synapseml_2.12:0.10.2," + f"com.microsoft.azure:synapseml_2.12:0.11.3{postfix_version}" "org.apache.hadoop:hadoop-azure:3.3.5," "com.microsoft.azure:azure-storage:8.6.6," f"org.mlflow:mlflow-spark:{mlflow.__version__}" @@ -172,15 +173,16 @@ def test_spark_input_df(): try: model = automl.model.estimator predictions = model.transform(test_data) + predictions.show() - from synapse.ml.train import ComputeModelStatistics + # from synapse.ml.train import ComputeModelStatistics - metrics = ComputeModelStatistics( - evaluationMetric="classification", - labelCol="Bankrupt?", - scoredLabelsCol="prediction", - ).transform(predictions) - metrics.show() + # metrics = ComputeModelStatistics( + # evaluationMetric="classification", + # labelCol="Bankrupt?", + # scoredLabelsCol="prediction", + # ).transform(predictions) + # metrics.show() except AttributeError: print("No fitted model because of too short training time.")