diff --git a/README.md b/README.md index bcbec91..8475186 100644 --- a/README.md +++ b/README.md @@ -87,9 +87,10 @@ sales.groupBy("year").pivot("city",Seq("Warsaw","Boston","Toronto")).agg(sum("am ## 本地测试和提交作业 参考:https://blog.csdn.net/dream_an/article/details/54915894 -- idea上测试spark作业 +### idea上测试spark作业 + +1. 引入依赖 -引入依赖 ``` @@ -118,13 +119,29 @@ sales.groupBy("year").pivot("city",Seq("Warsaw","Boston","Toronto")).agg(sum("am ``` -- 提交作业到本机的spark环境 +2. 直接在idea上运行相应的main方法。并在spark上下文设置master为local即可。 + +``` + SparkConf conf = new SparkConf().setMaster("local").setAppName("SparkFlatMapJava"); +``` + + + +### 提交作业到本机的spark环境 + +1. 启动spark +``` +~/opt/spark-2.4.0-bin-hadoop2.7 » sbin/start-all.sh +``` +可以观察到起来了一个master和worker进程。 +![](https://raw.githubusercontent.com/huzekang/picbed/master/20190626112610.png) -将使用`mvn clean package`打包好的作业提交到本地安装好的spark上跑 +2. 使用`mvn clean package`打包好的作业,并提交到本地安装好的spark上跑 ``` ~/opt/spark-2.4.0-bin-hadoop2.7 » bin/spark-submit --class "com.wugui.sparkstarter.SimpleApp" /Users/huzekang/study/spark-starter/target/spark-starter-1.0-SNAPSHOT.jar ``` -![](https://raw.githubusercontent.com/huzekang/picbed/master/20190620155332.png) +3. 打开spark server界面,可以看到已经完成的spark作业。 +![](https://raw.githubusercontent.com/huzekang/picbed/master/20190626112849.png) -- 提交作业到yarn \ No newline at end of file +### 提交作业到yarn \ No newline at end of file diff --git a/src/main/java/com/wugui/sparkstarter/SparkHiveNewVersion.java b/src/main/java/com/wugui/sparkstarter/SparkHiveNewVersion.java index 67d9070..0fdf924 100644 --- a/src/main/java/com/wugui/sparkstarter/SparkHiveNewVersion.java +++ b/src/main/java/com/wugui/sparkstarter/SparkHiveNewVersion.java @@ -15,8 +15,11 @@ public static void main(String[] args) { // 定义上下文 SparkSession spark = SparkSession .builder() + // 如果需要提交到remote spark则使用spark://host:port +// .master("spark://10.0.0.50:7077") + // 如果需要提交到remote spark则使用local .master("local") - .appName("Java Spark SQL basic example") + .appName("Java Spark SQL Starter !!") .enableHiveSupport() .config("spark.some.config.option", "some-value") .getOrCreate(); @@ -48,7 +51,6 @@ public static void main(String[] args) { + "ON si.name=ss.name " + "WHERE ss.score>=80"); - spark.sql("DROP TABLE IF EXISTS good_student_infos"); // 根据DataFrame创建临时表 goodStudentsDF.registerTempTable("goodstudent_temp"); Dataset result = spark.sql("select * from goodstudent_temp"); @@ -57,9 +59,10 @@ public static void main(String[] args) { /** * 将临时视图保存到hive表 good_student_infos */ - goodStudentsDF.write().mode(SaveMode.Overwrite).saveAsTable("good_student_infos2"); + spark.sql("DROP TABLE IF EXISTS good_student_infos"); + goodStudentsDF.write().mode(SaveMode.Overwrite).saveAsTable("good_student_infos"); - spark.table("good_student_infos2").foreach(row -> { + spark.table("good_student_infos").foreach(row -> { // 两种方式获取每行的数据 System.out.println(row.get(2)); System.out.println(row.getInt(row.fieldIndex("score")));