diff --git a/README.md b/README.md index 0dbbc7c..4083732 100644 --- a/README.md +++ b/README.md @@ -30,11 +30,12 @@ key-valuelist 可以参考:https://zhuanlan.zhihu.com/p/45729547 * 不使用hive元数据: - * SparkSession spark = SparkSession.builder().getOrCreate() + * SparkSession spark = SparkSession.builder().getOrCreate() * 使用hive元数据 - * SparkSession spark = SparkSession.builder().enableHiveSupport().getOrCreate() + * SparkSession spark = SparkSession.builder().enableHiveSupport().getOrCreate() + +- 查询远程的hive: -查询远程的hive 1. 项目中resource目录加入文件hive-site.xml,指明hive的thrift连接 ``` diff --git a/src/main/java/com/wugui/sparkstarter/SparkStarter.java b/src/main/java/com/wugui/sparkstarter/SparkStarter.java index 6e0bebb..815618f 100644 --- a/src/main/java/com/wugui/sparkstarter/SparkStarter.java +++ b/src/main/java/com/wugui/sparkstarter/SparkStarter.java @@ -6,8 +6,10 @@ /** * 两个简单使用的例子 - * 1. 读取文件打印每行 - * 2. wordcount + * 1. 读取文件打印每行 (读取hdfs文件) + * => 地址要和hdfs目录下的core-site.xml一样。 + * => 参考:![](https://raw.githubusercontent.com/huzekang/picbed/master/20190625000822.png) + * 2. wordcount(读取本地文件) **/ public class SparkStarter { @@ -17,7 +19,8 @@ public static void main(String[] args) { .setAppName("SparkStarter"); //之后你用的是Rdd JavaSparkContext sc = new JavaSparkContext(sparkConf); - JavaRDD stringJavaRDD = sc.textFile("/Users/huzekang/study/spark-starter/src/main/resources/students.txt"); + // Should be some file on remote hdfs + JavaRDD stringJavaRDD = sc.textFile("hdfs://cdh01:8020/tmp/spark_starter/app_log.txt"); stringJavaRDD.foreach(o -> System.out.println(o)); // Should be some file on your system