【Spark】02.Spark-IDEA配置
1.基于jdk1.8创建一个maven项目
2.增加scala-sdk
Project Structure - Platform Settings - Global Libraries
因为在学习中使用的spark版本,需要适配scala2.12.x版本(该文中用到的是scala_2.12.11)
3.增加Spark的Maven依赖
<dependencies> <dependency> <groupId>org.apache.spark</groupId> <artifactId>spark-core_2.12</artifactId> <version>3.0.0</version> </dependency> </dependencies>
4.WrodCount试运行
package com.atguigu.bigdta.spark.coreimport org.apache.spark.api.java.JavaSparkContext.fromSparkContextimport org.apache.spark.rdd.RDDimport org.apache.spark.{SparkConf, SparkContext}object Test01 { def main(args: Array[String]): Unit = { val sparkConf = new SparkConf().setMaster("local").setAppName("WordCount") val sc = new SparkContext(sparkConf) val lines: RDD[String] = sc.textFile("datas") val words: RDD[String] = lines.flatMap(_.split(" ")) val wordGroup: RDD[(String, Iterable[String])] = words.groupBy(word => word) val wordToCount: RDD[(String, Int)] = wordGroup.map { case (word, list) => { (word, list.size) } } val array: Array[(String, Int)] = wordToCount.collect() array.foreach(println) //TODO 关闭连接 sc.close() }}