下载地址:
测试代码
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22# coding=UTF-8
import os
os.environ['JAVA_HOME'] = r'D:\software\spark\java8' # 这个路径更换为你自己的java安装目录
import findspark
findspark.init()
from pyspark import SparkContext
def show(x):
print(x)
sc = SparkContext("local", "First App")
lines = sc.textFile("./test/word.txt").cache()
words = lines.flatMap(lambda line: line.split(" "), True)
pairWords = words.map(lambda word: (word, 1), True)
result = pairWords.reduceByKey(lambda v1, v2: v1 + v2, 3)
result.foreach(lambda x: show(x))
# result.saveAsTextFile("./wc-result2")