Download and install Scala 2.11.8
Set Scala configure
---------------------------------------------------------------------------------------
sudo gedit ~/.bashrc
#scala
export SCALA_HOME=/opt/scala-2.11.8
export PATH=$PATH:$SCALA_HOME/bin
source ~/.bashrc
---------------------------------------------------------------------------------------
test
[hadoop@master01 lib]$ scala
Welcome to Scala 2.11.8 (OpenJDK 64-Bit Server VM, Java 1.8.0_91).
Type in expressions for evaluation. Or try :help.
scala> 1+1
res0: Int = 2
---------------------------------------------------------------------------------------
Download and install Spark 1.6.0 on Hadoop 2.6
Set Spark configure
---------------------------------------------------------------------------------------
sudo gedit ~/.bashrc
#Spark
export SPARK_HOME=/opt/spark-1.6.0
export PATH=$PATH:$SPARK_HOME/bin
source ~/.bashrc
---------------------------------------------------------------------------------------
cp spark-env.sh.template spark-env.sh
sudo gedit spark-env.sh
export SCALA_HOME=/opt/scala-2.11.8
export JAVA_HOME=$(readlink -f /usr/bin/java | sed "s:bin/java::")
export SPARK_MASTER_IP=master01
export SPARK_WORKER_MEMORY=1024m
spark.master spark://master01:7077
spark.eventLog.enabled true
spark.eventLog.dir hdfs:///user/spark/eventlog
ps aux | grep spark
hadoop 969 0.0 0.0 112644 952 pts/0 R+ 21:21 0:00 grep --color=auto spark
---------------------------------------------------------------------------------------
import org.apache.spark.SparkContext import org.apache.spark.SparkContext._ import org.apache.spark.SparkConf
val sc = new SparkContext(new SparkConf().setAppName("Spark Count"))
val count = sc.parallelize(1 to NUM_SAMPLES).map{i =>
val x = Math.random()
val y = Math.random()
if (x*x + y*y < 1) 1 else 0
}.reduce(_ + _)
---------------------------------------------------------------------------------------
Word count example
scala> val textFile = sc.textFile("hdfs://master01:9000/opt/hadoop-2.7.1/input/text34mb.txt")
textFile: org.apache.spark.rdd.RDD[String] = MapPartitionsRDD[9] at textFile at <console>:27
scala> val wordCounts = textFile.flatMap(line => line.split(" ")).map(word => (word, 1)).reduceByKey((a, b) => a + b)
wordCounts: org.apache.spark.rdd.RDD[(String, Int)] = ShuffledRDD[12] at reduceByKey at <console>:29
scala> wordCounts.collect()
res0: Array[(String, Int)] = Array(('lopin',1), (Ah!,99), (houres,,36), (Committee,),1), (bone,40), (fleein',1), (�Head.�,1), (delinquents.,2), (Malwa,1), (routing*,2), ('farthest,1), (Dollours,2), (Feldkirch,,3), ((1754-1831),,1), (nothin,1), (untruthfulness.,1), (signal.,6), (langwidge,3), (drad;*,1), (meets,,3), (Lost.,3), (Papists,,6), (accompts,,2), (Goodbye!,1), (Galliard,4), ((1563-1631),1), (Anthonio,,40), (God-forsaken,4), (rightly-,1), (fowl,30), (coat;,3), (husky,5), (Carpenter,4), (precious*,1), (ampullaria,1), (afterward,64), (armes*,,2), (entend*,1), (provisioned,,1), (wicked?,3), (Francaise,1), (Herefords,2), (Souls.",1), (/Loci,2), (speak:,9), (half-crowns,1), (Thunder.,18), (Halkar;,2), (HISTORIES.,1), (feats;,1), (robin,1), (fixed-I,1), (undeterred,2), (fastenings,4), ...
沒有留言:
張貼留言