CentOS 6에서 Spark Streaming 프로그래밍

2017. 8. 9. 19:11서버 프로그래밍


<SparkStreaming>


$ cd ~/Documents/workspace

$ mkdir sparkstreaming-topurl

$ cd sparkstreaming-topurl

$ vi build.sbt


name := "LogParser"

version := "1.0"

scalaVersion := "2.11.8"

libraryDependencies ++= Seq(

"org.apache.spark"%"spark-core_2.11"%"2.1.0"%"provided",

"org.apache.spark"%"spark-streaming_2.11"%"2.1.0"%"provided"

)


$ sbt eclipse


Eclipse Import Project

append New Source Folder : src/main/scala

append New Scala Object :  LogParser

code writing


$ sbt package


$ wget http://www.monitorware.com/en/logsamples/download/apache-samples.rar

$ wget http://www.rarlab.com/rar/rarlinux-x64-5.2.1.tar.gz

$ tar zxvf rarlinux-x64-5.2.1.tar.gz

$ sudo cp rar/unrar /usr/local/bin

$ rm -rf rar

$ unrar x apache-samples.rar

$ unrar x apache-access_log.rar

$ cd access_log

$ nc -lk 9999 < access_log


$ ~/spark/bin/spark-submit --master localhost:7077 --class LogParser target/scala-2.11/logparser_2.11-1.0.jar


sparkstreaming-topurl.zip


-----------------------------------


1. zookeeper 실행

bin/zookeeper-server-start.sh config/zookeeper.properties


2. Kafka 실행

bin/kafka-server-start.sh config/server.properties


3. Kafka 수신 콘솔 실행

bin/kafka-console-consumer.sh --zookeeper localhost:2181 --topic room301 --from-beginning


4. Kafka 송신 콘솔 실행

bin/kafka-console-producer.sh --broker-list localhost:9092 --topic room301



$ mkdir sparkstreaming-kafkawordcoun

$ cd sparkstreaming-kafkawordcount

$ vi build.sbt


name := "KafkaWordCount"

version := "1.0"

scalaVersion := "2.11.8"

libraryDependencies ++= Seq(

"org.apache.spark"%"spark-core_2.11"%"2.1.0"%"provided",

"org.apache.spark"%"spark-streaming_2.11"%"2.1.0"%"provided",

"org.apache.spark"%"spark-streaming-kafka-0-8_2.11"%"2.1.0"%"provided"

)


$ sbt eclipse


Eclipse Import Project

append New Source Folder : src/main/scala

append New Scala Object :  KafkaWordCount

code writing


$ sbt package


$ mkdir lib

$ cp ~/.ivy2/jars/* ./lib


$ ~/spark/bin/spark-submit --jars ./lib/org.apache.spark_spark-streaming-kafka-0-8_2.11-2.0.2.jar,./lib/org.apache.kafka_kafka_2.11-0.8.2.1.jar,./lib/com.101tec_zkclient-0.3.jar,./lib/org.apache.kafka_kafka-clients-0.8.2.1.jar,./lib/com.yammer.metrics_metrics-core-2.2.0.jar --class KafkaWordCount target/scala-2.11/kafkawordcount_2.11-1.0.jar localhost:2181 wordcout room301 1


sparkstreaming-kafkawordcount.zip