标签:style blog http ar io color os sp for
如何做集成,其实特别简单,网上其实就是教程。


package com.hark
import java.io.File
import org.apache.spark.SparkConf
import org.apache.spark.storage.StorageLevel
import org.apache.spark.streaming.flume.FlumeUtils
import org.apache.spark.streaming.{Seconds, StreamingContext}
import org.apache.spark.streaming.StreamingContext._
/**
* Created by Administrator on 2014-12-16.
*/
object SparkStreamingFlumeTest {
def main(args: Array[String]) {
//println("harkhark")
val path = new File(".").getCanonicalPath()
//File workaround = new File(".");
System.getProperties().put("hadoop.home.dir", path);
new File("./bin").mkdirs();
new File("./bin/winutils.exe").createNewFile();
//val sparkConf = new SparkConf().setAppName("HdfsWordCount").setMaster("local[2]")
val sparkConf = new SparkConf().setAppName("HdfsWordCount")
// Create the context
val ssc = new StreamingContext(sparkConf, Seconds(20))
//val hostname = "127.0.0.1"
val hostname = "localhost"
val port = 2345
val storageLevel = StorageLevel.MEMORY_ONLY
val flumeStream = FlumeUtils.createStream(ssc, hostname, port, storageLevel)
flumeStream.count().map(cnt => "Received " + cnt + " flume events." ).print()
ssc.start()
ssc.awaitTermination()
}
}
# Please paste flume.conf here. Example: # Sources, channels, and sinks are defined per # agent name, in this case ‘tier1‘. tier1.sources = source1 tier1.channels = channel1 tier1.sinks = sink1 # For each source, channel, and sink, set # standard properties. tier1.sources.source1.type = exec tier1.sources.source1.command = tail -F /opt/data/test3/123 tier1.sources.source1.channels = channel1 tier1.channels.channel1.type = memory #tier1.sinks.sink1.type = logger tier1.sinks.sink1.type = avro tier1.sinks.sink1.hostname = localhost tier1.sinks.sink1.port = 2345 tier1.sinks.sink1.channel = channel1 # Other properties are specific to each type of yhx.hadoop.dn01 # source, channel, or sink. In this case, we # specify the capacity of the memory channel. tier1.channels.channel1.capacity = 100
spark-submit --driver-memory 512m --executor-memory 512m --executor-cores 1 --num-executors 3 --class com.hark.SparkStreamingFlumeTest --deploy-mode cluster --master yarn /opt/spark/SparkTest.jar
flume-ng agent --conf /opt/cloudera-manager/run/cloudera-scm-agent/process/585-flume-AGENT --conf-file /opt/cloudera-manager/run/cloudera-scm-agent/process/585-flume-AGENT/flume.conf --name tier1 -Dflume.root.logger=INFO,console
cdh环境下,spark streaming与flume的集成问题总结
标签:style blog http ar io color os sp for
原文地址:http://www.cnblogs.com/hark0623/p/4170156.html