dgadiraju
12/4/2017 - 12:23 AM

sdc.conf

# sdc.conf: A multiplex flume configuration
# Source: log file
# Sink 1: Unprocessed data to HDFS
# Sink 2: Spark

# Name the components on this agent
sdc.sources = ws
sdc.sinks = hd spark
sdc.channels = hdmem sparkmem

# Describe/configure the source
sdc.sources.ws.type = exec
sdc.sources.ws.command = tail -F /opt/gen_logs/logs/access.log

# Describe the sink
sdc.sinks.hd.type = hdfs
sdc.sinks.hd.hdfs.path = hdfs://nn01.itversity.com:8020/user/dgadiraju/flume_demo

sdc.sinks.hd.hdfs.filePrefix = FlumeDemo
sdc.sinks.hd.hdfs.fileSuffix = .txt
sdc.sinks.hd.hdfs.rollInterval = 120
sdc.sinks.hd.hdfs.rollSize = 1048576
sdc.sinks.hd.hdfs.rollCount = 100
sdc.sinks.hd.hdfs.fileType = DataStream

sdc.sinks.spark.type = org.apache.spark.streaming.flume.sink.SparkSink
sdc.sinks.spark.hostname = gw01.itversity.com
sdc.sinks.spark.port = 8123

# Use a channel sdcich buffers events in memory
sdc.channels.hdmem.type = memory
sdc.channels.hdmem.capacity = 1000
sdc.channels.hdmem.transactionCapacity = 100

sdc.channels.sparkmem.type = memory
sdc.channels.sparkmem.capacity = 1000
sdc.channels.sparkmem.transactionCapacity = 100


# Bind the source and sink to the channel
sdc.sources.ws.channels = hdmem sparkmem
sdc.sinks.hd.channel = hdmem
sdc.sinks.spark.channel = sparkmem