hivefans
9/21/2015 - 9:16 AM

flume.conf

webrequest.channels             = file-channel
webrequest.sources              = udp2log
webrequest.sinks                = hdfs-sink

# Channel which buffers events on disk
webrequest.channels.file-channel.type          = file
webrequest.channels.file-channel.checkpointDir = /var/lib/hadoop/data/e/flume/file-channel/checkpoint
webrequest.channels.file-channel.dataDirs      = /var/lib/hadoop/data/e/flume/file-channel/data
webrequest.channels.file-channel.checkpointInterval = 1000


# UDPSource Multicast (custom)
# See: https://issues.apache.org/jira/browse/FLUME-1838
webrequest.sources.udp2log.channels  = file-channel
webrequest.sources.udp2log.type      = org.apache.flume.source.udp.UDPSource
webrequest.sources.udp2log.host      = 233.58.59.1
webrequest.sources.udp2log.port      = 8420
webrequest.sources.udp2log.multicast = true
# timestamp interceptor extractor
webrequest.sources.udp2log.interceptors = request-timestamp
webrequest.sources.udp2log.interceptors.request-timestamp.type = regex_extractor
webrequest.sources.udp2log.interceptors.request-timestamp.regex = ^.+\\s\\d+\\s(\\d\\d\\d\\d-\\d\\d-\\d\\dT\\d\\d:\\d\\d:\\d\\d)
webrequest.sources.udp2log.interceptors.request-timestamp.serializers = s1
webrequest.sources.udp2log.interceptors.request-timestamp.serializers.s1.type = org.apache.flume.interceptor.RegexExtractorInterceptorMillisSerializer
webrequest.sources.udp2log.interceptors.request-timestamp.serializers.s1.name = timestamp
webrequest.sources.udp2log.interceptors.request-timestamp.serializers.s1.pattern = yyyy-MM-dd'T'HH:mm:ss


# hdfs sink
webrequest.sinks.hdfs-sink.channel           = file-channel
webrequest.sinks.hdfs-sink.type              = hdfs
webrequest.sinks.hdfs-sink.hdfs.path         = /user/otto/tmp/flume/%Y-%m-%d/%H.%M.%S
webrequest.sinks.hdfs-sink.hdfs.filePrefix   = webrequest
webrequest.sinks.hdfs-sink.hdfs.fileType     = DataStream
webrequest.sinks.hdfs-sink.hdfs.round        = true
webrequest.sinks.hdfs-sink.hdfs.roundValue   = 15
webrequest.sinks.hdfs-sink.hdfs.roundUnit    = minute
webrequest.sinks.hdfs-sink.hdfs.rollInterval = 60
webrequest.sinks.hdfs-sink.hdfs.rollCount    = 0
webrequest.sinks.hdfs-sink.hdfs.rollSize     = 0