vsouza
10/5/2016 - 2:34 AM

spark_context.py

spark_context = SparkContext(appName=kinesis_app_name)
spark_streaming_context = StreamingContext(spark_context, spark_batch_interval)
sql_context = SQLContext(spark_context)

kinesis_stream = KinesisUtils.createStream(
    spark_streaming_context, kinesis_app_name, kinesis_stream, kinesis_endpoint,
    aws_region, kinesis_initial_position, kinesis_checkpoint_interval)

py_rdd = kinesis_stream.map(lambda x: json.loads(x))