dgadiraju
11/4/2017 - 7:43 PM

spark-sql-saving-dataframe.py

sqlContext.sql("CREATE DATABASE dgadiraju_daily_revenue"); sqlContext.sql("CREATE TABLE dgadiraju_daily_revenue.daily_revenue (order_date string, product_name string, daily_revenue_per_product float) STORED AS orc")

daily_revenue_per_product_df = sqlContext.sql("SELECT o.order_date, p.product_name, sum(oi.order_item_subtotal) daily_revenue_per_product \ FROM orders o JOIN order_items oi \ ON o.order_id = oi.order_item_order_id \ JOIN products p \ ON p.product_id = oi.order_item_product_id \ WHERE o.order_status IN ('COMPLETE', 'CLOSED') \ GROUP BY o.order_date, p.product_name \ ORDER BY o.order_date, daily_revenue_per_product DESC")

daily_revenue_per_product_df.insertInto("dgadiraju_daily_revenue.daily_revenue")