LasithaMD
5/5/2016 - 2:01 AM

td_intern rossmann

td_intern rossmann

WITH test_cv1 as(
select
  * 
from
  train_cv where gid=1
) INSERT OVERWRITE TABLE cv1
SELECT 
  t2.rowid,
  t3.sales,
  EXP(predicted)-1 as predicted
FROM(
  SELECT
     rowid,
     avg(predicted) AS predicted
  FROM(
    SELECT
      t.rowid,
      tree_predict(p.model_id, p.model_type, p.pred_model, t.features, false) as predicted
    FROM
      model_cv1 p
      LEFT OUTER JOIN test_cv1 t
  ) t1
  group by
    rowid
) t2
JOIN test_cv1 t3 ON (t2.rowid=t3.rowid);
WITH train_limited AS(
  SELECT *
  FROM
   train_cv where gid!=1
) INSERT
  OVERWRITE TABLE
    model_cv1 SELECT 
      train_randomforest_regr(features, label, '-trees 20')
    FROM
      train_limited
    UNION ALL
    SELECT 
      train_randomforest_regr(features, label, '-trees 20')
    FROM
      train_limited;
			
WITH test_cv2 as(
select
  * 
from
  train_cv where gid=2
) INSERT OVERWRITE TABLE cv2
SELECT 
  t2.rowid,
  t3.sales,
  EXP(predicted)-1 as predicted
FROM(
  SELECT
     rowid,
     avg(predicted) AS predicted
  FROM(
    SELECT
      t.rowid,
      tree_predict(p.model_id, p.model_type, p.pred_model, t.features, false) as predicted
    FROM
      model_cv2 p
      LEFT OUTER JOIN test_cv2 t
  ) t1
  group by
    rowid
) t2
JOIN test_cv2 t3 ON (t2.rowid=t3.rowid);
WITH train_limited AS(
  SELECT *
  FROM
   train_cv where gid!=2 
) INSERT
  OVERWRITE TABLE
    model_cv2 SELECT 
      train_randomforest_regr(features, label, '-trees 20')
    FROM
      train_limited
    UNION ALL
    SELECT 
      train_randomforest_regr(features, label, '-trees 20')
    FROM
      train_limited;
WITH test_cv3 as(
select
  * 
from
  train_cv where gid=3
) INSERT OVERWRITE TABLE cv3
SELECT 
  t2.rowid,
  t3.sales,
  EXP(predicted)-1 as predicted
FROM(
  SELECT
     rowid,
     avg(predicted) AS predicted
  FROM(
    SELECT
      t.rowid,
      tree_predict(p.model_id, p.model_type, p.pred_model, t.features, false) as predicted
    FROM
      model_cv3 p
      LEFT OUTER JOIN test_cv3 t
  ) t1
  group by
    rowid
) t2
JOIN test_cv3 t3 ON (t2.rowid=t3.rowid);
WITH train_limited AS(
  SELECT *
  FROM
   train_cv where gid!=3 
) INSERT
  OVERWRITE TABLE
    model_cv3 SELECT 
      train_randomforest_regr(features, label, '-trees 20')
    FROM
      train_limited
    UNION ALL
    SELECT 
      train_randomforest_regr(features, label, '-trees 20')
    FROM
      train_limited;
      
WITH test_cv4 as(
select
  * 
from
  train_cv where gid=4
) INSERT OVERWRITE TABLE cv4
SELECT 
  t2.rowid,
  t3.sales,
  EXP(predicted)-1 as predicted
FROM(
  SELECT
     rowid,
     avg(predicted) AS predicted
  FROM(
    SELECT
      t.rowid,
      tree_predict(p.model_id, p.model_type, p.pred_model, t.features, false) as predicted
    FROM
      model_cv4 p
      LEFT OUTER JOIN test_cv4 t
  ) t1
  group by
    rowid
) t2
JOIN test_cv4 t3 ON (t2.rowid=t3.rowid);
WITH train_limited AS(
  SELECT *
  FROM
   train_cv where gid!=4 
) INSERT
  OVERWRITE TABLE
    model_cv4 SELECT 
      train_randomforest_regr(features, label, '-trees 20')
    FROM
      train_limited
    UNION ALL
    SELECT 
      train_randomforest_regr(features, label, '-trees 20')
    FROM
      train_limited;
      
WITH test_cv5 as(
select
  * 
from
  train_cv where gid=0
) INSERT OVERWRITE TABLE cv5
SELECT 
  t2.rowid,
  t3.sales,
  EXP(predicted)-1 as predicted
FROM(
  SELECT
     rowid,
     avg(predicted) AS predicted
  FROM(
    SELECT
      t.rowid,
      tree_predict(p.model_id, p.model_type, p.pred_model, t.features, false) as predicted
    FROM
      model_cv5 p
      LEFT OUTER JOIN test_cv5 t
  ) t1
  group by
    rowid
) t2
JOIN test_cv5 t3 ON (t2.rowid=t3.rowid);
WITH train_limited AS(
  SELECT *
  FROM
   train_cv where gid!=0 LIMIT 10000
) INSERT
  OVERWRITE TABLE
    model_cv5 SELECT 
      train_randomforest_regr(features, label, '-trees 20')
    FROM
      train_limited
    UNION ALL
    SELECT 
      train_randomforest_regr(features, label, '-trees 20')
    FROM
      train_limited;
      
INSERT OVERWRITE TABLE train_cv
select
  rand_gid2(5, 2016) gid,
  sales, rowid, features, label
from
  training3
cluster by gid, rand(2016);
WITH RMSPE1 as (
SELECT
sqrt( AVG( POW((predicted-sales)/sales, 2) ) ) as RMSPE1
FROM cv1
), RMSPE2 as (
SELECT
sqrt( AVG( POW((predicted-sales)/sales, 2) ) ) as RMSPE2
FROM cv2
), RMSPE3 as (
SELECT
sqrt( AVG( POW((predicted-sales)/sales, 2) ) ) as RMSPE3
FROM cv3
), RMSPE4 as (
SELECT
sqrt( AVG( POW((predicted-sales)/sales, 2) ) ) as RMSPE4
FROM cv4
), RMSPE5 as (
SELECT
sqrt( AVG( POW((predicted-sales)/sales, 2) ) ) as RMSPE5 
FROM cv5
) SELECT
 (RMSPE1+RMSPE2+RMSPE3+RMSPE4+RMSPE5)/5
 FROM RMSPE1, RMSPE2, RMSPE3, RMSPE4, RMSPE5;