td_intern rossmann
WITH test_cv1 as(
select
*
from
train_cv where gid=1
) INSERT OVERWRITE TABLE cv1
SELECT
t2.rowid,
t3.sales,
EXP(predicted)-1 as predicted
FROM(
SELECT
rowid,
avg(predicted) AS predicted
FROM(
SELECT
t.rowid,
tree_predict(p.model_id, p.model_type, p.pred_model, t.features, false) as predicted
FROM
model_cv1 p
LEFT OUTER JOIN test_cv1 t
) t1
group by
rowid
) t2
JOIN test_cv1 t3 ON (t2.rowid=t3.rowid);
WITH train_limited AS(
SELECT *
FROM
train_cv where gid!=1
) INSERT
OVERWRITE TABLE
model_cv1 SELECT
train_randomforest_regr(features, label, '-trees 20')
FROM
train_limited
UNION ALL
SELECT
train_randomforest_regr(features, label, '-trees 20')
FROM
train_limited;
WITH test_cv2 as(
select
*
from
train_cv where gid=2
) INSERT OVERWRITE TABLE cv2
SELECT
t2.rowid,
t3.sales,
EXP(predicted)-1 as predicted
FROM(
SELECT
rowid,
avg(predicted) AS predicted
FROM(
SELECT
t.rowid,
tree_predict(p.model_id, p.model_type, p.pred_model, t.features, false) as predicted
FROM
model_cv2 p
LEFT OUTER JOIN test_cv2 t
) t1
group by
rowid
) t2
JOIN test_cv2 t3 ON (t2.rowid=t3.rowid);
WITH train_limited AS(
SELECT *
FROM
train_cv where gid!=2
) INSERT
OVERWRITE TABLE
model_cv2 SELECT
train_randomforest_regr(features, label, '-trees 20')
FROM
train_limited
UNION ALL
SELECT
train_randomforest_regr(features, label, '-trees 20')
FROM
train_limited;
WITH test_cv3 as(
select
*
from
train_cv where gid=3
) INSERT OVERWRITE TABLE cv3
SELECT
t2.rowid,
t3.sales,
EXP(predicted)-1 as predicted
FROM(
SELECT
rowid,
avg(predicted) AS predicted
FROM(
SELECT
t.rowid,
tree_predict(p.model_id, p.model_type, p.pred_model, t.features, false) as predicted
FROM
model_cv3 p
LEFT OUTER JOIN test_cv3 t
) t1
group by
rowid
) t2
JOIN test_cv3 t3 ON (t2.rowid=t3.rowid);
WITH train_limited AS(
SELECT *
FROM
train_cv where gid!=3
) INSERT
OVERWRITE TABLE
model_cv3 SELECT
train_randomforest_regr(features, label, '-trees 20')
FROM
train_limited
UNION ALL
SELECT
train_randomforest_regr(features, label, '-trees 20')
FROM
train_limited;
WITH test_cv4 as(
select
*
from
train_cv where gid=4
) INSERT OVERWRITE TABLE cv4
SELECT
t2.rowid,
t3.sales,
EXP(predicted)-1 as predicted
FROM(
SELECT
rowid,
avg(predicted) AS predicted
FROM(
SELECT
t.rowid,
tree_predict(p.model_id, p.model_type, p.pred_model, t.features, false) as predicted
FROM
model_cv4 p
LEFT OUTER JOIN test_cv4 t
) t1
group by
rowid
) t2
JOIN test_cv4 t3 ON (t2.rowid=t3.rowid);
WITH train_limited AS(
SELECT *
FROM
train_cv where gid!=4
) INSERT
OVERWRITE TABLE
model_cv4 SELECT
train_randomforest_regr(features, label, '-trees 20')
FROM
train_limited
UNION ALL
SELECT
train_randomforest_regr(features, label, '-trees 20')
FROM
train_limited;
WITH test_cv5 as(
select
*
from
train_cv where gid=0
) INSERT OVERWRITE TABLE cv5
SELECT
t2.rowid,
t3.sales,
EXP(predicted)-1 as predicted
FROM(
SELECT
rowid,
avg(predicted) AS predicted
FROM(
SELECT
t.rowid,
tree_predict(p.model_id, p.model_type, p.pred_model, t.features, false) as predicted
FROM
model_cv5 p
LEFT OUTER JOIN test_cv5 t
) t1
group by
rowid
) t2
JOIN test_cv5 t3 ON (t2.rowid=t3.rowid);
WITH train_limited AS(
SELECT *
FROM
train_cv where gid!=0 LIMIT 10000
) INSERT
OVERWRITE TABLE
model_cv5 SELECT
train_randomforest_regr(features, label, '-trees 20')
FROM
train_limited
UNION ALL
SELECT
train_randomforest_regr(features, label, '-trees 20')
FROM
train_limited;
INSERT OVERWRITE TABLE train_cv
select
rand_gid2(5, 2016) gid,
sales, rowid, features, label
from
training3
cluster by gid, rand(2016);
WITH RMSPE1 as (
SELECT
sqrt( AVG( POW((predicted-sales)/sales, 2) ) ) as RMSPE1
FROM cv1
), RMSPE2 as (
SELECT
sqrt( AVG( POW((predicted-sales)/sales, 2) ) ) as RMSPE2
FROM cv2
), RMSPE3 as (
SELECT
sqrt( AVG( POW((predicted-sales)/sales, 2) ) ) as RMSPE3
FROM cv3
), RMSPE4 as (
SELECT
sqrt( AVG( POW((predicted-sales)/sales, 2) ) ) as RMSPE4
FROM cv4
), RMSPE5 as (
SELECT
sqrt( AVG( POW((predicted-sales)/sales, 2) ) ) as RMSPE5
FROM cv5
) SELECT
(RMSPE1+RMSPE2+RMSPE3+RMSPE4+RMSPE5)/5
FROM RMSPE1, RMSPE2, RMSPE3, RMSPE4, RMSPE5;