naissa12
2/10/2018 - 12:51 AM

query

DROP TABLE dwo_analysis.exp_show;
CREATE TABLE dwo_analysis.exp_show AS
SELECT

MIN(sc.date_time) as first_shown_time,
SUBSTR(sc.post_evar12,1,24) as guid,
sc.post_evar238 as experiment_name,
sc.post_evar239 as variant_name

FROM sourcedata.sc_visitor_click_history_jun_2015 sc

WHERE report_suite='adbemmarvelweb.prod'
AND click_date BETWEEN DATE_SUB(CURRENT_DATE,90) AND CURRENT_DATE
AND post_prop5 = 'experiment:standard:authenticated:shown'
AND post_evar238 NOT LIKE 'control%'
AND post_evar238 <> ''
AND post_evar239 <> ''

GROUP BY SUBSTR(sc.post_evar12,1,24), post_evar238, post_evar239;



SELECT

experiment_name,
variant_name,

COUNT(DISTINCT CASE WHEN click_date >= first_date THEN guid ELSE NULL END) AS numUsers,
COUNT(DISTINCT CASE WHEN create=1 AND click_date >= first_date THEN guid ELSE NULL END) AS numCreate,
COUNT(DISTINCT CASE WHEN publish=1 AND click_date >= first_date THEN guid ELSE NULL END) AS numPublish,

-- first week

COUNT(DISTINCT CASE WHEN click_date >= first_date AND click_date < DATE_ADD(first_date, 8) THEN guid ELSE NULL END) AS numUsersFirstWeek,
COUNT(DISTINCT CASE WHEN click_date >= first_date AND click_date < DATE_ADD(first_date, 8) THEN CONCAT(guid, click_date) ELSE NULL END) AS numDaysActiveFirstWeek,
COUNT(DISTINCT CASE WHEN publish=1 AND click_date >= first_date AND click_date < DATE_ADD(first_date, 8) THEN CONCAT(guid, click_date) ELSE NULL END) AS numDaysPublishFirstWeek,

SUM(CASE WHEN click_date >= first_date AND click_date < DATE_ADD(first_date, 8) THEN numCreate ELSE 0 END) AS sumCreateFirstWeek,
SUM(CASE WHEN click_date >= first_date AND click_date < DATE_ADD(first_date, 8) THEN numPublish ELSE 0 END) AS sumPublishFirstWeek,

-- 7 day
COUNT(DISTINCT CASE WHEN click_date >= DATE_ADD(first_date, 1) AND click_date < DATE_ADD(first_date, 8) THEN guid ELSE NULL END) AS numUsers7day,
COUNT(DISTINCT CASE WHEN create=1 AND click_date >= DATE_ADD(first_date, 1) AND click_date < DATE_ADD(first_date, 8) THEN guid ELSE NULL END) AS numCreate7day,
COUNT(DISTINCT CASE WHEN publish=1 AND click_date >= DATE_ADD(first_date, 1) AND click_date < DATE_ADD(first_date, 8) THEN guid ELSE NULL END) AS numPublish7day,

-- 1 to 28 day
COUNT(DISTINCT CASE WHEN click_date >= DATE_ADD(first_date, 1) AND click_date < DATE_ADD(first_date, 28) THEN guid ELSE NULL END) AS numUsers128day,
COUNT(DISTINCT CASE WHEN create=1 AND click_date >= DATE_ADD(first_date, 1) AND click_date < DATE_ADD(first_date, 28) THEN guid ELSE NULL END) AS numCreate128day,
COUNT(DISTINCT CASE WHEN publish=1 AND click_date >= DATE_ADD(first_date, 1) AND click_date < DATE_ADD(first_date, 28) THEN guid ELSE NULL END) AS numPublish128day,

-- 28 day
COUNT(DISTINCT CASE WHEN click_date >= DATE_ADD(first_date, 28) THEN guid ELSE NULL END) AS numUsers28day,
COUNT(DISTINCT CASE WHEN create=1 AND click_date >= DATE_ADD(first_date, 28) THEN guid ELSE NULL END) AS numCreate28day,
COUNT(DISTINCT CASE WHEN publish=1 AND click_date >= DATE_ADD(first_date, 28) THEN guid ELSE NULL END) AS numPublish28day
FROM
(

    SELECT

    a.first_shown_time,
    DATE(a.first_shown_time) AS first_date,
    b.*


    FROM dwo_analysis.exp_show a

    LEFT JOIN (
        SELECT

        SUBSTR(post_evar12,1,24) AS guid,
        click_date,
        MAX(CASE WHEN post_prop5='project:createClicked' THEN 1 ELSE 0 END) AS create,
        MAX(CASE WHEN post_prop5='project:exportCompleted' OR post_prop5='project:reExportCompleted' THEN 1 ELSE 0 END) AS publish,

        SUM(CASE WHEN post_prop5='project:createClicked' THEN 1 ELSE 0 END) AS numCreate,
        SUM(CASE WHEN post_prop5='project:exportCompleted' OR post_prop5='project:reExportCompleted' THEN 1 ELSE 0 END) AS numPublish,
        post_evar238 as experiment_name,
        post_evar239 as variant_name

        FROM sourcedata.sc_visitor_click_history_jun_2015 sc

        INNER JOIN dwo_analysis.exp_show b
        ON SUBSTR(sc.post_evar12,1,24) = b.guid
        AND sc.post_evar238 = b.experiment_name
        AND sc.post_evar239 = b.variant_name

        WHERE report_suite='adbemmarvelweb.prod'
        AND click_date BETWEEN DATE_SUB(CURRENT_DATE,90) AND CURRENT_DATE
        AND sc.date_time >= b.first_shown_time

        GROUP BY SUBSTR(post_evar12,1,24), click_date, post_evar238, post_evar239
    ) b

	ON a.guid = b.guid
	AND a.experiment_name = b.experiment_name
	AND a.variant_name = b.variant_name
) c

GROUP BY experiment_name, variant_name;