cybernetics
1/2/2016 - 10:46 PM

These queries let you define find user sessions against event data logged to Segment SQL, Snowplow, or Google BigQuery.

These queries let you define find user sessions against event data logged to Segment SQL, Snowplow, or Google BigQuery.

-- These queries let you define find user sessions against event data
-- logged to Segment SQL, Snowplow, or Google BigQuery.
-- For more details, see the full post:
-- LINK


--- SEGMENT SQL
-- Finding the start of every session
SELECT *
  FROM (
       SELECT *
              LAG(sent_at,1) OVER (PARTITION BY user_id ORDER BY sent_at) AS last_event
        FROM "your_project".tracks
      ) last
WHERE EXTRACT('EPOCH' FROM sent_at) - EXTRACT('EPOCH' FROM last_event) >= (60 * 10)
   OR last_event IS NULL

-- Mapping every event to its session
SELECT *,
       SUM(is_new_session) OVER (ORDER BY user_id, sent_at ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS global_session_id,
       SUM(is_new_session) OVER (PARTITION BY user_id ORDER BY sent_at ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS user_session_id
  FROM (
       SELECT *,
              CASE WHEN EXTRACT('EPOCH' FROM sent_at) - EXTRACT('EPOCH' FROM last_event) >= (60 * 10) 
                     OR last_event IS NULL 
                   THEN 1 ELSE 0 END AS is_new_session
         FROM (
              SELECT *,
                     LAG(sent_at,1) OVER (PARTITION BY user_id ORDER BY sent_at) AS last_event
                FROM "your_project".tracks
              ) last
       ) final


--- SNOWPLOW
-- Finding the start of every session
SELECT *
  FROM (
       SELECT *
              LAG(collector_tstamp,1) OVER (PARTITION BY domain_userid ORDER BY collector_tstamp) AS last_event
        FROM "your_project".events
      ) last
WHERE EXTRACT('EPOCH' FROM collector_tstamp) - EXTRACT('EPOCH' FROM last_event) >= (60 * 10)
   OR last_event IS NULL

-- Mapping every event to its session
SELECT *,
       SUM(is_new_session) OVER (ORDER BY domain_userid, collector_tstamp ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS global_session_id,
       SUM(is_new_session) OVER (PARTITION BY domain_userid ORDER BY collector_tstamp ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS user_session_id
  FROM (
       SELECT *,
              CASE WHEN EXTRACT('EPOCH' FROM collector_tstamp) - EXTRACT('EPOCH' FROM last_event) >= (60 * 10) 
                     OR last_event IS NULL 
                   THEN 1 ELSE 0 END AS is_new_session
         FROM (
              SELECT *,
                     LAG(collector_tstamp,1) OVER (PARTITION BY domain_userid ORDER BY collector_tstamp) AS last_event
                FROM "your_project".events
              ) last
       ) final


--- GOOGLE BIGQUERY
-- Finding the start of every session
SELECT * 
  FROM (
       SELECT *,
              LAG(occurred_at,1) OVER (PARTITION BY fullVisitorId ORDER BY occurred_at) AS last_event
         FROM (
              SELECT fullVisitorId,
                     visitStartTime + hits.time/1000 AS occurred_at
                FROM "your_tables"
              ) pre
       ) last
 WHERE occurred_at - last_event >= (60 * 10)
    OR last_event IS NULL

-- Mapping every event to its session
SELECT *,
       SUM(is_new_session) OVER (ORDER BY fullVisitorId, occurred_at) AS global_session_id
  FROM (
       SELECT *,
              CASE WHEN occurred_at - last_event >= (60 * 10) OR last_event IS NULL THEN 1 ELSE 0 END AS is_new_session
         FROM (
              SELECT *,
                     LAG(occurred_at,1) OVER (PARTITION BY fullVisitorId ORDER BY occurred_at) AS last_event
                FROM (
                     SELECT fullVisitorId,
                            visitStartTime + hits.time/1000 AS occurred_at
                       FROM "your_tables"
                     ) pre
              ) last
       ) final
 ORDER BY 1,2