aaron-maass of Web Analytics
3/25/2019 - 8:40 PM

EchoShip Customer Attributes & Conversion Rates

/*
The purpose of this query is to do the following:
	-Identify attributes about EchoShip customers
	-Identify quotes generated on EchoShip so we can measure losses
		-Differentiate between quotes generated through the Book Shipment path & the Quote & Book Shipment path
			-In the Book Shipment (BS) path, users enter in all details about the items and route before viewing a list of quotes.
			-In the Quote & Book Shipment (QBS) path, users are able to view a list of quotes before adding the specific details about the route and items
	-Calculate conversion rate on an account level
	-Segment clients based on volume, gp, and industry for comparative analyses
*/


--BLOCK 1: CUSTOMER ATTRIBUTES


--The table below pulls summary information about every Echo customer

if object_id('tempdb.db.#cust_attributes1', 'U') is not null drop table #cust_attributes1
select		c.customerid
			,c.customername
			,c.CustomerGuid
			,pc.customerid parentaccountid
			,c.segment
			,count(*) vol		--loads in past year
			,sum(trm.reportrevenue - l.cogs) gp		--gp in past year
into		#cust_attributes1
from		EchoOptimizer.dbo.tblCustomer c
left join	echooptimizer.dbo.tblCustomerLoads cl on c.CustomerGuid = cl.CustomerGuiD 
left join   (select * from echooptimizer.dbo.tblloads where cast(echooptimizer.dbo.tblloads.createddate as date) between dateadd(d,-366,cast(getdate() as date)) 
															and dateadd(d,-1,cast(getdate() as date))) l
			on cl.loadguid = l.loadguid			--identifies all loads created in past 366 days
left join	echooptimizer.dbo.tblreportmoney trm on l.loadguid = trm.loadguid
left join	echooptimizer.dbo.tblcustomer pc on c.parentaccountid = pc.customerguid
group by	c.customerid
			,c.customername
			,pc.customerid
			,pc.customername
			,c.segment
			,c.CustomerGuid
;

--The table below identifies information about each company's industry. Colin Johnston is most knowledgable about the data in the NaicsCodeDesc table

if object_id('tempdb.db.#industry', 'U') is not null drop table #industry
select acct.E_Number__c
		,acct.NaicsCode as IndustryCodeFull		--industry code
		,left(acct.NaicsCode,2) as IndustryCodeTwoDigit			--twodigit industry code (for summarizing)
		,n.[Description]		--industry description
into	#industry
from SalesAnalytics.Salesforce.Account acct
left join (select NaicsCode, [Description] from SalesAnalytics.dbo.NaicsCodeDesc where digits = 2) n on left(acct.NaicsCode,2) = n.NaicsCode
where acct.E_Number__c is not null
;

--The table below appends industry information to our customer attributes table

if object_id('tempdb.db.#cust_attributes2', 'U') is not null drop table #cust_attributes2
select ca.CustomerGuid
		,ca.CustomerName
		,ca.CustomerId
		,ca.Parentaccountid
		,ca.vol
		,ca.gp
		,ca.Segment
		,i.[Description]
		,i.IndustryCodeTwoDigit
		,i.IndustryCodeFull
into	#cust_attributes2
from	#cust_attributes1 ca
left join #industry i on ca.customerid = i.E_Number__c
;


--BLOCK 2A: BOOK SHIPMENT PATH


--The table below identifies all pageviews where a quote was generated through the Book Shipment path

if object_id('tempdb.db.#bs_quotes_source', 'U') is not null drop table #bs_quotes_source
select firstpartycookies_accountguid		--This is the same as CustomerGuid. Tealium recognizes it as a first party cookie.
		,c.CustomerName
		,EventId		--Every pageview is an 'event'. Tealium generates a unique identifier for each event
		,pageurl_domain		--This is the page url. The Tealium source data collects pageviews from EchoTrak/EchoShip/EchoDrive.
		,udo_page_name
		,udo_page_type
		,DATEADD(hour, -6, DATEADD(ss, CAST (udo_tealium_timestamp_epoch AS int), '19700101')) as dt		--The Tealium source data is formatted as epoch time; this converts to standard datetime
into	#bs_quotes_source
from	InformaticaTest.web.TealiumStaging ts
left join EchoOptimizer.dbo.tblCustomer c on ts.firstpartycookies_accountguid = c.CustomerGuid
where udo_page_name in ('carrier')		--This is the page_name of the view quotes page along the book shipment path. EchoShip is a single-page application (all pages have the same url), so this is necessary to differentiate between page views.
		and udo_page_type in ('new-shipment')			--This is the page_type of the view quotes page along the book shipment path
		and pageurl_domain not in ('shipperportal-www-dev1.echo.com', 'shipperportal-www-qa1.echo.com', 'shipperportal-www-dev2.echo.com', 'dev1-eswww01.dev.echogl.net', 'dev2-eswww01.dev.echogl.net')		--This removes pageviews done by our devs
		and DATEADD(hour, -6, DATEADD(ss, CAST (udo_tealium_timestamp_epoch AS int), '19700101')) > '09-30-2018'		--I need to run all events prior to 9/30/2018 through my ETL. Once this is done I will remove this qualifier
;

/*
The table below does two important things.
1: It associates pageviews with quoteids. Under our current configuration (as of 3/25/19) Tealium does not capture the quoteid that is generated on the page. To account for this we do the following:
	a. Calculate the datediff(ss) between the quote createddate and the pageview

	b. Determine whether the quote createddate was before or after the pageview. 

		Most of the time, the pageview should be before the createddate. The Tealium tags fire asynchroniously, meaning that they fire in a manner that does not disrupt the page load. 
		In most (70-80%) cases, the Tealium tag fires before the API pings RateIQ and RateIQ is able to generate a quote.
		However, since this is not always the case, and since some pageviews may not be real pageviews, this data is not 100% reliable (i.e. someone hit back button too many times, thus triggering the tags to fire but the page doesn't load).
		Geoffrey set the time difference to be no more than an hour in either direction. I am currently experimenting with tighter timeframes.

 	c. As there may be many quotes generated within the timeframe referenced above, we order each row based on timediff between the pageview datetime and the createddate. The lowest timediff receives a value of 1, which we use for determining match.

2: It identifies the origin and destination zip for each quote. This is necessary for calculating cross-channel conversions.
	
*/
if object_id('tempdb.db.#bs_quotes_matched', 'U') is not null drop table #bs_quotes_matched
select bsqs.firstpartycookies_accountguid
		,bsqs.CustomerName
		,bsqs.eventid
		,bsqs.pageurl_domain
		,bsqs.udo_page_type
		,bsqs.udo_page_name
		,hq.QuoteID
		,ql.QuoteLoadID
		,hq.CreatedDate
		,bsqs.dt
		,GLO.Name as Origin_Zip		
		,GLO.CountryCode as Origin_Country
		,GLD.Name as Destination_Zip
		,GLD.CountryCode as Destination_Country
		,concat(GLO.Name, GLD.Name) as quote_org_dest		--This can be used for identifying cross-channel conversions
		,abs(datediff(s, bsqs.dt, hq.CreatedDate)) as TimeDiff		--Calculates timediff between pageview and when quote was created in RateIQ
		,sign(datediff(s, bsqs.dt, hq.CreatedDate)) as HorizonBeforeEchoShip		--Determines whether pageview happened before or after when quote was generated in RateIQ
		,row_number() over(partition by bsqs.EventId order by abs(datediff(s, bsqs.dt, hq.CreatedDate))) as r
into	#bs_quotes_matched
from	#bs_quotes_source as bsqs
left join EchoQuote.horizon.quote hq on try_convert(uniqueidentifier, bsqs.firstpartycookies_accountguid) = hq.CustomerGuid 
		and hq.CreatedDate between dateadd(hour, -1, bsqs.dt) and dateadd(hour, 1, bsqs.dt)		--Timeframe for determining whether the quote should be attributed to the pageview. This may want to be tightened to a 10 minute window.
		and hq.OriginApplicationName is not null
left join EchoQuote.Horizon.QuoteLoad ql on hq.QuoteID = ql.QuoteID 
left join EchoQuote.Horizon.Location LO ON HQ.QuoteID = LO.QuoteID and LO.SourceTypeID = 1
left join EchoQuote.Horizon.Location LD ON HQ.QuoteID = LD.QuoteID and LD.SourceTypeID = 2
left join SearchRepository.Geo.Location GLO ON LO.LocationSearchID = GLO.ID
left join SearchRepository.Geo.Location GLD ON LD.LocationSearchID = GLD.ID
;

--The table below gives a count of quotes and loads that were generated through the Book Shipment path.

if object_id('tempdb.db.#bs_quotes_final', 'U') is not null drop table #bs_quotes_final
select firstpartycookies_accountguid
		,count(quoteid) as bs_quotes
		,count(quoteloadid) as bs_loads
		,min(bsqm.dt) as first_bs_quote_dt		--Identifies first time a user generates a quote on the book shipment path. Useful for understanding when a user became active on EchoShip.
into	#bs_quotes_final
from	#bs_quotes_matched bsqm
where r = 1
group by firstpartycookies_accountguid
;


--BLOCK 2B: QUOTE & BOOK SHIPMENT PATH

--The tables in this block are the same as block 2a, except these look at quotes generated on the quote & book shipment path.

if object_id('tempdb.db.#qbs_quotes_source', 'U') is not null drop table #qbs_quotes_source
select firstpartycookies_accountguid
		,c.CustomerName
		,EventId
		,pageurl_domain
		,udo_page_name
		,udo_page_type
		,DATEADD(hour, -6, DATEADD(ss, CAST (udo_tealium_timestamp_epoch AS int), '19700101')) as dt
into	#qbs_quotes_source
from	InformaticaTest.web.TealiumStaging ts
left join EchoOptimizer.dbo.tblCustomer c on ts.firstpartycookies_accountguid = c.CustomerGuid
where	udo_page_name in ('carrier-quote')
		and udo_page_type in ('new-quote')
		and pageurl_domain not in ('shipperportal-www-dev1.echo.com', 'shipperportal-www-qa1.echo.com', 'shipperportal-www-dev2.echo.com', 'dev1-eswww01.dev.echogl.net', 'dev2-eswww01.dev.echogl.net')
		and DATEADD(hour, -6, DATEADD(ss, CAST (udo_tealium_timestamp_epoch AS int), '19700101')) > '09-30-2018'
;

if object_id('tempdb.db.#qbs_quotes_matched', 'U') is not null drop table #qbs_quotes_matched
select qbsqs.firstpartycookies_accountguid
		,qbsqs.CustomerName
		,qbsqs.eventid
		,qbsqs.pageurl_domain
		,qbsqs.udo_page_type
		,qbsqs.udo_page_name
		,hq.QuoteID
		,ql.QuoteLoadID
		,hq.CreatedDate
		,qbsqs.dt
		,GLO.Name as Origin_Zip
		,GLO.CountryCode as Origin_Country
		,GLD.Name as Destination_Zip
		,GLD.CountryCode as Destination_Country
		,concat(GLO.Name, GLD.Name) as quote_org_dest
		,abs(datediff(s, qbsqs.dt, hq.CreatedDate)) as TimeDiff
		,sign(datediff(s, qbsqs.dt, hq.CreatedDate)) as HorizonBeforeEchoShip
		,row_number() over(partition by qbsqs.EventId order by abs(datediff(s, qbsqs.dt, hq.CreatedDate))) as r
into	#qbs_quotes_matched
from	#qbs_quotes_source as qbsqs
left join EchoQuote.horizon.quote as hq on try_convert(uniqueidentifier, qbsqs.firstpartycookies_accountguid) = hq.CustomerGuid 
	and hq.CreatedDate between dateadd(hour, -1, qbsqs.dt) and dateadd(hour, 1, qbsqs.dt)
	and hq.OriginApplicationName is not null
left join EchoQuote.Horizon.QuoteLoad ql on hq.QuoteID = ql.QuoteID
inner join EchoQuote.Horizon.Location LO ON HQ.QuoteID = LO.QuoteID and LO.SourceTypeID = 1
inner join EchoQuote.Horizon.Location LD ON HQ.QuoteID = LD.QuoteID and LD.SourceTypeID = 2
inner join SearchRepository.Geo.Location GLO ON LO.LocationSearchID = GLO.ID
inner join SearchRepository.Geo.Location GLD ON LD.LocationSearchID = GLD.ID
;

if object_id('tempdb.db.#qbs_quotes_final', 'U') is not null drop table #qbs_quotes_final
select firstpartycookies_accountguid
		,count(quoteid) as qbs_quotes
		,count(QuoteLoadID) as qbs_loads
		,min(qbsqm.dt) as first_qbs_quote_dt
into	#qbs_quotes_final
from	#qbs_quotes_matched qbsqm
where r = 1
group by firstpartycookies_accountguid
;


--BLOCK 3: UNIONING GUIDS ACROSS BOOK SHIPMENT & QUOTE AND BOOK SHIPMENT PATHS


--The table below grabs all the CustomerGuids from the final tables for each path. Necessary because some clients may only use one of the two paths to book a load.

if object_id('tempdb.db.#guids', 'U') is not null drop table #guids
select firstpartycookies_accountguid.*
into   #guids
from   (select firstpartycookies_accountguid from #qbs_quotes_final
		union
		select firstpartycookies_accountguid from #bs_quotes_final) firstpartycookies_accountguid
where firstpartycookies_accountguid is not null
;


--BLOCK 4: CONVERSION RATES


--The table below calculates the conversion rate for each path, and also provides a percentile value for each customer's volume and GP

if object_id('tempdb.db.#conversion_source', 'U') is not null drop table #conversion_source
select g.firstpartycookies_accountguid
		,ca.CustomerName
		,ca.CustomerId
		,ca.Parentaccountid
		,ca.vol
		,case when ca.vol is not null then ntile(100) over (order by vol asc) else null end as vol_ntile
		,ca.gp
		,case when ca.gp is not null then ntile(100) over (order by gp asc) else null end as gp_ntile
		,ca.[description]
		,ca.IndustryCodeTwoDigit
		,ca.IndustryCodeFull
		,bs.bs_quotes
		,bs.bs_loads
		,cast(bs_loads as decimal)/nullif(bs_quotes,0) as bs_conv		--Conversion rate for book shipment path
		,bs.bs_quotes - bs.bs_loads as bs_quote_diff		--Count of how many more quotes than loads a client has on the Book Shipment path
		,qbs.qbs_quotes
		,qbs.qbs_loads
		,cast(qbs_loads as decimal)/nullif(qbs_quotes,0) as qbs_conv		--Conversion rate for quote and book shipment path
		,qbs.qbs_quotes - qbs.qbs_loads as qbs_quote_diff		--Count of how many more quotes than loads a client has on the Quote & Book Shipment path
		,isnull(isnull(bs_quotes,0) + isnull(qbs_quotes,0),0) as total_quotes		--total quotes across both paths
		,isnull(isnull(bs_loads,0) + isnull(qbs_loads,0),0) as total_loads		--total loads across both paths
		,case when first_bs_quote_dt <= first_qbs_quote_dt 
			  then first_bs_quote_dt else first_qbs_quote_dt end as first_quote_dt
		,case when (isnull(isnull(bs_quotes,0) + isnull(qbs_quotes,0),0)) = 0 then null
			  else ((isnull(cast(qbs_loads as decimal),0) + isnull(cast(bs_loads as decimal),0)) / (isnull(isnull(bs_quotes,0) + isnull(qbs_quotes,0),0)))
			  end as total_conv
into	#conversion_source
from	#guids g
left join #bs_quotes_final bs on g.firstpartycookies_accountguid = bs.firstpartycookies_accountguid
left join #qbs_quotes_final qbs on g.firstpartycookies_accountguid = qbs.firstpartycookies_accountguid
left join #cust_attributes2 ca on g.firstpartycookies_accountguid = ca.CustomerGuid
;

--The table below categorizes volume and gp percentiles into low/medium/high

if object_id('tempdb.db.#conversion_percentiles', 'U') is not null drop table #conversion_percentiles
select cs.firstpartycookies_accountguid
		,cs.CustomerName
		,cs.[description]
		,cs.industryCodeFull
		,cs.vol
		,cs.vol_ntile
		,case when vol_ntile is null then 'low'
				when vol_ntile between 0 and 32 then 'low'
				when vol_ntile between 33 and 65 then 'medium'
				when vol_ntile between 66 and 100 then 'high'
				else null end as vol_level
		,cs.gp
		,cs.gp_ntile
		,case when gp_ntile is null then 'low'
				when gp_ntile between 0 and 32 then 'low'
				when gp_ntile between 33 and 65 then 'medium'
				when gp_ntile between 66 and 100 then 'high'
				else null end as gp_level
		,total_quotes
		,total_loads
		,cast(total_conv as decimal(4,3)) as total_conv
		,cast(bs_conv as decimal(4,3)) as bs_conv
		,cast(qbs_conv as decimal(4,3)) as qbs_conv
into	#conversion_percentiles
from	#conversion_source cs
;