Monday, January 16, 2012

SQL to Merge Imps and Convs


select count(*) from lh_imps;
select * from lh_imps limit 10;
--drop table lh_imps;
create table lh_imps as
select a.userhashkey, a.datekey,  a.revenue,
       c.adid, d.sectionid,
       to_timestamp(a.datekey * 1000000.0 + a.timekey - 1000000, 'YYYYMMDDHHMISS') as impdatetime
from databasefactevent a
join databasedimcampaign b using(campaignkey)
join databasedimad c using(adkey)
join databasedimsection d using(sectionkey)
where a.eventtypeid = 1
and b.campaignid = 57464
and a.datekey between 20120901 and 20120930
order by a.userhashkey, impdatetime;


select count(*) from lh_imps2;
select * from lh_imps2 limit 10;
--drop table lh_imps2;
create table lh_imps2 as
select a.*,
       row_number() over(partition by userhashkey order by userhashkey, impdatetime, adid, sectionid) frequency,
       sum(revenue) over(partition by userhashkey order by userhashkey, impdatetime, adid, sectionid) cum_revenue
from lh_imps a;

select count(*) from lh_conv;
select * from lh_conv limit 10;
--drop table lh_conv;
create table lh_conv as
select a.eventkey, a.datekey as convdate, a.userhashkey, c.adid, d.sectionid,
       to_timestamp(a.datekey * 1000000.0 + a.timekey - 1000000, 'YYYYMMDDHHMISS') as convdatetime
from databasefactconversion a
join databasedimcampaign b using(campaignkey)
join databasedimad c using(adkey)
join databasedimsection d using(sectionkey)
where b.campaignid = 57464
and a.datekey between 20120901 and 20120930;

select count(*) from lh_conv2;
select * from lh_conv2 limit 10;
--drop table lh_conv2;
create table lh_conv2 as
select b.eventkey, b.userhashkey, b.adid, b.sectionid, b.convdate,  b.convdatetime,
       max(a.impdatetime) as lastImpTime,
       max(a.datekey) as lastImpdatekey,
       max(a.frequency) as lastImpfrequency
from lh_imps2 a
join lh_conv b
on a.userhashkey = b.userhashkey and a.adid = b.adid and a.sectionid = b.sectionid
where a.impdatetime < b.convdatetime
group by 1, 2, 3, 4, 5, 6;

select count(*) from lh_conv3;
select count(*) from lh_conv3 where converted = 1;
select * from lh_conv3 where converted = 1 limit 10;
--drop table lh_conv3;
create table lh_conv3 as
select a.*,
       (case when b.userhashkey is not null then 1 else 0 end ) converted,
       (case when b.userhashkey is not null then b.convdate - a.datekey  else -1 end ) days2conv
from lh_imps2 a
left join lh_conv2 b
on a.userhashkey = b.userhashkey and a.adid = b.adid and a.sectionid = b.sectionid and a.impdatetime = b.lastImpTime;

select userhashkey, adid, sectionid, impdatetime, count(*)
from lh_conv3
where converted = 1
group by 1,2,3,4
having count(*)>2;

select count(*) from lh_conv4;
select count(*) from lh_conv4 where converted = 1;
select * from lh_conv4 where converted = 1 limit 10;
select * from lh_conv4 where converted = 0 limit 10;
--drop table lh_conv4;
create table lh_conv4 as
select a.*,
       (case when b.userhashkey is not null then 1 else 0 end ) converted,
       (case when b.userhashkey is not null then b.convdate - a.datekey  else -1 end ) days2conv
from lh_imps2 a
left join (
select userhashkey, adid, sectionid, max(convdate) as convdate,
       max(convdatetime) as convdatetime,
       max(lastImpTime) as lastImpTime,
       max(lastImpdatekey) as lastImpdatekey
from lh_conv2
group by 1,2,3
) b
on a.userhashkey = b.userhashkey and a.adid = b.adid and a.sectionid = b.sectionid and a.impdatetime = b.lastImpTime;

No comments:

Post a Comment