Monday, November 2, 2015

Part 2: Frequent Item Sets Using SQL

-- total items n 
-- 265,588; 
select count(distinct ITEM_ID) 
from ANALYTICS_STG..LH_DM_COUPON_7_sales_6_product; 

-- basket size t 
-- 73; 
select max(aa.ITEM_CNT),min(aa.ITEM_CNT),avg(aa.ITEM_CNT) from 
( 
select trans_id, count(ITEM_ID) ITEM_CNT 
from ANALYTICS_STG..LH_DM_COUPON_7_sales_6_product 
group by 1 
) aa; 

-- 42,640,950; 
select count(distinct TRANS_ID) 
from ANALYTICS_STG..LH_DM_COUPON_7_sales_6_product; 

-- Finding frequent itemsets; 
drop table ANALYTICS_STG..LH_DM_COUPON_7_sales_6_product1; 
-- First Pass Singleton Set; 
-- 265,588 rows affected; 
create table ANALYTICS_STG..LH_DM_COUPON_7_sales_6_product1 as 
select ITEM_ID, item_sku_desc, count(*) ITEM_CNT 
from ANALYTICS_STG..LH_DM_COUPON_7_sales_6_product 
group by 1,2 
order by 3 desc; 

-- Count the Items; 
drop table ANALYTICS_STG..LH_DM_COUPON_7_sales_6_product2; 
-- 265,588 rows affected 
create table ANALYTICS_STG..LH_DM_COUPON_7_sales_6_product2 as 
select a.*, NTILE(100) OVER(ORDER BY a.ITEM_CNT DESC NULLS LAST) as PERCENTILE 
from ANALYTICS_STG..LH_DM_COUPON_7_sales_6_product1 a; 

select PERCENTILE, count(*), avg(ITEM_CNT), min(ITEM_CNT), max(ITEM_CNT) 
from ANALYTICS_STG..LH_DM_COUPON_7_sales_6_product2 
group by 1 
order by 1; 

-- 209137; 
select count(*) from ANALYTICS_STG..LH_DM_COUPON_7_sales_6_product1 where ITEM_CNT>1; 
select ITEM_CNT, count(*) from ANALYTICS_STG..LH_DM_COUPON_7_sales_6_product1 group by 1 order by 1 desc, 2 desc; 

-- Second Pass Doubleton Set; 
-- Filter: support > 120 
drop table ANALYTICS_STG..LH_DM_COUPON_7_sales_6_product3; 
-- 55,966,448 rows affected; 
-- ANALYTICS_STG..LH_DM_COUPON_7_sales_6_product3 
create table ANALYTICS_STG..LH_DM_COUPON_7_sales_6_product3 as 
select a.TRANS_ID, a.HH_ADDRESS_ID, a.ITEM_ID, 
row_number() over(order by a.HH_ADDRESS_ID, a.ITEM_ID) id 
from ANALYTICS_STG..LH_DM_COUPON_7_sales_6_product a 
inner join 
( 
select distinct aa.TRANS_ID 
from ANALYTICS_STG..LH_DM_COUPON_7_sales_6_product aa 
inner join 
(select distinct ITEM_ID from ANALYTICS_STG..LH_DM_COUPON_7_sales_6_product2 where ITEM_CNT >= 120) bb 
on aa.ITEM_ID = bb.ITEM_ID 
) b 
on a.TRANS_ID = b.TRANS_ID 
; 

-- Construct 
drop table ANALYTICS_STG..LH_DM_COUPON_7_sales_6_product4; 
-- 14,161,756 rows affected; 
create table ANALYTICS_STG..LH_DM_COUPON_7_sales_6_product4 as 
select distinct aa.id, aa.TRANS_ID, aa.HH_ADDRESS_ID, aa.ITEM_ID, 
bb.ITEM_ID as ITEM_ID2 
from ANALYTICS_STG..LH_DM_COUPON_7_sales_6_product3 aa 
inner join ANALYTICS_STG..LH_DM_COUPON_7_sales_6_product3 bb 
on aa.HH_ADDRESS_ID = bb.HH_ADDRESS_ID 
and aa.TRANS_ID = bb.TRANS_ID 
where aa.id < bb.id 
and aa.ITEM_ID < bb.ITEM_ID 
; 

-- Counting 
drop table ANALYTICS_STG..LH_DM_COUPON_7_sales_6_product5; 
-- 9,517,664 rows affected; 
create table ANALYTICS_STG..LH_DM_COUPON_7_sales_6_product5 as 
select ITEM_ID, ITEM_ID2, count(*) PAIR_CNT 
from ANALYTICS_STG..LH_DM_COUPON_7_sales_6_product4 
group by 1,2 
order by 3 desc; 

-- Counting 
drop table ANALYTICS_STG..LH_DM_COUPON_7_sales_6_product6; 
-- 9,517,664 rows affected; 
create table ANALYTICS_STG..LH_DM_COUPON_7_sales_6_product6 as 
select a.*, NTILE(100) OVER(ORDER BY a.PAIR_CNT DESC NULLS LAST) as PERCENTILE 
from ANALYTICS_STG..LH_DM_COUPON_7_sales_6_product5 a; 

select PERCENTILE, count(*), avg(PAIR_CNT), min(PAIR_CNT), max(PAIR_CNT) 
from ANALYTICS_STG..LH_DM_COUPON_7_sales_6_product6 
group by 1 
order by 1; 

select * from ANALYTICS_STG..LH_DM_COUPON_7_sales_6_product6 
where item_id = 1846960 and item_id2 = 1871478 
; 

select * from ANALYTICS_STG..LH_DM_COUPON_7_sales_6_product6 
where item_id = 884680  and item_id2 =  961474 
; 

-- Third Pass Tripleton Set; 
-- Filter: support >= 7 
drop table ANALYTICS_STG..LH_DM_COUPON_7_sales_6_product7; 
-- 47,509,423 rows affected; 
-- ANALYTICS_STG..LH_DM_COUPON_7_sales_6_product3 
create table ANALYTICS_STG..LH_DM_COUPON_7_sales_6_product7 as 
select a.TRANS_ID, a.HH_ADDRESS_ID, a.ITEM_ID, row_number() over(order by a.HH_ADDRESS_ID, a.ITEM_ID) id 
from ANALYTICS_STG..LH_DM_COUPON_7_sales_6_product a 
inner join 
( 
select distinct aa.TRANS_ID 
from ANALYTICS_STG..LH_DM_COUPON_7_sales_6_product aa 
inner join 
(select distinct ITEM_ID from ANALYTICS_STG..LH_DM_COUPON_7_sales_6_product6 where PAIR_CNT >= 7) bb 
on aa.ITEM_ID = bb.ITEM_ID 
) b 
on a.TRANS_ID = b.TRANS_ID 
; 

-- Coonstruct; 
drop table ANALYTICS_STG..LH_DM_COUPON_7_sales_6_product8; 
-- 4,513,794 rows affected; 
create table ANALYTICS_STG..LH_DM_COUPON_7_sales_6_product8 as 
select distinct aa.id, aa.TRANS_ID, aa.HH_ADDRESS_ID, aa.ITEM_ID, 
bb.ITEM_ID as ITEM_ID2, 
cc.ITEM_ID as ITEM_ID3 
from ANALYTICS_STG..LH_DM_COUPON_7_sales_6_product7 aa 
inner join ANALYTICS_STG..LH_DM_COUPON_7_sales_6_product7 bb 
on aa.HH_ADDRESS_ID = bb.HH_ADDRESS_ID 
and aa.TRANS_ID = bb.TRANS_ID 
inner join ANALYTICS_STG..LH_DM_COUPON_7_sales_6_product7 cc 
on bb.HH_ADDRESS_ID = cc.HH_ADDRESS_ID 
and bb.TRANS_ID = cc.TRANS_ID 
where aa.id < bb.id 
and aa.ITEM_ID < bb.ITEM_ID 
and bb.id < cc.id 
and bb.ITEM_ID < cc.ITEM_ID 
; 

-- Counting; 
drop table ANALYTICS_STG..LH_DM_COUPON_7_sales_6_product9; 
-- 4,218,992 rows affected; 
create table ANALYTICS_STG..LH_DM_COUPON_7_sales_6_product9 as 
select a.ITEM_ID, a.ITEM_ID2, a.ITEM_ID3, count(*) TRIPLE 
from ANALYTICS_STG..LH_DM_COUPON_7_sales_6_product8 a 
group by 1,2,3 
order by 4 desc;


No comments:

Post a Comment

Blog Archive