-- total items n
-- 265,588;
select count(distinct ITEM_ID)
from ANALYTICS_STG..LH_DM_COUPON_7_sales_6_product;
-- basket size t
-- 73;
select max(aa.ITEM_CNT),min(aa.ITEM_CNT),avg(aa.ITEM_CNT) from
(
select trans_id, count(ITEM_ID) ITEM_CNT
from ANALYTICS_STG..LH_DM_COUPON_7_sales_6_product
group by 1
) aa;
-- 42,640,950;
select count(distinct TRANS_ID)
from ANALYTICS_STG..LH_DM_COUPON_7_sales_6_product;
-- Finding frequent itemsets;
drop table ANALYTICS_STG..LH_DM_COUPON_7_sales_6_product1;
-- First Pass Singleton Set;
-- 265,588 rows affected;
create table ANALYTICS_STG..LH_DM_COUPON_7_sales_6_product1 as
select ITEM_ID, item_sku_desc, count(*) ITEM_CNT
from ANALYTICS_STG..LH_DM_COUPON_7_sales_6_product
group by 1,2
order by 3 desc;
-- Count the Items;
drop table ANALYTICS_STG..LH_DM_COUPON_7_sales_6_product2;
-- 265,588 rows affected
create table ANALYTICS_STG..LH_DM_COUPON_7_sales_6_product2 as
select a.*, NTILE(100) OVER(ORDER BY a.ITEM_CNT DESC NULLS LAST) as PERCENTILE
from ANALYTICS_STG..LH_DM_COUPON_7_sales_6_product1 a;
select PERCENTILE, count(*), avg(ITEM_CNT), min(ITEM_CNT), max(ITEM_CNT)
from ANALYTICS_STG..LH_DM_COUPON_7_sales_6_product2
group by 1
order by 1;
-- 209137;
select count(*) from ANALYTICS_STG..LH_DM_COUPON_7_sales_6_product1 where ITEM_CNT>1;
select ITEM_CNT, count(*) from ANALYTICS_STG..LH_DM_COUPON_7_sales_6_product1 group by 1 order by 1 desc, 2 desc;
-- Second Pass Doubleton Set;
-- Filter: support > 120
drop table ANALYTICS_STG..LH_DM_COUPON_7_sales_6_product3;
-- 55,966,448 rows affected;
-- ANALYTICS_STG..LH_DM_COUPON_7_sales_6_product3
create table ANALYTICS_STG..LH_DM_COUPON_7_sales_6_product3 as
select a.TRANS_ID, a.HH_ADDRESS_ID, a.ITEM_ID,
row_number() over(order by a.HH_ADDRESS_ID, a.ITEM_ID) id
from ANALYTICS_STG..LH_DM_COUPON_7_sales_6_product a
inner join
(
select distinct aa.TRANS_ID
from ANALYTICS_STG..LH_DM_COUPON_7_sales_6_product aa
inner join
(select distinct ITEM_ID from ANALYTICS_STG..LH_DM_COUPON_7_sales_6_product2 where ITEM_CNT >= 120) bb
on aa.ITEM_ID = bb.ITEM_ID
) b
on a.TRANS_ID = b.TRANS_ID
;
-- Construct
drop table ANALYTICS_STG..LH_DM_COUPON_7_sales_6_product4;
-- 14,161,756 rows affected;
create table ANALYTICS_STG..LH_DM_COUPON_7_sales_6_product4 as
select distinct aa.id, aa.TRANS_ID, aa.HH_ADDRESS_ID, aa.ITEM_ID,
bb.ITEM_ID as ITEM_ID2
from ANALYTICS_STG..LH_DM_COUPON_7_sales_6_product3 aa
inner join ANALYTICS_STG..LH_DM_COUPON_7_sales_6_product3 bb
on aa.HH_ADDRESS_ID = bb.HH_ADDRESS_ID
and aa.TRANS_ID = bb.TRANS_ID
and aa.ITEM_ID < bb.ITEM_ID
;
-- Counting
drop table ANALYTICS_STG..LH_DM_COUPON_7_sales_6_product5;
-- 9,517,664 rows affected;
create table ANALYTICS_STG..LH_DM_COUPON_7_sales_6_product5 as
select ITEM_ID, ITEM_ID2, count(*) PAIR_CNT
from ANALYTICS_STG..LH_DM_COUPON_7_sales_6_product4
group by 1,2
order by 3 desc;
-- Counting
drop table ANALYTICS_STG..LH_DM_COUPON_7_sales_6_product6;
-- 9,517,664 rows affected;
create table ANALYTICS_STG..LH_DM_COUPON_7_sales_6_product6 as
select a.*, NTILE(100) OVER(ORDER BY a.PAIR_CNT DESC NULLS LAST) as PERCENTILE
from ANALYTICS_STG..LH_DM_COUPON_7_sales_6_product5 a;
select PERCENTILE, count(*), avg(PAIR_CNT), min(PAIR_CNT), max(PAIR_CNT)
from ANALYTICS_STG..LH_DM_COUPON_7_sales_6_product6
group by 1
order by 1;
select * from ANALYTICS_STG..LH_DM_COUPON_7_sales_6_product6
where item_id = 1846960 and item_id2 = 1871478
;
select * from ANALYTICS_STG..LH_DM_COUPON_7_sales_6_product6
where item_id = 884680 and item_id2 = 961474
;
-- Third Pass Tripleton Set;
-- Filter: support >= 7
drop table ANALYTICS_STG..LH_DM_COUPON_7_sales_6_product7;
-- 47,509,423 rows affected;
-- ANALYTICS_STG..LH_DM_COUPON_7_sales_6_product3
create table ANALYTICS_STG..LH_DM_COUPON_7_sales_6_product7 as
select a.TRANS_ID, a.HH_ADDRESS_ID, a.ITEM_ID, row_number() over(order by a.HH_ADDRESS_ID, a.ITEM_ID) id
from ANALYTICS_STG..LH_DM_COUPON_7_sales_6_product a
inner join
(
select distinct aa.TRANS_ID
from ANALYTICS_STG..LH_DM_COUPON_7_sales_6_product aa
inner join
(select distinct ITEM_ID from ANALYTICS_STG..LH_DM_COUPON_7_sales_6_product6 where PAIR_CNT >= 7) bb
on aa.ITEM_ID = bb.ITEM_ID
) b
on a.TRANS_ID = b.TRANS_ID
;
-- Coonstruct;
drop table ANALYTICS_STG..LH_DM_COUPON_7_sales_6_product8;
-- 4,513,794 rows affected;
create table ANALYTICS_STG..LH_DM_COUPON_7_sales_6_product8 as
select distinct aa.id, aa.TRANS_ID, aa.HH_ADDRESS_ID, aa.ITEM_ID,
bb.ITEM_ID as ITEM_ID2,
cc.ITEM_ID as ITEM_ID3
from ANALYTICS_STG..LH_DM_COUPON_7_sales_6_product7 aa
inner join ANALYTICS_STG..LH_DM_COUPON_7_sales_6_product7 bb
on aa.HH_ADDRESS_ID = bb.HH_ADDRESS_ID
and aa.TRANS_ID = bb.TRANS_ID
inner join ANALYTICS_STG..LH_DM_COUPON_7_sales_6_product7 cc
on bb.HH_ADDRESS_ID = cc.HH_ADDRESS_ID
and bb.TRANS_ID = cc.TRANS_ID
and aa.ITEM_ID < bb.ITEM_ID
and bb.ITEM_ID < cc.ITEM_ID
;
-- Counting;
drop table ANALYTICS_STG..LH_DM_COUPON_7_sales_6_product9;
-- 4,218,992 rows affected;
create table ANALYTICS_STG..LH_DM_COUPON_7_sales_6_product9 as
select a.ITEM_ID, a.ITEM_ID2, a.ITEM_ID3, count(*) TRIPLE
from ANALYTICS_STG..LH_DM_COUPON_7_sales_6_product8 a
group by 1,2,3
order by 4 desc;
No comments:
Post a Comment