Wednesday, November 25, 2015

Similarity Calculation 3 - Gini/Efficiency Similarity Using SQL

-- Calculate population counts/prob;
-- 4,856,590 33,853,878 38,710,468;
select sum(response), count(1)-sum(response), count(1)
from ANALYTICS_STG..LH_CAMPAIGN_DM_ADM1;

-- 1 Calculate positive marginal counts given x=1 and y=1;
drop table ANALYTICS_STG..LH_CAMPAIGN_DM_ADM2_pos;
-- 1 rows affected;
create table ANALYTICS_STG..LH_CAMPAIGN_DM_ADM2_pos as
select
sum(UNIV_BBBUS)UNIV_BBBUS,
sum(UNIV_BABYUS)UNIV_BABYUS,
sum(UNIV_CTSUS)UNIV_CTSUS,
sum(UNIV_HRMUS)UNIV_HRMUS,
sum(UNIV_BBBCA)UNIV_BBBCA,
sum(UNIV_BBBMX)UNIV_BBBMX,
...
sum(FST_STR_SHOP_NBR_651)FST_STR_SHOP_NBR_651
from ANALYTICS_STG..LH_CAMPAIGN_DM_ADM1
where response = 1
;
-- 2 Calculate Gini Similarity;
drop table ANALYTICS_STG..LH_CAMPAIGN_DM_ADM3_pos;
-- 1 rows affected
create table ANALYTICS_STG..LH_CAMPAIGN_DM_ADM3_pos as
select 
a.UNIV_BBBUS/38710468.0 UNIV_BBBUS,
a.UNIV_BABYUS/38710468.0 UNIV_BABYUS,
a.UNIV_CTSUS/38710468.0 UNIV_CTSUS,
a.UNIV_HRMUS/38710468.0 UNIV_HRMUS,
a.UNIV_BBBCA/38710468.0 UNIV_BBBCA,
a.UNIV_BBBMX/38710468.0 UNIV_BBBMX,
...
a.FST_STR_SHOP_NBR_651/38710468.0 FST_STR_SHOP_NBR_651
from ANALYTICS_STG..LH_CAMPAIGN_DM_ADM2_pos a;


drop table ANALYTICS_STG..LH_CAMPAIGN_DM_ADM4_pos;
-- 38,710,468 rows affected;
create table ANALYTICS_STG..LH_CAMPAIGN_DM_ADM4_pos as
select a.COUPON_BARCODE, a.RESPONSE,
a.UNIV_BBBUS*b.UNIV_BBBUS+
a.UNIV_BABYUS*b.UNIV_BABYUS+
a.UNIV_CTSUS*b.UNIV_CTSUS+
a.UNIV_HRMUS*b.UNIV_HRMUS+
a.UNIV_BBBCA*b.UNIV_BBBCA+
a.UNIV_BBBMX*b.UNIV_BBBMX+
a.UNIV_UNIV_BABYCA*b.UNIV_UNIV_BABYCA+
...
a.FST_STR_SHOP_NBR_651*b.FST_STR_SHOP_NBR_651  score
from ANALYTICS_STG..LH_CAMPAIGN_DM_ADM1 a
cross join ANALYTICS_STG..LH_CAMPAIGN_DM_ADM3_pos b
;

select a.DECILE, count(*), sum(response)
from
(
select coupon_barcode, response, score,
NTILE(10) OVER(ORDER BY score DESC NULLS LAST) as DECILE
from ANALYTICS_STG..LH_CAMPAIGN_DM_ADM4_pos
) a
group by 1
order by 1 
;

No comments:

Post a Comment

Blog Archive