-- Calculate population counts/prob;
-- 4,856,590 33,853,878 38,710,468;
select sum(response), count(1)-sum(response), count(1)
from ANALYTICS_STG..LH_CAMPAIGN_DM_ADM1;
-- Calculate counts/prob for positive;
drop table ANALYTICS_STG..LH_CAMPAIGN_DM_ADM1_pos;
-- 1 rows affected;
create table ANALYTICS_STG..LH_CAMPAIGN_DM_ADM1_pos as
select
sum(UNIV_BBBUS)UNIV_BBBUS,
sum(UNIV_BABYUS)UNIV_BABYUS,
sum(UNIV_CTSUS)UNIV_CTSUS,
sum(UNIV_HRMUS)UNIV_HRMUS,
sum(UNIV_BBBCA)UNIV_BBBCA,
sum(UNIV_BBBMX)UNIV_BBBMX,
sum(UNIV_UNIV_BABYCA)UNIV_UNIV_BABYCA,
sum(DM_RECENTOPT_OPT_BABYUS)DM_RECENTOPT_OPT_BABYUS,
sum(DM_RECENTOPT_OPT_BBBUS)DM_RECENTOPT_OPT_BBBUS,
sum(DM_RECENTOPT_OPT_CTSUS)DM_RECENTOPT_OPT_CTSUS,
...
from ANALYTICS_STG..LH_CAMPAIGN_DM_ADM1
where response = 1;
-- Caculate counts/prob for negative;
drop table ANALYTICS_STG..LH_CAMPAIGN_DM_ADM1_neg;
-- 1 rows affected;
create table ANALYTICS_STG..LH_CAMPAIGN_DM_ADM1_neg as
select
sum(UNIV_BBBUS)UNIV_BBBUS,
sum(UNIV_BABYUS)UNIV_BABYUS,
sum(UNIV_CTSUS)UNIV_CTSUS,
sum(UNIV_HRMUS)UNIV_HRMUS,
sum(UNIV_BBBCA)UNIV_BBBCA,
sum(UNIV_BBBMX)UNIV_BBBMX,
sum(UNIV_UNIV_BABYCA)UNIV_UNIV_BABYCA,
sum(DM_RECENTOPT_OPT_BABYUS)DM_RECENTOPT_OPT_BABYUS,
sum(DM_RECENTOPT_OPT_BBBUS)DM_RECENTOPT_OPT_BBBUS,
sum(DM_RECENTOPT_OPT_CTSUS)DM_RECENTOPT_OPT_CTSUS,
...
from ANALYTICS_STG..LH_CAMPAIGN_DM_ADM1
where response = 0;
-- Drop a probability of zero which causes divide by zero;
proc sql;
create table pos as
select * from connection to NETEZZA
(
select * from ANALYTICS_STG..LH_CAMPAIGN_DM_ADM1_pos
;
)
;
quit;
proc means data=pos noprint;
output out=meanout(drop=_type_ _freq_ where=(_stat_ in ('MIN')));
run;
proc transpose data=meanout out=meanout;
run;
* varlist;
proc sql noprint;
select _name_ into :varlist separated by ' '
from meanout where col1>0.0;
quit;
%put &varlist;
/*Calculate probabilities for negative*/
proc sql;
create table neg as
select * from connection to NETEZZA
(
select * from ANALYTICS_STG..LH_CAMPAIGN_DM_ADM1_neg
;
)
;
quit;
proc means data=neg noprint;
var &varlist;
output out=meanout(drop=_type_ _freq_ where=(_stat_ in ('MIN')));
run;
proc transpose data=meanout out=meanout;
run;
* varlist;
proc sql;
select _name_
from meanout where col1>0.0;
quit;
-- Caculate conditional prob ratio for positive;
drop table ANALYTICS_STG..LH_CAMPAIGN_DM_ADM2_pos;
-- 1 rows affected;
create table ANALYTICS_STG..LH_CAMPAIGN_DM_ADM2_pos as
select
(a.UNIV_BBBUS/4856590.0)/(b.UNIV_BBBUS/33853878.0) UNIV_BBBUS,
(a.UNIV_BABYUS/4856590.0)/(b.UNIV_BABYUS/33853878.0) UNIV_BABYUS,
(a.UNIV_CTSUS/4856590.0)/(b.UNIV_CTSUS/33853878.0) UNIV_CTSUS,
(a.UNIV_HRMUS/4856590.0)/(b.UNIV_HRMUS/33853878.0) UNIV_HRMUS,
(a.UNIV_BBBCA/4856590.0)/(b.UNIV_BBBCA/33853878.0) UNIV_BBBCA,
(a.UNIV_BBBMX/4856590.0)/(b.UNIV_BBBMX/33853878.0) UNIV_BBBMX,
(a.UNIV_UNIV_BABYCA/4856590.0)/(b.UNIV_UNIV_BABYCA/33853878.0) UNIV_UNIV_BABYCA,
(a.DM_RECENTOPT_OPT_BABYUS/4856590.0)/(b.DM_RECENTOPT_OPT_BABYUS/33853878.0) DM_RECENTOPT_OPT_BABYUS,
...
from ANALYTICS_STG..LH_CAMPAIGN_DM_ADM1_pos a
cross join ANALYTICS_STG..LH_CAMPAIGN_DM_ADM1_neg b;
drop table ANALYTICS_STG..LH_CAMPAIGN_DM_ADM2_neg;
-- Caculate conditional prob ratio for negative;
create table ANALYTICS_STG..LH_CAMPAIGN_DM_ADM2_neg as
select
(1-a.UNIV_BBBUS/4856590.0)/(1-b.UNIV_BBBUS/33853878.0) UNIV_BBBUS,
(1-a.UNIV_BABYUS/4856590.0)/(1-b.UNIV_BABYUS/33853878.0) UNIV_BABYUS,
(1-a.UNIV_CTSUS/4856590.0)/(1-b.UNIV_CTSUS/33853878.0) UNIV_CTSUS,
(1-a.UNIV_HRMUS/4856590.0)/(1-b.UNIV_HRMUS/33853878.0) UNIV_HRMUS,
(1-a.UNIV_BBBCA/4856590.0)/(1-b.UNIV_BBBCA/33853878.0) UNIV_BBBCA,
(1-a.UNIV_BBBMX/4856590.0)/(1-b.UNIV_BBBMX/33853878.0) UNIV_BBBMX,
(1-a.UNIV_UNIV_BABYCA/4856590.0)/(1-b.UNIV_UNIV_BABYCA/33853878.0) UNIV_UNIV_BABYCA,
(1-a.DM_RECENTOPT_OPT_BABYUS/4856590.0)/(1-b.DM_RECENTOPT_OPT_BABYUS/33853878.0) DM_RECENTOPT_OPT_BABYUS,
...
from ANALYTICS_STG..LH_CAMPAIGN_DM_ADM1_pos a
cross join ANALYTICS_STG..LH_CAMPAIGN_DM_ADM1_neg b;
create table ANALYTICS_STG..LH_CAMPAIGN_DM_ADM_NB as
select a.*,
a.UNIV_BBBUS*b.UNIV_BBBUS+(1-a.UNIV_BBBUS)*c.UNIV_BBBUS+
a.UNIV_BABYUS*b.UNIV_BABYUS+(1-a.UNIV_BABYUS)*c.UNIV_BABYUS+
a.UNIV_CTSUS*b.UNIV_CTSUS+(1-a.UNIV_CTSUS)*c.UNIV_CTSUS+
a.UNIV_HRMUS*b.UNIV_HRMUS+(1-a.UNIV_HRMUS)*c.UNIV_HRMUS+
a.UNIV_BBBCA*b.UNIV_BBBCA+(1-a.UNIV_BBBCA)*c.UNIV_BBBCA+
a.UNIV_BBBMX*b.UNIV_BBBMX+(1-a.UNIV_BBBMX)*c.UNIV_BBBMX+
a.UNIV_UNIV_BABYCA*b.UNIV_UNIV_BABYCA+(1-a.UNIV_UNIV_BABYCA)*c.UNIV_UNIV_BABYCA+
...
a.FST_STR_SHOP_NBR_651*b.FST_STR_SHOP_NBR_651+(1-a.FST_STR_SHOP_NBR_651)*c.FST_STR_SHOP_NBR_651 score
from ANALYTICS_STG..LH_CAMPAIGN_DM_ADM1 a
cross join ANALYTICS_STG..LH_CAMPAIGN_DM_ADM2_pos b
cross join ANALYTICS_STG..LH_CAMPAIGN_DM_ADM2_neg c
;
select a.DECILE, count(*), sum(response)
from
(
select coupon_barcode, response, score,
NTILE(10) OVER(ORDER BY score DESC NULLS LAST) as DECILE
from ANALYTICS_STG..LH_CAMPAIGN_DM_ADM_NB
) a
group by 1
order by 1
;
Subscribe to:
Post Comments (Atom)
Blog Archive
-
▼
2015
(43)
-
▼
November
(18)
- Similarity Calculation 5 - Mutual Information Usin...
- Similarity Calculation 4 - Naive Bayes Using SQL a...
- Similarity Calculation 3 - Gini/Efficiency Similar...
- Similarity Calculation 2 - Cosine Similarity Using...
- Similarity Calculation 1 - Jaccard Similarity Usin...
- R Basics 12 - Enviromnetnts, Frames and the Call S...
- R Basics 11 - OOP and R5
- R Basics 10 - Avoiding For-Loops
- R Basics 9 - Writing Functions
- R Basics 8 - Tips and Traps
- R Basics 7 - Factors
- R Basics 6 - Matrices and Arrays
- R Basics 5 - Data Frames
- R Basics 4 - Lists
- R Basics 3 - Atomic Vectors
- R Basics 2 - Basic List of Useful Functions in R
- R Basics 1 - Brief Introduction to Language Elemen...
- Part 2: Frequent Item Sets Using SQL
-
▼
November
(18)
No comments:
Post a Comment