sql <- paste(" select a.week, a.fact_cat, a.fact_cnts/b.fact_cnts as perc from (
select week, fact_cat, count(*)*1.0 as fact_cnts from lh_userfact_weekhist
group by 1,2) a,
(select week, count(*)*1.0 as fact_cnts from lh_userfact_weekhist group by 1 ) b
where a.week=b.week
order by 1,3 desc;", sep = '');
data <- sqlQuery(con, sql, as.is = T, max =0 )
data[,3] <- as.numeric(data[ ,3])
data2<- data
data2$name <- factor(data2$fact_cat, levels=levels)
data2$nperc <- paste(round(data2$perc*100,2), '%', sep = '')
ggplot(data2, aes(y=name, x=week, size=perc, label=nperc)) +
geom_point(color='red', shape=16) + scale_size_continuous(range = c(1, 50)) +
geom_text(face = "bold", size = 6, color = "black") +
ggtitle("Distribution of Events by Category") +
xlab("Week") + ylab("Category") +
theme(plot.title = element_text(face = "bold", size = 20)) +
theme(axis.text.x = element_text(face = "bold", size = 16)) +
theme(axis.text.y = element_text(face = "bold", size = 16)) +
theme(axis.title.x = element_text(face = "bold", size = 16)) +
theme(axis.title.y = element_text(face = "bold", size = 16, angle = 90)) +
theme(legend.position = "top") +
theme(legend.key = element_rect(colour = NA)) +
theme(legend.title = element_blank())
#getting visit category matrix for the first 20 visits;
#156712;
levels(result2$category)
result2$user_cumcnt <- as.numeric(result2$user_cumcnt)
sample <- data.frame(result2[result2$user_cumcnt<=20, c(1,4,15)])
#37427 21;
visit_cmatrix <- dcast(sample, userid ~ user_cumcnt)
dim(visit_cmatrix)
#dot plot;
sample50 <- visit_cmatrix[sort(sample(1:nrow(visit_cmatrix), 50)),]
cnts <- apply(sample50[,-1], 1, fun <- function(x) {sum(!is.na(x))})
sample50 <- cbind(sample50, cnts)
sample50 <- sample50[sort.list(sample50$cnts),]
sample50 <- cbind(sample50, 1:50)
plot_sample <- melt(sample50, id = c("userid","cnts", "1:50"), na.rm = T)
names(plot_sample) <- c("userid", "cnts", "user_seq_no", "visits", "category" )
qplot(visits, user_seq_no, data = plot_sample) +
geom_point(aes(color = category), size = 4.5) +
geom_point(aes(shape = category)) +
scale_shape_manual(value=1:length(plot_sample$category)) +
opts(title = "First 20 Visits for 50 Users in the Sample") +
opts(plot.title = theme_text(face = "bold", size=14)) +
xlab("Visits") + ylab("User Seq No.") +
opts(axis.text.x = theme_text(family = "sans", face = "bold", size = 12)) +
opts(axis.text.y = theme_text(family = "sans", face = "bold", size = 12))
Subscribe to:
Post Comments (Atom)
Blog Archive
-
▼
2015
(43)
-
▼
December
(14)
- Master R 4 - Restruct Data
- Master R 3 - Filter and Summarize Data
- Master R 2 - Fetch Web Data
- Master R 1 - Readin Data
- Visual 10 - Bipartite Network Plot in R
- Visual 9 - Heat Map in R
- Visual 8 - Dot Plot and Bubble Chart
- Visual 7 - Network Graph
- Visual 6 - Density
- Visual 5 - Bar Chart and Stacked Bar Chart
- Visual 4 - Bloxplot and Donut Chart
- Visual 3 - Line Plot
- Visual 2 - Histogram
- Visual 1 - Corrlation
-
▼
December
(14)
No comments:
Post a Comment