Wednesday, December 2, 2015

Visual 8 - Dot Plot and Bubble Chart

sql <- paste(" select a.week, a.fact_cat, a.fact_cnts/b.fact_cnts as perc from (
        select week, fact_cat, count(*)*1.0 as fact_cnts from   lh_userfact_weekhist
        group by 1,2) a,
        (select week, count(*)*1.0 as fact_cnts from lh_userfact_weekhist group by 1 ) b
        where a.week=b.week
        order by 1,3 desc;", sep = '');
data <- sqlQuery(con, sql, as.is = T, max =0 )
data[,3] <- as.numeric(data[ ,3])

data2<- data
data2$name <- factor(data2$fact_cat, levels=levels)
data2$nperc <- paste(round(data2$perc*100,2), '%', sep = '')

ggplot(data2, aes(y=name, x=week, size=perc, label=nperc)) +
  geom_point(color='red', shape=16) + scale_size_continuous(range = c(1, 50)) +
  geom_text(face = "bold", size = 6, color = "black") +
  ggtitle("Distribution of Events by Category") +
  xlab("Week") + ylab("Category") +
  theme(plot.title = element_text(face = "bold", size = 20)) +
  theme(axis.text.x = element_text(face = "bold", size = 16)) +
  theme(axis.text.y = element_text(face = "bold", size = 16)) +
  theme(axis.title.x = element_text(face = "bold", size = 16)) +
  theme(axis.title.y = element_text(face = "bold", size = 16, angle = 90)) +
  theme(legend.position = "top") +
  theme(legend.key = element_rect(colour = NA)) +
  theme(legend.title = element_blank())

#getting visit category matrix for the first 20 visits;
#156712;
levels(result2$category)
result2$user_cumcnt <- as.numeric(result2$user_cumcnt)
sample <- data.frame(result2[result2$user_cumcnt<=20, c(1,4,15)])
#37427  21;
visit_cmatrix <- dcast(sample, userid ~ user_cumcnt)
dim(visit_cmatrix)

#dot plot;
sample50 <- visit_cmatrix[sort(sample(1:nrow(visit_cmatrix), 50)),]
cnts <- apply(sample50[,-1], 1, fun <- function(x) {sum(!is.na(x))})
sample50 <- cbind(sample50, cnts)
sample50 <- sample50[sort.list(sample50$cnts),]
sample50 <- cbind(sample50, 1:50)

plot_sample <- melt(sample50, id = c("userid","cnts", "1:50"), na.rm = T)

names(plot_sample) <- c("userid", "cnts", "user_seq_no", "visits", "category" )

qplot(visits, user_seq_no, data = plot_sample) +
    geom_point(aes(color = category), size = 4.5) +
    geom_point(aes(shape = category)) +
    scale_shape_manual(value=1:length(plot_sample$category)) +
    opts(title = "First 20 Visits for 50 Users in the Sample") +
    opts(plot.title = theme_text(face = "bold", size=14)) +
    xlab("Visits") + ylab("User Seq No.") +
    opts(axis.text.x = theme_text(family = "sans", face = "bold", size = 12)) +
    opts(axis.text.y = theme_text(family = "sans", face = "bold", size = 12))

No comments:

Post a Comment

Blog Archive