Tuesday, December 1, 2015

Visual 4 - Bloxplot and Donut Chart

# Use rect and points to plot boxplot
boxplot.ej <- function(y, xloc = 1, width.box = 0.25, lwd.box = 2, width.hor = 0.25,
                       lwd.hor = 2, range.wisk = 1.5, lwd.wisk = 2, pch.box = 16, cex.boxpoint = 2,
                       plot.outliers = FALSE, pch.out = 1, cex.out = 1, color = "black") {

  # makes boxplot with dot as median and solid whisker Interquartile range =
  # (.75 quantile) - (.25 quantile).  Note: Wiskers are not always symmetrical;
  # top wisker extends up to max(y) constrained by y <= (.75 quantile) +
  # range.wisk*Interquartile range bottom whisker is determined by min(y)
  # constrained by y >= (.25 quantile) - range.wisk*Interquartile range

  Q <- quantile(y, c(0.25, 0.5, 0.75))
  names(Q) <- NULL  # gets rid of percentages
  IQ.range <- Q[3] - Q[1]
  low <- Q[1] - range.wisk * IQ.range
  high <- Q[3] + range.wisk * IQ.range
  index <- which((y >= low) & (y <= high))
  wisk.low <- min(y[index])
  wisk.high <- max(y[index])
  outliers <- y[which((y < low) | (y > high))]

  # plot median:
  points(xloc, Q[2], pch = pch.box, cex = cex.boxpoint, col = color)

  # plot box:
  xleft <- xloc - width.box/2
  xright <- xloc + width.box/2
  ybottom <- Q[1]
  ytop <- Q[3]
  rect(xleft, ybottom, xright, ytop, lwd = lwd.box, border = color)

  # plot whiskers:
  segments(xloc, wisk.low, xloc, Q[1], lwd = lwd.wisk, col = color)
  segments(xloc, Q[3], xloc, wisk.high, lwd = lwd.wisk, col = color)

  # plot horizontal segments:
  x0 <- xloc - width.hor/2
  x1 <- xloc + width.hor/2
  segments(x0, wisk.low, x1, wisk.low, lwd = lwd.hor, col = color)
  segments(x0, wisk.high, x1, wisk.high, lwd = lwd.hor, col = color)

  # plot outliers:
  if (plot.outliers == TRUE) {
    xloc.p <- rep(xloc, length(outliers))
    points(xloc.p, outliers, pch = pch.out, cex = cex.out, col = color)
  }
}

RT.hf.sp <- rnorm(1000, mean = 0.41, sd = 0.008)
RT.lf.sp <- rnorm(1000, mean = 0.43, sd = 0.01)
RT.vlf.sp <- rnorm(1000, mean = 0.425, sd = 0.012)
RT.hf.ac <- rnorm(1000, mean = 0.46, sd = 0.008)
RT.lf.ac <- rnorm(1000, mean = 0.51, sd = 0.01)
RT.vlf.ac <- rnorm(1000, mean = 0.52, sd = 0.012)

ps <- 1  # size of boxpoint
par(cex.main = 1.5, mar = c(5, 6, 4, 5) + 0.1, mgp = c(3.5, 1, 0), cex.lab = 1.5,
    font.lab = 2, cex.axis = 1.3, bty = "n", las = 1)
x <- c(1, 2, 3, 4)
plot(x, c(-10, -10, -10, -10), type = "p", ylab = " ", xlab = " ", cex = 1.5,
     ylim = c(0.3, 0.6), xlim = c(1, 4), lwd = 2, pch = 5, axes = FALSE, main = " ")
axis(1, at = c(1.5, 2.5, 3.5), labels = c("HF", "LF", "VLF"))
mtext("Word Frequency", side = 1, line = 3, cex = 1.5, font = 2)
axis(2, pos = 1.1)
par(las = 0)
mtext("Group Mean M", side = 2, line = 2.9, cex = 1.5, font = 2)

x <- c(1.5, 2.5, 3.5)
boxplot.ej(RT.hf.sp, xloc = 1.5, cex.boxpoint = ps)
boxplot.ej(RT.hf.ac, xloc = 1.5, cex.boxpoint = ps, color = "grey")
boxplot.ej(RT.lf.sp, xloc = 2.5, cex.boxpoint = ps)
boxplot.ej(RT.lf.ac, xloc = 2.5, cex.boxpoint = ps, color = "grey")
boxplot.ej(RT.vlf.sp, xloc = 3.5, cex.boxpoint = ps)
boxplot.ej(RT.vlf.ac, xloc = 3.5, cex.boxpoint = ps, color = "grey")

text(2.5, 0.35, "Speed", cex = 1.4, font = 1, adj = 0.5)
text(2.5, 0.57, "Accuracy", cex = 1.4, font = 1, col = "grey", adj = 0.5)

# Use qplot to create bloxplot
users <- ddply(result3, .(category, user_cumcnt), function(x) data.frame(users = length(unique(x$userid))))
convs <- ddply(result3, .(category, user_cumcnt), function(x) data.frame(convs = sum(x$convs)))
tmp <- merge(users, convs, by=c("category", "user_cumcnt"))
imps <- rep(0, length(tmp[,1]))
cum.convs <- rep(0, length(tmp[,1]))
tmp <- data.frame(tmp, cbind(imps, cum.convs))
tmp <- tmp[order(tmp$category, tmp$user_cumcnt), ]

cat.visit <- tmp
web.level <- levels(cat.visit[,1])
for (i in 1: length(web.level)){
cat1 <- cat.visit[cat.visit[,1] == web.level[i],3]  
cat.visit[cat.visit[,1] == web.level[i],5] <- cumsum(cat1)

cat2 <- cat.visit[cat.visit[,1] == web.level[i],4]  
cat.visit[cat.visit[,1] == web.level[i],6] <- cumsum(cat2)
}

write.csv(cat.visit, file= "cat_visit.csv")

cat.visit1 <- ddply(cat.visit, .(category, user_cumcnt), transform, resp.rate = convs/users, cum.conv = cum.convs/imps )
qplot(category, cum.conv, data=cat.visit1, geom=c("boxplot", "jitter"), color =category) +
    geom_point(aes(color = category), size = 2) +
    opts(title = "Conversion Rates by Website Category") +
    opts(plot.title = theme_text(face = "bold", size=14)) + 
    xlab("Category") + ylab("Conversion Rates") +
    opts(axis.text.x = theme_text(family = "sans", face = "bold", size = 8)) +
    opts(axis.text.y = theme_text(family = "sans", face = "bold", size = 12))


## Donut Charts
list.of.data.frames = list(dat1, dat2, dat3, dat4, dat5, dat6, dat7)
data=merged.data.frame = Reduce(function(...) merge(..., all=T), list.of.data.frames)

# ggplot(data, aes(y=data[,2], x="", fill=data$age_grp)) +
#   geom_bar(width = 1, stat = "identity") +
#   coord_polar("y", start=0) +
#   scale_fill_brewer(palette="Dark2") +
#   ggtitle("Total Customers by Age Group") +
#   ylab("Total Customers") +
#   xlab("") +
#   theme(plot.title = element_text(face = "bold", size = 20)) +
#   theme(axis.text.x = element_text(face = "bold", size = 14)) +
#   theme(axis.text.y = element_text(face = "bold", size = 14)) +
#   theme(axis.title.x = element_text(face = "bold", size = 16)) +
#   theme(strip.text.x = element_text(face = "bold", size = 16)) +
#   theme(axis.title.y = element_text(face = "bold", size = 16, angle=90))

data2=colPerc(data[,-1])
data2=as.data.frame(data2[-nrow(data2),])
data2$age_grp=data$age_grp
data2$ymax=cumsum(data2[,1])
data2$ymin = c(0, head(data2$ymax, n = -1))

blank_theme <- theme_minimal()+
  theme(
    axis.title.x = element_blank(),
    axis.title.y = element_blank(),
    panel.border = element_blank(),
    panel.grid=element_blank(),
    axis.ticks = element_blank(),
    plot.title=element_text(size=14, face="bold")
  )

ggplot(data2, aes(fill = age_grp, ymax = ymax, ymin = ymin, xmax = 100, xmin = 80)) +
  geom_rect(colour = "black") +
  coord_polar(theta = "y") +
  xlim(c(0, 100)) +
  geom_label(aes(label=paste(data2[,1],"%"),x=100,y=(ymin+ymax)/2),inherit.aes = TRUE, show.legend = FALSE) +
  scale_fill_brewer("Age Group", palette = "Dark2") + blank_theme +
  theme(axis.text.x=element_blank()) + theme(legend.position=c(.5, .5)) +
  ggtitle("Total Customers") +
  theme(panel.grid=element_blank()) +
  theme(axis.text=element_blank()) +
  theme(axis.ticks=element_blank()) +
  theme(legend.title = element_text(size=16, face="bold")) +

  theme(legend.text = element_text(size = 14, face = "bold"))



No comments:

Post a Comment

Blog Archive