rm(list=ls())
# Transposing matrices
m <- matrix(1:9, 3)
t(m)
# Filtering data by string matching
library(plyr)
library(dplyr)
library(hflights)
str(select(hflights, ends_with("delay")))
str(select(hflights, contains("T", ignore.case = FALSE)))
str(select(hflights, matches("^[[:alpha:]]{5,6}$")))
table(nchar(names(hflights)))
str(select(hflights, -matches("^[[:alpha:]]{7,8}$")))
# Rearranging data
str(arrange(hflights, ActualElapsedTime))
install.packages("magrittr")
library(magrittr)
hflights %>% arrange(ActualElapsedTime) %>% str
hflights %>% arrange(ActualElapsedTime) %>%
select(ActualElapsedTime, Dest) %>%
subset(Dest != 'AUS') %>%
head %>%
str
library(data.table)
str(head(data.table(hflights, key='ActualElapsedTime')[Dest != 'AUS', c('ActualElapsedTime', 'Dest'), with = FALSE]))
str(head(na.omit(data.table(hflights, key='ActualElapsedTime'))[Dest != 'AUS', list('ActualElapsedTime', 'Dest')]))
# dplyr vs data.table
# Computing new varibles
hflights_dt <- data.table(hflights)
hflights_dt[, DistanceKMs := Distance / 0.62137]
hflights_dt$DistanceKMs <- hflights_dt$Distance / 0.62137
# Memory profiling
install.packages("pryr")
library(pryr)
hflights_dt <- data.table(hflights)
address(hflights_dt)
hflights_dt$DistanceKMs <- hflights_dt$Distance / 0.62137
hflights_dt <- data.table(hflights)
address(hflights_dt)
hflights_dt[, DistanceKMs := Distance / 0.62137]
address(hflights_dt)
system.time(within(hflights_dt, DistanceKMs <- Distance / 0.62137))
# Creating multiple variables at a time
hflights_dt[, c('DistanceKMs', 'DistanceFeets') := list(Distance/0.62137, Distance * 5280)];
carriers <- unique(hflights_dt$UniqueCarrier)
hflights_dt[, paste('carrier', carriers, sep = '_') := lapply(carriers, function(x) as.numeric(UniqueCarrier == x))]
str(hflights_dt[, grep('^carrier', names(hflights_dt)), with = FALSE])
str(hflights_dt)
# Computing new variables with dplyr
hflights <- hflights %>% mutate(DistanceKMs = Distance / 0.62137)
hflights <- mutate(hflights, DistanceKMs = Distance/0.62137)
# Merging datasets
wdays <- data.frame(DayOfWeek = 1: 7,DayOfWeekString = c("Sunday", "Monday", "Tuesday","Wednesday", "Thursday", "Friday", "Saturday"))
system.time(merge(hflights, wdays))
system.time(merge(hflights_dt, wdays, by = 'DayOfWeek'))
hflights$wdays <- weekdays(as.Date(with(hflights, paste(Year, Month, DayofMonth, sep = '-'))))
# One of the most often used function along with the base commands, rbind and cbind is do.call, which can execute the rbind or cbind call on all elemetns of a list, thus enabling us, to join a list of data frames.
# Similarly, rbindlist can be called to merge a list of data.table objects ina much faster way.
# Reshaping data in a flexible way
# Converting wide tables to the long table format
library(reshape2)
head(melt(hflights))
hflights_melted <- melt(hflights, id.vars=0, measure.vars = c('ActualElapsedTime', 'AirTime'))
str(hflights_melted)
library(ggplot2)
ggplot(hflights_melted, aes(variable, y=value)) + geom_boxplot()
# Converting long tables to the wide table format
hflights_melted <- melt(hflights, id.vars='Month', measure.vars = c('ActualElapsedTime', 'AirTime'))
df <- dcast(hflights_melted, Month ~ variable, fun.aggregate = mean, na.rm = TRUE)
ggplot(melt(df, id.vars = 'Month')) +
geom_line(aes(x=Month, y=value, color=variable)) +
scale_x_continuous(breaks = 1:12) +
theme_bw() +
theme(legend.position = 'top')
hflights_melted <- melt(add_margins(hflights, 'Month'), id.vars = 'Month', measure.vars = c('ActualElapsedTime', 'AirTime'))
df <- dcast(hflights_melted, Month ~ variable, fun.aggregate = mean, na.rm = TRUE)
Thursday, December 17, 2015
Subscribe to:
Post Comments (Atom)
Blog Archive
-
▼
2015
(43)
-
▼
December
(14)
- Master R 4 - Restruct Data
- Master R 3 - Filter and Summarize Data
- Master R 2 - Fetch Web Data
- Master R 1 - Readin Data
- Visual 10 - Bipartite Network Plot in R
- Visual 9 - Heat Map in R
- Visual 8 - Dot Plot and Bubble Chart
- Visual 7 - Network Graph
- Visual 6 - Density
- Visual 5 - Bar Chart and Stacked Bar Chart
- Visual 4 - Bloxplot and Donut Chart
- Visual 3 - Line Plot
- Visual 2 - Histogram
- Visual 1 - Corrlation
-
▼
December
(14)
No comments:
Post a Comment