Cluster analysis
# Hierarchical clustering
d <- dist(mtcars)
h <- hclust(d)
h
plot(h)
rect.hclust(h, k=3, border = "red")
cn <- cutree(h, k=3)
table(cn)
round(aggregate(mtcars, FUN = mean, by = list(cn)), 1)
round(aggregate(mtcars, FUN = sd, by = list(cn)), 1)
round(sapply(mtcars, sd), 1)
round(apply(aggregate(mtcars, FUN = mean, by = list(cn)),2, sd), 1)
# Determining the ideal number of clusters
install.packages('NbClust')
library(NbClust)
NbClust(mtcars, method = 'complete', index = 'dindex')
NbClust(mtcars, method = 'complete', index = 'hartigan')$Best.nc
NbClust(mtcars, method = 'complete', index = 'kl')$Best.nc
NbClust(iris[, -5], method = 'complete', index = 'all')$Best.nc[1,]
# K-means clustering
(k <- kmeans(mtcars, 3))
all(cn == k$cluster)
# Visualizing clusters
library(cluster)
clusplot(mtcars, k$cluster, color = TRUE, shade = TRUE, labels = 2)
# Latent class models
factors <- c('cyl', 'vs', 'am', 'carb', 'gear')
mtcars[, factors] <- lapply(mtcars[, factors], factor)
# Latent Class Analysis
install.packages('poLCA')
library(poLCA)
p <- poLCA(cbind(cyl, vs, am, carb, gear) ~ 1,data = mtcars, graphs = TRUE, nclass = 3)
p$P
# Latent class regression
# Discriminant analysis
rm(mtcars)
mtcars$gear <- factor(mtcars$gear)
library(MASS)
d <- lda(gear ~ ., data = mtcars, CV =TRUE)
(tab <- table(mtcars$gear, d$class))
sum(diag(tab)) / sum(tab)
round(d$posterior, 4)
d <- lda(gear ~ ., data = mtcars)
plot(d)
plot(d, dimen = 1, type = "both" )
# Logistic regression
lr <- glm(am ~ hp + wt, data = mtcars, family = binomial)
summary(lr)
table(mtcars$am, round(predict(lr, type = 'response')))
install.packages('nnet')
library(nnet)
(mlr <- multinom(factor(gear) ~ ., data = mtcars))
table(mtcars$gear, predict(mlr))
rm(mtcars)
# Machine learning algorithms
# The K-Nearest Neighbors algorithm
#K-Nearest Neighbors is a supervised classification algorithm, which is a mostly used in pattern recognition and business analytics. A big advantage of k-NN is that it is not sensitive to outliers, and the usage is extremely straightforward.
set.seed(42)
n <- nrow(mtcars)
train <- mtcars[sample(n, n/2), ]
library(dplyr)
train <- sample_n(mtcars, n / 2)
test <- mtcars[setdiff(row.names(mtcars), row.names(train)), ]
library(class)
(cm <- knn(
train = subset(train, select = -gear),
test = subset(test, select = -gear),
cl = train$gear,
k = 5))
cor(test$gear, as.numeric(as.character(cm)))
table(train$gear)
# Classification trees
library(rpart)
ct <- rpart(factor(gear) ~ ., data = train, minsplit = 3)
summary(ct)
plot(ct)
text(ct)
table(test$gear, predict(ct, newdata = test, type = 'class'))
install.packages('party')
library(party)
ct <- ctree(factor(gear) ~ drat, data = train, controls = ctree_control(minsplit = 3))
plot(ct, main = "Conditional Inference Tree")
table(test$gear, predict(ct, newdata = test, type = 'node'))
install.packages('randomForest')
library(randomForest)
(rf <- randomForest(factor(gear) ~ ., data = train, ntree = 250))
table(test$gear, predict(rf, test))
plot(rf)
legend('topright', legend = colnames(rf$err.rate), col = 1:4, fill = 1:4, bty = 'n')
# Other algorithms
install.packages(c('caret','c50'))
library(caret)
library(C50)
C50 <- train(factor(gear) ~ ., data = train, method = 'C5.0')
summary(C50)
table(test$gear, predict(C50, test))
Monday, February 8, 2016
Master R 10 - Classification and Clustering
Labels:
R
Subscribe to:
Post Comments (Atom)
Blog Archive
-
▼
2016
(87)
-
▼
February
(15)
- Python Data Analysis 5 - pandas: Reading and Writi...
- Python Data Analysis 4 - The pandas Library - An I...
- Python Data Analysis 3 - The NumPy Library
- Python Data Analysis 2 - Introduction to the Pytho...
- Python Data Analysis 1 - An Introduction to Data A...
- Master R 14 - Analyzing the R community
- Master R 13 - Data Around Us
- Master R 12 - Analyzing Time-series
- Master R 11 - Social Network analysis of the R Eco...
- Master R 10 - Classification and Clustering
- Master R 9 - From Big to Small Data
- Master R 8 - Polishing data
- Master R 7 - Unstructured Data
- Master R 6 - Beyond the linear trend line
- Master R 5 - Building Models
-
▼
February
(15)
No comments:
Post a Comment