memory.limit()
memory.size(max = TRUE)
rm(list=ls(all=T))
require(RODBC)
require(ggplot2)
require(plyr)
require(e1071)
require(LiblineaR)
data(iris)
attach(iris)
x=iris[,1:4]
y=factor(iris[,5])
train=sample(1:dim(iris)[1],100)
xTrain=x[train,]
xTest=x[-train,]
yTrain=y[train]
yTest=y[-train]
# Center and scale data
s=scale(xTrain,center=TRUE,scale=TRUE)
# Sparse Logistic Regression
t=6
# Tune the cost parameter of a logistic regression according to the Joachim’s heuristics
co=heuristicC(s)
m=LiblineaR(data=s,labels=yTrain,type=t,cost=co,bias=TRUE,verbose=FALSE)
# Scale the test data
s2=scale(xTest,attr(s,"scaled:center"),attr(s,"scaled:scale"))
# Make prediction
p=predict(m,s2)
# Display confusion matrix
res=table(p$predictions,yTest)
print(res)
# Compute Balanced Classification Rate
BCR=mean(c(res[1,1]/sum(res[,1]),res[2,2]/sum(res[,2]),res[3,3]/sum(res[,3])))
print(BCR)
# Logistic Regression
t=0
# Find the best model with the best cost parameter via 10-fold cross-validations
# 0 – L2-regularized logistic regression
# 1 – L2-regularized L2-loss support vector classification (dual)
# 2 – L2-regularized L2-loss support vector classification (primal)
# 3 – L2-regularized L1-loss support vector classification (dual)
# 4 – multi-class support vector classification by Crammer and Singer
# 5 – L1-regularized L2-loss support vector classification
# 6 – L1-regularized logistic regression
# 7 – L2-regularized logistic regression (dual)
tryTypes=c(0:7)
tryCosts=c(1000,100,10,1,0.1,0.01,0.001)
bestCost=NA
bestAcc=0
bestType=NA
for(ty in tryTypes){
for(co in tryCosts){
acc=LiblineaR(data=s,labels=yTrain,type=ty,cost=co,bias=TRUE,cross=10,verbose=FALSE)
cat("Results for C=",co," : ",acc," accuracy.\n",sep="")
if(acc>bestAcc){
bestCost=co
bestAcc=acc
bestType=ty
}
}
}
cat("Best model type is:",bestType,"\n")
cat("Best cost is:",bestCost,"\n")
cat("Best accuracy is:",bestAcc,"\n")
# Re-train best model with best cost value.
m=LiblineaR(data=s,labels=yTrain,type=bestType,cost=bestCost,bias=TRUE,verbose=FALSE)
# Scale the test data
s2=scale(xTest,attr(s,"scaled:center"),attr(s,"scaled:scale"))
# Make prediction
pr=FALSE
if(bestType==0 | bestType==7) pr=TRUE
p=predict(m,s2,proba=pr,decisionValues=TRUE)
# Display confusion matrix
res=table(p$predictions,yTest)
print(res)
# Compute Balanced Classification Rate
BCR=mean(c(res[1,1]/sum(res[,1]),res[2,2]/sum(res[,2]),res[3,3]/sum(res[,3])))
print(BCR)
#coding to use loops;
cost <- c(100, 10, 1, .1, .01);
ty <- 3
kk <- 1
s <- scale(x[,-1],center=TRUE,scale=TRUE)
#L1-regularized L2-loss support vector classification
fit <- LiblineaR(data=s,labels=x[,1],type=ty,cost=cost[kk], cross=10,bias=TRUE,verbose=FALSE)
pred <- predict(fit, s)
result <- table(pred$predictions, x$conv);
cat("\n");
cat(kk, cost[kk], result[1:4], "\n");
err <- sum(result[2:3]);
co <- cost[kk];
for(kk in 2 : length(cost)){
fit <- LiblineaR(data=s,labels=x[,1],type=ty,cost=cost[kk],bias=TRUE,verbose=FALSE)
pred <- predict(fit, s)
result <- table(pred$predictions, x$conv);
cat(kk, cost[kk], result[1:4], "\n")
if (sum(result[2:3]) <= err){
co <- cost[kk]
err <- sum(result[2:3])
}
}
# Try binary
iris.part = subset(iris, Species != 'setosa')
iris.part$Species = factor(iris.part$Species)
iris.part = iris.part[, c(1,2,5)]
head(iris.part)
xTrain = iris.part[, 1:2]
yTrain = iris.part[, 3]
s=scale(xTrain,center=TRUE,scale=TRUE)
# Sparse Logistic Regression
t=3
# Tune the cost parameter of a logistic regression according to the Joachim’s heuristics
co=heuristicC(s)
m=LiblineaR(data=s,labels=yTrain,type=t,cost=co,bias=TRUE,verbose=FALSE)
# Scale the test data
s2=scale(xTrain,attr(s,"scaled:center"),attr(s,"scaled:scale"))
# Make prediction
p=predict(m,s2, decisionValues = T)
# Display confusion matrix
res=table(p$predictions,yTrain)
print(res)
# Calculate parameters manually;
w <- m$W
iris.scaled = as.matrix(data.frame(s2, rep(1, length(s2[,1]))))
rho <-iris.scaled %*% t(w)
# should equal
rho - p$decisionValues[,1]