Thursday, December 13, 2012

SVM2 - Using LIBLINEAR SVM in R

memory.limit()
memory.size(max = TRUE)
rm(list=ls(all=T))

require(RODBC)
require(ggplot2)
require(plyr)
require(e1071)
require(LiblineaR)

data(iris)
attach(iris)
x=iris[,1:4]
y=factor(iris[,5])
train=sample(1:dim(iris)[1],100)
xTrain=x[train,]
xTest=x[-train,]
yTrain=y[train]
yTest=y[-train]

# Center and scale data
s=scale(xTrain,center=TRUE,scale=TRUE)
# Sparse Logistic Regression
t=6
# Tune the cost parameter of a logistic regression according to the Joachim’s heuristics
co=heuristicC(s)
m=LiblineaR(data=s,labels=yTrain,type=t,cost=co,bias=TRUE,verbose=FALSE)
# Scale the test data
s2=scale(xTest,attr(s,"scaled:center"),attr(s,"scaled:scale"))
# Make prediction
p=predict(m,s2)
# Display confusion matrix
res=table(p$predictions,yTest)
print(res)

# Compute Balanced Classification Rate
BCR=mean(c(res[1,1]/sum(res[,1]),res[2,2]/sum(res[,2]),res[3,3]/sum(res[,3])))
print(BCR)

# Logistic Regression
t=0
# Find the best model with the best cost parameter via 10-fold cross-validations
# 0 – L2-regularized logistic regression
# 1 – L2-regularized L2-loss support vector classification (dual)
# 2 – L2-regularized L2-loss support vector classification (primal)
# 3 – L2-regularized L1-loss support vector classification (dual)
# 4 – multi-class support vector classification by Crammer and Singer
# 5 – L1-regularized L2-loss support vector classification
# 6 – L1-regularized logistic regression
# 7 – L2-regularized logistic regression (dual)
tryTypes=c(0:7)
tryCosts=c(1000,100,10,1,0.1,0.01,0.001)
bestCost=NA
bestAcc=0
bestType=NA
for(ty in tryTypes){
  for(co in tryCosts){
    acc=LiblineaR(data=s,labels=yTrain,type=ty,cost=co,bias=TRUE,cross=10,verbose=FALSE)
    cat("Results for C=",co," : ",acc," accuracy.\n",sep="")
    if(acc>bestAcc){
      bestCost=co
      bestAcc=acc
      bestType=ty
    }
  }
}
cat("Best model type is:",bestType,"\n")
cat("Best cost is:",bestCost,"\n")
cat("Best accuracy is:",bestAcc,"\n")

# Re-train best model with best cost value.
m=LiblineaR(data=s,labels=yTrain,type=bestType,cost=bestCost,bias=TRUE,verbose=FALSE)

# Scale the test data
s2=scale(xTest,attr(s,"scaled:center"),attr(s,"scaled:scale"))
# Make prediction
pr=FALSE
if(bestType==0 | bestType==7) pr=TRUE
p=predict(m,s2,proba=pr,decisionValues=TRUE)
# Display confusion matrix
res=table(p$predictions,yTest)
print(res)
# Compute Balanced Classification Rate
BCR=mean(c(res[1,1]/sum(res[,1]),res[2,2]/sum(res[,2]),res[3,3]/sum(res[,3])))
print(BCR)

#coding to use loops;
cost <- c(100, 10, 1, .1, .01);
ty <- 3
kk <- 1
s <- scale(x[,-1],center=TRUE,scale=TRUE)
#L1-regularized L2-loss support vector classification
fit <- LiblineaR(data=s,labels=x[,1],type=ty,cost=cost[kk], cross=10,bias=TRUE,verbose=FALSE)
pred <- predict(fit, s)
result <- table(pred$predictions, x$conv);
cat("\n");
cat(kk, cost[kk], result[1:4], "\n");
err <- sum(result[2:3]);
co <- cost[kk];

for(kk in 2 : length(cost)){
  fit <- LiblineaR(data=s,labels=x[,1],type=ty,cost=cost[kk],bias=TRUE,verbose=FALSE)
  pred <- predict(fit, s)
  result <- table(pred$predictions, x$conv);
  cat(kk, cost[kk], result[1:4], "\n")
  if (sum(result[2:3]) <= err){
    co <- cost[kk]
    err <- sum(result[2:3])
  }
}

# Try binary
iris.part = subset(iris, Species != 'setosa')
iris.part$Species = factor(iris.part$Species)
iris.part = iris.part[, c(1,2,5)]
head(iris.part)

xTrain = iris.part[, 1:2]
yTrain = iris.part[, 3]
s=scale(xTrain,center=TRUE,scale=TRUE)
# Sparse Logistic Regression
t=3
# Tune the cost parameter of a logistic regression according to the Joachim’s heuristics
co=heuristicC(s)
m=LiblineaR(data=s,labels=yTrain,type=t,cost=co,bias=TRUE,verbose=FALSE)
# Scale the test data
s2=scale(xTrain,attr(s,"scaled:center"),attr(s,"scaled:scale"))
# Make prediction
p=predict(m,s2, decisionValues = T)
# Display confusion matrix
res=table(p$predictions,yTrain)
print(res)

# Calculate parameters manually;
w <- m$W
iris.scaled = as.matrix(data.frame(s2, rep(1, length(s2[,1]))))
rho <-iris.scaled %*% t(w)
# should equal
rho - p$decisionValues[,1]