Linear regression/Logistic regression/Poisson regression
The Motivation behind Multivariate Models
A confounder is a third variable that biases (increases or decreases) the association we are interested in. The confounder is always associated with both the response and the predictor.
Linear Regression with Continuous Predictors
install.packages("gamlss.data")
library(gamlss.data)
data(usair)
model.0 <- lm(y~x3, data=usair)
summary(model.0)
plot(y~x3, data=usair, cex.lab=1.5)
abline(model.0, col='red', lwd=2.5)
legend('bottomright', legend='y=x3', lty=1, col='red', lwd=2.5, title='Regression Line')
usair$prediction <- predict(model.0)
usair$residule <- resid(model.0)
plot(y~x3, data=usair, cex.lab=1.5)
abline(model.0, col='red', lwd=2.5)
segments(usair$x3, usair$y, usair$x3, usair$prediction, col='blue', lty=2)
legend('bottomright', legend=c('y~x3', 'residule'), lty=c(1,2), col=c('red', 'blue'),lwd=2.5, title='Regression Line')
library(gamlss.data)
data(usair)
model.0 <- lm(y~x3, data=usair)
summary(model.0)
plot(y~x3, data=usair, cex.lab=1.5)
abline(model.0, col='red', lwd=2.5)
legend('bottomright', legend='y=x3', lty=1, col='red', lwd=2.5, title='Regression Line')
usair$prediction <- predict(model.0)
usair$residule <- resid(model.0)
plot(y~x3, data=usair, cex.lab=1.5)
abline(model.0, col='red', lwd=2.5)
segments(usair$x3, usair$y, usair$x3, usair$prediction, col='blue', lty=2)
legend('bottomright', legend=c('y~x3', 'residule'), lty=c(1,2), col=c('red', 'blue'),lwd=2.5, title='Regression Line')
Multiple predictor
model.1 <- update(model.0, .~.+x2)
summary(model.1)
as.numeric(predict(model.1, data.frame(x2=150, x3=400)))
install.packages('scatterplot3d')
library(scatterplot3d)
plot3d <- scatterplot3d(usair$x3, usair$x2, usair$y, pch=19, type='h', hightlight.3d=TRUE, main='3_D Scatterplot')
plot3d$planed3d(model.1, lty='solid', col = 'red')
par(mfrow=c(1,2))
plot(y~x3, data=usair, main='2D projection for x3')
abline(model.1, col='red', lwd=2.5)
plot(y~x2, data=usair, main='2D projection for x2')
abline(lm(y~x2+x3, data=usair), col='red', lwd=2.5)
Model Assumptions
install.packages(c('Hmisc','gridExtra'))
library(Hmisc)
library(ggplot2)
library(gridExtra)
set.seed(7)
x <- sort(rnorm(1000, 10, 100))[26:975]
y <- x * 500 + rnorm(950, 5000, 20000)
df <- data.frame(x = x, y = y, cuts = factor(cut2(x, g = 5)),resid = resid(lm(y ~ x)))
scatterPl <- ggplot(df, aes(x = x, y = y)) + geom_point(aes(colour = cuts, fill = cuts), shape = 1,show_guide = FALSE) + geom_smooth(method = lm, level = 0.99)
plot_left <- ggplot(df, aes(x = y, fill = cuts)) + geom_density(alpha = .5) + coord_flip() + scale_y_reverse()
plot_right <- ggplot(data = df, aes(x = resid, fill = cuts)) + geom_density(alpha = .5) + coord_flip()
grid.arrange(plot_left, scatterPl, plot_right, ncol=3, nrow=1, widths=c(1, 3, 1))
summary(model.1)
as.numeric(predict(model.1, data.frame(x2=150, x3=400)))
install.packages('scatterplot3d')
library(scatterplot3d)
plot3d <- scatterplot3d(usair$x3, usair$x2, usair$y, pch=19, type='h', hightlight.3d=TRUE, main='3_D Scatterplot')
plot3d$planed3d(model.1, lty='solid', col = 'red')
par(mfrow=c(1,2))
plot(y~x3, data=usair, main='2D projection for x3')
abline(model.1, col='red', lwd=2.5)
plot(y~x2, data=usair, main='2D projection for x2')
abline(lm(y~x2+x3, data=usair), col='red', lwd=2.5)
Model Assumptions
install.packages(c('Hmisc','gridExtra'))
library(Hmisc)
library(ggplot2)
library(gridExtra)
set.seed(7)
x <- sort(rnorm(1000, 10, 100))[26:975]
y <- x * 500 + rnorm(950, 5000, 20000)
df <- data.frame(x = x, y = y, cuts = factor(cut2(x, g = 5)),resid = resid(lm(y ~ x)))
scatterPl <- ggplot(df, aes(x = x, y = y)) + geom_point(aes(colour = cuts, fill = cuts), shape = 1,show_guide = FALSE) + geom_smooth(method = lm, level = 0.99)
plot_left <- ggplot(df, aes(x = y, fill = cuts)) + geom_density(alpha = .5) + coord_flip() + scale_y_reverse()
plot_right <- ggplot(data = df, aes(x = resid, fill = cuts)) + geom_density(alpha = .5) + coord_flip()
grid.arrange(plot_left, scatterPl, plot_right, ncol=3, nrow=1, widths=c(1, 3, 1))
Outlier
install.packages('gvlma')
library(gvlma)
gvlma(model.1)
model.2 <- update(model.1, data=usair[-31,])
gvlma(model.2)
library(gvlma)
gvlma(model.1)
model.2 <- update(model.1, data=usair[-31,])
gvlma(model.2)
How well does the line fit in the data
model.0 <- update(model.0, data = usair[-31, ])
summary(model.0)[c('r.squared', 'adj.r.squared')]
summary(model.2)[c('r.squared', 'adj.r.squared')]
#Akaike Information Criterion(AIC)
summary(model.3 <- update(model.2, .~. -x2 + x1))$coefficients
summary(model.4 <- update(model.2, .~. -x3 + x1))$coefficients
AIC(model.3, model.4)
summary(model.0)[c('r.squared', 'adj.r.squared')]
summary(model.2)[c('r.squared', 'adj.r.squared')]
#Akaike Information Criterion(AIC)
summary(model.3 <- update(model.2, .~. -x2 + x1))$coefficients
summary(model.4 <- update(model.2, .~. -x3 + x1))$coefficients
AIC(model.3, model.4)
Discrete preditors
par(mfrow=c(1,1))
plot(y ~ x5, data = usair, cex.lab = 1.5)
abline(lm(y ~ x5, data = usair), col = 'red', lwd = 2.5, lty = 1)
abline(lm(y ~ x5, data = usair[usair$x5<=45,]), col = 'red', lwd = 2.5, lty = 3)
abline(lm(y ~ x5, data = usair[usair$x5 >=30, ]), col = 'red', lwd = 2.5, lty = 2)
abline(v = c(30, 45), col = 'blue', lwd = 2.5)
legend('topleft', lty = c(1, 3, 2, 1), lwd = rep(2.5, 4),
legend = c('y ~ x5', 'y ~ x5 | x5<=45','y ~ x5 | x5>=30','Critical zone'), col = c('red', 'red', 'red', 'blue'))
plot(y ~ x5, data = usair, cex.lab = 1.5)
abline(lm(y ~ x5, data = usair), col = 'red', lwd = 2.5, lty = 1)
abline(lm(y ~ x5, data = usair[usair$x5<=45,]), col = 'red', lwd = 2.5, lty = 3)
abline(lm(y ~ x5, data = usair[usair$x5 >=30, ]), col = 'red', lwd = 2.5, lty = 2)
abline(v = c(30, 45), col = 'blue', lwd = 2.5)
legend('topleft', lty = c(1, 3, 2, 1), lwd = rep(2.5, 4),
legend = c('y ~ x5', 'y ~ x5 | x5<=45','y ~ x5 | x5>=30','Critical zone'), col = c('red', 'red', 'red', 'blue'))
install.packages(c('partykit','rpart'))
library(partykit)
library(rpart)
plot(as.party(rpart(y ~ x5, data = usair)))
usair$x5_3 <- cut2(usair$x5, c(30,45))
plot(y ~ as.numeric(x5_3), data = usair, cex.lab = 1.5, xlab = 'Categorized annual rainfall(x5)', xaxt = 'n')
axis(1, at = 1:3, labels = levels(usair$x5_3))
lines(tapply(usair$y, usair$x5_3, mean), col='red', lwd=2.5, lty=1)
legend('topright', legend = 'Linear prediction', col = 'red')
summary(glmmodel.1 <- glm(y ~ x2+x3+x5_3, data=usair[-31,]))
library(partykit)
library(rpart)
plot(as.party(rpart(y ~ x5, data = usair)))
usair$x5_3 <- cut2(usair$x5, c(30,45))
plot(y ~ as.numeric(x5_3), data = usair, cex.lab = 1.5, xlab = 'Categorized annual rainfall(x5)', xaxt = 'n')
axis(1, at = 1:3, labels = levels(usair$x5_3))
lines(tapply(usair$y, usair$x5_3, mean), col='red', lwd=2.5, lty=1)
legend('topright', legend = 'Linear prediction', col = 'red')
summary(glmmodel.1 <- glm(y ~ x2+x3+x5_3, data=usair[-31,]))
No comments:
Post a Comment