Content Weekly Trend
========================================================
### Read-in content raw data - weekly aggregated data
```{r echo=FALSE, results='hide',message=FALSE}
memory.limit()
memory.size(max = TRUE)
rm(list=ls(all=T))
require(data.table)
require(ggplot2)
require(plyr)
data <- read.table("content_data.csv", head=F, sep=',');
setnames(data, c("pv","mon","wk", "week"));
head(data)
```
```{r echo=FALSE, results='hide',message=FALSE, fig.width=20, fig.height=8}
ggplot(data, aes(x=wk, y=pv)) +
geom_bar(stat = "identity", fill = "blue")+
geom_abline(intercept = mean(data$pv), color = 'red', size = 1, lty = 3) +
ggtitle("Weekly Pageview Counts") +
ylab("Page Views") + xlab("Week") +
theme(plot.title = element_text(face = "bold", size = 20)) +
theme(axis.text.x = element_text(face = "bold", size = 16, angle=-90)) +
theme(axis.text.y = element_text(face = "bold", size = 16)) +
theme(axis.title.x = element_text(face = "bold", size = 16)) +
theme(axis.title.y = element_text(face = "bold", size = 16, angle = 90)) +
theme(legend.position = "top") +
theme(legend.key = element_rect(colour = NA)) +
theme(legend.title = element_blank()) +
theme(legend.position = "none")
```
Drop first 11 month pageviews.
```{r}
data <- data[14:77,]
head(data)
```
### OLS
#### Stata code
```
import excel "mytest.xlsx",
sheet("Sheet1") firstrow tsset mycount
generate mylog = log(wvolume)
generate cc = mycount*mycount
regress mylog mycount i.mymonth
```
### Regress on continuous month
```{r}
data$mylog = log(data$pv)
data$mycount = (1:length(data$pv))
data$cc = data$mycount^2
ols1 <- lm(data$mylog ~ data$mycount + data$mon, data = data)
summary(ols1)
exp(ols1$coefficients[3])
AIC(ols1)
BIC(ols1)
```
### Regress on discrete month
```{r}
data$mon = as.factor(data$mon)
ols2 <- lm(data$mylog ~ data$mycount + data$cc + data$mon, data = data)
summary(ols2)
AIC(ols2)
BIC(ols2)
par(mfrow=c(2, 2))
plot(ols2)
anova(ols1, ols2)
```
### trending factor
```{r}
exp(ols2$coefficients[2]+ols2$coefficients[3])
```
### monthly adjustment factor
```{r}
exp(ols2$coefficients[4:14])
```
### Aggregated weekly data to generate monthly data
```{r}
data2 <- data.table(data)
data3<-data2[,list(pv=sum(pv),mon=min(as.integer(mon))), by=week][order(week)]
```
```{r echo=FALSE, results='hide',message=FALSE, fig.width=20, fig.height=8}
ggplot(data3, aes(x=week, y=pv)) +
geom_bar(stat = "identity", fill = "darkblue")+
geom_abline(intercept = mean(data3$pv), color = 'red', size = 1, lty = 3) +
ggtitle("Monthly Pageview Counts") +
ylab("PageViews") + xlab("Month") +
theme(plot.title = element_text(face = "bold", size = 20)) +
theme(axis.text.x = element_text(face = "bold", size = 16, angle=-90)) +
theme(axis.text.y = element_text(face = "bold", size = 16)) +
theme(axis.title.x = element_text(face = "bold", size = 16)) +
theme(axis.title.y = element_text(face = "bold", size = 16, angle = 90)) +
theme(legend.position = "top") +
theme(legend.key = element_rect(colour = NA)) +
theme(legend.title = element_blank()) +
theme(legend.position = "none")
```
```{r}
data3$mylog = log(data3$pv)
data3$mycount = (1:length(data3$pv))
data3$cc = data3$mycount^2
data3$mon = as.factor(data3$mon)
ols3 <- lm(data3$mylog ~ data3$mycount + data3$cc + data3$mon, data = data3)
summary(ols3)
AIC(ols3)
BIC(ols3)
par(mfrow=c(2, 2))
plot(ols3)
```
### trending factor
```{r}
exp(ols3$coefficients[2]+ols3$coefficients[3])
```
### monthly adjustment factor
```{r}
exp(ols3$coefficients[4:14])
```