1 Use Python to Download
from pytrends import pyGTrends
server = pyGTrends.pyGTrends("js970008@gmail.com", "durham19")
styles=[
"Bomber jacket",
...
]
for idx, val in enumerate(styles):
if idx>=10:
print("*************************")
server.request_report(val)
server.save_csv("", str(idx))
print(idx)
2 Use Shell to Extract TS
cat 0.csv | awk 'NR>=5 && NR <=663 {print }' > result0.csv
cat 1.csv | awk 'NR>=5 && NR <=663 {print }' > result1.csv
3 Use R to Combine
Sample=fread("result0.csv")
dim(Sample)
str(Sample)
names(Sample)=str_replace_all(names(Sample), " ", "_")
head(Sample)
n=nrow(Sample)
for(i in 1:2){
info = file.info(paste("result",i,".csv",sep = ""))
if(!(info$size == 0)){
tmp=fread(paste("result",i,".csv",sep = ""))
names(tmp)=str_replace_all(names(tmp), " ", "_")
setkey(Sample, "Week")
setkey(tmp, "Week")
Sample = merge(Sample, tmp, by="Week")
}
else print(paste("result",i,".csv is empty",sep = ""))
}
4 Use R to directly extract data
filename <- "0.csv"
con <- file(filename, open = "r")
linecount <- 0
stringdata <- ""
while (length(oneLine <- readLines(con, n = 1, warn = FALSE)) > 0) {
linecount <- linecount + 1
if (linecount < 3) {
filename <- paste0(filename,oneLine)
}
# get headers at line 5
if (linecount == 5) rowheaders = strsplit(oneLine, ",")[[1]]
# skip firt 5 lines
if (linecount > 5) {
# break when there is no more main data
if (gsub(pattern=",", x=oneLine, replacement="") == "") break
stringdata <- paste0(stringdata,oneLine,"\n")
}
}
close(con)
newData <- read.table(textConnection(stringdata), sep=",", header=FALSE, stringsAsFactors = FALSE)
names(newData) <- rowheaders
newData$StartDate <- as.Date(sapply(strsplit(as.character(newData[,1]), " - "), `[`, 1))
newData$EndDate <- as.Date(sapply(strsplit(as.character(newData[,1]), " - "), `[`, 2))
newData$year <- sapply(strsplit(as.character(newData$StartDate), "-"), `[`, 1)
Sample=data.table(do.call(cbind.data.frame, newData))
5 Use R library
install.packages("gtrendsR")
library(gtrendsR)
user <- "***@gmail.com"
psw <- "***"
gconnect(user, psw)
trend <- gtrends(c(
"Bomber jacket",
"Satin bomber jacket",
"Embroidered bomber jacket",
"Flight jacket",
"Slip dress"
))
plot(trend)
Friday, August 26, 2016
Thursday, August 11, 2016
Large Scale of Machine Learning by Python 1 - First Steps to Scalability
Explaining Scalability in detail
Making large scale examples
Introducing Python
It offers a large, mature system of packages for data analysis and machine learning.
It is very versatile.
It is very simple to learn and use.
It is cross-platform.
It is undoubtedly fast compared to other mainstream data analysis languages.
It can work-with in-memory big data.
Making large scale examples
Introducing Python
It offers a large, mature system of packages for data analysis and machine learning.
It is very versatile.
It is very simple to learn and use.
It is cross-platform.
It is undoubtedly fast compared to other mainstream data analysis languages.
It can work-with in-memory big data.
Subscribe to:
Posts (Atom)