#########################################################
## stan
#########################################################
data{
int<lower=0> N;
vector[N] Value;
vector[N] SqFt;
}
parameters
{
real alpha;
real beta;
real<lower=0> sigma;
}
model
{
Value ~ normal(alpha + beta*SqFt, sigma);
}
#########################################################
## R
#########################################################
housing <- read.table("housing1.csv", sep=",", header=TRUE, stringsAsFactors = FALSE)
head(housing)
mod1 <- lm(ValuePerSqFt ~ SqFt, data=housing)
summary(mod1)
fit = stan(file='y.stan', data=list(N=nrow(housing),
Value=housing$Value,
SqFt=housing$SqFt), iter=10)
Wednesday, May 24, 2017
Monday, May 8, 2017
Windowing in Hive
1 Partition specification: It includes a column reference from the table. It could not be any aggregation or other window specification.
- SELECT fname,ip, COUNT(pid) OVER (PARTITION BY ip) FROM sales;
SELECT fname,pid, LAG(pid) OVER (PARTITION BY ip ORDER BY ip) FROM sales;
select user_id,
client_session_id,
event_name,
event_timestamp,
LAG(event_name) OVER (PARTITION BY user_id, client_session_id order by event_timestamp) previous_event_name,
count(event_name) over (partition by user_id, client_session_id order by event_timestamp) event_ts_order
from tmp_churn_mar2017_little_sister
where user_id = 255177207;
- SELECT fname,ip, COUNT(pid) OVER (PARTITION BY ip) FROM sales;
- SELECT fname,ip,zip,pid, COUNT(pid) OVER (PARTITION BY ip, zip) FROM sales;
select user_id,
client_session_id,
event_name,
count(event_name) over (partition by user_id, client_session_id)
from tmp_churn_mar2017_little_sister
where user_id = 255177207;
2 Order specification: It comprises a combination of one or more columns. The ordering could be ASC or DESC, which by default is ASC.
3 Window frame: A frame has a start boundary and an optional end boundary. Frame type: Window frames could be any of the following types: ROW, RANGE
- SELECT fname,pid, COUNT(pid) OVER (PARTITION BY ip ORDER BY fname) FROM sales;
- SELECT fname,ip,pid, COUNT(pid) OVER (PARTITION BY ip, pid ORDER BY fname) FROM sales;
- select user_id,
client_session_id,
event_name,
count(event_name) over (partition by user_id, client_session_id order by event_timestamp)
from tmp_churn_mar2017_little_sister
where user_id = 255177207;
3 Window frame: A frame has a start boundary and an optional end boundary. Frame type: Window frames could be any of the following types: ROW, RANGE
- SELECT fname, ip, COUNT(pid) OVER (PARTITION BY ip ORDER BY fname ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) FROM sales;
select user_id,
client_session_id,
event_name,
count(event_name) over (partition by user_id, client_session_id order by event_timestamp ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW)
from tmp_churn_mar2017_little_sister
where user_id = 255177207;
Frame boundary: A frame is associated with a direction or an amount. A direction value could be PRECEDING or FOLLOWING and the amount could be an integer value or keyword UNBOUNDED.
Effective window frames:
BETWEEN <start boundary> AND CURRENT ROW: When only the start boundary of a frame is specified.
RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: When only the order is specified but no window frame is specified.
ROW BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: When no order and no window frame are specified.
- SELECT fname, ip, COUNT(pid) OVER (PARTITION BY ip ORDER BY fname ROWS BETWEEN 2 PRECEDING AND CURRENT ROW) FROM sales;
- SELECT fname, ip ,COUNT(pid) OVER (PARTITION BY ip ORDER BY fname ROWS BETWEEN 2 PRECEDING AND 2 FOLLOWING) FROM sales;
- SELECT fname, ip, COUNT(pid) OVER (PARTITION BY ip ORDER BY fname ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING)FROM sales;
Effective window frames:
BETWEEN <start boundary> AND CURRENT ROW: When only the start boundary of a frame is specified.
RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: When only the order is specified but no window frame is specified.
ROW BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: When no order and no window frame are specified.
- SELECT fname, ip, COUNT(pid) OVER (PARTITION BY ip ORDER BY fname ROWS BETWEEN 2 PRECEDING AND CURRENT ROW) FROM sales;
- SELECT fname, ip ,COUNT(pid) OVER (PARTITION BY ip ORDER BY fname ROWS BETWEEN 2 PRECEDING AND 2 FOLLOWING) FROM sales;
- SELECT fname, ip, COUNT(pid) OVER (PARTITION BY ip ORDER BY fname ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING)FROM sales;
4 Source name for window definition: A row R in the input table belongs to a partition as defined in the partition specification.
SELECT fname,pid, LEAD(pid) OVER (PARTITION BY ip ORDER BY ip) FROM sales;
SELECT fname,pid, LEAD(pid) OVER (PARTITION BY ip ORDER BY ip) FROM sales;
select user_id,
client_session_id,
event_name,
event_timestamp,
LAG(event_name) OVER (PARTITION BY user_id, client_session_id order by event_timestamp) previous_event_name,
count(event_name) over (partition by user_id, client_session_id order by event_timestamp) event_ts_order
from tmp_churn_mar2017_little_sister
where user_id = 255177207;
5 Second largest
select max(Salary)
from Employee
where Salary < (select max(salary) from Employee)
6 Nth largest
select a.Salary
from Employee a
join Employee b
where a.Salary <= b.Salary
group by a.Salary
having count(distinct b.Salary) = N
7 Top three Salaries
select a.dept, a.Salary
from Employee a
join Employee b
where a.Salary <= b.Salary
group by a.dept, a.Salary
having count(distinct b.Salary) <=3
8 Median
select Id, Company, Salary
from Employee e
where abs(
select(count(*) from Employee e1 where e.company=e1.company and e.Salary>=e1.Salary) -
select(count(*) from Employee e2 where e.company=e2.company and e.Salary<=e2.Salary) -
)<=1
group by Id, Company
select max(Salary)
from Employee
where Salary < (select max(salary) from Employee)
6 Nth largest
select a.Salary
from Employee a
join Employee b
where a.Salary <= b.Salary
group by a.Salary
having count(distinct b.Salary) = N
7 Top three Salaries
select a.dept, a.Salary
from Employee a
join Employee b
where a.Salary <= b.Salary
group by a.dept, a.Salary
having count(distinct b.Salary) <=3
8 Median
select Id, Company, Salary
from Employee e
where abs(
select(count(*) from Employee e1 where e.company=e1.company and e.Salary>=e1.Salary) -
select(count(*) from Employee e2 where e.company=e2.company and e.Salary<=e2.Salary) -
)<=1
group by Id, Company
Subscribe to:
Posts (Atom)