Sunday, March 6, 2016

Python Data Analysis 9 - An Example - Meteorological Data

A Hypothesis to Be Tested: The Influence of the Proximity of the Sea

http://www.thetimenow.com/distance-calculator.php

determine the distances of the cities from the sea.

-- Data Source

http://api.openweathermap.org/data/2.5/history/city?q=Atlanta,US

-- Data Analysis on IPython Notebook

ipython notebook

import numpy as np
import pandas as pd
import datetime

ferrara = pd.read_json(’http://api.openweathermap.org/data/2.5/history/city?q=Ferrara,IT’)
torino = pd.read_json(’http://api.openweathermap.org/data/2.5/history/city?q=Torino,IT’)
mantova = pd.read_json(’http://api.openweathermap.org/data/2.5/history/city?q=Mantova,IT’)
milano = pd.read_json(’http://api.openweathermap.org/data/2.5/history/city?q=Milano,IT’)
ravenna = pd.read_json(’http://api.openweathermap.org/data/2.5/history/city?q=Ravenna,IT’)
asti = pd.read_json(’http://api.openweathermap.org/data/2.5/history/city?q=Asti,IT’)
bologna = pd.read_json(’http://api.openweathermap.org/data/2.5/history/city?q=Bologna,IT’)
piacenza = pd.read_json(’http://api.openweathermap.org/data/2.5/history/city?q=Piacenza,IT’)
cesena = pd.read_json(’http://api.openweathermap.org/data/2.5/history/city?q=Cesena,IT’)
faenza = pd.read_json(’http://api.openweathermap.org/data/2.5/history/city?q=Faenza,IT’)

def prepare(city_list,city_name):
    temp = [ ]
    humidity = [ ]
    pressure = [ ]
    description = [ ]
    dt = [ ]
    wind_speed = [ ]
    wind_deg = [ ]
    for row in city_list:
       temp.append(row[’main’][’temp’]-273.15)
       humidity.append(row[’main’][’humidity’])
       pressure.append(row[’main’][’pressure’])
       description.append(row[’weather’][0][’description’])
       dt.append(row[’dt’])
       wind_speed.append(row[’wind’][’speed’])
       wind_deg.append(row[’wind’][’deg’])
    headings = [’temp’,’humidity’,’pressure’,’description’,’dt’,’wind_speed’,’wind_deg’]
    data = [temp,humidity,pressure,description,dt,wind_speed,wind_deg]
    df = pd.DataFrame(data,index=headings)
    city = df.T
    city[’city’] = city_name
    city[’day’] = city[’dt’].apply(datetime.datetime.fromtimestamp)
    return city

city[’day’] = city[’dt’].apply(datetime.datetime.fromtimestamp)

df_ferrara = prepare(ferrara.list,’Ferrara’)
df_milano = prepare(milano.list,’Milano’)
df_mantova = prepare(mantova.list,’Mantova’)
df_ravenna = prepare(ravenna.list,’Ravenna’)
df_torino = prepare(torino.list,’Torino’)
df_asti = prepare(asti.list,’Asti’)
df_bologna = prepare(bologna.list,’Bologna’)
df_piacenza = prepare(piacenza.list,’Piacenza’)
df_cesena = prepare(cesena.list,’Cesena’)
df_faenza = prepare(faenza.list,’Faenza’)


df_ravenna[’dist’] = 8
df_cesena[’dist’] = 14
df_faenza[’dist’] = 37
df_ferrara[’dist’] = 47
df_bologna[’dist’] = 71
df_mantova[’dist’] = 121
df_piacenza[’dist’] = 200
df_milano[’dist’] = 250
df_asti[’dist’] = 315
df_torino[’dist’] = 357

print df_ferrara.shape
print df_milano.shape
print df_mantova.shape
print df_ravenna.shape
print df_torino.shape
print df_asti.shape
print df_bologna.shape
print df_piacenza.shape
print df_cesena.shape
print df_faenza.shape

df_ferrara.to_csv(’ferrara_270615.csv’)
df_milano.to_csv(’milano_270615.csv’)
df_mantova.to_csv(’mantova_270615.csv’)
df_ravenna.to_csv(’ravenna_270615.csv’)
df_torino.to_csv(’torino_270615.csv’)
df_asti.to_csv(’asti_270615.csv’)
df_bologna.to_csv(’bologna_270615.csv’)
df_piacenza.to_csv(’piacenza_270615.csv’)
df_cesena.to_csv(’cesena_270615.csv’)
df_faenza.to_csv(’faenza_270615.csv’)

df_ferrara.read_csv(’ferrara_270615.csv’)
df_milano.read_csv(’milano_270615.csv’)
df_mantova.read_csv(’mantova_270615.csv’)
df_ravenna.read_csv(’ravenna_270615.csv’)
df_torino.read_csv(’torino_270615.csv’)
df_asti.read_csv(’asti_270615.csv’)
df_bologna.read_csv(’bologna_270615.csv’)
df_piacenza.read_csv(’piacenza_270615.csv’)
df_cesena.read_csv(’cesena_270615.csv’)
df_faenza.read_csv(’faenza_270615.csv’)

%matplotlib inline
import matplotlib.pyplot as plt
import matplotlib.dates as mdates

y1 = df_milano[’temp’]
x1 = df_milano[’day’]
fig, ax = plt.subplots()
plt.xticks(rotation=70)
hours = mdates.DateFormatter(’%H:%M’)
ax.xaxis.set_major_formatter(hours)
ax.plot(x1,y1,’r’)

y1 = df_ravenna[’temp’]
x1 = df_ravenna[’day’]
y2 = df_faenza[’temp’]
x2 = df_faenza[’day’]
y3 = df_cesena[’temp’]
x3 = df_cesena[’day’]
y4 = df_milano[’temp’]
x4 = df_milano[’day’]
y5 = df_asti[’temp’]
x5 = df_asti[’day’]
y6 = df_torino[’temp’]
x6 = df_torino[’day’]
fig, ax = plt.subplots()
plt.xticks(rotation=70)
hours = mdates.DateFormatter(’%H:%M’)
ax.xaxis.set_major_formatter(hours)
plt.plot(x1,y1,’r’,x2,y2,’r’,x3,y3,’r’)
plt.plot(x4,y4,’g’,x5,y5,’g’,x6,y6,’g’)

dist = [df_ravenna[’dist’][0],
     df_cesena[’dist’][0],
     df_faenza[’dist’][0],
     df_ferrara[’dist’][0],
     df_bologna[’dist’][0],
     df_mantova[’dist’][0],
     df_piacenza[’dist’][0],
     df_milano[’dist’][0],
     df_asti[’dist’][0],
     df_torino[’dist’][0]
]
temp_max = [df_ravenna[’temp’].max(),
     df_cesena[’temp’].max(),
     df_faenza[’temp’].max(),
     df_ferrara[’temp’].max(),
     df_bologna[’temp’].max(),
     df_mantova[’temp’].max(),
     df_piacenza[’temp’].max(),
     df_milano[’temp’].max(),
     df_asti[’temp’].max(),
     df_torino[’temp’].max()
]
temp_min = [df_ravenna[’temp’].min(),
     df_cesena[’temp’].min(),
     df_faenza[’temp’].min(),
     df_ferrara[’temp’].min(),
     df_bologna[’temp’].min(),
     df_mantova[’temp’].min(),
     df_piacenza[’temp’].min(),
     df_milano[’temp’].min(),
     df_asti[’temp’].min(),
     df_torino[’temp’].min()
]

plt.plot(dist,temp_max,’ro’)

from sklearn.svm import SVR
svr_lin1 = SVR(kernel=’linear’, C=1e3)
svr_lin2 = SVR(kernel=’linear’, C=1e3)
svr_lin1.fit(x1, y1)
svr_lin2.fit(x2, y2)
xp1 = np.arange(10,100,10).reshape((9,1))
xp2 = np.arange(50,400,50).reshape((7,1))
yp1 = svr_lin1.predict(xp1)
yp2 = svr_lin2.predict(xp2)
plt.plot(xp1, yp1, c=’r’, label=’Strong sea effect’)
plt.plot(xp2, yp2, c=’b’, label=’Light sea effect’)
plt.axis((0,400,27,32))
plt.scatter(x, y, c=’k’, label=’data’)

print svr_lin1.coef_
print svr_lin1.intercept_
print svr_lin2.coef_
print svr_lin2.intercept_

from scipy.optimize import fsolve

def line1(x):
    a1 = svr_lin1.coef_[0][0]
    b1 = svr_lin1.intercept_[0]
    return -a1*x + b1
def line2(x):
    a2 = svr_lin2.coef_[0][0]
    b2 = svr_lin2.intercept_[0]
    return -a2*x + b2
def findIntersection(fun1,fun2,x0):
 return fsolve(lambda x : fun1(x) - fun2(x),x0)

result = findIntersection(line1,line2,0.0)
print "[x,y] = [ %d , %d ]" % (result,line1(result))
x = numpy.linspace(0,300,31)
plt.plot(x,line1(x),x,line2(x),result,line1(result),’ro’)

plt.axis((0,400,15,25))
plt.plot(dist,temp_min,’bo’)

y1 = df_ravenna[’humidity’]
x1 = df_ravenna[’day’]
y2 = df_faenza[’humidity’]
x2 = df_faenza[’day’]
y3 = df_cesena[’humidity’]
x3 = df_cesena[’day’]
y4 = df_milano[’humidity’]
x4 = df_milano[’day’]
y5 = df_asti[’humidity’]
x5 = df_asti[’day’]
y6 = df_torino[’humidity’]
x6 = df_torino[’day’]
fig, ax = plt.subplots()
plt.xticks(rotation=70)
hours = mdates.DateFormatter(’%H:%M’)
ax.xaxis.set_major_formatter(hours)
plt.plot(x1,y1,’r’,x2,y2,’r’,x3,y3,’r’)
plt.plot(x4,y4,’g’,x5,y5,’g’,x6,y6,’g’)

hum_max = [df_ravenna[’humidity’].max(),
     df_cesena[’humidity’].max(),
     df_faenza[’humidity’].max(),
     df_ferrara[’humidity’].max(),
     df_bologna[’humidity’].max(),
     df_mantova[’humidity’].max(),
     df_piacenza[’humidity’].max(),
     df_milano[’humidity’].max(),
     df_asti[’humidity’].max(),
     df_torino[’humidity’].max()
]
plt.plot(dist,hum_max,’bo’)

hum_min = [df_ravenna[’humidity’].min(),
     df_cesena[’humidity’].min(),
     df_faenza[’humidity’].min(),
     df_ferrara[’humidity’].min(),
     df_bologna[’humidity’].min(),
     df_mantova[’humidity’].min(),
     df_piacenza[’humidity’].min(),
     df_milano[’humidity’].min(),
     df_asti[’humidity’].min(),
     df_torino[’humidity’].min()
]
plt.plot(dist,hum_min,’bo’)

-- The RoseWind

plt.plot(df_ravenna[’wind_deg’],df_ravenna[’wind_speed’],’ro’)

hist, bins = np.histogram(df_ravenna[’wind_deg’],8,[0,360])
print hist
print bins

def showRoseWind(values,city_name,max_value):
   N = 8
   theta = np.arange(0.,2 * np.pi, 2 * np.pi / N)
   radii = np.array(values)
   plt.axes([0.025, 0.025, 0.95, 0.95], polar=True)
   colors = [(1-x/max_value, 1-x/max_value, 0.75) for x in radii]
   plt.bar(theta, radii, width=(2*np.pi/N), bottom=0.0, color=colors)
   plt.title(city_name,x=0.2, fontsize=20)

showRoseWind(hist,’Ravenna’,max(hist))

hist, bin = np.histogram(df_ferrara[’wind_deg’],8,[0,360])
print hist
showRoseWind(hist,’Ferrara’, 15.0)

def RoseWind_Speed(df_city):
   degs = np.arange(45,361,45)
   tmp =  []
   for deg in degs:
      tmp.append(df_city[(df_city[’wind_deg’]>(deg-46)) & (df_city[’wind_deg’]<deg)][’wind_speed’].mean())
   return np.array(tmp)


showRoseWind_Speed(RoseWind_Speed(df_ravenna),’Ravenna’)

No comments:

Post a Comment

Blog Archive