-- The scikit-learn Library
-- Machine Learning
-- Supervised and Unsupervised Learning
Supervised learning
Classification
Regression
Unsupervised learning
Clustering
Dimensionality reduction
Training Set and Testing Set
-- Supervised Learning with scikit-learn
Classification, using the Iris Dataset
K-Nearest Neighbors Classifier
Support Vector Machines (SVC)
Regression, using the Diabetes Dataset
Linear Regression
Support Vector Machines (SVR)
-- The Iris Flower Dataset
In [ ]: from sklearn import datasets
: iris = datasets.load_iris()
In [ ]: iris.data
Out[ ]:
array([[ 5.1, 3.5, 1.4, 0.2],
[ 4.9, 3. , 1.4, 0.2],
[ 4.7, 3.2, 1.3, 0.2],
[ 4.6, 3.1, 1.5, 0.2],
...
In [ ]: iris.target
Out[ ]:
array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2])
In [ ]: iris.target_names
Out[ ]:
array([ísetosaí, íversicolorí, ívirginicaí], dtype=í|S10í)
In [ ]: import matplotlib.pyplot as plt
: import matplotlib.patches as mpatches
: from sklearn import datasets
:
: iris = datasets.load_iris()
: x = iris.data[:,0] #X-Axis - sepal length
: y = iris.data[:,1] #Y-Axis - sepal length
: species = iris.target #Species
:
: x_min, x_max = x.min() - .5,x.max() + .5
: y_min, y_max = y.min() - .5,y.max() + .5
:
: #SCATTERPLOT
: plt.figure()
: plt.title(íIris Dataset - Classification By Sepal Sizesí)
: plt.scatter(x,y, c=species)
: plt.xlabel(íSepal lengthí)
: plt.ylabel(íSepal widthí)
: plt.xlim(x_min, x_max)
: plt.ylim(y_min, y_max)
: plt.xticks(())
: plt.yticks(())
In [ ]: import matplotlib.pyplot as plt
: import matplotlib.patches as mpatches
: from sklearn import datasets
:
: iris = datasets.load_iris()
: x = iris.data[:,2] #X-Axis - petal length
: y = iris.data[:,3] #Y-Axis - petal length
: species = iris.target #Species
:
: x_min, x_max = x.min() - .5,x.max() + .5
: y_min, y_max = y.min() - .5,y.max() + .5
: #SCATTERPLOT
: plt.figure()
: plt.title(íIris Dataset - Classification By Petal Sizesí, size=14)
: plt.scatter(x,y, c=species)
: plt.xlabel(íPetal lengthí)
: plt.ylabel(íPetal widthí)
: plt.xlim(x_min, x_max)
: plt.ylim(y_min, y_max)
: plt.xticks(())
: plt.yticks(())
-- The PCA Decomposition
from sklearn.decomposition import PCA
x_reduced = PCA(n_components=3).fit_transform(iris.data)
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from sklearn import datasets
from sklearn.decomposition import PCA
iris = datasets.load_iris()
x = iris.data[:,1] #X-Axis - petal length
y = iris.data[:,2] #Y-Axis - petal length
species = iris.target #Species
x_reduced = PCA(n_components=3).fit_transform(iris.data)
#SCATTERPLOT 3D
fig = plt.figure()
ax = Axes3D(fig)
ax.set_title(íIris Dataset by PCAí, size=14)
ax.scatter(x_reduced[:,0],x_reduced[:,1],x_reduced[:,2], c=species)
ax.set_xlabel(íFirst eigenvectorí)
ax.set_ylabel(íSecond eigenvectorí)
ax.set_zlabel(íThird eigenvectorí)
ax.w_xaxis.set_ticklabels(())
ax.w_yaxis.set_ticklabels(())
ax.w_zaxis.set_ticklabels(())
-- K-Nearest Neighbors Classifier
import numpy as np
from sklearn import datasets
np.random.seed(0)
iris = datasets.load_iris()
x = iris.data
y = iris.target
i = np.random.permutation(len(iris.data))
x_train = x[i[:-10]]
y_train = y[i[:-10]]
x_test = x[i[-10:]]
y_test = y[i[-10:]]
from sklearn.neighbors import KNeighborsClassifier
knn = KNeighborsClassifier()
knn.fit(x_train,y_train)
Out[86]:
KNeighborsClassifier(algorithm=íautoí, leaf_size=30, metric=íminkowskií, metric_params=None, n_neighbors=5, p=2, weights=íuniformí)
knn.predict(x_test)
Out[100]: array([1, 2, 1, 0, 0, 0, 2, 1, 2, 0])
y_test
Out[101]: array([1, 1, 1, 0, 0, 0, 2, 1, 2, 0])
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap
from sklearn import datasets
from sklearn.neighbors import KNeighborsClassifier
iris = datasets.load_iris()
x = iris.data[:,:2] #X-Axis - sepal length-width
y = iris.target #Y-Axis - species
x_min, x_max = x[:,0].min() - .5,x[:,0].max() + .5
y_min, y_max = x[:,1].min() - .5,x[:,1].max() + .5
#MESH
cmap_light = ListedColormap([í#AAAAFFí,í#AAFFAAí,í#FFAAAAí])
h = .02
xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
knn = KNeighborsClassifier()
knn.fit(x,y)
Z = knn.predict(np.c_[xx.ravel(),yy.ravel()])
Z = Z.reshape(xx.shape)
plt.figure()
plt.pcolormesh(xx,yy,Z,cmap=cmap_light)
#Plot the training points
plt.scatter(x[:,0],x[:,1],c=y)
plt.xlim(xx.min(),xx.max())
plt.ylim(yy.min(),yy.max())
Out[120]: (1.5, 4.900000000000003)
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap
from sklearn import datasets
from sklearn.neighbors import KNeighborsClassifier
iris = datasets.load_iris()
x = iris.data[:,2:4] #X-Axis - petals length-width
y = iris.target #Y-Axis - species
x_min, x_max = x[:,0].min() - .5,x[:,0].max() + .5
y_min, y_max = x[:,1].min() - .5,x[:,1].max() + .5
#MESH
cmap_light = ListedColormap([í#AAAAFFí,í#AAFFAAí,í#FFAAAAí])
h = .02
xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
knn = KNeighborsClassifier()
knn.fit(x,y)
Z = knn.predict(np.c_[xx.ravel(),yy.ravel()])
Z = Z.reshape(xx.shape)
plt.figure()
plt.pcolormesh(xx,yy,Z,cmap=cmap_light)
#Plot the training points
plt.scatter(x[:,0],x[:,1],c=y)
plt.xlim(xx.min(),xx.max())
plt.ylim(yy.min(),yy.max())
Out[126]: (-0.40000000000000002, 2.9800000000000031)
-- Diabetes Dataset
In [ ]: from sklearn import datasets
: diabetes = datasets.load_diabetes()
diabetes.data[0]
Out[ ]:
array([ 0.03807591, 0.05068012, 0.06169621, 0.02187235, -0.0442235 ,
-0.03482076, -0.04340085, -0.00259226, 0.01990842, -0.01764613])
np.sum(diabetes.data[:,0]**2)
Out[143]: 1.0000000000000746
diabetes.target
Out[146]:
array([ 151., 75., 141., 206., 135., 97., 138., 63., 110.,
310., 101., 69., 179., 185., 118., 171., 166., 144.,
97., 168., 68., 49., 68., 245., 184., 202., 137. . .
-- Linear Regression: The Least Square Regression
from sklearn import linear_model
linreg = linear_model.LinearRegression()
from sklearn import datasets
diabetes = datasets.load_diabetes()
x_train = diabetes.data[:-20]
y_train = diabetes.target[:-20]
x_test = diabetes.data[-20:]
y_test = diabetes.target[-20:]
linreg.fit(x_train,y_train)
Out[ ]: LinearRegression(copy_X=True, fit_intercept=True, normalize=False)
linreg.coef_
linreg.predict(x_test)
y_test
linreg.score(x_test, y_test)
import numpy as np
import matplotlib.pyplot as plt
from sklearn import linear_model
from sklearn import datasets
diabetes = datasets.load_diabetes()
x_train = diabetes.data[:-20]
y_train = diabetes.target[:-20]
x_test = diabetes.data[-20:]
y_test = diabetes.target[-20:]
x0_test = x_test[:,0]
x0_train = x_train[:,0]
x0_test = x0_test[:,np.newaxis]
x0_train = x0_train[:,np.newaxis]
linreg = linear_model.LinearRegression()
linreg.fit(x0_train,y_train)
y = linreg.predict(x0_test)
plt.scatter(x0_test,y_test,color=íkí)
plt.plot(x0_test,y,color=íbí,linewidth=3)
Out[230]: [<matplotlib.lines.Line2D at 0x380b1908>]
import numpy as np
import matplotlib.pyplot as plt
from sklearn import linear_model
from sklearn import datasets
diabetes = datasets.load_diabetes()
x_train = diabetes.data[:-20]
y_train = diabetes.target[:-20]
x_test = diabetes.data[-20:]
y_test = diabetes.target[-20:]
plt.figure(figsize=(8,12))
for f in range(0,10):
xi_test = x_test[:,f]
xi_train = x_train[:,f]
xi_test = xi_test[:,np.newaxis]
xi_train = xi_train[:,np.newaxis]
linreg.fit(xi_train,y_train)
y = linreg.predict(xi_test)
plt.subplot(5,2,f+1)
plt.scatter(xi_test,y_test,color=íkí)
plt.plot(xi_test,y,color=íbí,linewidth=3)
-- Support Vector Machines (SVMs)
-- Support Vector Classification (SVC)
import numpy as np
import matplotlib.pyplot as plt
from sklearn import svm
x = np.array([[1,3],[1,2],[1,1.5],[1.5,2],[2,3],[2.5,1.5],
[2,1],[3,1],[3,2],[3.5,1],[3.5,3]])
y = [0]*6 + [1]*5
plt.scatter(x[:,0],x[:,1],c=y,s=50,alpha=0.9)
Out[360]: <matplotlib.collections.PathCollection at 0x545634a8>
import numpy as np
import matplotlib.pyplot as plt
from sklearn import svm
x = np.array([[1,3],[1,2],[1,1.5],[1.5,2],[2,3],[2.5,1.5],
[2,1],[3,1],[3,2],[3.5,1],[3.5,3]])
y = [0]*6 + [1]*5
svc = svm.SVC(kernel=ílinearí).fit(x,y)
X,Y = np.mgrid[0:4:200j,0:4:200j]
Z = svc.decision_function(np.c_[X.ravel(),Y.ravel()])
Z = Z.reshape(X.shape)
plt.contourf(X,Y,Z > 0,alpha=0.4)
plt.contour(X,Y,Z,colors=[íkí], linestyles=[í-í],levels=[0])
plt.scatter(x[:,0],x[:,1],c=y,s=50,alpha=0.9)
Out[363]: <matplotlib.collections.PathCollection at 0x54acae10>
svc.predict([1.5,2.5])
Out[56]: array([0])
svc.predict([2.5,1])
Out[57]: array([1])
import numpy as np
import matplotlib.pyplot as plt
from sklearn import svm
x = np.array([[1,3],[1,2],[1,1.5],[1.5,2],[2,3],[2.5,1.5],[2,1],[3,1],[3,2],[3.5,1],[3.5,3]])
y = [0]*6 + [1]*5
svc = svm.SVC(kernel=ílinearí,C=1).fit(x,y)
X,Y = np.mgrid[0:4:200j,0:4:200j]
Z = svc.decision_function(np.c_[X.ravel(),Y.ravel()])
Z = Z.reshape(X.shape)
plt.contourf(X,Y,Z > 0,alpha=0.4)
plt.contour(X,Y,Z,colors=[íkí,íkí,íkí], linestyles=[í--í,í-í,í--í],levels=[-1,0,1])
plt.scatter(svc.support_vectors_[:,0],svc.support_vectors_[:,1],s=120,facecolors=ínoneí)
plt.scatter(x[:,0],x[:,1],c=y,s=50,alpha=0.9)
Out[23]: <matplotlib.collections.PathCollection at 0x177066a0>
import numpy as np
import matplotlib.pyplot as plt
from sklearn import svm
x = np.array([[1,3],[1,2],[1,1.5],[1.5,2],[2,3],[2.5,1.5],
[2,1],[3,1],[3,2],[3.5,1],[3.5,3]])
y = [0]*6 + [1]*5
svc = svm.SVC(kernel=ílinearí,C=0.1).fit(x,y)
X,Y = np.mgrid[0:4:200j,0:4:200j]
Z = svc.decision_function(np.c_[X.ravel(),Y.ravel()])
Z = Z.reshape(X.shape)
plt.contourf(X,Y,Z > 0,alpha=0.4)
plt.contour(X,Y,Z,colors=[íkí,íkí,íkí], linestyles=[í--í,í-í,í--í],levels=[-1,0,1])
plt.scatter(svc.support_vectors_[:,0],svc.support_vectors_[:,1],s=120,facecolors=ínoneí)
plt.scatter(x[:,0],x[:,1],c=y,s=50,alpha=0.9)
Out[24]: <matplotlib.collections.PathCollection at 0x1a01ecc0>
-- Nonlinear SVC
import numpy as np
import matplotlib.pyplot as plt
from sklearn import svm
x = np.array([[1,3],[1,2],[1,1.5],[1.5,2],[2,3],[2.5,1.5],
[2,1],[3,1],[3,2],[3.5,1],[3.5,3]])
y = [0]*6 + [1]*5
svc = svm.SVC(kernel=ípolyí,C=1, degree=3).fit(x,y)
X,Y = np.mgrid[0:4:200j,0:4:200j]
Z = svc.decision_function(np.c_[X.ravel(),Y.ravel()])
Z = Z.reshape(X.shape)
plt.contourf(X,Y,Z > 0,alpha=0.4)
plt.contour(X,Y,Z,colors=[íkí,íkí,íkí], linestyles=[í--í,í-í,í--í],levels=[-1,0,1])
plt.scatter(svc.support_vectors_[:,0],svc.support_vectors_[:,1],s=120,facecolors=ínoneí)
plt.scatter(x[:,0],x[:,1],c=y,s=50,alpha=0.9)
Out[34]: <matplotlib.collections.PathCollection at 0x1b6a9198>
import numpy as np
import matplotlib.pyplot as plt
from sklearn import svm
x = np.array([[1,3],[1,2],[1,1.5],[1.5,2],[2,3],[2.5,1.5],
[2,1],[3,1],[3,2],[3.5,1],[3.5,3]])
y = [0]*6 + [1]*5
svc = svm.SVC(kernel=írbfí, C=1, gamma=3).fit(x,y)
X,Y = np.mgrid[0:4:200j,0:4:200j]
Z = svc.decision_function(np.c_[X.ravel(),Y.ravel()])
Z = Z.reshape(X.shape)
plt.contourf(X,Y,Z > 0,alpha=0.4)
plt.contour(X,Y,Z,colors=[íkí,íkí,íkí], linestyles=[í--í,í-í,í--í],levels=[-1,0,1])
plt.scatter(svc.support_vectors_[:,0],svc.support_vectors_[:,1],s=120,facecolors=ínoneí)
plt.scatter(x[:,0],x[:,1],c=y,s=50,alpha=0.9)
Out[43]: <matplotlib.collections.PathCollection at 0x1cb8d550>
-- Plotting Different SVM Classifiers Using the Iris Dataset
import numpy as np
import matplotlib.pyplot as plt
from sklearn import svm, datasets
iris = datasets.load_iris()
x = iris.data[:,:2]
y = iris.target
h = .05
svc = svm.SVC(kernel=ílinearí,C=1.0).fit(x,y)
x_min,x_max = x[:,0].min() - .5, x[:,0].max() + .5
y_min,y_max = x[:,1].min() - .5, x[:,1].max() + .5
h = .02
X, Y = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min,y_max,h))
Z = svc.predict(np.c_[X.ravel(),Y.ravel()])
Z = Z.reshape(X.shape)
plt.contourf(X,Y,Z,alpha=0.4)
plt.contour(X,Y,Z,colors=íkí)
plt.scatter(x[:,0],x[:,1],c=y)
Out[49]: <matplotlib.collections.PathCollection at 0x1f2bd828>
import numpy as np
import matplotlib.pyplot as plt
from sklearn import svm, datasets
iris = datasets.load_iris()
x = iris.data[:,:2]
y = iris.target
h = .05
svc = svm.SVC(kernel=ípolyí,C=1.0,degree=3).fit(x,y)
x_min,x_max = x[:,0].min() - .5, x[:,0].max() + .5
y_min,y_max = x[:,1].min() - .5, x[:,1].max() + .5
h = .02
X, Y = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min,y_max,h))
Z = svc.predict(np.c_[X.ravel(),Y.ravel()])
Z = Z.reshape(X.shape)
plt.contourf(X,Y,Z,alpha=0.4)
plt.contour(X,Y,Z,colors=íkí)
plt.scatter(x[:,0],x[:,1],c=y)
Out[50]: <matplotlib.collections.PathCollection at 0x1f4cc4e0>
svc = svm.SVC(kernel=írbfí, gamma=3, C=1.0).fit(x,y)
-- Support Vector Regression (SVR)
import numpy as np
import matplotlib.pyplot as plt
from sklearn import svm
from sklearn import datasets
diabetes = datasets.load_diabetes()
x_train = diabetes.data[:-20]
y_train = diabetes.target[:-20]
x_test = diabetes.data[-20:]
y_test = diabetes.target[-20:]
x0_test = x_test[:,2]
x0_train = x_train[:,2]
x0_test = x0_test[:,np.newaxis]
x0_train = x0_train[:,np.newaxis]
x0_test.sort(axis=0)
x0_test = x0_test*100
x0_train = x0_train*100
svr = svm.SVR(kernel=ílinearí,C=1000)
svr2 = svm.SVR(kernel=ípolyí,C=1000,degree=2)
svr3 = svm.SVR(kernel=ípolyí,C=1000,degree=3)
svr.fit(x0_train,y_train)
svr2.fit(x0_train,y_train)
svr3.fit(x0_train,y_train)
y = svr.predict(x0_test)
y2 = svr2.predict(x0_test)
y3 = svr3.predict(x0_test)
plt.scatter(x0_test,y_test,color=íkí)
plt.plot(x0_test,y,color=íbí)
plt.plot(x0_test,y2,c=írí)
plt.plot(x0_test,y3,c=ígí)
Out[155]: [<matplotlib.lines.Line2D at 0x262e10b8>]
Thursday, March 3, 2016
Python Data Analysis 8 - Machine Learning with scikit-learn
Labels:
Python
Subscribe to:
Post Comments (Atom)
Blog Archive
-
▼
2016
(87)
-
▼
March
(25)
- Learning Python 12 - Summing Up
- Learning Python 11 - Debugging and Troubleshooting
- Learning Python 10 - Web Development Done Right
- Learning Python 9 - Data Science
- Learning Python 8 - The GUIs and Scripts
- Learning Python 7 - Tesing, Profiling, and Dealing...
- Learning Python 6 - OOP, Decorators, and Iterators
- Learning Python 5 - Saving Time and Memory
- Learning Python 4 - Functions
- Learning Python 3 - Interating and Making Decisions
- Learning Python 2 - Build-in Data Types
- Learning Python 1 - Introduction
- Bandit algorithms 7 - Bandits in the Real World: C...
- Bandit algorithms 6 - UCB - The Upper Confidence B...
- Bandit algorithms 5 - The Softmax Algorithm
- Bandit algorithms 4 - Debugging Bandit Algorithms
- Bandit algorithms 3 - The Epsilon-Greedy Algorithm
- Bandit algorithms 2 - Multiarmed Bandit Algorithms
- Bandit algorithms 1 - Exploration and Exploitation
- Python Data Analysis 11 - Recognizing Handwritten ...
- Python Data Analysis 10 - Embedding the JavaScript...
- Python Data Analysis 9 - An Example - Meteorologic...
- Python Data Analysis 8 - Machine Learning with sci...
- Python Data Analysis 7 - Data Visualization with m...
- Python Data Analysis 6 - pandas in Depth: Data Man...
-
▼
March
(25)
No comments:
Post a Comment