-- The scikit-learn Library -- Machine Learning -- Supervised and Unsupervised Learning Supervised learning Classification Regression Unsupervised learning Clustering Dimensionality reduction Training Set and Testing Set -- Supervised Learning with scikit-learn Classification, using the Iris Dataset K-Nearest Neighbors Classifier Support Vector Machines (SVC) Regression, using the Diabetes Dataset Linear Regression Support Vector Machines (SVR) -- The Iris Flower Dataset In [ ]: from sklearn import datasets : iris = datasets.load_iris() In [ ]: iris.data Out[ ]: array([[ 5.1, 3.5, 1.4, 0.2], [ 4.9, 3. , 1.4, 0.2], [ 4.7, 3.2, 1.3, 0.2], [ 4.6, 3.1, 1.5, 0.2], ... In [ ]: iris.target Out[ ]: array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2]) In [ ]: iris.target_names Out[ ]: array([ísetosaí, íversicolorí, ívirginicaí], dtype=í|S10í) In [ ]: import matplotlib.pyplot as plt : import matplotlib.patches as mpatches : from sklearn import datasets : : iris = datasets.load_iris() : x = iris.data[:,0] #X-Axis - sepal length : y = iris.data[:,1] #Y-Axis - sepal length : species = iris.target #Species : : x_min, x_max = x.min() - .5,x.max() + .5 : y_min, y_max = y.min() - .5,y.max() + .5 : : #SCATTERPLOT : plt.figure() : plt.title(íIris Dataset - Classification By Sepal Sizesí) : plt.scatter(x,y, c=species) : plt.xlabel(íSepal lengthí) : plt.ylabel(íSepal widthí) : plt.xlim(x_min, x_max) : plt.ylim(y_min, y_max) : plt.xticks(()) : plt.yticks(()) In [ ]: import matplotlib.pyplot as plt : import matplotlib.patches as mpatches : from sklearn import datasets : : iris = datasets.load_iris() : x = iris.data[:,2] #X-Axis - petal length : y = iris.data[:,3] #Y-Axis - petal length : species = iris.target #Species : : x_min, x_max = x.min() - .5,x.max() + .5 : y_min, y_max = y.min() - .5,y.max() + .5 : #SCATTERPLOT : plt.figure() : plt.title(íIris Dataset - Classification By Petal Sizesí, size=14) : plt.scatter(x,y, c=species) : plt.xlabel(íPetal lengthí) : plt.ylabel(íPetal widthí) : plt.xlim(x_min, x_max) : plt.ylim(y_min, y_max) : plt.xticks(()) : plt.yticks(()) -- The PCA Decomposition from sklearn.decomposition import PCA x_reduced = PCA(n_components=3).fit_transform(iris.data) import matplotlib.pyplot as plt from mpl_toolkits.mplot3d import Axes3D from sklearn import datasets from sklearn.decomposition import PCA iris = datasets.load_iris() x = iris.data[:,1] #X-Axis - petal length y = iris.data[:,2] #Y-Axis - petal length species = iris.target #Species x_reduced = PCA(n_components=3).fit_transform(iris.data) #SCATTERPLOT 3D fig = plt.figure() ax = Axes3D(fig) ax.set_title(íIris Dataset by PCAí, size=14) ax.scatter(x_reduced[:,0],x_reduced[:,1],x_reduced[:,2], c=species) ax.set_xlabel(íFirst eigenvectorí) ax.set_ylabel(íSecond eigenvectorí) ax.set_zlabel(íThird eigenvectorí) ax.w_xaxis.set_ticklabels(()) ax.w_yaxis.set_ticklabels(()) ax.w_zaxis.set_ticklabels(()) -- K-Nearest Neighbors Classifier import numpy as np from sklearn import datasets np.random.seed(0) iris = datasets.load_iris() x = iris.data y = iris.target i = np.random.permutation(len(iris.data)) x_train = x[i[:-10]] y_train = y[i[:-10]] x_test = x[i[-10:]] y_test = y[i[-10:]] from sklearn.neighbors import KNeighborsClassifier knn = KNeighborsClassifier() knn.fit(x_train,y_train) Out[86]: KNeighborsClassifier(algorithm=íautoí, leaf_size=30, metric=íminkowskií, metric_params=None, n_neighbors=5, p=2, weights=íuniformí) knn.predict(x_test) Out[100]: array([1, 2, 1, 0, 0, 0, 2, 1, 2, 0]) y_test Out[101]: array([1, 1, 1, 0, 0, 0, 2, 1, 2, 0]) import numpy as np import matplotlib.pyplot as plt from matplotlib.colors import ListedColormap from sklearn import datasets from sklearn.neighbors import KNeighborsClassifier iris = datasets.load_iris() x = iris.data[:,:2] #X-Axis - sepal length-width y = iris.target #Y-Axis - species x_min, x_max = x[:,0].min() - .5,x[:,0].max() + .5 y_min, y_max = x[:,1].min() - .5,x[:,1].max() + .5 #MESH cmap_light = ListedColormap([í#AAAAFFí,í#AAFFAAí,í#FFAAAAí]) h = .02 xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h)) knn = KNeighborsClassifier() knn.fit(x,y) Z = knn.predict(np.c_[xx.ravel(),yy.ravel()]) Z = Z.reshape(xx.shape) plt.figure() plt.pcolormesh(xx,yy,Z,cmap=cmap_light) #Plot the training points plt.scatter(x[:,0],x[:,1],c=y) plt.xlim(xx.min(),xx.max()) plt.ylim(yy.min(),yy.max()) Out[120]: (1.5, 4.900000000000003) import numpy as np import matplotlib.pyplot as plt from matplotlib.colors import ListedColormap from sklearn import datasets from sklearn.neighbors import KNeighborsClassifier iris = datasets.load_iris() x = iris.data[:,2:4] #X-Axis - petals length-width y = iris.target #Y-Axis - species x_min, x_max = x[:,0].min() - .5,x[:,0].max() + .5 y_min, y_max = x[:,1].min() - .5,x[:,1].max() + .5 #MESH cmap_light = ListedColormap([í#AAAAFFí,í#AAFFAAí,í#FFAAAAí]) h = .02 xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h)) knn = KNeighborsClassifier() knn.fit(x,y) Z = knn.predict(np.c_[xx.ravel(),yy.ravel()]) Z = Z.reshape(xx.shape) plt.figure() plt.pcolormesh(xx,yy,Z,cmap=cmap_light) #Plot the training points plt.scatter(x[:,0],x[:,1],c=y) plt.xlim(xx.min(),xx.max()) plt.ylim(yy.min(),yy.max()) Out[126]: (-0.40000000000000002, 2.9800000000000031) -- Diabetes Dataset In [ ]: from sklearn import datasets : diabetes = datasets.load_diabetes() diabetes.data[0] Out[ ]: array([ 0.03807591, 0.05068012, 0.06169621, 0.02187235, -0.0442235 , -0.03482076, -0.04340085, -0.00259226, 0.01990842, -0.01764613]) np.sum(diabetes.data[:,0]**2) Out[143]: 1.0000000000000746 diabetes.target Out[146]: array([ 151., 75., 141., 206., 135., 97., 138., 63., 110., 310., 101., 69., 179., 185., 118., 171., 166., 144., 97., 168., 68., 49., 68., 245., 184., 202., 137. . . -- Linear Regression: The Least Square Regression from sklearn import linear_model linreg = linear_model.LinearRegression() from sklearn import datasets diabetes = datasets.load_diabetes() x_train = diabetes.data[:-20] y_train = diabetes.target[:-20] x_test = diabetes.data[-20:] y_test = diabetes.target[-20:] linreg.fit(x_train,y_train) Out[ ]: LinearRegression(copy_X=True, fit_intercept=True, normalize=False) linreg.coef_ linreg.predict(x_test) y_test linreg.score(x_test, y_test) import numpy as np import matplotlib.pyplot as plt from sklearn import linear_model from sklearn import datasets diabetes = datasets.load_diabetes() x_train = diabetes.data[:-20] y_train = diabetes.target[:-20] x_test = diabetes.data[-20:] y_test = diabetes.target[-20:] x0_test = x_test[:,0] x0_train = x_train[:,0] x0_test = x0_test[:,np.newaxis] x0_train = x0_train[:,np.newaxis] linreg = linear_model.LinearRegression() linreg.fit(x0_train,y_train) y = linreg.predict(x0_test) plt.scatter(x0_test,y_test,color=íkí) plt.plot(x0_test,y,color=íbí,linewidth=3) Out[230]: [<matplotlib.lines.Line2D at 0x380b1908>] import numpy as np import matplotlib.pyplot as plt from sklearn import linear_model from sklearn import datasets diabetes = datasets.load_diabetes() x_train = diabetes.data[:-20] y_train = diabetes.target[:-20] x_test = diabetes.data[-20:] y_test = diabetes.target[-20:] plt.figure(figsize=(8,12)) for f in range(0,10): xi_test = x_test[:,f] xi_train = x_train[:,f] xi_test = xi_test[:,np.newaxis] xi_train = xi_train[:,np.newaxis] linreg.fit(xi_train,y_train) y = linreg.predict(xi_test) plt.subplot(5,2,f+1) plt.scatter(xi_test,y_test,color=íkí) plt.plot(xi_test,y,color=íbí,linewidth=3) -- Support Vector Machines (SVMs) -- Support Vector Classification (SVC) import numpy as np import matplotlib.pyplot as plt from sklearn import svm x = np.array([[1,3],[1,2],[1,1.5],[1.5,2],[2,3],[2.5,1.5], [2,1],[3,1],[3,2],[3.5,1],[3.5,3]]) y = [0]*6 + [1]*5 plt.scatter(x[:,0],x[:,1],c=y,s=50,alpha=0.9) Out[360]: <matplotlib.collections.PathCollection at 0x545634a8> import numpy as np import matplotlib.pyplot as plt from sklearn import svm x = np.array([[1,3],[1,2],[1,1.5],[1.5,2],[2,3],[2.5,1.5], [2,1],[3,1],[3,2],[3.5,1],[3.5,3]]) y = [0]*6 + [1]*5 svc = svm.SVC(kernel=ílinearí).fit(x,y) X,Y = np.mgrid[0:4:200j,0:4:200j] Z = svc.decision_function(np.c_[X.ravel(),Y.ravel()]) Z = Z.reshape(X.shape) plt.contourf(X,Y,Z > 0,alpha=0.4) plt.contour(X,Y,Z,colors=[íkí], linestyles=[í-í],levels=[0]) plt.scatter(x[:,0],x[:,1],c=y,s=50,alpha=0.9) Out[363]: <matplotlib.collections.PathCollection at 0x54acae10> svc.predict([1.5,2.5]) Out[56]: array([0]) svc.predict([2.5,1]) Out[57]: array([1]) import numpy as np import matplotlib.pyplot as plt from sklearn import svm x = np.array([[1,3],[1,2],[1,1.5],[1.5,2],[2,3],[2.5,1.5],[2,1],[3,1],[3,2],[3.5,1],[3.5,3]]) y = [0]*6 + [1]*5 svc = svm.SVC(kernel=ílinearí,C=1).fit(x,y) X,Y = np.mgrid[0:4:200j,0:4:200j] Z = svc.decision_function(np.c_[X.ravel(),Y.ravel()]) Z = Z.reshape(X.shape) plt.contourf(X,Y,Z > 0,alpha=0.4) plt.contour(X,Y,Z,colors=[íkí,íkí,íkí], linestyles=[í--í,í-í,í--í],levels=[-1,0,1]) plt.scatter(svc.support_vectors_[:,0],svc.support_vectors_[:,1],s=120,facecolors=ínoneí) plt.scatter(x[:,0],x[:,1],c=y,s=50,alpha=0.9) Out[23]: <matplotlib.collections.PathCollection at 0x177066a0> import numpy as np import matplotlib.pyplot as plt from sklearn import svm x = np.array([[1,3],[1,2],[1,1.5],[1.5,2],[2,3],[2.5,1.5], [2,1],[3,1],[3,2],[3.5,1],[3.5,3]]) y = [0]*6 + [1]*5 svc = svm.SVC(kernel=ílinearí,C=0.1).fit(x,y) X,Y = np.mgrid[0:4:200j,0:4:200j] Z = svc.decision_function(np.c_[X.ravel(),Y.ravel()]) Z = Z.reshape(X.shape) plt.contourf(X,Y,Z > 0,alpha=0.4) plt.contour(X,Y,Z,colors=[íkí,íkí,íkí], linestyles=[í--í,í-í,í--í],levels=[-1,0,1]) plt.scatter(svc.support_vectors_[:,0],svc.support_vectors_[:,1],s=120,facecolors=ínoneí) plt.scatter(x[:,0],x[:,1],c=y,s=50,alpha=0.9) Out[24]: <matplotlib.collections.PathCollection at 0x1a01ecc0> -- Nonlinear SVC import numpy as np import matplotlib.pyplot as plt from sklearn import svm x = np.array([[1,3],[1,2],[1,1.5],[1.5,2],[2,3],[2.5,1.5], [2,1],[3,1],[3,2],[3.5,1],[3.5,3]]) y = [0]*6 + [1]*5 svc = svm.SVC(kernel=ípolyí,C=1, degree=3).fit(x,y) X,Y = np.mgrid[0:4:200j,0:4:200j] Z = svc.decision_function(np.c_[X.ravel(),Y.ravel()]) Z = Z.reshape(X.shape) plt.contourf(X,Y,Z > 0,alpha=0.4) plt.contour(X,Y,Z,colors=[íkí,íkí,íkí], linestyles=[í--í,í-í,í--í],levels=[-1,0,1]) plt.scatter(svc.support_vectors_[:,0],svc.support_vectors_[:,1],s=120,facecolors=ínoneí) plt.scatter(x[:,0],x[:,1],c=y,s=50,alpha=0.9) Out[34]: <matplotlib.collections.PathCollection at 0x1b6a9198> import numpy as np import matplotlib.pyplot as plt from sklearn import svm x = np.array([[1,3],[1,2],[1,1.5],[1.5,2],[2,3],[2.5,1.5], [2,1],[3,1],[3,2],[3.5,1],[3.5,3]]) y = [0]*6 + [1]*5 svc = svm.SVC(kernel=írbfí, C=1, gamma=3).fit(x,y) X,Y = np.mgrid[0:4:200j,0:4:200j] Z = svc.decision_function(np.c_[X.ravel(),Y.ravel()]) Z = Z.reshape(X.shape) plt.contourf(X,Y,Z > 0,alpha=0.4) plt.contour(X,Y,Z,colors=[íkí,íkí,íkí], linestyles=[í--í,í-í,í--í],levels=[-1,0,1]) plt.scatter(svc.support_vectors_[:,0],svc.support_vectors_[:,1],s=120,facecolors=ínoneí) plt.scatter(x[:,0],x[:,1],c=y,s=50,alpha=0.9) Out[43]: <matplotlib.collections.PathCollection at 0x1cb8d550> -- Plotting Different SVM Classifiers Using the Iris Dataset import numpy as np import matplotlib.pyplot as plt from sklearn import svm, datasets iris = datasets.load_iris() x = iris.data[:,:2] y = iris.target h = .05 svc = svm.SVC(kernel=ílinearí,C=1.0).fit(x,y) x_min,x_max = x[:,0].min() - .5, x[:,0].max() + .5 y_min,y_max = x[:,1].min() - .5, x[:,1].max() + .5 h = .02 X, Y = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min,y_max,h)) Z = svc.predict(np.c_[X.ravel(),Y.ravel()]) Z = Z.reshape(X.shape) plt.contourf(X,Y,Z,alpha=0.4) plt.contour(X,Y,Z,colors=íkí) plt.scatter(x[:,0],x[:,1],c=y) Out[49]: <matplotlib.collections.PathCollection at 0x1f2bd828> import numpy as np import matplotlib.pyplot as plt from sklearn import svm, datasets iris = datasets.load_iris() x = iris.data[:,:2] y = iris.target h = .05 svc = svm.SVC(kernel=ípolyí,C=1.0,degree=3).fit(x,y) x_min,x_max = x[:,0].min() - .5, x[:,0].max() + .5 y_min,y_max = x[:,1].min() - .5, x[:,1].max() + .5 h = .02 X, Y = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min,y_max,h)) Z = svc.predict(np.c_[X.ravel(),Y.ravel()]) Z = Z.reshape(X.shape) plt.contourf(X,Y,Z,alpha=0.4) plt.contour(X,Y,Z,colors=íkí) plt.scatter(x[:,0],x[:,1],c=y) Out[50]: <matplotlib.collections.PathCollection at 0x1f4cc4e0> svc = svm.SVC(kernel=írbfí, gamma=3, C=1.0).fit(x,y) -- Support Vector Regression (SVR) import numpy as np import matplotlib.pyplot as plt from sklearn import svm from sklearn import datasets diabetes = datasets.load_diabetes() x_train = diabetes.data[:-20] y_train = diabetes.target[:-20] x_test = diabetes.data[-20:] y_test = diabetes.target[-20:] x0_test = x_test[:,2] x0_train = x_train[:,2] x0_test = x0_test[:,np.newaxis] x0_train = x0_train[:,np.newaxis] x0_test.sort(axis=0) x0_test = x0_test*100 x0_train = x0_train*100 svr = svm.SVR(kernel=ílinearí,C=1000) svr2 = svm.SVR(kernel=ípolyí,C=1000,degree=2) svr3 = svm.SVR(kernel=ípolyí,C=1000,degree=3) svr.fit(x0_train,y_train) svr2.fit(x0_train,y_train) svr3.fit(x0_train,y_train) y = svr.predict(x0_test) y2 = svr2.predict(x0_test) y3 = svr3.predict(x0_test) plt.scatter(x0_test,y_test,color=íkí) plt.plot(x0_test,y,color=íbí) plt.plot(x0_test,y2,c=írí) plt.plot(x0_test,y3,c=ígí) Out[155]: [<matplotlib.lines.Line2D at 0x262e10b8>]
Thursday, March 3, 2016
Python Data Analysis 8 - Machine Learning with scikit-learn
Labels:
Python
Subscribe to:
Post Comments (Atom)
Blog Archive
-
▼
2016
(87)
-
▼
March
(25)
- Learning Python 12 - Summing Up
- Learning Python 11 - Debugging and Troubleshooting
- Learning Python 10 - Web Development Done Right
- Learning Python 9 - Data Science
- Learning Python 8 - The GUIs and Scripts
- Learning Python 7 - Tesing, Profiling, and Dealing...
- Learning Python 6 - OOP, Decorators, and Iterators
- Learning Python 5 - Saving Time and Memory
- Learning Python 4 - Functions
- Learning Python 3 - Interating and Making Decisions
- Learning Python 2 - Build-in Data Types
- Learning Python 1 - Introduction
- Bandit algorithms 7 - Bandits in the Real World: C...
- Bandit algorithms 6 - UCB - The Upper Confidence B...
- Bandit algorithms 5 - The Softmax Algorithm
- Bandit algorithms 4 - Debugging Bandit Algorithms
- Bandit algorithms 3 - The Epsilon-Greedy Algorithm
- Bandit algorithms 2 - Multiarmed Bandit Algorithms
- Bandit algorithms 1 - Exploration and Exploitation
- Python Data Analysis 11 - Recognizing Handwritten ...
- Python Data Analysis 10 - Embedding the JavaScript...
- Python Data Analysis 9 - An Example - Meteorologic...
- Python Data Analysis 8 - Machine Learning with sci...
- Python Data Analysis 7 - Data Visualization with m...
- Python Data Analysis 6 - pandas in Depth: Data Man...
-
▼
March
(25)
No comments:
Post a Comment