2017-06-13 57 views
0

我想在一個合併兩個地塊:插曲遇到意外變化

  1. http://scikit-learn.org/stable/auto_examples/linear_model/plot_sgd_iris.html
  2. http://scikit-learn.org/stable/auto_examples/ensemble/plot_voting_decision_regions.html#sphx-glr-auto-examples-ensemble-plot-voting-decision-regions-py

在左圖我想要顯示的決策邊界與超平面對應的OVA分類器和右圖我想顯示決策概率。

這是迄今爲止代碼:

import numpy as np 
import matplotlib.pyplot as plt 
import seaborn as sn 

from sklearn import datasets 
from sklearn import preprocessing 
from sklearn.neighbors import KNeighborsClassifier 
from sklearn.tree import DecisionTreeClassifier 
from sklearn.linear_model import SGDClassifier 
from sklearn.svm import SVC 

def plot_hyperplane(c, color, fitted_model): 
    """ 
    Plot the one-against-all classifiers for the given model. 

    Parameters 
    -------------- 

    c : index of the hyperplane to be plot 
    color : color to be used when drawing the line 
    fitted_model : the fitted model 
    """ 
    xmin, xmax = plt.xlim() 
    ymin, ymax = plt.ylim() 

    try: 
     coef = fitted_model.coef_ 
     intercept = fitted_model.intercept_ 
    except: 
     return 

    def line(x0): 
     return (-(x0 * coef[c, 0]) - intercept[c])/coef[c, 1] 

    plt.plot([xmin, xmax], [line(xmin), line(xmax)], ls="--", color=color, zorder=3) 


def plot_decision_boundary(X, y, fitted_model, features, targets): 
    """ 
    This function plots a model decision boundary as well as it tries to plot 
    the decision probabilities, if available. 
    Requires a model fitted with two features only. 

    Parameters 
    -------------- 

    X : the data to learn 
    y : the classification labels 
    fitted_model : the fitted model 
    """ 
    cmap = plt.get_cmap('Set3') 
    prob = cmap 
    colors = [cmap(i) for i in np.linspace(0, 1, len(fitted_model.classes_))] 

    plt.figure(figsize=(9.5, 5)) 
    for i, plot_type in enumerate(['Decision Boundary', 'Decision Probabilities']): 
     plt.subplot(1, 2, i+1) 

     mesh_step_size = 0.01 # step size in the mesh 
     x_min, x_max = X[:, 0].min() - .1, X[:, 0].max() + .1 
     y_min, y_max = X[:, 1].min() - .1, X[:, 1].max() + .1 
     xx, yy = np.meshgrid(np.arange(x_min, x_max, mesh_step_size), np.arange(y_min, y_max, mesh_step_size)) 
     # First plot, predicted results using the given model 
     if i == 0: 
      Z = fitted_model.predict(np.c_[xx.ravel(), yy.ravel()]) 
      for h, color in zip(fitted_model.classes_, colors): 
       plot_hyperplane(h, color, fitted_model) 
     # Second plot, predicted probabilities using the given model 
     else: 
      prob = 'RdYlBu_r' 
      try: 
       Z = fitted_model.predict_proba(np.c_[xx.ravel(), yy.ravel()])[:, 1] 
      except: 
       plt.text(0.4, 0.5, 'Probabilities Unavailable', horizontalalignment='center', 
         verticalalignment='center', transform=plt.gca().transAxes, fontsize=12) 
       plt.axis('off') 
       break 
     Z = Z.reshape(xx.shape) 
     # Display Z 
     plt.imshow(Z, interpolation='nearest', cmap=prob, alpha=0.5, 
        extent=(x_min, x_max, y_min, y_max), origin='lower', zorder=1) 
     # Plot the data points 
     for i, color in zip(fitted_model.classes_, colors): 
      idx = np.where(y == i) 
      plt.scatter(X[idx, 0], X[idx, 1], facecolor=color, edgecolor='k', lw=1, 
         label=iris.target_names[i], cmap=cmap, alpha=0.8, zorder=2) 
     plt.title(plot_type + '\n' + 
        str(fitted_model).split('(')[0]+ ' Test Accuracy: ' + str(np.round(fitted_model.score(X, y), 5))) 
     plt.xlabel(features[0]) 
     plt.ylabel(features[1]) 
     plt.gca().set_aspect('equal') 
    plt.legend(loc='center left', bbox_to_anchor=(1, 0.5))  
    plt.tight_layout() 
    plt.subplots_adjust(top=0.9, bottom=0.08, wspace=0.02) 
    plt.show() 


if __name__ == '__main__': 
    iris = datasets.load_iris() 
    X = iris.data[:, [0, 2]] 
    y = iris.target 

    scaler = preprocessing.StandardScaler().fit_transform(X) 

    clf1 = DecisionTreeClassifier(max_depth=4) 
    clf2 = KNeighborsClassifier(n_neighbors=7) 
    clf3 = SVC(kernel='rbf', probability=True) 
    clf4 = SGDClassifier(alpha=0.001, n_iter=100).fit(X, y) 

    clf1.fit(X, y) 
    clf2.fit(X, y) 
    clf3.fit(X, y) 
    clf4.fit(X, y) 

    plot_decision_boundary(X, y, clf1, iris.feature_names, iris.target_names[[0, 2]]) 
    plot_decision_boundary(X, y, clf2, iris.feature_names, iris.target_names[[0, 2]]) 
    plot_decision_boundary(X, y, clf3, iris.feature_names, iris.target_names[[0, 2]]) 
    plot_decision_boundary(X, y, clf4, iris.feature_names, iris.target_names[[0, 2]]) 

而且結果:

enter image description hereenter image description hereenter image description here

可以看出,在過去的例子(在給定的代碼clf4)我迄今爲止無法將超平面繪製在錯誤的位置。我想知道如何解決這個問題。應該將它們翻譯成適合使用的特徵的正確範圍以適應模型。

謝謝。

回答

1

顯然,問題是代表超平面的虛線的末端與最終和預期的xlimylim不一致。這種情況的一個好處是,您已經定義了x_min, x_max, y_min, y_max。所以使用它並通過在繪製超平面之前應用以下3行來修復xlimylim(具體地說,在評論行的前面添加# First plot, predicted results using the given model)。

 ax = plt.gca() 
     ax.set_xlim((x_min, x_max), auto=False) 
     ax.set_ylim((y_min, y_max), auto=False) 
+0

當我嘗試這個,我得到和錯誤和Python停止工作:QWindowsWindow :: setGeometry:'無法設置幾何1000x1069 + 9 +上QWidgetWindow/38 'MainWindowClassWindow'。所得的幾何形狀:1000x1055 + 9 + 38(幀:9,38,9,9,定製餘量:0,0,0,0,最小尺寸:72x69,最大尺寸:16777215x16777215).' – pceccon

+0

@pceccon它看起來像這樣是一個Qt問題,儘管我沒有看到Qt進入OP的位置。對不起,我沒有考慮到這一點。另外我不擅長'pyqt'。爲了解決你的問題,一個選擇是嘗試沒有Qt的代碼,就像Jupyter筆記本一樣。這將幫助您確定「將超平面置於錯誤位置」問題的原因。如果'xlim'和'ylim'確實是原因,那麼你可以用Qt來設置'xlim'和'ylim'。希望這可能有所幫助... –

+0

我不明確導入pyqt。我會盡力弄清楚這個問題。謝謝。 – pceccon