可以使用numpy的陣列或一個數據幀大熊貓作爲輸入提供給sklearn.cluster.FeatureAgglomeration
輸出是一個numpy的陣列,其中在數據集中等於這些行的行和等於n_clusters列參數中設定FeatureAgglomeration 。
from sklearn.cluster import FeatureAgglomeration
import pandas as pd
import matplotlib.pyplot as plt
#iris.data from https://archive.ics.uci.edu/ml/machine-learning-databases/iris/
iris=pd.read_csv('iris.data',sep=',',header=None)
#store labels
label=iris[4]
iris=iris.drop([4],1)
#set n_clusters to 2, the output will be two columns of agglomerated features (iris has 4 features)
agglo=FeatureAgglomeration(n_clusters=2).fit_transform(iris)
#plotting
color=[]
for i in label:
if i=='Iris-setosa':
color.append('g')
if i=='Iris-versicolor':
color.append('b')
if i=='Iris-virginica':
color.append('r')
plt.scatter(agglo[:,0],agglo[:,1],c=color)
plt.show()