我想在Python中使用numpy和sklearn執行kmeans聚類。 我有一個45列和645行的txt文件。第一行是Y和剩下的644行是X.Python中的KMeans:ValueError:設置一個序列的數組元素
我的Python代碼是:kmean.fit(X)
我得到的錯誤是:
import numpy as np
import matplotlib.pyplot as plt
import csv
from sklearn.cluster import KMeans
#The following code reads the first row and terminates the loop
with open('trainDataXY.txt','r') as f:
read = csv.reader(f)
for first_row in read:
y = list(first_row)
break
#The following code skips the first row and reads rest of the rows
firstLine = True
with open('trainDataXY.txt','r') as f1:
readY = csv.reader(f1)
for rows in readY:
if firstLine:
firstLine=False
continue
x = list(readY)
X = np.array((x,y), dtype=object)
kmean = KMeans(n_clusters=2)
kmean.fit(X)
我在這條線得到一個錯誤
Traceback (most recent call last):
File "D:\file_path\kmeans.py", line 25, in <module> kmean.fit(X)
File "C:\Anaconda2\lib\site-packages\sklearn\cluster\k_means_.py",
line 812, in fit X = self._check_fit_data(X)
File "C:\Anaconda2\lib\site-packages\sklearn\cluster\k_means_.py",
line 786, in _check_fit_data X = check_array(X, accept_sparse='csr',
dtype=np.float64)
File "C:\Anaconda2\lib\site-packages\sklearn\utils\validation.py",
line 373, in check_array array = np.array(array, dtype=dtype,
order=order, copy=copy) ValueError: setting an array element with a
sequence.`
trainDataXY。txt
1,1,1,1,1,1,1,1,1,2,2,2,2,2,2,2,2,2,3,3,3,3, 3,3,3,3,3,4,4,4,4,4,4,4,4,4,5,5,5,5,5,5,5,5,5 47,64, 50,39,66,51,46,37,43,37,37,35,36,34,37,38,37,39,104,102,103,103,102,108,109,107,106,115,116,116,120,122,121,121,116,116,131,131,130,132,126,127,131,128,127
47,65,58,30,39,48,47,35 ,42,37,38,37,37,36,38,38,38,40,104,103,103,103,101,108,110,108,106,116,115,116,121,121,119,121,116,116,133,131,129,132,127,128,132,126,127
49,69,55,28,56,64,50,30,41,37,39,37,38, 36,39,39,39,40,105,103,104,104,103,110,110,108,107,116,115,117,120,120,117,121,115,116,134,131,129,134,128,125,134,126 ,127
51,78,52,46,56,74,50,28,38,38,39,38,38,37,40,39,39,41,96,101,99,104,97,101,111,101,104,115,116,116,119,110,112,119,116,116,135,130,129,135,120,108,133,120,125
55,79,53,65,52,102,55,28,36,39,40,38,39,37,40,39,40,42,79,86,84,105,84,57,110,85,76,117,118,115,110,66 ,86,117,117,118,123,130,130,129,106,93,130,113,114
48,80,59,81,50,120,63,26,31,39,40,39,40,38,42,37,41,42,53,73,77,90, 47,34,76,52,63,106,102,97,80,33,68,105,105,113,115,130,124,111,83,91,128,105,110
45,95,56,86,38,137,60,27,27,39,40,38,40,37 ,41,5 2,38,41,24,44,44,79,40,32,48,26,28,63,52,59,42,30,62,79,67,77,116,121,122,114,96,90,126,93,103
45,93,47,86,35,144,60,26,27,39,40,45,39,38,43,87,46,58,33,21,26,62,42,49,49,37 ,24,33,41,56,29,28,68,79,58,74,115,111,115,119,117,104,132,92,97
48,85,50,83,37,142,62,25,29,57,47,77, 43,64,61,115,70,101,41,28,28,48,39,46,42,38,37,47,43,74,32,28,64,86,80,81,127,113,99,130,140,112,139,92,97
48,94,78,77,30,138,57,28,29,91,66,94,61,94,103,129,89,140,38,34,32,38,33,43,38,36,39,50 ,39,75,31,33,65,89,82,84,127,112,100,133,141,107,136,95,97
45,108,158,77,30,140,67,29,26,104,97,113,92,106,141,137,116,151,33,32,32,43,44,40,37,34,37,54,86,77,55,48,77,112,83,109,120,111,105,124,133,98,129 ,89.99
48,139,173,64,40,159,61,55,27,115,117,128,106,124,150,139,125,160,27,26,29,54,51,47,36,36,32,80,125,105,97,96,86,130,102,118,117,104,105,118,117,92,130,94, 97
131,157,143,66,87,130,57,118,26,124,137,129,133,138,156,133,132,173,29,25,28,81,48,38,48,32,24,134,165,144,149,142,110,145,147,161,114,112,103,118,115,94,126,87,102
160,162,146,78,116,127,52,133,71,116,141,125,125,141,169,115,110,161,69, 53,46,97,79,47,76,59,32,148,147,134,165,152,111,155,139,145,116,113,10 1,118,105,86,123,92,99
請顯示您的數據文件的摘錄。還要確保你創建了一個真正的數字矩陣,而不是'dtype = object'。喜歡用numpy或pandas閱讀器閱讀您的數據。 – eickenberg