摘要:聚類效果數(shù)據(jù)集代碼從文件中加載數(shù)據(jù)一次性讀取文件中的所有數(shù)據(jù)遍歷數(shù)據(jù)中的每一行對(duì)
聚類效果
1.658985 4.285136 -3.453687 3.424321 4.838138 -1.151539 -5.379713 -3.362104 0.972564 2.924086 -3.567919 1.531611 0.450614 -3.302219 -3.487105 -1.724432 2.668759 1.594842 -3.156485 3.191137 3.165506 -3.999838 -2.786837 -3.099354 4.208187 2.984927 -2.123337 2.943366 0.704199 -0.479481 -0.392370 -3.963704 2.831667 1.574018 -0.790153 3.343144 2.943496 -3.357075 -3.195883 -2.283926 2.336445 2.875106 -1.786345 2.554248 2.190101 -1.906020 -3.403367 -2.778288 1.778124 3.880832 -1.688346 2.230267 2.592976 -2.054368 -4.007257 -3.207066 2.257734 3.387564 -2.679011 0.785119 0.939512 -4.023563 -3.674424 -2.261084 2.046259 2.735279 -3.189470 1.780269 4.372646 -0.822248 -2.579316 -3.497576 1.889034 5.190400 -0.798747 2.185588 2.836520 -2.658556 -3.837877 -3.253815 2.096701 3.886007 -2.709034 2.923887 3.367037 -3.184789 -2.121479 -4.232586 2.329546 3.179764 -3.284816 3.273099 3.091414 -3.815232 -3.762093 -2.432191 3.542056 2.778832 -1.736822 4.241041 2.127073 -2.983680 -4.323818 -3.938116 3.792121 5.135768 -4.786473 3.358547 2.624081 -3.260715 -4.009299 -2.978115 2.493525 1.963710 -2.513661 2.642162 1.864375 -3.176309 -3.171184 -3.572452 2.894220 2.489128 -2.562539 2.884438 3.491078 -3.947487 -2.565729 -2.012114 3.332948 3.983102 -1.616805 3.573188 2.280615 -2.559444 -2.651229 -3.103198 2.321395 3.154987 -1.685703 2.939697 3.031012 -3.620252 -4.599622 -2.185829 4.196223 1.126677 -2.133863 3.093686 4.668892 -2.562705 -2.793241 -2.149706 2.884105 3.043438 -2.967647 2.848696 4.479332 -1.764772 -4.905566 -2.911070
import numpy as np import math import matplotlib.pyplot as plt #從文件中加載數(shù)據(jù) def loadDataSet(file_name): data_mat = [] with open(file_name) as fr: #一次性讀取文件中的所有數(shù)據(jù) lines = fr.readlines() #print(lines) #遍歷數(shù)據(jù)中的每一行 for line in lines: #對(duì)每一行以 進(jìn)行分割 cur_line = line.strip().split(" ") #["1.658985", "4.285136"] #print(cur_line) #將每一行的內(nèi)容由字符串轉(zhuǎn)換成float flt_line = list(map(lambda x:float(x), cur_line)) #[-4.905566, -2.91107] #print(flt_line) #將轉(zhuǎn)換后的內(nèi)容append到data_mat中 data_mat.append(flt_line) #返回一個(gè)array類型 return np.array(data_mat) #計(jì)算兩個(gè)向量的歐式距離 #傳入vecA=(x1,y1);vecB=(x2,y2) #計(jì)算的是sqrt((x1-x2)^2+(y1-y2)^2) def dist_eclud(vecA,vecB): vec_square = [] for element in vecA - vecB: element = element ** 2 vec_square.append(element) return sum(vec_square) ** 0.5 #構(gòu)建k個(gè)隨機(jī)質(zhì)心 def rand_cent(dataSet,k): #n表示dataSet的列數(shù) n = data_set.shape[1] #print(np.shape(dataSet)) #構(gòu)造一個(gè)k*n的0矩陣 centroids = np.zeros((k, n)) #填充矩陣的每一列 for j in range(n): #得到dataSet中第j列的最小值s min_j = float(min(data_set[:,j])) #獲得第j列的最小值與最大值的差 range_j = float(max(data_set[:,j])) - min_j #minJ+最小值與最大值的差*一個(gè)(0-1)之間的隨機(jī)數(shù) centroids[:,j] = (min_j + range_j * np.random.rand(k, 1))[:,0] return centroids #K-均值聚類算法 def Kmeans(data_set, k): #m為dataSet的行數(shù) m = data_set.shape[0] #初始化一個(gè)m*2的0矩陣 #每一行表示每一個(gè)點(diǎn),[0]存放該點(diǎn)對(duì)應(yīng)的質(zhì)心的行;[1]為到質(zhì)心的距離 cluster_assment = np.zeros((m, 2)) #構(gòu)建k個(gè)隨機(jī)質(zhì)心 centroids = rand_cent(data_set, k) cluster_changed = True #當(dāng)任意一點(diǎn)所屬的類別發(fā)生了變化的時(shí)候 while cluster_changed: cluster_changed = False #遍歷每個(gè)點(diǎn)(每一行) for i in range(m): #初始化 min_dist = np.inf; min_index = -1 #遍歷每一個(gè)質(zhì)心 for j in range(k): #計(jì)算當(dāng)前這一點(diǎn)與質(zhì)心的dis dist_ji = dist_eclud(centroids[j,:], data_set[i,:]) #更新最小的dis與對(duì)應(yīng)的質(zhì)心所在的行j if dist_ji < min_dist: min_dist = dist_ji; min_index = j #該點(diǎn)的質(zhì)心所在的行發(fā)生了變換——該點(diǎn)所屬的類別發(fā)生了變化 if cluster_assment[i,0] != min_index: cluster_changed = True #更新類別與該點(diǎn)到質(zhì)心的距離 cluster_assment[i,:] = min_index, min_dist**2 #更新質(zhì)心 for cent in range(k): pts_inclust = data_set[np.nonzero(list(map(lambda x:x==cent, cluster_assment[:,0])))] centroids[cent,:] = np.mean(pts_inclust, axis=0) #返回質(zhì)心,一個(gè)m*2的矩陣,[0]存放該點(diǎn)對(duì)應(yīng)的質(zhì)心的行(類別);[1]為到質(zhì)心的距離 return centroids, cluster_assment #繪制散點(diǎn)圖 def drawFigure(dataMat): #第一列(特征1)為橫坐標(biāo) pointX=dataMat[:,0] pointY=dataMat[:,1] fig, ax = plt.subplots(figsize=(10,5)) ax.scatter(pointX, pointY, s=30, c="r", marker="o", label="sample point") ax.legend() ax.set_xlabel("factor1") ax.set_ylabel("factor2") plt.show() #繪制聚類后的散點(diǎn)圖 def drawFigure2(data_set,my_centroids): point_x = data_set[:,0] point_y = data_set[:,1] cent_x = my_centroids[:,0] cent_y = my_centroids[:,1] fig, ax = plt.subplots(figsize=(10,5)) ax.scatter(point_x, point_y, s=30, c="r", marker="o", label="sample point") ax.scatter(cent_x, cent_y, s=100, c="black", marker="v", label="centroids") ax.legend() ax.set_xlabel("factor1") ax.set_ylabel("factor2") plt.show() if __name__=="__main__": #將文本內(nèi)容轉(zhuǎn)換成矩陣 data_set=loadDataSet("testSet.txt") my_centroids, my_cluster_assment = Kmeans(data_set, 4) drawFigure2(data_set,my_centroids) #print(my_centroids) # print(my_cluster_assment) #畫圖 #drawFigure(dataMat) #計(jì)算第一行與第二行的距離 #dist=distEclud(dataMat[0],dataMat[1]) #print(dist) #mm=randCent(dataMat,2) #print(mm) #print(dataMat) #第一列 #print(dataMat[:,0]) #第一行 #print(dataMat[0])
文章版權(quán)歸作者所有,未經(jīng)允許請(qǐng)勿轉(zhuǎn)載,若此文章存在違規(guī)行為,您可以聯(lián)系管理員刪除。
轉(zhuǎn)載請(qǐng)注明本文地址:http://m.specialneedsforspecialkids.com/yun/43546.html
閱讀 3257·2021-10-27 14:20
閱讀 2531·2021-10-08 10:05
閱讀 1634·2021-09-09 09:33
閱讀 2906·2019-08-30 13:16
閱讀 1442·2019-08-29 18:34
閱讀 1176·2019-08-29 10:58
閱讀 1232·2019-08-28 18:22
閱讀 1229·2019-08-26 13:33