聚类分析----C-均值算法练习
数据:
0 0 0 1 4 4 4 5 5 4 5 5 1 0
# 用C-均值法进行聚类分析 from fileUtil import FileUtil from classUtil import ClassUtil # c = int(input(input C :)) c =2 X = FileUtil.openFile(E:\2-1.txt) # 数据 Z = [] # 聚类中心 #1.选c个类心 for i in range(c): Z.append(X[i]) #2.将待分类的模式特征矢量集中的模式按最小距离原则划分到c类中 w = ClassUtil.minDivide(Z,X) #3.循环重新计算各类类心与重新分类至分类结束 f = 1 while f: nZ=[] # for i in range(c): # nZ.append(0) for i in range(w.__len__()): nZ.append(ClassUtil.getZ(w[i])) if nZ == Z: f = 0 else: Z = nZ.copy() w = ClassUtil.minDivide(Z,X) for i in w: print(i)
class FileUtil: @staticmethod def openFile(fileName): #按行读取坐标信息 f = open(fileName) X = [] for line in f: data = line.split() X.append([int(data[0]), int(data[1])]) return X
import math class MathUtil: # 计算欧式距离 @staticmethod def ED(x,y): sum = 0 for i in range(x.__len__()): sum += (x[i]-y[i])**2 return math.sqrt(sum)
from mathUtil import MathUtil class ClassUtil: # 将未分类模式按最小距离原则分类到各类 @staticmethod def minDivide(Z,X): ans = [] #结果集 for i in range(Z.__len__()): ans.append([]) ans[i].append(Z[i]) for i in range(X.__len__()): l = float(inf) idx = 0 for j in range(Z.__len__()): b = MathUtil.ED(Z[j],X[i]) if b<l: l = b idx= j ans[idx].append(X[i]) return ans # 计算类心 @staticmethod def getZ(X): s=X[0].copy() for i in range(X.__len__()): for j in range(X[i].__len__()): s[j] += X[i][j] for i in range(s.__len__()): s[i] /= X.__len__() return s