快捷搜索: 王者荣耀 脱发

聚类分析----C-均值算法练习

数据:

0 0 0 1 4 4 4 5 5 4 5 5 1 0

# 用C-均值法进行聚类分析

from fileUtil import FileUtil
from classUtil import ClassUtil

# c = int(input(input C :))
c =2
X = FileUtil.openFile(E:\2-1.txt)  # 数据
Z = []  # 聚类中心

#1.选c个类心
for i in range(c):
    Z.append(X[i])

#2.将待分类的模式特征矢量集中的模式按最小距离原则划分到c类中
w = ClassUtil.minDivide(Z,X)

#3.循环重新计算各类类心与重新分类至分类结束
f = 1
while f:
    nZ=[]
    # for i in range(c):
    #     nZ.append(0)
    for i in range(w.__len__()):
        nZ.append(ClassUtil.getZ(w[i]))
    if nZ == Z:
        f = 0
    else:
        Z = nZ.copy()
    w = ClassUtil.minDivide(Z,X)

for i in w:
    print(i)
class FileUtil:
    @staticmethod
    def openFile(fileName): #按行读取坐标信息
        f = open(fileName)
        X = []
        for line in f:
            data = line.split()
            X.append([int(data[0]), int(data[1])])
        return X
import math

class MathUtil:

    # 计算欧式距离
    @staticmethod
    def ED(x,y): 
        sum = 0
        for i in range(x.__len__()):
            sum += (x[i]-y[i])**2
        return math.sqrt(sum)
from mathUtil import MathUtil

class ClassUtil:
     # 将未分类模式按最小距离原则分类到各类
    @staticmethod
    def minDivide(Z,X):
        ans = [] #结果集
        for i in range(Z.__len__()):
            ans.append([])
            ans[i].append(Z[i])

        for i in range(X.__len__()):
            l = float(inf)
            idx = 0
            for j in range(Z.__len__()):
                b = MathUtil.ED(Z[j],X[i])
                if b<l:
                    l = b
                    idx= j
            ans[idx].append(X[i])
        return ans

    # 计算类心
    @staticmethod
    def getZ(X):
        s=X[0].copy()
        for i in range(X.__len__()):
            for j in range(X[i].__len__()):
                s[j] += X[i][j]
        for i in range(s.__len__()):
            s[i] /= X.__len__()
        return s
经验分享 程序员 微信小程序 职场和发展