机器学习(周志华)-python编程练习-习题3-3
习题3.3 编程实现对率回归,并给出西瓜数据集 3.0α 上的结果.
数据集3.0α
对应的python实现代码
#encoding:UTF-8 import csv import numpy as np from math import * from numpy.linalg import * from matplotlib import pyplot as plt #牛顿迭代法实现对数几率回归 old_ml=0 #初始似然函数值 n=0 beta=np.mat([[0],[0],[1]]) #初始参数[w1;w2;b] #读取数据集 wm_data_30a = csv.reader(open(../data_set/watermelon_data_set_30a.csv,r)) xi_d = np.mat(np.zeros((17,3))) yi_d = np.mat(np.zeros((17,1))) #遍历数据集 sn=0 for stu in wm_data_30a: #[sn x1 x2 y] if(stu[0].isdigit()==True): xi_d[sn,:] = np.mat([float(stu[1]),float(stu[2]),1]) yi_d[sn,0] = float(stu[3]) sn = sn+1 #print(xi_d= ,xi_d) #print(yi_d= ,yi_d) #print(wm_data_30a) iter=0 xi = np.mat(np.zeros((3,1))) while(iter<10): sn=0 #遍历数据 print(iter=,iter) new_ml=0 for idx in range(17):#0~16 #print(idx=,idx) xi = xi_d[idx,:].T #3*1的矩阵 yi=yi_d[idx,0] #print(yi) #计算当前ml函数 # ln(1+beta*xi) - yi*beta*xi new_ml = new_ml + log(1+e**det(beta.T*xi)) - yi*det(beta.T*xi) print(new ml=,new_ml) if abs(new_ml-old_ml)<0.001: print(new_ml-old_ml) print(beta) break #牛顿迭代法更新参数 old_ml = new_ml dml_beta_1 = np.mat([[0],[0],[0]])#3*1矩阵 dml_beta_2 = np.mat(np.zeros((3,3))) #3*3矩阵 #遍历数据 for idx in range(17):#0~16 xi = xi_d[idx,:].T yi=yi_d[idx,0] #print(det(beta.T*xi)) p1_xi = 1-1/(1+e**(det(beta.T*xi))) #计算一阶导 dml_beta_1 = dml_beta_1 - xi*(yi-p1_xi) #矩阵 #计算二阶导 dml_beta_2 = dml_beta_2 + xi*(xi.T)*p1_xi*(1-p1_xi) #print(Neton iter,dml_beta_2) beta = beta - (dml_beta_2.I)*dml_beta_1 print(beta=,beta) iter=iter+1 #%画出散点图以及计算出的直线 #%逐点画 分别表示是否好瓜 for idx in range(17): if yi_d[idx,0]==1: plt.plot(xi_d[idx,0],xi_d[idx,1],+r) else: plt.plot(xi_d[idx,0],xi_d[idx,1],ob) #画出回归线 ply=-(0.1*beta[0,0]+beta[2,0])/beta[1,0]; pry=-(0.9*beta[0,0]+beta[2,0])/beta[1,0]; px=[0.1,0.9] py=[ply,pry] plt.plot(px,py) plt.xlabel(density) plt.ylabel(suger ratio) plt.title(logistic function regression) plt.show()
运算结果
beta=
[[ 3.15832738] [12.52119012] [-4.42886222]]