深度学习欠拟合下的模型调优
生成欠拟合模型:
import torch import torch.optim as optim from study.torchData import tensorDataGenRe,mse_cla,fit,split_loader import torch.nn as nn import matplotlib.pyplot as plt features,labels = tensorDataGenRe(w=[2,-1],bias=False,bag=2) train,test = split_loader(features,labels) class LR(nn.Module): def __init__(self,in_features=2,out_features=1): super(LR, self).__init__() self.linear = nn.Linear(in_features,out_features) def forward(self,x): out = self.linear(x) return out Lr = LR() torch.manual_seed(929) train_mse = [] test_mse = [] num_epochs = 10 for epochs in range(num_epochs): fit(net=Lr,epochs=epochs,criterion=nn.MSELoss(),optimizer=optim.SGD(Lr.parameters(),lr=0.03),batchdata=train) train_mse.append(mse_cla(train,Lr).detach().numpy()) test_mse.append(mse_cla(test,Lr).detach().numpy()) if __name__ == __main__: print(train_mse) print(test_mse) # plt.plot(list(range(num_epochs)),train_mse)
优化:
增加网络复杂度(不增加激活函数):
class LR_1(nn.Module): def __init__(self,in_features=2,n_hidden=4,out_features=1): super(LR_1, self).__init__() self.linear = nn.Linear(in_features,n_hidden) self.linear = nn.Linear(n_hidden,out_features) def forward(self,x): zhat = self.linear(x) out = self.linear(zhat) return out
可以画图观察到,对于loss来说没有显著的提升,但是稳定性得以提高。所以加入简单的线性层并不能达到拟合高此项的目的,后续我们加入激活函数
带入激活函数:
class LR_1(nn.Module): def __init__(self,in_features=2,n_hidden=4,out_features=1): super(LR_1, self).__init__() self.linear1 = nn.Linear(in_features,n_hidden) self.linear2 = nn.Linear(n_hidden,out_features) def forward(self,x): zhat = self.linear1(x) out = self.linear2(zhat) return out class sigmoid_LR(nn.Module): def __init__(self,in_features=2,n_hidden=4,out_features=1): super(sigmoid_LR, self).__init__() self.linear1 = nn.Linear(in_features, n_hidden) self.linear2 = nn.Linear(n_hidden, out_features) def forward(self, x): zhat = torch.sigmoid(self.linear1(x)) out = self.linear2(zhat) return out class tanh_LR(nn.Module): def __init__(self,in_features=2,n_hidden=4,out_features=1): super(tanh_LR, self).__init__() self.linear1 = nn.Linear(in_features, n_hidden) self.linear2 = nn.Linear(n_hidden, out_features) def forward(self, x): zhat = torch.tanh(self.linear1(x)) out = self.linear2(zhat) return out class relu_LR(nn.Module): def __init__(self,in_features=2,n_hidden=4,out_features=1): super(relu_LR, self).__init__() self.linear1 = nn.Linear(in_features, n_hidden) self.linear2 = nn.Linear(n_hidden, out_features) def forward(self, x): zhat = torch.relu(self.linear1(x)) out = self.linear2(zhat) return out sigmoid_LR = sigmoid_LR() relu_LR = relu_LR() tanh_LR = tanh_LR() Lr = LR_1() model_list = [Lr,sigmoid_LR,relu_LR,tanh_LR] num_epochs = 20 torch.manual_seed(929) train_mse = torch.zeros(len(model_list),num_epochs) test_mse = torch.zeros(len(model_list),num_epochs) for epochs in range(num_epochs): for i,model in enumerate(model_list): fit(net=model,epochs=epochs,criterion=nn.MSELoss(),optimizer=optim.SGD(Lr.parameters(),lr=0.03),batchdata=train) train_mse[i][epochs] = mse_cla(train,model).detach() test_mse[i][epochs] = mse_cla(test,model).detach()
对比之后可以确定的是,有relu激活函数的模型最稳定,且效果相对最好
后续在添加relu函数的神经元,效果反而会下降,称为’神经元坏死‘
上一篇:
通过多线程提高代码的执行效率例子