李沐动手学深度学习笔记---AlexNet
简介:更深更大的LeNet
主要改进为:丢弃法Dropout、Relu、MaxPooling;激活函数从sigmoid变成了Relu;隐藏全连接层后加入丢弃层;数据增强
架构:
总体架构:
代码实现:
import torch from torch import nn from d2l import torch as d2l net=nn.Sequential( # 这⾥,我们使⽤⼀个11*11的更⼤窗⼝来捕捉对象。 # 同时,步幅为4,以减少输出的⾼度和宽度。 # 另外,输出通道的数⽬远⼤于LeNet nn.Conv2d(1,96,kernel_size=11,stride=4,padding=1),nn.ReLU(), nn.MaxPool2d(kernel_size=3,stride=2), # 减⼩卷积窗⼝,使⽤填充为2来使得输⼊与输出的⾼和宽⼀致,且增⼤输出通道 nn.Conv2d(96,256,kernel_size=5,padding=2),nn.ReLU(), nn.MaxPool2d(kernel_size=3,padding=2), # 使⽤三个连续的卷积层和较⼩的卷积窗⼝。 # 除了最后的卷积层,输出通道的数量进⼀步增加。 # 在前两个卷积层之后,汇聚层不⽤于减少输⼊的⾼度和宽度 nn.Conv2d(256,384,kernel_size=3,padding=1),nn.ReLU(), nn.Conv2d(384,384,kernel_size=3,padding=1),nn.ReLU(), nn.Conv2d(384,256,kernel_size=3,padding=1),nn.ReLU(), nn.MaxPool2d(kernel_size=3,stride=2),nn.Flatten(), # 这⾥,全连接层的输出数量是LeNet中的好⼏倍。使⽤dropout层来减轻过拟合 nn.Linear(6400,4096),nn.ReLU(),nn.Dropout(p=0.5), nn.Linear(4096,4096),nn.ReLU(),nn.Dropout(p=0.5), # 最后是输出层。由于这⾥使⽤Fashion-MNIST,所以⽤类别数为10,⽽⾮论⽂中的1000 nn.Linear(4096,10) ) X=torch.randn(1,1,224,224) for layer in net: X=layer(X) #首先用layer.__class__将实例变量指向类,然后再去调用__name__类属性 print(layer.__class__.__name__,output shape: ,X.shape) # 读取数据集 batch_size = 128 train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size, resize=224) #训练AlexNet lr,num_epochs=0.01,10 #可调lr d2l.train_ch6(net, train_iter, test_iter, num_epochs, lr, d2l.try_gpu())
train_ch6():
#为了使⽤GPU,我们还需要⼀点⼩改动。与 3.6节中定义的train_epoch_ch3不同,在进⾏正向和反向传播 #之前,我们需要将每⼀⼩批量数据移动到我们指定的设备(例如GPU)上。 def train_ch6(net,train_iter,test_iter,num_epochs,lr,device): def init_weights(m): if type(m)==nn.Linear or type(m)==nn.Conv2d: nn.init.xavier_uniform_(m.weight) net.apply(init_weights) print(training on,device) net.to(device)#区别 optimizer=torch.optim.SGD(net.parameters(),lr=lr)#SGD随机梯度下降算法 loss=nn.CrossEntropyLoss()#交叉熵 animator=d2l.Animator(xlabel=epoch,xlim=[1,num_epochs], legend=[train loss,train acc,test acc]) timer,num_batches=d2l.Timer(),len(train_iter) for epoch in range(num_epochs): metric=d2l.Accumulator(3) net.train() for i,(X,y) in enumerate(train_iter): timer.start() optimizer.zero_grad() X,y=X.to(device),y.to(device)#将输入和输出挪到GPU上,主要区别 y_hat=net(X)#前向操作 l=loss(y_hat,y) l.backward() optimizer.step()#迭代 with torch.no_grad(): metric.add(l * X.shape[0], d2l.accuracy(y_hat, y), X.shape[0]) timer.stop() train_l = metric[0] / metric[2] train_acc = metric[1] / metric[2] if (i + 1) % (num_batches // 5) == 0 or i == num_batches - 1: animator.add(epoch + (i + 1) / num_batches, (train_l, train_acc, None)) test_acc = evaluate_accuracy_gpu(net, test_iter) animator.add(epoch + 1, (None, None, test_acc)) print(floss {train_l:.3f}, train acc {train_acc:.3f}, ftest acc {test_acc:.3f}) print(f{metric[2] * num_epochs / timer.sum():.1f} examples/sec fon {str(device)})
上一篇:
JS实现多线程数据分片下载
下一篇:
猎头推荐转行大数据分析师骗局