繁昌县网站开发,公司名称大全四个字,市场推广有哪些,商业网名一、在多分类任务实验中实现momentum、rmsprop、adam优化器
1.1 任务内容
在手动实现多分类的任务中手动实现三种优化算法#xff0c;并补全Adam中计算部分的内容在torch.nn实现多分类的任务中使用torch.nn实现各种优化器#xff0c;并对比其效果
1.2 任务思路及代码
imp…一、在多分类任务实验中实现momentum、rmsprop、adam优化器
1.1 任务内容
在手动实现多分类的任务中手动实现三种优化算法并补全Adam中计算部分的内容在torch.nn实现多分类的任务中使用torch.nn实现各种优化器并对比其效果
1.2 任务思路及代码
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import torchvision
from torchvision import transforms
import time
from torch.nn import CrossEntropyLoss
device torch.device(cuda if torch.cuda.is_available() else cpu) # 如果有gpu则在gpu上计算 加快计算速度
print(f当前使用的device为{device})# 多分类任务
mnist_train torchvision.datasets.FashionMNIST(root./FashionMNIST, trainTrue, downloadTrue, transformtransforms.ToTensor())
mnist_test torchvision.datasets.FashionMNIST(root./FashionMNIST, trainFalse, downloadTrue, transformtransforms.ToTensor())
batch_size 256
train_iter torch.utils.data.DataLoader(mnist_train, batch_sizebatch_size, shuffleTrue, num_workers0)
test_iter torch.utils.data.DataLoader(mnist_test, batch_sizebatch_size, shuffleFalse, num_workers0)# 定义绘图函数
import matplotlib.pyplot as plt
def draw(name, trainl, testl,xlabelEpoch,ylabelLoss):plt.rcParams[font.sans-serif][SimHei] #设置字体plt.rcParams[axes.unicode_minus]False #该语句解决图像中的“-”负号的乱码问题plt.figure(figsize(8, 3))plt.title(name[-1]) # 命名color [g,r,b,c]if trainl is not None:plt.subplot(121)for i in range(len(name)-1):plt.plot(trainl[i], ccolor[i],labelname[i])plt.xlabel(xlabel)plt.ylabel(ylabel)plt.legend()if testl is not None:plt.subplot(122)for i in range(len(name)-1):plt.plot(testl[i], ccolor[i], labelname[i])plt.xlabel(xlabel)plt.ylabel(ylabel)plt.legend()
# 自定义实现
class Net():def __init__(self):# 设置隐藏层和输出层的节点数num_inputs, num_hiddens, num_outputs 28 * 28, 256, 10 # 十分类问题self.w_1 torch.tensor(np.random.normal(0, 0.01, (num_hiddens, num_inputs)), dtypetorch.float32,requires_gradTrue)self.b_1 torch.zeros(num_hiddens, dtypetorch.float32, requires_gradTrue)self.w_2 torch.tensor(np.random.normal(0, 0.01, (num_outputs, num_hiddens)), dtypetorch.float32,requires_gradTrue)self.b_2 torch.zeros(num_outputs, dtypetorch.float32, requires_gradTrue)self.params[self.w_1, self.b_1, self.w_2, self.b_2]self.w [self.w_1,self.w_2]# 定义模型结构self.input_layer lambda x: x.view(x.shape[0], -1)self.hidden_layer lambda x: self.my_relu(torch.matmul(x, self.w_1.t()) self.b_1)self.output_layer lambda x: nn.functional.softmax(torch.matmul(x, self.w_2.t()) self.b_2, dim1)self.momentum_states [torch.zeros_like(param) for param in self.params]def my_relu(self, x):return torch.max(inputx, othertorch.tensor(0.0))# 定义前向传播def forward(self, x):x self.input_layer(x)x self.hidden_layer(x)x self.output_layer(x)return xdef my_cross_entropy_loss(y_hat, labels):def log_softmax(y_hat):max_v torch.max(y_hat, dim1).values.unsqueeze(dim1)return y_hat - max_v - torch.log(torch.exp(y_hat-max_v).sum(dim1).unsqueeze(dim1))return (-log_softmax(y_hat))[range(len(y_hat)), labels].mean()# nn实现
class MyNet_NN(nn.Module):def __init__(self,dropout0.0):super(MyNet_NN, self).__init__()# 设置隐藏层和输出层的节点数self.num_inputs, self.num_hiddens, self.num_outputs 28 * 28, 256, 10 # 十分类问题# 定义模型结构self.input_layer nn.Flatten()self.hidden_layer nn.Linear(28*28,256)self.drop nn.Dropout(dropout)self.output_layer nn.Linear(256,10)# 使用relu激活函数self.relu nn.ReLU()# 定义前向传播def forward(self, x):x self.drop(self.input_layer(x))x self.drop(self.hidden_layer(x))x self.relu(x)x self.output_layer(x)return xdef train_and_test(modelNet(),init_statesNone,optimizeroptim.SGD,epochs10,lr0.01,L2False,lambd0):train_all_loss [] test_all_loss [] train_ACC, test_ACC [], [] begintime time.time()criterion CrossEntropyLoss() for epoch in range(epochs):train_l,train_acc_num 0, 0for data, labels in train_iter:pred model.forward(data)train_each_loss criterion(pred, labels) # 若L2为True则表示需要添加L2范数惩罚项if L2 True:train_each_loss lambd * l2_penalty(model.w)train_l train_each_loss.item()train_each_loss.backward() # 反向传播if init_states None: optimizer(model.params, lr, 128) # 使用小批量随机梯度下降迭代模型参数else:states init_states(model.params)optimizer(model.params,states,lrlr)# 梯度清零train_acc_num (pred.argmax(dim1)labels).sum().item()for param in model.params:param.grad.data.zero_()# print(train_each_loss)train_all_loss.append(train_l) # 添加损失值到列表中train_ACC.append(train_acc_num / len(mnist_train)) # 添加准确率到列表中with torch.no_grad():is_train False test_l, test_acc_num 0, 0for data, labels in test_iter:pred model.forward(data)test_each_loss criterion(pred, labels)test_l test_each_loss.item()test_acc_num (pred.argmax(dim1)labels).sum().item()test_all_loss.append(test_l)test_ACC.append(test_acc_num / len(mnist_test)) # # 添加准确率到列表中print(epoch: %d\t train loss:%.5f\t test loss:%.5f\t train acc: %.2f\t test acc: %.2f% (epoch 1, train_l, test_l, train_ACC[-1],test_ACC[-1]))endtime time.time()print(%d轮 总用时: %.3f秒 % ( epochs, endtime - begintime))return train_all_loss,test_all_loss,train_ACC,test_ACC
def train_and_test_NN(modelMyNet_NN(),epochs10,lr0.01,weight_decay0.0,optimizerNone):MyModel modelprint(MyModel)if optimizer None:optimizer SGD(MyModel.parameters(), lrlr,weight_decayweight_decay) criterion CrossEntropyLoss() # 损失函数criterion criterion.to(device)train_all_loss [] test_all_loss [] train_ACC, test_ACC [], []begintime time.time()for epoch in range(epochs):train_l, train_epoch_count, test_epoch_count 0, 0, 0for data, labels in train_iter:data, labels data.to(device), labels.to(device)pred MyModel(data)train_each_loss criterion(pred, labels.view(-1)) # 计算每次的损失值optimizer.zero_grad() train_each_loss.backward() optimizer.step() train_l train_each_loss.item()train_epoch_count (pred.argmax(dim1)labels).sum()train_ACC.append(train_epoch_count/len(mnist_train))train_all_loss.append(train_l) with torch.no_grad():test_loss, test_epoch_count 0, 0for data, labels in test_iter:data, labels data.to(device), labels.to(device)pred MyModel(data)test_each_loss criterion(pred,labels)test_loss test_each_loss.item()test_epoch_count (pred.argmax(dim1)labels).sum()test_all_loss.append(test_loss)test_ACC.append(test_epoch_count.cpu()/len(mnist_test))print(epoch: %d\t train loss:%.5f\t test loss:%.5f\t train acc:%5f test acc:%.5f: % (epoch 1, train_all_loss[-1], test_all_loss[-1],train_ACC[-1],test_ACC[-1]))endtime time.time()print(torch.nn实现前馈网络-多分类任务 %d轮 总用时: %.3f秒 % (epochs, endtime - begintime))# 返回训练集和测试集上的 损失值 与 准确率return train_all_loss,test_all_loss,train_ACC,test_ACC# 手动实现momentum
def init_momentum(params):w1,b1,w2,b2 torch.zeros(params[0].shape),torch.zeros(params[1].shape),torch.zeros(params[2].shape),torch.zeros(params[3].shape)return (w1,b1,w2,b2)def sgd_momentum(params, states, lr0.01, momentum0.9):for p, v in zip(params, states):with torch.no_grad():v[:] momentum * v - p.gradp[:] lr*vp.grad.data.zero_()net11 Net()
trainL11, testL11, trainAcc11, testAcc11 train_and_test(modelnet11,epochs10,init_statesinit_momentum, optimizersgd_momentum)# nn实现Momentum
net12 MyNet_NN()
net12 net12.to(device)
momentum_optimizer optim.SGD(net12.parameters(), lr0.01, momentum0.9)
trainL12, testL12, trainAcc12, testAcc12 train_and_test_NN(modelnet12,epochs10,optimizermomentum_optimizer) # 手动实现RMSpropdef init_rmsprop(params):s_w1, s_b1, s_w2, s_b2 torch.zeros(params[0].shape), torch.zeros(params[1].shape),\torch.zeros(params[2].shape), torch.zeros(params[3].shape)return (s_w1, s_b1, s_w2, s_b2)def rmsprop(params,states,lr0.01,gamma0.9):gamma, eps gamma, 1e-6for p, s in zip(params,states):with torch.no_grad():s[:] gamma * s (1 - gamma) * torch.square(p.grad)p[:] - lr * p.grad / torch.sqrt(s eps)p.grad.data.zero_()net21 Net()
trainL21, testL21, trainAcc21, testAcc21 train_and_test(modelnet21,epochs10,init_statesinit_rmsprop, optimizerrmsprop)# nn实现RMSprop
net22 MyNet_NN()
net22 net22.to(device)
optim_RMSprop torch.optim.RMSprop(net22.parameters(), lr0.01, alpha0.9, eps1e-6)
trainL22, testL22, trainAcc22, testAcc22 train_and_test_NN(modelnet22,epochs10,optimizeroptim_RMSprop) # 手动实现Adam
def init_adam_states(params):v_w1, v_b1, v_w2, v_b2 torch.zeros(params[0].shape), torch.zeros(params[1].shape),\torch.zeros(params[2].shape), torch.zeros(params[3].shape)s_w1, s_b1, s_w2, s_b2 torch.zeros(params[0].shape), torch.zeros(params[1].shape),\torch.zeros(params[2].shape), torch.zeros(params[3].shape)return ((v_w1, s_w1), (v_b1, s_b1),(v_w2, s_w2), (v_b2, s_b2))# 根据Adam算法思想手动实现Adam
Adam_t 0.01
def Adam(params, states, lr0.01, tAdam_t):global Adam_tbeta1, beta2, eps 0.9, 0.999, 1e-6for p, (v, s) in zip(params, states):with torch.no_grad():v[:] beta1 * v (1 - beta1) * p.grads[:] beta2 * s (1 - beta2) * (p.grad**2)v_bias_corr v / (1 - beta1 ** Adam_t)s_bias_corr s / (1 - beta2 ** Adam_t)p.data - lr * v_bias_corr / (torch.sqrt(s_bias_corr eps))p.grad.data.zero_()Adam_t 1net31 Net()
trainL31, testL31, trainAcc31, testAcc31 train_and_test(modelnet31,epochs10,init_statesinit_adam_states, optimizerAdam) # nn实现adam
net32 MyNet_NN()
net32 net32.to(device)optim_Adam torch.optim.Adam(net32.parameters(), lr0.01, betas(0.9,0.999),eps1e-6)
trainL32, testL32, trainAcc32, testAcc32 train_and_test_NN(modelnet32,epochs10,optimizeroptim_Adam) name11 [RMSprop,Momentum,Adam,手动实现不同的优化器-Loss变化]
train11 [trainL11,trainL21,trainL31]
test11 [testL11, testL21, testL31]
draw(name11, train11, test11)name12 [RMSprop,Momentum,Adam,torch.nn实现不同的优化器-Loss变化]
train12 [trainL12,trainL22,trainL32]
test12 [testL12, testL22, testL32]
draw(name12, train12, test12)二、在多分类任务实验中分别手动实现和用torch.nn实现正则化
2.1 任务内容
探究惩罚项的权重对实验结果的影响可用loss曲线进行展示
2.2 任务思路及代码
# 定义L2范数惩罚项
def l2_penalty(w):cost 0for i in range(len(w)):cost (w[i]**2).sum()return cost / batch_size / 2# 手动实现
net221 Net()
trainL221, testL221, trainAcc221, testAcc221 train_and_test(modelnet221,epochs10,init_statesinit_momentum, optimizersgd_momentum,lr0.01,L2True,lambd0)net222 Net()
trainL222, testL222, trainAcc222, testAcc222 train_and_test(modelnet222,epochs10,init_statesinit_momentum, optimizersgd_momentum,lr0.01,L2True,lambd2)# 可视化比较
name221 [lambd 0,lambd2,手动实现不同的惩罚权重lambd-Loss变化]
trains221 [trainL221,trainL222]
tests221 [testL221,testL222]
draw(name221, trains221, tests221)## nn实现
net223 MyNet_NN()
net223 net223.to(device)
momentum_optimizer optim.SGD(net223.parameters(), lr0.01, momentum0.9)
trainL223, testL223, trainAcc223, testAcc223 train_and_test_NN(modelnet223,epochs10,optimizermomentum_optimizer,lr0.01,weight_decay0.0)net224 MyNet_NN()
net224 net223.to(device)
momentum_optimizer optim.SGD(net224.parameters(), lr0.01, momentum0.9)
trainL224, testL224, trainAcc224, testAcc224 train_and_test_NN(modelnet224,epochs10,optimizermomentum_optimizer,lr0.01,weight_decay0.01)# 可视化比较
name222 [weight_decay0,weight_decay 0.01,torch.nn实现不同的惩罚权重lambd-Loss变化]
trains222 [trainL223,trainL224]
tests222 [testL223,testL224]
draw(name222, trains222, tests222)三、在多分类任务实验中分别手动实现和用torch.nn实现dropout
3.1 任务内容
探究不同丢弃率对实验结果的影响可用loss曲线进行展示
3.2 任务思路及代码
# 为手动模型添加dropout项
class MyNet():def __init__(self,dropout0.0):# 设置隐藏层和输出层的节点数# global dropoutself.dropout dropoutprint(dropout: ,self.dropout)self.is_train Nonenum_inputs, num_hiddens, num_outputs 28 * 28, 256, 10 # 十分类问题w_1 torch.tensor(np.random.normal(0, 0.01, (num_hiddens, num_inputs)), dtypetorch.float32,requires_gradTrue)b_1 torch.zeros(num_hiddens, dtypetorch.float32, requires_gradTrue)w_2 torch.tensor(np.random.normal(0, 0.01, (num_outputs, num_hiddens)), dtypetorch.float32,requires_gradTrue)b_2 torch.zeros(num_outputs, dtypetorch.float32, requires_gradTrue)self.params [w_1, b_1, w_2, b_2]self.w [w_1,w_2]# 定义模型结构self.input_layer lambda x: x.view(x.shape[0], -1)self.hidden_layer lambda x: self.my_relu(torch.matmul(x, w_1.t()) b_1)self.output_layer lambda x: torch.matmul(x, w_2.t()) b_2def my_relu(self, x):return torch.max(inputx, othertorch.tensor(0.0))def train(self):self.is_train Truedef test(self):self.is_test Falsedef dropout_layer(self, x):dropout self.dropoutassert 0 dropout 1 #dropout值必须在0-1之间# dropout1所有元素都被丢弃。if dropout 1:return torch.zeros_like(x)# 在本情况中所有元素都被保留。if dropout 0:return xmask (torch.rand(x.shape) 1.0 - dropout).float() #rand()返回一个张量包含了从区间[0, 1)的均匀分布中抽取的一组随机数return mask * x / (1.0 - dropout)# 定义前向传播def forward(self, x):x self.input_layer(x)if self.is_train: # 如果是训练过程则需要开启dropout 否则 需要关闭 dropoutx self.dropout_layer(x) elif self.is_test:x self.dropout_layer(x)x self.my_relu(self.hidden_layer(x))x self.output_layer(x)return x
def train_and_test3(modelMyNet(),init_statesNone,optimizeroptim.SGD,epochs20,lr0.01,L2False,lambd0):train_all_loss [] test_all_loss [] train_ACC, test_ACC [], [] begintime time.time()criterion CrossEntropyLoss() # 损失函数model.train() for epoch in range(epochs):train_l,train_acc_num 0, 0for data, labels in train_iter:pred model.forward(data)train_each_loss criterion(pred, labels) # 计算每次的损失值if L2 True:train_each_loss lambd * l2_penalty(model.w)train_l train_each_loss.item()train_each_loss.backward() # 反向传播if init_states None: optimizer(model.params, lr, 128) # 使用小批量随机梯度下降迭代模型参数else:states init_states(model.params)optimizer(model.params,states,lrlr)# 梯度清零train_acc_num (pred.argmax(dim1)labels).sum().item()for param in model.params:param.grad.data.zero_()train_all_loss.append(train_l) train_ACC.append(train_acc_num / len(mnist_train)) # 添加准确率到列表中model.test() with torch.no_grad():is_train False # 表明当前为测试阶段不需要dropout参与test_l, test_acc_num 0, 0for data, labels in test_iter:pred model.forward(data)test_each_loss criterion(pred, labels)test_l test_each_loss.item()test_acc_num (pred.argmax(dim1)labels).sum().item()test_all_loss.append(test_l)test_ACC.append(test_acc_num / len(mnist_test)) # # 添加准确率到列表中print(epoch: %d\t train loss:%.5f\t test loss:%.5f\t train acc: %.2f\t test acc: %.2f% (epoch 1, train_l, test_l, train_ACC[-1],test_ACC[-1]))endtime time.time()print(手动实现dropout, %d轮 总用时: %.3f % ( epochs, endtime - begintime))return train_all_loss,test_all_loss,train_ACC,test_ACC# 手动实现dropout
net331 MyNet(dropout 0.0)
trainL331, testL331, trainAcc331, testAcc331 train_and_test3(modelnet331,epochs10,init_statesinit_momentum, optimizersgd_momentum,lr0.01)net332 MyNet(dropout 0.3)
trainL332, testL332, trainAcc332, testAcc332 train_and_test3(modelnet332,epochs10,init_statesinit_momentum, optimizersgd_momentum,lr0.01)net333 MyNet(dropout 0.5)
trainL333, testL333, trainAcc333, testAcc333 train_and_test3(modelnet333,epochs10,init_statesinit_momentum, optimizersgd_momentum,lr0.01)net334 MyNet(dropout 0.8)
trainL334, testL334, trainAcc334, testAcc334 train_and_test3(modelnet334,epochs10,init_statesinit_momentum, optimizersgd_momentum,lr0.01)name331 [dropout0,dropout0.3,dropout0.5,dropout0.8,手动实现不同的dropout-Loss变化]
train331 [trainL331,trainL332,trainL333,trainL334]
test331 [testL331,testL332,testL333,testL334]
draw(name331, train331, test331)# nn实现dropout
net341 MyNet_NN(dropout0)
net341 net341.to(device)
momentum_optimizer optim.SGD(net341.parameters(), lr0.01, momentum0.9)
trainL341, testL341, trainAcc341, testAcc341 train_and_test_NN(modelnet341,epochs10,optimizermomentum_optimizer,lr0.01)net342 MyNet_NN(dropout0.3)
net342 net342.to(device)
momentum_optimizer optim.SGD(net342.parameters(), lr0.01, momentum0.9)
trainL342, testL342, trainAcc342, testAcc342 train_and_test_NN(modelnet342,epochs10,optimizermomentum_optimizer,lr0.01)net343 MyNet_NN(dropout0.5)
net343 net341.to(device)
momentum_optimizer optim.SGD(net343.parameters(), lr0.01, momentum0.9)
trainL343, testL343, trainAcc343, testAcc343 train_and_test_NN(modelnet343,epochs10,optimizermomentum_optimizer,lr0.01)net344 MyNet_NN(dropout0.8)
net344 net344.to(device)
momentum_optimizer optim.SGD(net344.parameters(), lr0.01, momentum0.9)
trainL344, testL344, trainAcc344, testAcc344 train_and_test_NN(modelnet344,epochs10,optimizermomentum_optimizer,lr0.01)name332 [dropout0,dropout0.3,dropout0.5,dropout0.8,手动实现不同的dropout-Loss变化]
train332 [trainL341,trainL342,trainL343,trainL344]
test332 [testL341,testL342,testL343,testL344]
draw(name332, train332, test332)四、对多分类任务实验中实现早停机制并在测试集上测试
4.1 任务内容
选择上述实验中效果最好的组合手动将训练数据划分为训练集和验证集实现早停机制 并在测试集上进行测试。训练集验证集82早停轮数为5.
4.2 任务思路及代码
# 构建数据集
import random
index list(range(len(mnist_train)))
random.shuffle(index)# 按照 训练集和验证集 82 的比例分配各自下标
train_index, val_index index[ : 48000], index[48000 : ]train_dataset, train_labels mnist_train.data[train_index], mnist_train.targets[train_index]
val_dataset, val_labels mnist_train.data[val_index], mnist_train.targets[val_index]
print(训练集:, train_dataset.shape, train_labels.shape)
print(验证集:, val_dataset.shape,val_labels.shape)T_dataset torch.utils.data.TensorDataset(train_dataset,train_labels)
V_dataset torch.utils.data.TensorDataset(val_dataset,val_labels)
T_dataloader torch.utils.data.DataLoader(datasetT_dataset,batch_size128,shuffleTrue)
V_dataloader torch.utils.data.DataLoader(datasetV_dataset,batch_size128,shuffleTrue)
print(T_dataset,len(T_dataset),T_dataloader batch_size: 128)
print(V_dataset,len(V_dataset),V_dataloader batch_size: 128)
def train_and_test_4(modelMyNet(0.0),epochs10,lr0.01,weight_decay0.0):print(model)# 优化函数, 默认情况下weight_decay为0 通过更改weight_decay的值可以实现L2正则化。optimizer torch.optim.Adam(model.parameters(), lr0.01, betas(0.9,0.999),eps1e-6)criterion CrossEntropyLoss() # 损失函数train_all_loss [] # 记录训练集上得loss变化val_all_loss [] # 记录测试集上的loss变化train_ACC, val_ACC [], []begintime time.time()flag_stop 0for epoch in range(1000):train_l, train_epoch_count, val_epoch_count 0, 0, 0for data, labels in T_dataloader:data, labels data.to(torch.float32).to(device), labels.to(device)pred model(data)train_each_loss criterion(pred, labels.view(-1)) # 计算每次的损失值optimizer.zero_grad() # 梯度清零train_each_loss.backward() # 反向传播optimizer.step() # 梯度更新train_l train_each_loss.item()train_epoch_count (pred.argmax(dim1)labels).sum()train_ACC.append(train_epoch_count/len(train_dataset))train_all_loss.append(train_l) # 添加损失值到列表中with torch.no_grad():val_loss, val_epoch_count 0, 0for data, labels in V_dataloader:data, labels data.to(torch.float32).to(device), labels.to(device)pred model(data)val_each_loss criterion(pred,labels)val_loss val_each_loss.item()val_epoch_count (pred.argmax(dim1)labels).sum()val_all_loss.append(val_loss)val_ACC.append(val_epoch_count / len(val_dataset))# 实现早停机制# 若连续五次验证集的损失值连续增大则停止运行否则继续运行if epoch 5 and val_all_loss[-1] val_all_loss[-2]:flag_stop 1if flag_stop 5 or epoch 35:print(停止运行防止过拟合)breakelse:flag_stop 0if epoch 0 or (epoch 1) % 4 0:print(epoch: %d | train loss:%.5f | val loss:%.5f | train acc:%5f val acc:%.5f: % (epoch 1, train_all_loss[-1], val_all_loss[-1],train_ACC[-1],val_ACC[-1]))endtime time.time()print(torch.nn实现前馈网络-多分类任务 %d轮 总用时: %.3fs % (epochs, endtime - begintime))# 返回训练集和测试集上的 损失值 与 准确率return train_all_loss,val_all_loss,train_ACC,val_ACCnet4 MyNet_NN(dropout0.5)
net4 net4.to(device)
trainL4, testL4, trainAcc4, testAcc4 train_and_test_4(modelnet4,epochs 10000,lr0.1)
draw([, 早停机制], [trainL4], [testL4])实验总结
实验中我们通过两种方式构建了前馈神经网络一种是手动搭建另一种是利用PyTorch中的torch.nn模块进行构建。在这两种网络结构的基础上分别引入了dropout层以有效地防止模型的过拟合现象。 首先在优化器的选择上我们尝试了不同的优化函数并对它们在模型训练中的效果进行了比较。不同的优化器具有不同的优点通过对比它们的性能我们可以更好地选择适合具体任务的优化器进一步提升模型的性能。 其次我们引入了惩罚权重的概念通过增加惩罚项来约束模型的复杂度。实验结果表明适度增加惩罚权重可以在一定程度上增大模型输出的损失但同时也达到了防止过拟合的效果。这进一步证实了模型复杂度与过拟合之间存在一定的权衡关系。 通过实验我们观察到适当设置dropout的概率可以显著减轻模型的过拟合问题。dropout通过在训练过程中随机丢弃一部分神经元的输出有效降低了模型对于训练数据的过度依赖提高了模型的泛化能力从而在测试集上表现更为鲁棒。 最后为了进一步提高模型的训练效果我们引入了早停机制。该机制通过监测在验证集上的测试误差在发现测试误差上升的情况下停止训练以防止网络过拟合。早停机制在一定程度上能够避免模型在训练过程中过分拟合训练数据从而提高了模型的泛化性能。
通过以上实验我们综合考虑了dropout、惩罚权重、不同优化器以及早停机制等因素为构建更稳健、泛化能力强的前馈神经网络提供了有益的经验和指导。这些技术手段的灵活运用可以在实际任务中更好地平衡模型的性能和泛化能力。