使用 PyTorch 在 MNIST 数据集训练模型

使用 PyTorch 在 MNIST 数据集训练模型

1. 创建训练脚本创建训练脚本 mnist.py,内容如下:

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

26

27

28

29

30

31

32

33

34

35

36

37

38

39

40

41

42

43

44

45

46

47

48

49

50

51

52

53

54

55

56

57

58

59

60

61

62

63

64

65

66

67

68

69

70

71

72

73

74

75

76

77

78

79

80

81

82

83

84

85

86

87

88

89

90

91

92

93

94

95

96

97

98

import torch

import torch.nn as nn

import torch.nn.functional as F

import torch.optim as optim

from torchvision import datasets, transforms

# 定义超参数

BATCH_SIZE = 512

EPOCHS = 20

LEARNING_RATE = 1e-3

DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# 数据加载和预处理,在 data 目录下载 MNIST 数据集

train_loader = torch.utils.data.DataLoader(

datasets.MNIST('data', train=True, download=True,

transform=transforms.Compose([

transforms.RandomRotation(10), # 随机旋转,提高模型的泛化能力

transforms.RandomAffine(0, shear=10, scale=(0.8, 1.2)), # 仿射变换,提高模型的泛化能力

transforms.ToTensor(),

transforms.Normalize((0.1307,), (0.3081,))

])),

batch_size=BATCH_SIZE, shuffle=True)

test_loader = torch.utils.data.DataLoader(

datasets.MNIST('data', train=False,

transform=transforms.Compose([

transforms.ToTensor(),

transforms.Normalize((0.1307,), (0.3081,))

])),

batch_size=BATCH_SIZE, shuffle=False)

# 定义模型的结构,这里是一个简单的卷积神经网络

class ConvNet(nn.Module):

def __init__(self):

super(ConvNet, self).__init__()

self.conv1 = nn.Conv2d(1, 32, 3)

self.conv2 = nn.Conv2d(32, 64, 3)

self.dropout1 = nn.Dropout(0.25)

self.fc1 = nn.Linear(64 * 5 * 5, 128)

self.dropout2 = nn.Dropout(0.5)

self.fc2 = nn.Linear(128, 10)

def forward(self, x):

x = F.relu(self.conv1(x))

x = F.max_pool2d(x, 2)

x = F.relu(self.conv2(x))

x = F.max_pool2d(x, 2)

x = self.dropout1(x)

x = x.view(-1, 64 * 5 * 5)

x = F.relu(self.fc1(x))

x = self.dropout2(x)

x = self.fc2(x)

return F.log_softmax(x, dim=1)

# 初始化模型和优化器

model = ConvNet().to(DEVICE)

optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE)

scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.5) # 学习率衰减

# 训练函数

def train(model, device, train_loader, optimizer, epoch):

model.train()

for batch_idx, (data, target) in enumerate(train_loader):

data, target = data.to(device), target.to(device)

optimizer.zero_grad()

output = model(data)

loss = F.nll_loss(output, target)

loss.backward()

optimizer.step()

if (batch_idx + 1) % 30 == 0:

print(f"Train Epoch: {epoch} [{batch_idx * len(data)}/{len(train_loader.dataset)} "

f"({100. * batch_idx / len(train_loader):.0f}%)]\tLoss: {loss.item():.6f}")

# 测试函数

def test(model, device, test_loader):

model.eval()

test_loss = 0

correct = 0

with torch.no_grad():

for data, target in test_loader:

data, target = data.to(device), target.to(device)

output = model(data)

test_loss += F.nll_loss(output, target, reduction='sum').item()

pred = output.argmax(dim=1, keepdim=True)

correct += pred.eq(target.view_as(pred)).sum().item()

test_loss /= len(test_loader.dataset)

accuracy = 100. * correct / len(test_loader.dataset)

print(f"\nTest set: Average loss: {test_loss:.4f}, Accuracy: {correct}/{len(test_loader.dataset)} ({accuracy:.2f}%)\n")

# 训练和测试循环

for epoch in range(1, EPOCHS + 1):

train(model, DEVICE, train_loader, optimizer, epoch)

test(model, DEVICE, test_loader)

scheduler.step() # 更新学习率

# 保存模型

torch.save(model.state_dict(), "mnist_cnn.pth")

print("Model saved to mnist_cnn.pth")

2. 启动训练任务使用指定的卡号的 GPU 进行训练

1

2

3

4

5

docker run --gpus '"device=5"' \

-v "$PWD":/workspace \

-it --rm \

pytorch/pytorch:2.2.2-cuda12.1-cudnn8-runtime \

python mnist.py

可以看到训练过程输出如下日志:

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

26

27

28

29

30

31

32

33

34

35

Train Epoch: 1 [14848/60000 (25%)] Loss: 0.628275

Train Epoch: 1 [30208/60000 (50%)] Loss: 0.372206

Train Epoch: 1 [45568/60000 (75%)] Loss: 0.319735

Test set: Average loss: 0.0831, Accuracy: 9726/10000 (97.26%)

Train Epoch: 2 [14848/60000 (25%)] Loss: 0.165549

Train Epoch: 2 [30208/60000 (50%)] Loss: 0.184633

Train Epoch: 2 [45568/60000 (75%)] Loss: 0.112471

Test set: Average loss: 0.0579, Accuracy: 9805/10000 (98.05%)

Train Epoch: 3 [14848/60000 (25%)] Loss: 0.137946

Train Epoch: 3 [30208/60000 (50%)] Loss: 0.154524

Train Epoch: 3 [45568/60000 (75%)] Loss: 0.159434

...

Test set: Average loss: 0.0206, Accuracy: 9926/10000 (99.26%)

Train Epoch: 18 [14848/60000 (25%)] Loss: 0.071504

Train Epoch: 18 [30208/60000 (50%)] Loss: 0.050344

Train Epoch: 18 [45568/60000 (75%)] Loss: 0.070272

Test set: Average loss: 0.0198, Accuracy: 9929/10000 (99.29%)

Train Epoch: 19 [14848/60000 (25%)] Loss: 0.039598

Train Epoch: 19 [30208/60000 (50%)] Loss: 0.067286

Train Epoch: 19 [45568/60000 (75%)] Loss: 0.048905

Test set: Average loss: 0.0191, Accuracy: 9929/10000 (99.29%)

Train Epoch: 20 [14848/60000 (25%)] Loss: 0.060975

Train Epoch: 20 [30208/60000 (50%)] Loss: 0.087753

Train Epoch: 20 [45568/60000 (75%)] Loss: 0.067398

Test set: Average loss: 0.0189, Accuracy: 9936/10000 (99.36%)

可以看到,loss 值从最开始的 0.628275 经过若干 epoch 之后降到了 0.07 左右。这意味着模型的误差在不断减小,准确率在不断提高。

3. 创建测试脚本为了验证模型在真实数据上的表现,我们可以使用一个测试脚本 test.py,内容如下:

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

26

27

28

29

30

31

32

33

34

35

36

37

38

39

40

41

42

43

44

45

46

47

48

49

50

51

52

53

54

55

56

57

58

59

60

61

62

63

64

65

66

67

import sys

import torch

import torch.nn as nn

import torch.nn.functional as F

from torchvision import transforms

from PIL import Image

# 定义与训练时相同的网络结构

class ConvNet(nn.Module):

def __init__(self):

super(ConvNet, self).__init__()

self.conv1 = nn.Conv2d(1, 32, 3)

self.conv2 = nn.Conv2d(32, 64, 3)

self.dropout1 = nn.Dropout(0.25)

self.fc1 = nn.Linear(64 * 5 * 5, 128)

self.dropout2 = nn.Dropout(0.5)

self.fc2 = nn.Linear(128, 10)

def forward(self, x):

x = F.relu(self.conv1(x))

x = F.max_pool2d(x, 2)

x = F.relu(self.conv2(x))

x = F.max_pool2d(x, 2)

x = self.dropout1(x)

x = x.view(-1, 64 * 5 * 5)

x = F.relu(self.fc1(x))

x = self.dropout2(x)

x = self.fc2(x)

return F.log_softmax(x, dim=1)

# 加载模型

DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model = ConvNet().to(DEVICE)

model.load_state_dict(torch.load("mnist_cnn.pth", map_location=DEVICE))

model.eval()

# 图片预处理

def preprocess_image(image_path):

transform = transforms.Compose([

transforms.Grayscale(num_output_channels=1), # 确保是单通道(灰度图)

transforms.Resize((28, 28)), # 调整大小为28x28

transforms.ToTensor(), # 转为Tensor

transforms.Normalize((0.1307,), (0.3081,)) # 标准化

])

image = Image.open(image_path).convert('RGB') # 确保图片为RGB模式,避免报错

return transform(image).unsqueeze(0) # 添加批次维度

# 预测函数

def predict(image_path):

try:

image_tensor = preprocess_image(image_path).to(DEVICE)

with torch.no_grad():

output = model(image_tensor)

pred = output.argmax(dim=1, keepdim=True)

print(f"Predicted Digit: {pred.item()}")

except Exception as e:

print(f"Error processing image '{image_path}': {e}")

# 脚本入口

if __name__ == "__main__":

if len(sys.argv) != 2:

print("Usage: python test.py ")

sys.exit(1)

image_path = sys.argv[1]

predict(image_path)

EOF

在使用时,只需要指定图片文件的路径即可:

1

python test.py 4.jpg

4. 启动测试任务下载图片,准备测试数据这里在网上找了一些手写数字图片,用于测试模型的准确率。

1

wget https://img.redocn.com/sheji/20211030/shouxieshuzi0yishuzi_11822055.jpg -O 0.jpg

1

wget https://img-blog.csdn.net/20170413123610276?watermark/2/text/aHR0cDovL2Jsb2cuY3Nkbi5uZXQvQ2FsbE1lR29EZW5n/font/5a6L5L2T/fontsize/400/fill/I0JBQkFCMA==/dissolve/70/gravity/SouthEast -O 3.jpg

1

wget https://img.redocn.com/sheji/20211030/shouxieshuzi4yishuzi_11822063.jpg.400.jpg -O 4.jpg

1

wget https://img.redocn.com/sheji/20211030/shouxieshuzi6yishuzi_11822067.jpg.400.jpg -O 6.jpg

1

wget https://code.lardcave.net/2015/12/06/1/67CDB2E1-C19C-4CE7-94A3-32AE4B98F4C8@local -O 7.jpg

测试模型1

2

3

4

5

6

7

for i in 0 3 4 6 7 ; do

docker run --gpus '"device=5"' \

-v "$PWD":/workspace \

-it --rm \

pytorch/pytorch:2.2.2-cuda12.1-cudnn8-runtime \

python test.py $i.jpg

done

1

2

3

4

5

Predicted Digit: 0

Predicted Digit: 3

Predicted Digit: 4

Predicted Digit: 6

Predicted Digit: 0

5. 计算准确率图片文件实际标签预测标签是否正确0.jpg00是3.jpg33是4.jpg44是6.jpg66是7.jpg70否准确率 = 正确预测数 / 总预测数 = 4 / 5 = 80%

相关推荐

中國五聲音階
谁有365体育投注网址

中國五聲音階

⌛ 07-28 👁️ 8727
七日杀怎么用游侠联机-正式版游侠对战平台联机教程
Bet体育365提款流程

七日杀怎么用游侠联机-正式版游侠对战平台联机教程

⌛ 10-05 👁️ 9232
电信怎么在手机上买流量包,手机上怎么订电信卡流量
谁有365体育投注网址

电信怎么在手机上买流量包,手机上怎么订电信卡流量

⌛ 09-25 👁️ 7478