Compare commits

...

3 Commits

Author SHA1 Message Date
kemna
d3e281700e bug一堆
Some checks failed
/ job1 (push) Failing after 4m5s
2024-10-29 20:05:56 +08:00
kemna
96406275f5 出bug了 2024-10-28 19:58:07 +08:00
kemna
c609eee84d 要跑很多次 2024-10-26 10:49:47 +08:00
6 changed files with 100 additions and 218 deletions

View File

@ -1,30 +0,0 @@
import os
import requests
import re
import time
def image_set(save_path,word,epoch):
q=0
a=0
while True:
time.sleep(1)
url="https://images.baidu.com/search/index?tn=baiduimage&ipn=r&ct=201326592&cl=2&lm=-1&st=-1&fm=index&fr=&hs=0&xthttps=111110&sf=1&fmq=&pv=&ic=0&nc=1&z=&se=1&showtab=0&fb=0&width=&height=&face=0&istype=2&ie=utf-8&word={}&oq={}&rsp=-1".format(word,word)
headers={"User-Agent":"Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; .NET4.0C; .NET4.0E; rv:11.0) like Gecko"}
response=requests.get(url,headers)
html=response.text
urls=re.findall('"objURL":"(.*?)"',html)
for url in urls:
print(a)
response = requests.get(url,headers=headers)
image = response.content
with open(os.path.join(save_path,"{}.jpg".format(a))) as f:
f.write(image)
a=a+1
q=q+20
if(q/20)>=int(epoch):
break
if __name__=="__main__":
save_path = "/kemna"
word=input('输入你要的图片')
epoch=input('下载几轮图片?')
image_set(save_path,word,epoch)

185
main.py
View File

@ -1,107 +1,122 @@
#导入模块
import torch
import torch.utils
import torch.utils.data
import torch.utils.data.dataloader
import torchvision
from tqdm import tqdm
import matplotlib
#进行数据预处理
#初始化存储训练和测试损失与准确率的字典
history = {
'Train Loss': [],
'Train Accuracy': [],
'Test Loss': [],
'Test Accuracy': []
}
#将训练设备设置为GPU
import random
from enum import Enum
import matplotlib as plt
import torch.nn.functional as F
device = 'cuda:0' if torch.cuda.is_available() else 'cpu'
#使用torchvisiom.transform将图片转换为张量
transform =torchvision.transforms.Compose([torchvision.transforms.ToTensor(),torchvision.transforms.Normalize(mean=[0.5],std=[0.5])])
#构建数据集使用MINIST
path ='./MNIST'
EPOCH =10
Batch_Size = 64
#创建dataset
class DataSet:
def __init__(self,floder,data_name,label_name,transform=None):
self.floder = floder
self.transform = transform
#创建dataloader
class DataLoader:
def __init__(self,type,batch_size,is_shuffle):
data_name = 'train-images-idx3-ubyte.gz' if type=='train' else 't10k-images-idx3-ubyte.gz'
label_name = 'train-labels-idx1-ubyte.gz' if type=='train' else 't10k-labels-idx1-ubyte.gz'
#下载数据集
#下载训练集
trainData=torchvision.datasets.MNIST(path,train=True,transform=transform,download=True)
#下载测试集
testData=torchvision.datasets.MNIST(path,train=False,transform=transform,download=False)
#使用dataloader方法开始训练
#设定batch大小
BATCH_SIZE=1000
#构建dataloader
TrainDataLoader = torch.utils.data.DataLoader(dataset = trainData,batch_size=BATCH_SIZE)
TestDataLoader = torch.utils.data.DataLoader(dataset=testData,batch_size=BATCH_SIZE)
#构建神经网络
#shuffle=true是用来打乱数据集的
train_Dataloader = DataLoader(train,batch_size = Batch_Size,shuffle = True)
test_DataLoader = DataLoader(test,batch_size=Batch_Size,shuffle=False)
class Net(torch.nn.Module):
#构造函数
def __init__(self):
#继承父类
super(Net,self).__init__()
self.model = torch.nn.Sequential(
torch.nn.Conv2d(in_channels=1,out_channels=16,kernel_size=3,stride=1,padding=1),
self.conv1 = torch.nn.Sequential(
torch.nn.Conv2d(1, 10, kernel_size=5),
torch.nn.ReLU(),
torch.nn.MaxPool2d(kernel_size = 2,stride = 2),
#The size of the picture is 14x14
torch.nn.Conv2d(in_channels = 16,out_channels = 32,kernel_size = 3,stride = 1,padding = 1),
torch.nn.ReLU(),
torch.nn.MaxPool2d(kernel_size = 2,stride = 2),
#The size of the picture is 7x7
torch.nn.Conv2d(in_channels = 32,out_channels = 64,kernel_size = 3,stride = 1,padding = 1),
torch.nn.ReLU(),
torch.nn.Flatten(),
torch.nn.Linear(in_features = 7 * 7 * 64,out_features = 128),
torch.nn.ReLU(),
torch.nn.Linear(in_features = 128,out_features = 10),
torch.nn.Softmax(dim=1)
torch.nn.MaxPool2d(kernel_size=2),
)
def forward(self,input):
output = self.model(input)
return output
net = Net().to(device)
#构建迭代器与损失函数
#对于简单的多分类任务我们可以使用交叉熵损失来作为损失函数而对于迭代器而言我们可以使用Adam迭代器
lossF = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(net.parameters())
#循环训练
Epochs=100
for epochs in range(0,Epochs):
#训练内容
processBar = tqdm(TrainDataLoader,unit = 'step')
net.train(True)
for step,(trainImgs,labels) in enumerate(processBar):
trainImgs = trainImgs.to(device)
labels = labels.to(device)
net.zero_grad()
outputs = net(trainImgs)
loss = lossF(outputs,labels)
predictions = torch.argmax(outputs, dim = 1)
accuracy = torch.sum(predictions == labels)/labels.shape[0]
self.conv2 = torch.nn.Sequential(
torch.nn.Conv2d(10, 20, kernel_size=5),
torch.nn.ReLU(),
torch.nn.MaxPool2d(kernel_size=2),
)
self.fc = torch.nn.Sequential(
torch.nn.Linear(320, 50),
torch.nn.Linear(50, 10),
)
def forward(self,x):
batch_size = x.size(0)
x = self.conv1(x) # 一层卷积层,一层池化层,一层激活层(图是先卷积后激活再池化,差别不大)
x = self.conv2(x) # 再来一次
x = x.view(batch_size, -1) # flatten 变成全连接网络需要的输入 (batch, 20,4,4) ==> (batch,320), -1 此处自动算出的是320
x = self.fc(x)
return x
model = Net().to(device)
#使用交叉墒损失做损失函数
sunshi = torch.nn.CrossEntropyLoss()
#优化器:随机梯度下降
#lr=学习率momentum = 冲量
optimizer = torch.optim.SGD(model.parameters(),lr=0.25,momentum=0.25)
#训练
def train(epoch):
running_loss = 0.0
running_total=0
running_correct = 0
for batch_idx,data in enumerate(train_Dataloader,0):
inputs, target = data
inputs = inputs.to(device)
target = target.to(device)
#梯度归零
optimizer.zero_grad()
outputs = model(inputs)
loss = sunshi(outputs,target)
#反向传播
loss.backward()
optimizer.step()
processBar.set_description("[%d/%d] Loss: %.4f, Acc: %.4f" %
(epochs,Epochs,loss.item(),accuracy.item()))
if step == len(processBar)-1:
correct,totalLoss = 0,0
net.train(False)
for testImgs,labels in TestDataLoader:
testImgs = testImgs.to(device)
labels = labels.to(device)
outputs = net(testImgs)
loss = lossF(outputs,labels)
predictions = torch.argmax(outputs,dim = 1)
totalLoss += loss
correct += torch.sum(predictions == labels)
testAccuracy = correct/(BATCH_SIZE * len(TestDataLoader))
testLoss = totalLoss/len(TestDataLoader)
history['Test Loss'].append(testLoss.item())
history['Test Accuracy'].append(testAccuracy.item())
processBar.set_description("[%d/%d] Loss: %.4f, Acc: %.4f, Test Loss: %.4f, Test Acc: %.4f" %
(epochs,Epochs,loss.item(),accuracy.item(),testLoss.item(),testAccuracy.item()))
processBar.close()
running_loss += loss.item()
#准确率
_,predicted = torch.max(outputs,dim=1)
running_total+=inputs.shape[0]
running_correct += (predicted == target).sum().item()
print('[%d,%5d]:loss:%.3f,acc:%.2f',epoch+1,batch_idx+1,running_loss,running_correct/running_total)
#测试
def test():
correct =0
total = 0
with torch.no_grad():
for data in test_DataLoader:
images,labels = data
outputs = model(images)
predicted = torch.max(outputs.data,dim=1)
total += labels.size(0)
correct +=(predicted == labels).sum().item()
accuracy = correct/total
print('[%d/%d]Accuracy: %.lf %%', epoch+1,EPOCH,accuracy)
return accuracy
if __name__ =='__main__':
acc_list_test =[]
for epoch in range(EPOCH):
train(epoch)
#每训练10轮测试一次
if epoch % 10 ==9:
acc_test = test()
acc_list_test.append(acc_test)
plt.plot(acc_list_test)
plt.xlabel('Epoch')
plt.ylabel('Accuracy On TestSet')
plt.show()

103
test.py
View File

@ -1,103 +0,0 @@
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
# 定义超参数
BATCH_SIZE =64 # 每批处理的数据
DEVICE = torch.device('cpu') # 用 cpu 还是 gpu
EPOCHS = 300 # 训练次数
# 构建 pipeline对图像做处理
pipeline = transforms.Compose([
transforms.ToTensor(), # 将图片转化成 tensor
transforms.Normalize((0.1307,), (0.3081,)) # 正则化,降低模型复杂度
])
# 下载数据集
train_set = datasets.MNIST('data', train=True, download=True, transform=pipeline)
test_set = datasets.MNIST('data', train=False, download=False, transform=pipeline)
# 加载数据
train_loader = DataLoader(train_set, batch_size=BATCH_SIZE, shuffle=True)
test_loader = DataLoader(test_set, batch_size=BATCH_SIZE, shuffle=True)
# 构建模型
class Dight(nn.Module):
def __init__(self):
super().__init__()
self.conv1 = nn.Conv2d(1, 10, 5)
self.conv2 = nn.Conv2d(10, 20, 3)
self.fc1 = nn.Linear(20 * 10 * 10, 500)
self.fc2 = nn.Linear(500, 10)
def forward(self, x):
input_size = x.size(0) # batch_size ×1×28×28
x = self.conv1(x) # s\输出 batch*1*28*28,输出×10×24×24(28-5+1=24)
x = F.relu(x) # 保持 shape 不变
x = F.max_pool2d(x, 2, 2) # 输入10×24×24 输出batch×10×12×12
x = self.conv2(x) # 10*12*12 20*10*10
x = F.relu(x)
x = x.view(input_size, -1) # 拉平,自动计算维度 20×10×10=2000
x = self.fc1(x) # 2000 500
x = F.relu(x)
x = self.fc2(x)
output = F.log_softmax(x, dim=1) # 数字概率找最大的
return output
# 定义优化器
model = Dight().to(DEVICE)
optimizer = optim.Adam(model.parameters())
# 定义训练方法
def train_model(model, device, train_loader, optimizer, epoch):
# 模型训练
model.train()
for batch_index, (data, target) in enumerate(train_loader):
# 部署到 GPU
data = data.to(device)
target = target.to(device)
# 梯度初始化为 0
optimizer.zero_grad()
# 预测结果
output = model(data)
# 计算损失
loss = F.cross_entropy(output, target)
# 损失函数
loss.backward()
# 反向传播
optimizer.step()
# 参数优化
optimizer.step()
if batch_index % 3000 == 0:
print('Traib Epoch : {} \t Loss : {:.6f}'.format(epoch,loss.item()))
# 定义测试方法
def test_model(model, device, test_loader):
# 模型验证
model.eval()
# 正确率
correct = 0.0
# 测试损失
test_loss = 0.0
with torch.no_grad():
for data, target in test_loader:
# 部署到 device 上
data = data.to(device)
target = target.to(device)
# 测试数据
output = model(data)
# 计算测试损失
test_loss += F.cross_entropy(output, target).item()
# 获取预测结果
_, pred = output.max(1, keepdim=True)
# 累计正确率
correct += (pred == target.view_as(pred)).sum().item()
test_loss /= len(test_loader.dataset)
print('Test - Average loss : (:.4f),Accuracy:{:.3f}\n'.format(
test_loss, 100.0 * correct / len(test_loader.dataset)))
# 调用,输出
for epoch in range(1, EPOCHS + 1):
train_model(model, DEVICE, train_loader, optimizer, epoch)
test_model(model, DEVICE, test_loader)

Binary file not shown.

Binary file not shown.

Binary file not shown.