假网站的域名,京东云免费建wordpress,开源社区的发展前景,泉州西街为了使0.06代码能够有效运行并输出项目目录及所有文件#xff0c;我们在代码中添加一些额外的功能。
项目目录结构 项目目录结构如下#xff1a;
text_to_image_project/ │ ├── config.yaml ├── data/ │ ├── train_data.csv │ └── test_data.txt ├── mod…为了使0.06代码能够有效运行并输出项目目录及所有文件我们在代码中添加一些额外的功能。
项目目录结构 项目目录结构如下
text_to_image_project/ │ ├── config.yaml ├── data/ │ ├── train_data.csv │ └── test_data.txt ├── models/ │ └── text_to_image_model.pth ├── main.py └── README.md
示例配置文件 config.yaml
model:path: models/text_to_image_model.pthtext_encoder_model_name: bert-base-uncaseddata:input_file: data/test_data.txtoutput_dir: data/generated_imagesdataset_path: data/train_data.csvtraining:batch_size: 64learning_rate: 0.0002epochs: 100示例训练数据文件 data/train_data.csv
text,image_path
a beautiful sunset,data/images/sunset.jpg
a cute puppy,data/images/puppy.jpg
a red rose,data/images/rose.jpg示例测试数据文件 data/test_data.txt
a beautiful sunset
a cute puppy
a red rose完善后的代码 main.py
import tkinter as tk
from tkinter import filedialog, messagebox
from PIL import Image, ImageTk
import torch
import torch.optim as optim
import torch.nn as nn
import torchvision.transforms as transforms
import yaml
import os
import pandas as pd
from torch.utils.data import Dataset, DataLoader
from transformers import AutoTokenizer, AutoModel
import random
import numpy as np# 配置文件加载
def load_config(config_path):with open(config_path, r, encodingutf-8) as file:config yaml.safe_load(file)return config# 数据加载
def load_text_data(file_path):with open(file_path, r, encodingutf-8) as file:text_data file.readlines()return [line.strip() for line in text_data]# 数据清洗
def clean_data(data):return data.dropna().drop_duplicates()# 数据增强
def augment_data(image, mode):if mode train:transform transforms.Compose([transforms.RandomHorizontalFlip(),transforms.RandomRotation(10),transforms.RandomResizedCrop(64, scale(0.8, 1.0)),transforms.ColorJitter(brightness0.2, contrast0.2, saturation0.2, hue0.1),transforms.ToTensor(),transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])else:transform transforms.Compose([transforms.Resize((64, 64)),transforms.ToTensor(),transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])return transform(image)# 文本编码器
class TextEncoder(nn.Module):def __init__(self, model_name):super(TextEncoder, self).__init__()self.tokenizer AutoTokenizer.from_pretrained(model_name)self.model AutoModel.from_pretrained(model_name)def forward(self, text):inputs self.tokenizer(text, return_tensorspt, paddingTrue, truncationTrue)outputs self.model(**inputs)return outputs.last_hidden_state.mean(dim1)# 图像生成器
class ImageGenerator(nn.Module):def __init__(self, in_channels):super(ImageGenerator, self).__init__()self.decoder nn.Sequential(nn.ConvTranspose2d(in_channels, 512, kernel_size4, stride1, padding0),nn.BatchNorm2d(512),nn.ReLU(True),nn.ConvTranspose2d(512, 256, kernel_size4, stride2, padding1),nn.BatchNorm2d(256),nn.ReLU(True),nn.ConvTranspose2d(256, 128, kernel_size4, stride2, padding1),nn.BatchNorm2d(128),nn.ReLU(True),nn.ConvTranspose2d(128, 64, kernel_size4, stride2, padding1),nn.BatchNorm2d(64),nn.ReLU(True),nn.ConvTranspose2d(64, 3, kernel_size4, stride2, padding1),nn.Tanh())def forward(self, x):x x.view(-1, x.size(1), 1, 1)return self.decoder(x)# 判别器
class Discriminator(nn.Module):def __init__(self):super(Discriminator, self).__init__()self.main nn.Sequential(nn.Conv2d(3, 64, kernel_size4, stride2, padding1),nn.LeakyReLU(0.2, inplaceTrue),nn.Conv2d(64, 128, kernel_size4, stride2, padding1),nn.BatchNorm2d(128),nn.LeakyReLU(0.2, inplaceTrue),nn.Conv2d(128, 256, kernel_size4, stride2, padding1),nn.BatchNorm2d(256),nn.LeakyReLU(0.2, inplaceTrue),nn.Conv2d(256, 512, kernel_size4, stride2, padding1),nn.BatchNorm2d(512),nn.LeakyReLU(0.2, inplaceTrue),nn.Conv2d(512, 1, kernel_size4, stride1, padding0),nn.Sigmoid())def forward(self, x):return self.main(x)# 模型定义
class TextToImageModel(nn.Module):def __init__(self, text_encoder_model_name):super(TextToImageModel, self).__init__()self.text_encoder TextEncoder(text_encoder_model_name)self.image_generator ImageGenerator(768) # 768 is the hidden size of BERTdef forward(self, text):text_features self.text_encoder(text)return self.image_generator(text_features)# 模型加载
def load_model(model_path, text_encoder_model_name):model TextToImageModel(text_encoder_model_name)if os.path.exists(model_path):model.load_state_dict(torch.load(model_path))model.eval()return model# 图像保存
def save_image(image, path):if not os.path.exists(os.path.dirname(path)):os.makedirs(os.path.dirname(path))image.save(path)# 数据集类
class TextToImageDataset(Dataset):def __init__(self, csv_file, transformNone, modetrain):self.data pd.read_csv(csv_file)self.data clean_data(self.data)self.transform transformself.mode modedef __len__(self):return len(self.data)def __getitem__(self, idx):text self.data.iloc[idx][text]image_path self.data.iloc[idx][image_path]image Image.open(image_path).convert(RGB)if self.transform:image self.transform(image, self.mode)return text, image# 模型训练
def train_model(config):transform transforms.Compose([transforms.Resize((64, 64)),transforms.ToTensor(),transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])dataset TextToImageDataset(config[training][dataset_path], transformaugment_data, modetrain)dataloader DataLoader(dataset, batch_sizeconfig[training][batch_size], shuffleTrue)model TextToImageModel(config[model][text_encoder_model_name])discriminator Discriminator()optimizer_g optim.Adam(model.parameters(), lrconfig[training][learning_rate])optimizer_d optim.Adam(discriminator.parameters(), lrconfig[training][learning_rate])criterion_gan nn.BCELoss()criterion_l1 nn.L1Loss()for epoch in range(config[training][epochs]):model.train()discriminator.train()running_loss_g 0.0running_loss_d 0.0for i, (text, images) in enumerate(dataloader):real_labels torch.ones(images.size(0), 1)fake_labels torch.zeros(images.size(0), 1)# Train Discriminatoroptimizer_d.zero_grad()real_outputs discriminator(images)d_loss_real criterion_gan(real_outputs, real_labels)generated_images model(text)fake_outputs discriminator(generated_images.detach())d_loss_fake criterion_gan(fake_outputs, fake_labels)d_loss (d_loss_real d_loss_fake) / 2d_loss.backward()optimizer_d.step()# Train Generatoroptimizer_g.zero_grad()generated_images model(text)g_outputs discriminator(generated_images)g_loss_gan criterion_gan(g_outputs, real_labels)g_loss_l1 criterion_l1(generated_images, images)g_loss g_loss_gan 100 * g_loss_l1 # Weighted sum of GAN loss and L1 lossg_loss.backward()optimizer_g.step()running_loss_g g_loss.item()running_loss_d d_loss.item()print(fEpoch {epoch 1}, Generator Loss: {running_loss_g / len(dataloader)}, Discriminator Loss: {running_loss_d / len(dataloader)})# 保存训练好的模型torch.save(model.state_dict(), config[model][path])# 图像生成
def generate_images(model, text_data, output_dir):for text in text_data:input_tensor model.text_encoder([text])image model.image_generator(input_tensor)image image.squeeze(0).detach().cpu().numpy()image (image * 127.5 127.5).astype(uint8)image Image.fromarray(image.transpose(1, 2, 0))# 保存图像save_image(image, f{output_dir}/{text}.png)# 图形用户界面
class TextToImageGUI:def __init__(self, root):self.root rootself.root.title(文本生成图像)self.config load_config(config.yaml)self.model load_model(self.config[model][path], self.config[model][text_encoder_model_name])self.text_input tk.Text(root, height10, width50)self.text_input.pack(pady10)self.train_button tk.Button(root, text训练模型, commandself.train_model)self.train_button.pack(pady10)self.generate_button tk.Button(root, text生成图像, commandself.generate_image)self.generate_button.pack(pady10)self.image_label tk.Label(root)self.image_label.pack(pady10)def train_model(self):train_model(self.config)self.model load_model(self.config[model][path], self.config[model][text_encoder_model_name])messagebox.showinfo(成功, 模型训练完成)def generate_image(self):text self.text_input.get(1.0, tk.END).strip()if not text:messagebox.showwarning(警告, 请输入文本)returninput_tensor self.model.text_encoder([text])image self.model.image_generator(input_tensor)image image.squeeze(0).detach().cpu().numpy()image (image * 127.5 127.5).astype(uint8)image Image.fromarray(image.transpose(1, 2, 0))# 显示图像img_tk ImageTk.PhotoImage(image)self.image_label.config(imageimg_tk)self.image_label.image img_tk# 保存图像save_image(image, f{self.config[data][output_dir]}/{text}.png)messagebox.showinfo(成功, 图像已生成并保存)# 输出项目目录及所有文件
def list_files(startpath):for root, dirs, files in os.walk(startpath):level root.replace(startpath, ).count(os.sep)indent * 4 * (level)print({}{}/.format(indent, os.path.basename(root)))subindent * 4 * (level 1)for f in files:print({}{}.format(subindent, f))if __name__ __main__:config load_config(config.yaml)# 输出项目目录及所有文件project_root os.path.dirname(os.path.abspath(__file__))print(项目目录及所有文件:)list_files(project_root)# 加载模型model load_model(config[model][path], config[model][text_encoder_model_name])# 加载文本数据text_data load_text_data(config[data][input_file])# 生成图像generate_images(model, text_data, config[data][output_dir])# 启动图形用户界面root tk.Tk()app TextToImageGUI(root)root.mainloop()项目目录及所有文件输出 在 main.py 中添加了一个 list_files 函数用于输出项目目录及所有文件。这个函数会在程序启动时自动调用输出当前项目的目录结构和所有文件。 说明文档 README.md 在项目根目录下创建一个 README.md 文件内容如下
# 文本生成图像项目## 目录结构
text_to_image_project/
│
├── config.yaml
├── data/
│ ├── train_data.csv
│ └── test_data.txt
├── models/
│ └── text_to_image_model.pth
├── main.py
└── README.mdcode## 配置文件 config.yaml
yaml
model:path: models/text_to_image_model.pthtext_encoder_model_name: bert-base-uncaseddata:input_file: data/test_data.txtoutput_dir: data/generated_imagesdataset_path: data/train_data.csvtraining:batch_size: 64learning_rate: 0.0002epochs: 100
训练数据文件
data/train_data.csv
csv
text,image_path
a beautiful sunset,data/images/sunset.jpg
a cute puppy,data/images/puppy.jpg
a red rose,data/images/rose.jpg
测试数据文件
data/test_data.txt
code
a beautiful sunset
a cute puppy
a red rose
运行项目
确保安装了所需的依赖库bash
pip install torch torchvision transformers pillow tkinter
运行 main.pybash
python main.py
功能
训练模型点击“训练模型”按钮开始训练模型。
生成图像在文本框中输入文本点击“生成图像”按钮生成相应的图像并显示在界面上同时保存到指定目录。
项目目录及所有文件
项目启动时会自动输出项目目录及所有文件。希望这些改进和示例文件能帮助你更好地理解和运行项目。如果有任何问题或需要进一步的帮助请随时告诉我