当前位置: 首页 > news >正文

自己做剧本网站网络营销推广的优势

自己做剧本网站,网络营销推广的优势,做网站哪家便宜,网站建设企业模板我们需要在现有的代码基础上增加网络搜索功能,并在大模型无法提供满意答案时调用网络搜索。以下是完整的代码和文件结构说明,我们创建一个完整的项目结构,包括多个文件和目录。这个项目将包含以下部分: 主文件 (main.py)&#xf…

我们需要在现有的代码基础上增加网络搜索功能,并在大模型无法提供满意答案时调用网络搜索。以下是完整的代码和文件结构说明,我们创建一个完整的项目结构,包括多个文件和目录。这个项目将包含以下部分:

主文件 (main.py):包含GUI界面和模型加载、训练、评估等功能。
网络请求模块 (web_search.py):用于从互联网获取信息。
日志配置文件 (logging.conf):用于配置日志记录。
模型文件 (xihua_model.pth):训练好的模型权重文件。
数据文件 (train_data.jsonl, test_data.jsonl):训练和测试数据文件。
项目结构:包括上述文件和目录。
项目结构

project_root/
├── data/
│   ├── train_data.jsonl
│   └── test_data.jsonl
├── logs/
│   └── (log files will be generated here)
├── models/
│   └── xihua_model.pth
├── main.py
├── web_search.py
└── logging.conf

文件内容
main.py

import os
import json
import jsonlines
import torch
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from transformers import BertModel, BertTokenizer
import tkinter as tk
from tkinter import filedialog, messagebox, ttk
import logging
from difflib import SequenceMatcher
from datetime import datetime
from web_search import search_web# 获取项目根目录
PROJECT_ROOT = os.path.dirname(os.path.abspath(__file__))# 配置日志
LOGS_DIR = os.path.join(PROJECT_ROOT, 'logs')
os.makedirs(LOGS_DIR, exist_ok=True)def setup_logging():log_file = os.path.join(LOGS_DIR, datetime.now().strftime('%Y-%m-%d_%H-%M-%S_羲和.txt'))logging.basicConfig(level=logging.INFO,format='%(asctime)s - %(levelname)s - %(message)s',handlers=[logging.FileHandler(log_file),logging.StreamHandler()])setup_logging()# 数据集类
class XihuaDataset(Dataset):def __init__(self, file_path, tokenizer, max_length=128):self.tokenizer = tokenizerself.max_length = max_lengthself.data = self.load_data(file_path)def load_data(self, file_path):data = []if file_path.endswith('.jsonl'):with jsonlines.open(file_path) as reader:for i, item in enumerate(reader):try:data.append(item)except jsonlines.jsonlines.InvalidLineError as e:logging.warning(f"跳过无效行 {i + 1}: {e}")elif file_path.endswith('.json'):with open(file_path, 'r') as f:try:data = json.load(f)except json.JSONDecodeError as e:logging.warning(f"跳过无效文件 {file_path}: {e}")return datadef __len__(self):return len(self.data)def __getitem__(self, idx):item = self.data[idx]question = item['question']human_answer = item['human_answers'][0]chatgpt_answer = item['chatgpt_answers'][0]try:inputs = self.tokenizer(question, return_tensors='pt', padding='max_length', truncation=True, max_length=self.max_length)human_inputs = self.tokenizer(human_answer, return_tensors='pt', padding='max_length', truncation=True, max_length=self.max_length)chatgpt_inputs = self.tokenizer(chatgpt_answer, return_tensors='pt', padding='max_length', truncation=True, max_length=self.max_length)except Exception as e:logging.warning(f"跳过无效项 {idx}: {e}")return self.__getitem__((idx + 1) % len(self.data))return {'input_ids': inputs['input_ids'].squeeze(),'attention_mask': inputs['attention_mask'].squeeze(),'human_input_ids': human_inputs['input_ids'].squeeze(),'human_attention_mask': human_inputs['attention_mask'].squeeze(),'chatgpt_input_ids': chatgpt_inputs['input_ids'].squeeze(),'chatgpt_attention_mask': chatgpt_inputs['attention_mask'].squeeze(),'human_answer': human_answer,'chatgpt_answer': chatgpt_answer}# 获取数据加载器
def get_data_loader(file_path, tokenizer, batch_size=8, max_length=128):dataset = XihuaDataset(file_path, tokenizer, max_length)return DataLoader(dataset, batch_size=batch_size, shuffle=True)# 模型定义
class XihuaModel(torch.nn.Module):def __init__(self, pretrained_model_name='F:/models/bert-base-chinese'):super(XihuaModel, self).__init__()self.bert = BertModel.from_pretrained(pretrained_model_name)self.classifier = torch.nn.Linear(self.bert.config.hidden_size, 1)def forward(self, input_ids, attention_mask):outputs = self.bert(input_ids=input_ids, attention_mask=attention_mask)pooled_output = outputs.pooler_outputlogits = self.classifier(pooled_output)return logits# 训练函数
def train(model, data_loader, optimizer, criterion, device, progress_var=None):model.train()total_loss = 0.0num_batches = len(data_loader)for batch_idx, batch in enumerate(data_loader):try:input_ids = batch['input_ids'].to(device)attention_mask = batch['attention_mask'].to(device)human_input_ids = batch['human_input_ids'].to(device)human_attention_mask = batch['human_attention_mask'].to(device)chatgpt_input_ids = batch['chatgpt_input_ids'].to(device)chatgpt_attention_mask = batch['chatgpt_attention_mask'].to(device)optimizer.zero_grad()human_logits = model(human_input_ids, human_attention_mask)chatgpt_logits = model(chatgpt_input_ids, chatgpt_attention_mask)human_labels = torch.ones(human_logits.size(0), 1).to(device)chatgpt_labels = torch.zeros(chatgpt_logits.size(0), 1).to(device)loss = criterion(human_logits, human_labels) + criterion(chatgpt_logits, chatgpt_labels)loss.backward()optimizer.step()total_loss += loss.item()if progress_var:progress_var.set((batch_idx + 1) / num_batches * 100)except Exception as e:logging.warning(f"跳过无效批次: {e}")return total_loss / len(data_loader)# 评估函数
def evaluate(model, data_loader, device):model.eval()correct_predictions = 0total_predictions = 0with torch.no_grad():for batch in data_loader:input_ids = batch['input_ids'].to(device)attention_mask = batch['attention_mask'].to(device)human_input_ids = batch['human_input_ids'].to(device)human_attention_mask = batch['human_attention_mask'].to(device)chatgpt_input_ids = batch['chatgpt_input_ids'].to(device)chatgpt_attention_mask = batch['chatgpt_attention_mask'].to(device)human_logits = model(human_input_ids, human_attention_mask)chatgpt_logits = model(chatgpt_input_ids, chatgpt_attention_mask)human_labels = torch.ones(human_logits.size(0), 1).to(device)chatgpt_labels = torch.zeros(chatgpt_logits.size(0), 1).to(device)human_preds = (torch.sigmoid(human_logits) > 0.5).float()chatgpt_preds = (torch.sigmoid(chatgpt_logits) > 0.5).float()correct_predictions += (human_preds == human_labels).sum().item()correct_predictions += (chatgpt_preds == chatgpt_labels).sum().item()total_predictions += human_labels.size(0) + chatgpt_labels.size(0)accuracy = correct_predictions / total_predictionsreturn accuracy# 主训练函数
def main_train(retrain=False):device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')logging.info(f'Using device: {device}')tokenizer = BertTokenizer.from_pretrained('F:/models/bert-base-chinese')model = XihuaModel(pretrained_model_name='F:/models/bert-base-chinese').to(device)if retrain:model_path = os.path.join(PROJECT_ROOT, 'models/xihua_model.pth')if os.path.exists(model_path):model.load_state_dict(torch.load(model_path, map_location=device))logging.info("加载现有模型")else:logging.info("没有找到现有模型,将使用预训练模型")optimizer = optim.Adam(model.parameters(), lr=1e-5)criterion = torch.nn.BCEWithLogitsLoss()train_data_loader = get_data_loader(os.path.join(PROJECT_ROOT, 'data/train_data.jsonl'), tokenizer, batch_size=8, max_length=128)num_epochs = 30for epoch in range(num_epochs):train_loss = train(model, train_data_loader, optimizer, criterion, device)logging.info(f'Epoch [{epoch+1}/{num_epochs}], Loss: {train_loss:.8f}')torch.save(model.state_dict(), os.path.join(PROJECT_ROOT, 'models/xihua_model.pth'))logging.info("模型训练完成并保存")# GUI界面
class XihuaChatbotGUI:def __init__(self, root):self.root = rootself.root.title("羲和聊天机器人")self.tokenizer = BertTokenizer.from_pretrained('F:/models/bert-base-chinese')self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')self.model = XihuaModel(pretrained_model_name='F:/models/bert-base-chinese').to(self.device)self.load_model()self.model.eval()# 加载训练数据集以便在获取答案时使用self.data = self.load_data(os.path.join(PROJECT_ROOT, 'data/train_data.jsonl'))# 历史记录self.history = []self.create_widgets()def create_widgets(self):# 顶部框架top_frame = tk.Frame(self.root)top_frame.pack(pady=10)self.question_label = tk.Label(top_frame, text="问题:", font=("Arial", 12))self.question_label.grid(row=0, column=0, padx=10)self.question_entry = tk.Entry(top_frame, width=50, font=("Arial", 12))self.question_entry.grid(row=0, column=1, padx=10)self.answer_button = tk.Button(top_frame, text="获取回答", command=self.get_answer, font=("Arial", 12))self.answer_button.grid(row=0, column=2, padx=10)# 中部框架middle_frame = tk.Frame(self.root)middle_frame.pack(pady=10)self.answer_label = tk.Label(middle_frame, text="回答:", font=("Arial", 12))self.answer_label.grid(row=0, column=0, padx=10)self.answer_text = tk.Text(middle_frame, height=10, width=70, font=("Arial", 12))self.answer_text.grid(row=1, column=0, padx=10)# 底部框架bottom_frame = tk.Frame(self.root)bottom_frame.pack(pady=10)self.correct_button = tk.Button(bottom_frame, text="准确", command=self.mark_correct, font=("Arial", 12))self.correct_button.grid(row=0, column=0, padx=10)self.incorrect_button = tk.Button(bottom_frame, text="不准确", command=self.mark_incorrect, font=("Arial", 12))self.incorrect_button.grid(row=0, column=1, padx=10)self.train_button = tk.Button(bottom_frame, text="训练模型", command=self.train_model, font=("Arial", 12))self.train_button.grid(row=0, column=2, padx=10)self.retrain_button = tk.Button(bottom_frame, text="重新训练模型", command=lambda: self.train_model(retrain=True), font=("Arial", 12))self.retrain_button.grid(row=0, column=3, padx=10)self.progress_var = tk.DoubleVar()self.progress_bar = ttk.Progressbar(bottom_frame, variable=self.progress_var, maximum=100, length=200)self.progress_bar.grid(row=1, column=0, columnspan=4, pady=10)self.log_text = tk.Text(bottom_frame, height=10, width=70, font=("Arial", 12))self.log_text.grid(row=2, column=0, columnspan=4, pady=10)self.evaluate_button = tk.Button(bottom_frame, text="评估模型", command=self.evaluate_model, font=("Arial", 12))self.evaluate_button.grid(row=3, column=0, padx=10, pady=10)self.history_button = tk.Button(bottom_frame, text="查看历史记录", command=self.view_history, font=("Arial", 12))self.history_button.grid(row=3, column=1, padx=10, pady=10)self.save_history_button = tk.Button(bottom_frame, text="保存历史记录", command=self.save_history, font=("Arial", 12))self.save_history_button.grid(row=3, column=2, padx=10, pady=10)def get_answer(self):question = self.question_entry.get()if not question:messagebox.showwarning("输入错误", "请输入问题")returninputs = self.tokenizer(question, return_tensors='pt', padding='max_length', truncation=True, max_length=128)with torch.no_grad():input_ids = inputs['input_ids'].to(self.device)attention_mask = inputs['attention_mask'].to(self.device)logits = self.model(input_ids, attention_mask)if logits.item() > 0:answer_type = "羲和回答"else:answer_type = "零回答"specific_answer = self.get_specific_answer(question, answer_type)if specific_answer == "这个我也不清楚,你问问零吧":specific_answer = search_web(question)self.answer_text.delete(1.0, tk.END)self.answer_text.insert(tk.END, f"{answer_type}\n{specific_answer}")# 添加到历史记录self.history.append({'question': question,'answer_type': answer_type,'specific_answer': specific_answer,'accuracy': None  # 初始状态为未评价})def get_specific_answer(self, question, answer_type):# 使用模糊匹配查找最相似的问题best_match = Nonebest_ratio = 0.0for item in self.data:ratio = SequenceMatcher(None, question, item['question']).ratio()if ratio > best_ratio:best_ratio = ratiobest_match = itemif best_match:if answer_type == "羲和回答":return best_match['human_answers'][0]else:return best_match['chatgpt_answers'][0]return "这个我也不清楚,你问问零吧"def load_data(self, file_path):data = []if file_path.endswith('.jsonl'):with jsonlines.open(file_path) as reader:for i, item in enumerate(reader):try:data.append(item)except jsonlines.jsonlines.InvalidLineError as e:logging.warning(f"跳过无效行 {i + 1}: {e}")elif file_path.endswith('.json'):with open(file_path, 'r') as f:try:data = json.load(f)except json.JSONDecodeError as e:logging.warning(f"跳过无效文件 {file_path}: {e}")return datadef load_model(self):model_path = os.path.join(PROJECT_ROOT, 'models/xihua_model.pth')if os.path.exists(model_path):self.model.load_state_dict(torch.load(model_path, map_location=self.device))logging.info("加载现有模型")else:logging.info("没有找到现有模型,将使用预训练模型")def train_model(self, retrain=False):file_path = filedialog.askopenfilename(filetypes=[("JSONL files", "*.jsonl"), ("JSON files", "*.json")])if not file_path:messagebox.showwarning("文件选择错误", "请选择一个有效的数据文件")returntry:dataset = XihuaDataset(file_path, self.tokenizer)data_loader = DataLoader(dataset, batch_size=8, shuffle=True)# 加载已训练的模型权重if retrain:self.model.load_state_dict(torch.load(os.path.join(PROJECT_ROOT, 'models/xihua_model.pth'), map_location=self.device))self.model.to(self.device)self.model.train()optimizer = torch.optim.Adam(self.model.parameters(), lr=1e-5)criterion = torch.nn.BCEWithLogitsLoss()num_epochs = 30for epoch in range(num_epochs):train_loss = train(self.model, data_loader, optimizer, criterion, self.device, self.progress_var)logging.info(f'Epoch [{epoch+1}/{num_epochs}], Loss: {train_loss:.4f}')self.log_text.insert(tk.END, f'Epoch [{epoch+1}/{num_epochs}], Loss: {train_loss:.4f}\n')self.log_text.see(tk.END)torch.save(self.model.state_dict(), os.path.join(PROJECT_ROOT, 'models/xihua_model.pth'))logging.info("模型训练完成并保存")self.log_text.insert(tk.END, "模型训练完成并保存\n")self.log_text.see(tk.END)messagebox.showinfo("训练完成", "模型训练完成并保存")except Exception as e:logging.error(f"模型训练失败: {e}")self.log_text.insert(tk.END, f"模型训练失败: {e}\n")self.log_text.see(tk.END)messagebox.showerror("训练失败", f"模型训练失败: {e}")def evaluate_model(self):test_data_loader = get_data_loader(os.path.join(PROJECT_ROOT, 'data/test_data.jsonl'), self.tokenizer, batch_size=8, max_length=128)accuracy = evaluate(self.model, test_data_loader, self.device)logging.info(f"模型评估准确率: {accuracy:.4f}")self.log_text.insert(tk.END, f"模型评估准确率: {accuracy:.4f}\n")self.log_text.see(tk.END)messagebox.showinfo("评估结果", f"模型评估准确率: {accuracy:.4f}")def mark_correct(self):if self.history:self.history[-1]['accuracy'] = Truemessagebox.showinfo("评价成功", "您认为这次回答是准确的")def mark_incorrect(self):if self.history:self.history[-1]['accuracy'] = Falsemessagebox.showinfo("评价成功", "您认为这次回答是不准确的")def view_history(self):history_window = tk.Toplevel(self.root)history_window.title("历史记录")history_text = tk.Text(history_window, height=20, width=80, font=("Arial", 12))history_text.pack(padx=10, pady=10)for entry in self.history:history_text.insert(tk.END, f"问题: {entry['question']}\n")history_text.insert(tk.END, f"回答类型: {entry['answer_type']}\n")history_text.insert(tk.END, f"具体回答: {entry['specific_answer']}\n")if entry['accuracy'] is None:history_text.insert(tk.END, "评价: 未评价\n")elif entry['accuracy']:history_text.insert(tk.END, "评价: 准确\n")else:history_text.insert(tk.END, "评价: 不准确\n")history_text.insert(tk.END, "-" * 50 + "\n")def save_history(self):file_path = filedialog.asksaveasfilename(defaultextension=".json", filetypes=[("JSON files", "*.json")])if not file_path:returnwith open(file_path, 'w') as f:json.dump(self.history, f, ensure_ascii=False, indent=4)messagebox.showinfo("保存成功", "历史记录已保存到文件")# 主函数
if __name__ == "__main__":# 启动GUIroot = tk.Tk()app = XihuaChatbotGUI(root)root.mainloop()

web_search.py

import requests
from bs4 import BeautifulSoupdef search_web(query):url = f"https://www.baidu.com/s?wd={query}"headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'}response = requests.get(url, headers=headers)soup = BeautifulSoup(response.text, 'html.parser')results = []for result in soup.find_all('div', class_='c-container'):title = result.find('h3').get_text()snippet = result.find('div', class_='c-abstract')if snippet:snippet = snippet.get_text()results.append(f"{title}\n{snippet}\n")if results:return '\n'.join(results[:3])  # 返回前三个结果else:return "没有找到相关信息"

logging.conf

[loggers]
keys=root[handlers]
keys=consoleHandler,fileHandler[formatters]
keys=simpleFormatter[logger_root]
level=INFO
handlers=consoleHandler,fileHandler[handler_consoleHandler]
class=StreamHandler
level=INFO
formatter=simpleFormatter
args=(sys.stdout,)[handler_fileHandler]
class=FileHandler
level=INFO
formatter=simpleFormatter
args=('logs/羲和.log', 'a')[formatter_simpleFormatter]
format=%(asctime)s - %(levelname)s - %(message)s
datefmt=%Y-%m-%d %H:%M:%S

目录结构

project_root/
├── data/
│   ├── train_data.jsonl
│   └── test_data.jsonl
├── logs/
│   └── (log files will be generated here)
├── models/
│   └── xihua_model.pth
├── main.py
├── web_search.py
└── logging.conf

说明
main.py:主文件,包含GUI界面和模型加载、训练、评估等功能。
web_search.py:用于从百度搜索信息的模块。
logging.conf:日志配置文件,用于配置日志记录。
data/:存放训练和测试数据文件。
logs/:存放日志文件。
models/:存放训练好的模型权重文件。
通过以上结构和代码,你可以实现一个具有GUI界面的聊天机器人,该机器人可以在本地使用训练好的模型回答问题,如果模型中没有相关内容,则会联网搜索并返回相关信息。

http://www.hkea.cn/news/542748/

相关文章:

  • wordpress如何关闭网站下载app
  • WordPress小程序二次修改石家庄seo排名外包
  • 做百度关键词网站厦门seo外包
  • 泉州seo-泉州网站建设公司谷歌关键词搜索工具
  • 组织部网站建设方案行业关键词分类
  • 上海黄浦 网站制作中国搜索引擎排名2021
  • 手机网站建设 cms营销技巧和营销方法
  • 平顶山做网站优化微博搜索引擎优化
  • 网站如何做品牌宣传海报每日舆情信息报送
  • 做论坛网站需要多大空间seo推广招聘
  • 中国建设银行网站软件不限次数观看视频的app
  • 网站开发建设的步骤win11优化大师
  • 在线做数据图的网站樱桃bt磁力天堂
  • 网站建设费的税率东莞公司网上推广
  • 上海设计公司排名前十宁波seo搜索优化费用
  • 如皋做网站公司com域名
  • 织梦做企业网站教程网络营销推广方案论文
  • 微信如何添加小程序二十条优化措施全文
  • 网站制作费可以做业务宣传费河北百度推广电话
  • wordpress日主题破解网站排名优化软件有哪些
  • 做公众号app 网站 app济南网站设计
  • 单位网站 单位网页 区别吗福州seo顾问
  • 专业做网站制作的公司百度地图网页版进入
  • 买卖网站域名骗局百度推广登陆
  • 石家庄大型网站设计公司手机怎么建网站
  • 政府网站图解怎么做百度关键词排名靠前
  • 天津做网站印标东莞网络推广排名
  • 设计一个外贸网站需要多少钱沈阳网站推广优化
  • 洗化行业做网站福州百度seo排名
  • 西安app网站开发项目腾讯域名注册官网