当前位置：首页 > news >正文

小学生家长网站建设需求wordpress实例网址

news 2026/5/5 8:39:24

小学生家长网站建设需求,wordpress实例网址,wordpress建站过程,设计作品集模板先放参考的原文链接大语言模型实战——搭建纯本地迷你版RAG_本地rag-CSDN博客一、大模型选择在我之前的文章中有讲到#xff0c;我用的是ollama中的llama3.1 Ollama在Windows安装#xff0c;使用#xff0c;简单调用API_ollama如何对外提供api-CSDN博客二、嵌入模型 … 先放参考的原文链接大语言模型实战——搭建纯本地迷你版RAG_本地rag-CSDN博客一、大模型选择在我之前的文章中有讲到我用的是ollama中的llama3.1 Ollama在Windows安装使用简单调用API_ollama如何对外提供api-CSDN博客二、嵌入模型与原文的嵌入模型不同我选取的是这个笔者也可以尽可能地多尝试给出下载链接 git clone https://www.modelscope.cn/maidalun/bce-embedding-base_v1.git 三、业务模块业务模块完全根据参考文章我只是记录我实现的过程。 1、整体结构解释 data文件夹我是随便放的一个文档 LRAG文件夹放的是主要py文件 maidalun1020文件夹是拉取的嵌入模型下面我将逐一给出各个代码 2、文档读取模块 import os from docx import Documentclass ReadFiles:def __init__(self, path: str):self.path pathdef list_files(self):file_list []for filepath, _, filenames in os.walk(self.path):for filename in filenames:file_list.append(os.path.join(filepath, filename))return file_listdef read_file_content(self, file_path: str):# 根据文件扩展名选择读取方法if file_path.endswith(.txt) or file_path.endswith(.stt):return self.read_text(file_path)elif file_path.endswith(.docx):return self.read_docx(file_path)else:print(fUnsupported file type: {file_path})return Nonedef read_text(self, file_path: str):with open(file_path, r, encodingutf-8) as file:return file.read()def read_docx(self, file_path: str):doc Document(file_path)contents [para.text for para in doc.paragraphs]return \n\n.join(contents)def split_chunks(self, text: str):return text.split(\n\n)def load_content(self):docs []file_list self.list_files()for file_path in file_list:# 读取文件内容content self.read_file_content(file_path)if content is None:continuedocs.extend(self.split_chunks(content))return docs# 使用示例 if __name__ __main__:path_to_files ollama-python-main/zdf/data/深度学习.txtreader ReadFiles(path_to_files)content reader.load_content()for doc in content:print(doc) 3、嵌入模块 from typing import Listimport numpy as npclass LocalEmbedding:def __init__(self, path: str) - None:self.path pathself._model self.load_model()def load_model(self):import torchfrom sentence_transformers import SentenceTransformerif torch.cuda.is_available():device torch.device(cuda)else:device torch.device(cpu)model SentenceTransformer(self.path, devicedevice, trust_remote_codeTrue)return modeldef get_embedding(self, text: str) - List[float]:return self._model.encode([text])[0].tolist()def cosine_similarity(cls, vector1: List[float], vector2: List[float]) - float:calculate cosine similarity between two vectorsdot_product np.dot(vector1, vector2)magnitude np.linalg.norm(vector1) * np.linalg.norm(vector2)if not magnitude:return 0return dot_product / magnitude 4、向量库模块 from typing import List from Embedding import LocalEmbedding import numpy as np from tqdm import tqdmclass VectorStore:def __init__(self, embedding_model: LocalEmbedding) - None:self._embedding_model embedding_modeldef embedding(self, documents: List[str] []) - List[List[float]]:self._documents documentsself._vectors []for doc in tqdm(self._documents, descCalculating embeddings):self._vectors.append(self._embedding_model.get_embedding(doc))return self._vectorsdef query(self, query: str, k: int 1) - List[str]:query_vector self._embedding_model.get_embedding(query)result np.array([self._embedding_model.cosine_similarity(query_vector, vector)for vector in self._vectors])return np.array(self._documents)[result.argsort()[-k:][::-1]].tolist() 5、大模型导入模块 from typing import List, DictRAG_PROMPT_TEMPLATE 先对上下文进行内容总结,再使用上下文来回答用户的问题。如果你不知道答案就说你不知道。总是使用中文回答。问题: {question} 可参考的上下文 ··· {context} ··· 如果给定的上下文无法让你做出回答请回答数据库中没有这个内容你不知道。有用的回答: class OllamaChat:def __init__(self, model: str llama3.1) - None:self.model modeldef _build_messages(self, prompt: str, content: str):prompt_message RAG_PROMPT_TEMPLATE.format(questionprompt, contextcontent)messages [{role: system, content: You are a helpful assistant.},{role: user, content: prompt_message}]return messagesdef chat(self, prompt: str, history: List[Dict], content: str) - str:import ollama# 给语言模型发送请求response ollama.chat(modelself.model,messagesself._build_messages(prompt, content),streamTrue)# 解析并组装响应结果final_response for chunk in response:if isinstance(chunk, str):final_response chunkelif content in chunk.get(message, {}):final_response chunk[message][content]return final_response 6、主程序调用模块 from utils import ReadFiles from Embedding import LocalEmbedding from VertorStore import VectorStore from LLM import OllamaChat# 获得data目录下的所有文件内容并分割 docs ReadFiles(ollama-python-main/zdf/data).load_content() print(fdocs count:{len(docs)} \n first doc: {docs[0]})embedding LocalEmbedding(pathollama-python-main/zdf/maidalun1020/bce-embedding-base_v1) print(fmodel: {embedding})vector VectorStore(embedding_modelembedding) embeddings vector.embedding(docs) print(fembeddings count: {len(embeddings)} \n dimentions: {len(embeddings[0])} \n embedding content: {embeddings[0][:10]})question 深度学习的应用场景有哪些 content vector.query(question, k1)[0] print(f这是\n{content})model OllamaChat(llama3.1) print(f这是大模型回答的\n{model.chat(question, [], content)}) 7、实现结果实现结果可谓是一沓糊涂哈哈哈哈哈不过没关系刚尝试已经很开心了可以细心看下三个图因为是照着写的所以输出比较多。为了区分我特意将大模型回答的和查找到的做了换行区分如上图。做的很简陋。。。。。

查看全文

http://www.hkea.cn/news/14539353/