微信扫码
与创始人交个朋友
我要投稿
今天要介绍的是用DuckDB把向量保存到数据库,并增加一个UI,让它成为一个真正可以使用的RAG应用(当然还是雏形)。
向量数据库的选择很多,这里暂且不讨论它们的优劣性。
pip install duckdb llama-index-vector-stores-duckdb
因为llamaindex已经帮你封装好了,引入DuckDB,只需要增加增加两行代码即可
from llama_index.core import VectorStoreIndex, Document, SimpleDirectoryReader,Settings,StorageContext
from llama_index.llms.ollama import Ollama
from llama_index.embeddings.ollama import OllamaEmbedding
from llama_index.vector_stores.duckdb import DuckDBVectorStore
# 指定LLM
Settings.llm = Ollama(model="wizardlm2:7b-q5_K_M", request_timeout=60.0)
# 指定 embedding model
Settings.embed_model = OllamaEmbedding(model_name="snowflake-arctic-embed:latest")
## 剩下代码一样
documents = SimpleDirectoryReader("./data").load_data()
index = VectorStoreIndex.from_documents(documents)
chat_engine = index.as_chat_engine(chat_mode="condense_question", verbose=True)
print(chat_engine.chat("DuckDB的VSS扩展主要功能, reply in Chinese"))
可选的UI框架很多,如streamlit, gradio, nicegui等等,今天介绍个streamlit的实现。
• 模型相关的部分封装在rag.py
中
import os
import streamlit as st
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader,Settings
from llama_index.vector_stores.duckdb import DuckDBVectorStore
from llama_index.core import StorageContext
from llama_index.llms.ollama import Ollama
from llama_index.embeddings.ollama import OllamaEmbedding
@st.cache_resource
def init_model():
Settings.llm = Ollama(model="wizardlm2:7b-q5_K_M", request_timeout=300.0)
Settings.embed_model = OllamaEmbedding(model_name="snowflake-arctic-embed:latest")
embed_dim=len(Settings.embed_model.get_query_embedding('hello'))
return embed_dim
@st.cache_resource
def init_index(rebuild=False):
embed_dim = init_model()
if rebuild:
documents = SimpleDirectoryReader("./data").load_data()
os.remove('duckdb/rag.db')
os.removedirs('duckdb')
vector_store = DuckDBVectorStore(embed_dim=embed_dim,database_name="rag.db",persist_dir="duckdb")
storage_context = StorageContext.from_defaults(vector_store=vector_store)
index = VectorStoreIndex.from_documents(documents, storage_context=storage_context)
else:
vector_store = DuckDBVectorStore(embed_dim=embed_dim,database_name="rag.db",persist_dir="duckdb")
index = VectorStoreIndex.from_vector_store(vector_store=vector_store)
return index
@st.cache_resource
def init_engine():
index = init_index(rebuild=True)
chat_engine = index.as_chat_engine(chat_mode="condense_question", verbose=True)
return chat_engine
• UI 部分main.py
import streamlit as st
from rag import init_engine
def main():
if "messages" not in st.session_state.keys(): # Initialize the chat messages history
st.session_state.messages = [
{"role": "assistant", "content": "I am rag bot!"}
]
# print(chat_engine.chat("DuckDB的VSS扩展主要功能, reply in Chinese"))
if "chat_engine" not in st.session_state.keys(): # Initialize the chat engine
st.session_state.chat_engine = init_engine()
# Prompt for user input and save to chat history
if prompt := st.chat_input("Your question"):
st.session_state.messages.append({"role": "user", "content": prompt})
for message in st.session_state.messages: # Display the prior chat messages
with st.chat_message(message["role"]):
st.write(message["content"])
# If last message is not from assistant, generate a new response
if st.session_state.messages[-1]["role"] != "assistant":
with st.chat_message("assistant"):
with st.spinner("Thinking..."):
response = st.session_state.chat_engine.chat(prompt)
st.write(response.response)
message = {"role": "assistant", "content": response.response}
# Add response to message history
st.session_state.messages.append(message)
if __name__ == "__main__":
main()
效果图已经附上了。
或者需要的包
pip install llama-index-embeddings-ollama llama-index-llms-ollama llama-index-readers-file llama-index-vector-stores-duckdb duckdb streamlit
或者使用requirements.txt
,代码在GitHub[1]。
目前只是出了个雏形,接下来还有很多工作要做,如:
• 配置模型选择,判断模型是否下载
• 指定chunksize
• 允许增量增加文档
• 解决streamlit 刷新聊天记录丢失的问题
• 聊天的历史导出
• Windows下基于embed版本Python的懒人包,类似我之前提供的Apache Superset 懒人包
[1]
GitHub: https://github.com/alitrack/rag
53AI,企业落地应用大模型首选服务商
产品:大模型应用平台+智能体定制开发+落地咨询服务
承诺:先做场景POC验证,看到效果再签署服务协议。零风险落地应用大模型,已交付160+中大型企业
2024-03-30
2024-04-26
2024-05-10
2024-04-12
2024-05-28
2024-05-14
2024-04-25
2024-07-18
2024-04-26
2024-05-06
2024-12-22
2024-12-21
2024-12-21
2024-12-21
2024-12-21
2024-12-20
2024-12-20
2024-12-19