42 lines
1.3 KiB
Python
42 lines
1.3 KiB
Python
from langchain_community.vectorstores import Chroma
|
|
from langchain_community.vectorstores import Chroma
|
|
from langchain_openai import OpenAIEmbeddings
|
|
from dotenv import load_dotenv
|
|
|
|
"""
|
|
A search utility for the loaded documents, for testing and debugging
|
|
Adapted from https://github.com/wu4f/cs410g-src/blob/main/03_RAG/07_rag_docsearch.py with small change for env loading and OpenAI embedding
|
|
"""
|
|
|
|
|
|
load_dotenv()
|
|
vectorstore = Chroma(
|
|
embedding_function=OpenAIEmbeddings(),
|
|
persist_directory="./rag_data/.chromadb"
|
|
)
|
|
|
|
def search_db(query):
|
|
docs = vectorstore.similarity_search(query)
|
|
print(f"Query database for: {query}")
|
|
if docs:
|
|
print(f"Closest document match in database: {docs[0].metadata['sourceURL']}")
|
|
else:
|
|
print("No matching documents")
|
|
|
|
print("RAG database initialized.")
|
|
retriever = vectorstore.as_retriever()
|
|
document_data_sources = set()
|
|
for doc_metadata in retriever.vectorstore.get()['metadatas']:
|
|
print(f"docm {doc_metadata}")
|
|
document_data_sources.add(doc_metadata['sourceURL'])
|
|
for doc in document_data_sources:
|
|
print(f" {doc}")
|
|
|
|
print("This program queries documents in the RAG database that are similar to whatever is entered.")
|
|
while True:
|
|
line = input(">> ")
|
|
if line:
|
|
search_db(line)
|
|
else:
|
|
break
|