from langchain_community.vectorstores import Chroma from langchain_community.vectorstores import Chroma from langchain_openai import OpenAIEmbeddings from dotenv import load_dotenv """ A search utility for the loaded documents, for testing and debugging Adapted from https://github.com/wu4f/cs410g-src/blob/main/03_RAG/07_rag_docsearch.py with small change for env loading and OpenAI embedding """ load_dotenv() vectorstore = Chroma( embedding_function=OpenAIEmbeddings(), persist_directory="./rag_data/.chromadb" ) def search_db(query): docs = vectorstore.similarity_search(query) print(f"Query database for: {query}") if docs: print(f"Closest document match in database: {docs[0].metadata['sourceURL']}") else: print("No matching documents") print("RAG database initialized.") retriever = vectorstore.as_retriever() document_data_sources = set() for doc_metadata in retriever.vectorstore.get()["metadatas"]: print(f"docm {doc_metadata}") document_data_sources.add(doc_metadata["sourceURL"]) for doc in document_data_sources: print(f" {doc}") print( "This program queries documents in the RAG database that are similar to whatever is entered." ) while True: line = input(">> ") if line: search_db(line) else: break