How to use Chromadb Rag with OpenAi
from openai import OpenAI
from langchain_openai import ChatOpenAI
from langchain.prompts import ChatPromptTemplate
import win32com.client
import chromadb
from chromadb.utils.embedding_functions import OpenAIEmbeddingFunction
client_chromadb = chromadb.PersistentClient("C:\\Users\dnishimoto\\python_files\\python-deep-learning-master")
collection_name = "emails"
has_collection = [collection for collection in client_chromadb.list_collections() if collection.name==collection_name]
if has_collection:
collection=client_chromadb.get_collection(collection_name,embedding_function=OpenAIEmbeddingFunction(api_key=key))
else:
collection= client_chromadb.create_collection(
name="emails",
embedding_function=OpenAIEmbeddingFunction(api_key=key)
)
session = win32com.client.gencache.EnsureDispatch ("Outlook.Application")
namespace = session.GetNamespace("MAPI")
inbox = namespace.GetDefaultFolder(6)
messages = inbox.Items
subjectLines=[]
for message in messages:
print("Subject:", message.Subject)
subjectLines.append(message.Subject)
collection.upsert(ids=[message.EntryID],documents=[message.Body[:7000]])
instructions =f"summarize in detail {', '.join(subjectLines)}"
result= collection.query(
query_texts=[instructions],
n_results=10
)
llm=ChatOpenAI(model="gpt-4o-mini",temperature=0.7, openai_api_key=key)
input=""
for document in result["documents"]:
for i in range(len(document)):
input+=document[i]
prompt=ChatPromptTemplate.from_template(f" perform the following user task instruction: {instructions} from the follow input:{input}")
chain=prompt| llm
response=chain.invoke({"input":input})
output=response.content
print(output)
The emails are stored as embedded documents in the chromadb sqllite database for query by key word later. langchain summarizes the last 10 chromadb documents