How to Build ChatGPT like Custom Chatbot in 3 simple steps!
Hasan Javaid Malik
Top Data Engineering Voice | CX Analytics & VOC @ Jazz | Databricks & Microsoft Certified | xMatas Group
Hi there! ??
Today, we are going to learn how to make a custom chatGPT-like chatbot in three simple steps, to which you can chat generally like chatGPT and to whom you can also provide your own data in csv and pdf and chat with your files!
Step 1
Get Open-AI API Key
First and foremost, you need an OpenAI API Key.
To do so:
Once you have your Open AI API key, set it as a system environment variable:
$setx OPENAI_API_KEY "your_openai_api_key" /M
Download and Install Python & Required Packages
Go to https://www.python.org/downloads/ and download the latest version of python
Once, python is installed open your command prompt and run these commands to install Chainlit, OpenAI & Langchain, these are the libraries with the help of which we would build our own chatbot.
$pip install chainlit
$pip install chainlit openai
$pip install langchain
Once, these packages are installed, run the following command to verify chainlit's installation
领英推荐
$chainlit hello
Step 2
Create a new file app.py and paste the following code into it
from langchain.chat_models import ChatOpenAI
from langchain.prompts import ChatPromptTemplate
from langchain.schema import StrOutputParser
from langchain.schema.runnable import Runnable
from langchain.schema.runnable.config import RunnableConfig
import chainlit as cl
@cl.on_chat_start
async def on_chat_start():
model = ChatOpenAI(streaming=True)
prompt = ChatPromptTemplate.from_messages(
[(
"system",
"You're an AI chatbot who provides accurate answers to questions.",
),
("human", "{question}"),
])
runnable = prompt | model | StrOutputParser()
cl.user_session.set("runnable", runnable)
@cl.on_message
async def on_message(message: cl.Message):
runnable = cl.user_session.get("runnable") # type: Runnable
msg = cl.Message(content="")
async for chunk in runnable.astream(
{"question": message.content},
config=RunnableConfig(callbacks=[cl.LangchainCallbackHandler()]),
):
await msg.stream_token(chunk)
await msg.send()
Step 3
Once, you have that file created, go to that directory, and run the following command:
$chainlit run app.py
Voila! You have your chatbot running! ??
Extra Steps
To have the ability to add your custom pdfs and chat with them, replace the code in app.py with the following code:
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import Chroma
from langchain.chains import RetrievalQAWithSourcesChain
from langchain.chat_models import ChatOpenAI
from langchain.prompts.chat import (
ChatPromptTemplate,
SystemMessagePromptTemplate,
HumanMessagePromptTemplate,
)
import os
import io
import chainlit as cl
import PyPDF2
from io import BytesIO
from dotenv import load_dotenv
load_dotenv()
OPENAI_API_KEY= os.getenv("OPENAI_API_KEY")
# text_splitter and system template
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
system_template = """Use the following pieces of context to answer the users question.
If you don't know the answer, just say that you don't know, don't try to make up an answer.
ALWAYS return a "SOURCES" part in your answer.
The "SOURCES" part should be a reference to the source of the document from which you got your answer.
Example of your response should be:
```
The answer is foo
SOURCES: xyz
```
Begin!
----------------
{summaries}"""
messages = [
SystemMessagePromptTemplate.from_template(system_template),
HumanMessagePromptTemplate.from_template("{question}"),
]
prompt = ChatPromptTemplate.from_messages(messages)
chain_type_kwargs = {"prompt": prompt}
@cl.on_chat_start
async def on_chat_start():
# Sending an image with the local file path
elements = [
cl.Image(name="image1", display="inline", path="./robot.jpeg")
]
await cl.Message(content="Hello there, Welcome to AskAnyQuery related to Data!", elements=elements).send()
files = None
# Wait for the user to upload a PDF file
while files is None:
files = await cl.AskFileMessage(
content="Please upload a PDF file to begin!",
accept=["application/pdf"],
max_size_mb=20,
timeout=180,
).send()
file = files[0]
msg = cl.Message(content=f"Processing `{file.name}`...")
await msg.send()
# Read the PDF file
pdf_stream = BytesIO(file.content)
pdf = PyPDF2.PdfReader(pdf_stream)
pdf_text = ""
for page in pdf.pages:
pdf_text += page.extract_text()
# Split the text into chunks
texts = text_splitter.split_text(pdf_text)
# Create metadata for each chunk
metadatas = [{"source": f"{i}-pl"} for i in range(len(texts))]
# Create a Chroma vector store
embeddings = OpenAIEmbeddings()
docsearch = await cl.make_async(Chroma.from_texts)(
texts, embeddings, metadatas=metadatas
)
# Create a chain that uses the Chroma vector store
chain = RetrievalQAWithSourcesChain.from_chain_type(
ChatOpenAI(temperature=0),
chain_type="stuff",
retriever=docsearch.as_retriever(),
)
# Save the metadata and texts in the user session
cl.user_session.set("metadatas", metadatas)
cl.user_session.set("texts", texts)
# Let the user know that the system is ready
msg.content = f"Processing `{file.name}` done. You can now ask questions!"
await msg.update()
cl.user_session.set("chain", chain)
@cl.on_message
async def main(message:str):
chain = cl.user_session.get("chain") # type: RetrievalQAWithSourcesChain
cb = cl.AsyncLangchainCallbackHandler(
stream_final_answer=True, answer_prefix_tokens=["FINAL", "ANSWER"]
)
cb.answer_reached = True
res = await chain.acall(message, callbacks=[cb])
answer = res["answer"]
sources = res["sources"].strip()
source_elements = []
# Get the metadata and texts from the user session
metadatas = cl.user_session.get("metadatas")
all_sources = [m["source"] for m in metadatas]
texts = cl.user_session.get("texts")
if sources:
found_sources = []
# Add the sources to the message
for source in sources.split(","):
source_name = source.strip().replace(".", "")
# Get the index of the source
try:
index = all_sources.index(source_name)
except ValueError:
continue
text = texts[index]
found_sources.append(source_name)
# Create the text element referenced in the message
source_elements.append(cl.Text(content=text, name=source_name))
if found_sources:
answer += f"\nSources: {', '.join(found_sources)}"
else:
answer += "\nNo sources found"
if cb.has_streamed_final_answer:
cb.final_stream.elements = source_elements
await cb.final_stream.update()
else:
await cl.Message(content=answer, elements=source_elements).send()
Happy Coding! ??
IT Certification at TIBCO
10 个月?? Elevate your data skills with CertFun's expertly crafted Databricks Certification practice tests. Visit www.certfun.com/databricks and power up your knowledge! #CertFun #DataSkills ??