IPACT/not_main.py at main · wooseungw/IPACT · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
import os
from glob import glob
from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_openai import OpenAIEmbeddings, ChatOpenAI
from langchain.chains import RetrievalQA
from langchain_community.vectorstores import Chroma
from dotenv import load_dotenv

load_dotenv()

# Initialize variables
documents = []
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")

# Define the directory containing the PDF files
pdf_directory = './data'

# Use glob to get all PDF files in the directory
pdf_files = glob(os.path.join(pdf_directory, '*.pdf'))

# Load all PDF files using PyPDFLoader
for pdf_file in pdf_files:
    loader = PyPDFLoader(pdf_file)
    pdf_documents = loader.load()
    documents.extend(pdf_documents)

# Split the documents into chunks
chunk_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
chunks = chunk_splitter.split_documents(documents)

# Create embeddings
embeddings = OpenAIEmbeddings(api_key=OPENAI_API_KEY)
vectordb = Chroma.from_documents(documents=chunks, embedding=embeddings)
retriever = vectordb.as_retriever()

# Create the QA chain
qa_chain = RetrievalQA.from_chain_type(
    llm=ChatOpenAI(api_key=OPENAI_API_KEY, model_name="gpt-4", temperature=0),
    chain_type="stuff",
    retriever=retriever,
    return_source_documents=True
)

# Define the chatbot response function
def get_chatbot_response(chatbot_response):
    print(chatbot_response['result'].strip())
    print('\n문서 출처:')
    for source in chatbot_response["source_documents"]:
        print(source.metadata['source'])

# Set up the Gradio interface
import gradio as gr

with gr.Blocks() as demo:
    chatbot = gr.Chatbot(label="")
    msg = gr.Textbox(label="질문해주세요!")
    clear = gr.Button("Clear")

    def response(message, chat_history=[]):
        result = qa_chain.invoke(message)
        bot_message = result['result']
        bot_message += '\n#source :'
        for i, doc in enumerate(result['source_documents']):
            bot_message += '[' + str(i + 1) + ']' + doc.metadata["source"] + '\n'
        chat_history.append((message, bot_message))
        return "", chat_history

    msg.submit(response, inputs=[msg, chatbot], outputs=[msg, chatbot])
    clear.click(lambda: None, None, chatbot, queue=False)

demo.launch()