LearningTool/app.py at master · easonwang00/LearningTool · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
from PyPDF2 import PdfReader
import streamlit as st
import os
from dotenv import load_dotenv
from langchain import LLMChain, PromptTemplate
from langchain.chat_models import ChatAnthropic
from langchain.prompts.chat import (AIMessagePromptTemplate,
                                    ChatPromptTemplate,
                                    MessagesPlaceholder,
                                    HumanMessagePromptTemplate,
                                    SystemMessagePromptTemplate)
from langchain.schema import AIMessage, HumanMessage, SystemMessage
from langchain.chat_models import ChatOpenAI
import anthropic
from langchain.llms import Anthropic
from langchain.chat_models import ChatAnthropic
from Generator_Anthropic import Generator_Anthropic
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
from langchain.memory import ConversationBufferMemory

#load_dotenv()
#openai_api_key = os.getenv('OPENAI_API_KEY')
#print(api_key)
openai_api_key = "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"
def retrieve_pdf_text(pdf_file):
    text = ""
    try:
        # Using PyPDF2 for reading PDFs with the updated class.
        pdf_reader = PdfReader(pdf_file)
        for page_number in range(len(pdf_reader.pages)):  # Updated this line to get number of pages
            page = pdf_reader.pages[page_number]  # Updated this line to get a page
            page_text = page.extract_text()
            if page_text is not None:
                text += page_text
            else:
                print(f"Warning: No text found on page {page_number + 1}")
        #print(text)
    except Exception as e:
        print(f"Error extracting text from PDF: {str(e)}")
    return text
class Generator:
    def __init__(self):
        self.system_prompt = self.get_system_prompt()

        self.user_prompt = HumanMessagePromptTemplate.from_template("{question_input}")

        full_prompt_template = ChatPromptTemplate.from_messages(
            [self.system_prompt, self.user_prompt]
        )

        self.chat = ChatOpenAI(
            model_name = "gpt-4",
            temperature=0,
            openai_api_key = openai_api_key,
            streaming=True,
            callbacks=[StreamingStdOutCallbackHandler()],
            #cache= True,
            #n = 2,
            #verbose: bool = _get_verbosity,
            #callbacks: Callbacks = None,
            #callback_manager: BaseCallbackManager | None = None,
            #tags: List[str] | None = None,
            #metadata: Dict[str, Any] | None = None,
            #client: Any,
            #model_kwargs: Dict[str, Any] = dict,
            #openai_api_base: str | None = None,
            #openai_organization: str | None = None,
            #openai_proxy: str | None = None,
            #request_timeout: float | Tuple[float, float] | None = None,
            #max_retries: int = 6,
            #streaming= True,
            #max_tokens: int | None = None,
            #tiktoken_model_name: str | None = None
            )

        self.chain = LLMChain(
            llm=self.chat,
            prompt=full_prompt_template,
            #memory= self.memory,
            #callbacks: Callbacks = None,
            #callback_manager: BaseCallbackManager | None = None,
            #verbose: bool = _get_verbosity,
            #tags: List[str] | None = None,
            #metadata: Dict[str, Any] | None = None,
            #output_key: str = "text",
            #output_parser: BaseLLMOutputParser = NoOpOutputParser,
            #return_final_only: bool = True,
            #llm_kwargs: dict = dict
            )

    def get_system_prompt(self):
        # system_prompt_example not in use
        system_prompt_example = """
        The following is a friendly conversation between a human and an AI.
        If the AI does not know the answer to a question, it tries its best to provide
        as much relevant information as possible.
        {context}
        Instruction:
        Based on the above documents, provide a detailed answer using {language}.
        Solution in {language}:
        """

        system_prompt_old = """
        I upoaded pdf files here, and I may ask questions about the pdf files:
        PDF files: {context},
        Start you answer in {language} here:
        """
        system_prompt = """
        Context: {context},
        Start you answer here:
        """
        return SystemMessagePromptTemplate.from_template(system_prompt)

    def run_chain(self, language, context, question):
        return self.chain.run(
            language=language, context=context, question_input=question,
        )


# Create a Streamlit app
#st.set_page_config(layout="wide")
st.markdown("<h1 style='text-align: center; color: black;'>Smart Kid🥳</h1>", unsafe_allow_html=True)
# Initialize the conversation history if it doesn't exist.
if "history" not in st.session_state:
    st.session_state.history = []

if "context" not in st.session_state:
    st.session_state.context = ""
# Arrange model choice, language selection, and PDF uploader in columns
col1, col2, col3 = st.columns(3)  # Create 3 columns

# Model Choice in 1st column
with col1:
    model_choice = st.selectbox("😇", ["Small Model", "Large Model"], key="model_selectbox")
    # Perform an action based on the user's choice
    if model_choice == "Small Model":
        st.write("Powered by OpenAI")
        st.session_state.Generator = Generator()
    elif model_choice == "Large Model":
        st.write("Powered by Anthropic AI")
        st.session_state.Generator = Generator_Anthropic()

# Language Choice in 2nd column
with col2:
    language = st.selectbox("✍️", ["English", "中文"], key="language_selectbox")
    if language != "English":
        st.session_state.language = "中文"
    else:
        st.session_state.language = "english"

# PDF Uploader in 3rd column
with col3:
    pdf_files = st.file_uploader("📚", type=["pdf"], accept_multiple_files=True)

    # if pdf files are uploaded
    if pdf_files:
        # retrieve the text from the pdfs
        texts = [retrieve_pdf_text(pdf_file) for pdf_file in pdf_files]

        # concatenate texts from all PDFs
        st.session_state.context = "\n\n".join(texts)


# create a button that clears the context
#if st.button("Clear context"):
#    st.session_state.context = ""

# Colors to be used in alternating manner for Q/A pairs
colors = ["#f0f8ff", "#faf0e6"]  # AliceBlue and OldLace color codes. You can choose your own.

for idx, interaction in enumerate(st.session_state.history):
    # Check if either question or answer is None or empty and handle accordingly
    question = interaction['question'] or "No Question Provided"
    answer = interaction['answer'] or "No Answer Available"

    # Choose color based on index (even/odd)
    color = colors[idx % len(colors)]

    # Using HTML and CSS for styling within markdown
    #st.markdown(f"<div style='background-color: {color}; padding: 10px;'><b>Q{idx + 1}: {question}</b></div>", unsafe_allow_html=True)
    #st.markdown(f"<div style='background-color: {color}; padding: 10px;'><b>A{idx + 1}: {answer}</b></div>", unsafe_allow_html=True)
    st.markdown(f"<div style='background-color: {color}; padding: 10px;'><b>Question: {question}</b></div>", unsafe_allow_html=True)
    st.markdown(f"<div style='background-color: {color}; padding: 10px;'><b>🥳 {answer}</b></div>", unsafe_allow_html=True)


# create a text input widget for a question
question = st.text_input("🌐")

# create a button to run the model
if st.button("Run"):
    # run the model
    generator_response = st.session_state.Generator.run_chain(
        language=st.session_state.language, context=st.session_state.context, question=question
    )
    generator_response = "。。。"
    print("#generator_response: ", generator_response)
    # Add the question and answer to the history.
    st.session_state.history.append({"question": question, "answer": generator_response})

    # refresh streamlit to display new response immediately
    st.experimental_rerun()

# create a button to clear the history
if st.button("Clear History"):
    if st.button("I want to Clear History"):
        st.session_state.history = []
        # refresh streamlit
        st.experimental_rerun()