AskMyPDF/main.py at main · saiful247/AskMyPDF · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
import os
import streamlit as st
import time
from langchain.chains import RetrievalQA
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import PyPDFLoader
from langchain.vectorstores import FAISS
from langchain_google_genai import ChatGoogleGenerativeAI, GoogleGenerativeAIEmbeddings
from langchain.chains.question_answering import load_qa_chain
from dotenv import load_dotenv
import requests
from google.cloud import vision
from PIL import Image
from io import BytesIO
import json
from google.oauth2 import service_account

# Load environment variables
load_dotenv()

# Initialize Google Gemini LLM
google_api_key = st.secrets["GOOGLE_API_KEY"]
os.environ["GOOGLE_API_KEY"] = google_api_key
llm = ChatGoogleGenerativeAI(model="gemini-2.0-flash", temperature=0.6)

credentials_dict = json.loads(st.secrets["GOOGLE_VISION_CREDENTIALS"])
credentials = service_account.Credentials.from_service_account_info(
    credentials_dict)

client = vision.ImageAnnotatorClient(credentials=credentials)


def extract_text_from_image(image_source):
    if image_source.startswith("http"):
        response = requests.get(image_source)
        img = BytesIO(response.content)
        image = vision.Image(content=img.getvalue())
    elif os.path.exists(image_source):
        with open(image_source, "rb") as image_file:
            content = image_file.read()
        image = vision.Image(content=content)
    else:
        raise ValueError("Invalid image source!")

    response = client.text_detection(image=image)
    texts = response.text_annotations

    return texts[0].description.strip() if texts else None


st.title("📚 Study Support System")
st.sidebar.title("📂 Upload PDF Files")

# Allow multiple PDF uploads
pdf_files = st.sidebar.file_uploader("Upload up to 2 PDFs", type=[
                                     "pdf"], accept_multiple_files=True)
process_pdf = st.sidebar.button("Process PDFs")

# Directory to store vector index
file_path = "models/qa_with_pdf"
os.makedirs(file_path, exist_ok=True)

# Initialize session state for PDF processing
if "pdf_processed" not in st.session_state:
    st.session_state["pdf_processed"] = False

main_placeholder = st.empty()

# Process uploaded PDFs
if process_pdf and pdf_files:
    doc_chunks = []

    for pdf_file in pdf_files:
        pdf_path = os.path.join("uploaded_docs", pdf_file.name)
        os.makedirs("uploaded_docs", exist_ok=True)

        with open(pdf_path, "wb") as f:
            f.write(pdf_file.read())

        loader = PyPDFLoader(pdf_path)
        main_placeholder.text(f"📄 Loading data from {pdf_file.name}...")

        data = loader.load()
        doc_splitter = RecursiveCharacterTextSplitter(
            chunk_size=520, chunk_overlap=50)

        main_placeholder.text(f"✂️ Splitting data from {pdf_file.name}...")
        doc_chunks.extend(doc_splitter.split_documents(data))

    embedding = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
    vectorindex_gemini = FAISS.from_documents(doc_chunks, embedding)

    main_placeholder.text("🔄 Embedding Vector Started Building...")
    time.sleep(2)

    vectorindex_gemini.save_local(file_path)

    st.session_state["pdf_processed"] = True
    st.success("✅ PDFs processed successfully! You can now ask questions.")

# User Query Input or Image Upload
st.header("💡 Ask Your Question")

# Text Input Section
query = st.text_input("Enter your question here",
                      disabled=st.session_state.get("image_uploaded", False))

# Image Upload Section (Below the text input)
image_file = st.file_uploader(
    "Or upload an image (PNG, JPG, JPEG)", type=["png", "jpg", "jpeg"])

# "Ask" Button Logic
if st.button("Ask"):
    # Prevent submission if PDFs are not processed
    if not st.session_state["pdf_processed"]:
        st.error("⚠️ Please upload and process a PDF before asking a question!")
        st.stop()

    # Ensure only one input is used
    if query and image_file:
        st.warning(
            "⚠️ You can either type a question OR upload an image, not both.")
        st.stop()

    # Process Query from Text Input
    if query:
        st.session_state["image_uploaded"] = False
        final_query = query

    # Process Query from Image Upload
    elif image_file:
        st.session_state["image_uploaded"] = True
        image_path = os.path.join("uploaded_docs", image_file.name)
        with open(image_path, "wb") as f:
            f.write(image_file.read())

        extracted_text = extract_text_from_image(image_path)

        if extracted_text:
            st.success("✅ Text extracted successfully from the image!")
            final_query = extracted_text
        else:
            st.error("❌ No text found in the image. Try another image.")
            st.stop()

    else:
        st.warning("⚠️ Please enter a question or upload an image!")
        st.stop()

    # Ensure the vector index exists before querying
    if final_query and os.path.exists(file_path):
        embedding = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
        vectorindex_gemini = FAISS.load_local(
            file_path, embedding, allow_dangerous_deserialization=True)

        retriever = vectorindex_gemini.as_retriever()
        qa_chain = load_qa_chain(llm, chain_type="stuff")

        chain = RetrievalQA(
            combine_documents_chain=qa_chain, retriever=retriever)
        result = chain({"query": final_query})

        st.header("🤖 Answer")
        st.subheader(result["result"])