-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathapp.py
More file actions
121 lines (99 loc) · 4.23 KB
/
app.py
File metadata and controls
121 lines (99 loc) · 4.23 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
from flask import Flask, request, jsonify
import os
import pandas as pd
import torch
from transformers import T5Tokenizer, T5ForConditionalGeneration
import PyPDF2
app = Flask(__name__)
UPLOAD_FOLDER = './uploads'
os.makedirs(UPLOAD_FOLDER, exist_ok=True)
# Load the model and tokenizer
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
weights_path = "AceTrack_T5_weights.pt"
tokenizer_path = "AceTrack_T5_tokenizer"
try:
tokenizer = T5Tokenizer.from_pretrained(tokenizer_path)
model = T5ForConditionalGeneration.from_pretrained("t5-small")
model.load_state_dict(torch.load(weights_path, map_location=device))
model.to(device)
model.eval()
print("Model and tokenizer loaded successfully.")
except Exception as e:
print(f"Error loading model or tokenizer: {e}")
raise
# Function to parse the PDF
def parse_pdf(file_path):
"""
Parses the PDF file and extracts questions, student answers, and correct answers.
Returns a DataFrame with the parsed data.
"""
data = {"Question": [], "Student Answer": [], "Correct Answer": []}
try:
with open(file_path, 'rb') as pdf_file:
reader = PyPDF2.PdfReader(pdf_file)
for page in reader.pages:
text = page.extract_text()
lines = text.split("\n")
# Parse lines for questions and answers
for line in lines:
if line.startswith("Question"):
data["Question"].append(line.split(":", 1)[1].strip())
elif line.startswith("Student Answer"):
data["Student Answer"].append(line.split(":", 1)[1].strip())
elif line.startswith("Correct Answer"):
data["Correct Answer"].append(line.split(":", 1)[1].strip())
# Check for consistent lengths
if not (len(data["Question"]) == len(data["Student Answer"]) == len(data["Correct Answer"])):
raise ValueError("Parsed data has inconsistent lengths.")
return pd.DataFrame(data)
except Exception as e:
raise ValueError(f"Error parsing PDF: {e}")
# Function to generate feedback for all questions
def generate_feedback_for_all(dataframe, model, tokenizer, device):
"""
Loops through each row in the DataFrame and generates feedback using the model.
Returns a list of feedback.
"""
feedback_list = []
for _, row in dataframe.iterrows():
question = row['Question']
student_answer = row['Student Answer']
correct_answer = row['Correct Answer']
# Create the input text for the model
input_text = (
f"Question: '{question}' Correct Answer: '{correct_answer}' "
f"Student Answer: '{student_answer}'"
)
# Tokenize and generate feedback
inputs = tokenizer(input_text, return_tensors='pt', max_length=512, truncation=True).to(device)
with torch.no_grad():
outputs = model.generate(inputs['input_ids'], max_length=100, num_beams=3, early_stopping=True)
feedback = tokenizer.decode(outputs[0], skip_special_tokens=True)
feedback_list.append(feedback)
return feedback_list
# Endpoint to upload a PDF file and generate feedback
@app.route('/upload', methods=['POST'])
def upload_file():
"""
Endpoint to upload a PDF file and process it to generate feedback.
"""
if 'file' not in request.files:
return jsonify({"error": "No file provided"}), 400
file = request.files['file']
if file.filename == '':
return jsonify({"error": "No file selected"}), 400
file_path = os.path.join(UPLOAD_FOLDER, file.filename)
file.save(file_path)
try:
# Parse the PDF
parsed_data = parse_pdf(file_path)
# Generate feedback
feedback_list = generate_feedback_for_all(parsed_data, model, tokenizer, device)
# Combine results with the original questions and answers
parsed_data['Feedback'] = feedback_list
# Return the feedback as JSON
return jsonify(parsed_data.to_dict(orient='records'))
except Exception as e:
return jsonify({"error": str(e)}), 500
if __name__ == "__main__":
app.run(host="0.0.0.0", port=8000, debug=True)