-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathapp.py
More file actions
276 lines (224 loc) · 8.62 KB
/
app.py
File metadata and controls
276 lines (224 loc) · 8.62 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
from flask import Flask, request, render_template, session, jsonify, redirect, url_for
from dotenv import load_dotenv
import os
from pymongo import MongoClient
import certifi
from werkzeug.security import check_password_hash
import pandas as pd
import io
from concurrent.futures import ThreadPoolExecutor, as_completed
import requests
from werkzeug.utils import secure_filename
from datetime import datetime
import boto3
from botocore.exceptions import NoCredentialsError
load_dotenv()
app = Flask(__name__)
app.secret_key = os.getenv("SECRET_KEY")
# MongoDB Configuration
MONGO_URI = os.getenv("MONGO_URI")
client = MongoClient(MONGO_URI, tlsCAFile=certifi.where())
db = client["KeywordSearch"]
collection = db["users"]
# AWS S3 Configuration
AWS_ACCESS_KEY = os.getenv("AWS_ACCESS_KEY_ID")
AWS_SECRET_KEY = os.getenv("AWS_SECRET_ACCESS_KEY")
AWS_S3_BUCKET = os.getenv("AWS_S3_BUCKET")
AWS_REGION = os.getenv("AWS_REGION")
# Login Page
@app.route('/')
def login():
return render_template('login.html')
# Authenticating Credentials in MongoDB
@app.route('/login', methods=['POST'])
def authenticate():
data = request.json
email = data.get("email")
password = data.get("password")
if not email or not password:
return jsonify({"error": "Username and password are required."}), 400
user = collection.find_one({"email": email})
if user and check_password_hash(user["password"], password):
session["user"] = email
return jsonify({"message": "Login successful", "redirect": url_for("upload_file")}), 200
else:
return jsonify({"error": "Invalid username or password"}), 401
# Upload Page
@app.route('/upload')
def upload_file():
if "user" not in session:
return redirect(url_for("login"))
return render_template('upload.html')
# Handle File Upload and Processing
@app.route('/process', methods=['POST'])
def process_file():
if 'file' not in request.files:
return jsonify({"error": "File is required."}), 400
if 'siteUrl' not in request.form or not request.form['siteUrl'].strip():
return jsonify({"error": "Site URL is required."}), 400
if 'apiKey' not in request.form or not request.form['apiKey'].strip():
return jsonify({"error": "API Key is required."}), 400
file = request.files['file']
site_url = request.form['siteUrl'].strip()
serp_api_key = request.form['apiKey'].strip()
session['serp_api_key'] = serp_api_key
try:
file_content = file.read()
df = pd.read_excel(io.BytesIO(file_content))
expected_columns = {'Keywords'}
if not expected_columns.issubset(df.columns):
return jsonify({"error": "The file must contain a 'Keywords' column."}), 400
keywords = df['Keywords'].dropna().unique()
result_list = []
with ThreadPoolExecutor(max_workers=10) as executor:
futures = [executor.submit(fetch_keyword_result, keyword, site_url, serp_api_key) for keyword in keywords]
for future in as_completed(futures):
result_list.append(future.result())
result_df = pd.DataFrame(result_list)
original_filename = secure_filename(file.filename) or "others_file.xlsx"
name, ext = os.path.splitext(original_filename)
if not name:
name = "result"
if not ext:
ext = ".xlsx"
current_datetime = datetime.now().strftime("%Y%m%d_%H%M%S")
result_file = io.BytesIO()
result_df.to_excel(result_file, index=False)
result_file.seek(0)
download_name = f"{name}_{current_datetime}{ext}"
client_name = name.split("_")[0]
year = datetime.now().strftime("%Y")
month = datetime.now().strftime("%B")
s3_path = f"{client_name}/{year}/{month}/{download_name}"
file_url = upload_to_s3(result_file, AWS_S3_BUCKET, s3_path)
if file_url:
return jsonify({"message": "File processed and uploaded", "s3_url": file_url}), 200
else:
return jsonify({"error": "Failed to upload to S3"}), 500
except Exception as e:
return jsonify({"error": f"An error occurred while processing the file: {str(e)}"}), 500
# Fetch keyword,Page and Position
def fetch_keyword_result(keyword, site_url, serp_api_key):
page, position = search_site_rank(keyword, site_url, serp_api_key)
return {
'Keyword': keyword,
'Page': page if page else 'Not Found',
'Position': position if position else 'Not Found'
}
# SERP API Function
def search_site_rank(query, site_url, serp_api_key, num_results=100):
url = 'https://serpapi.com/search.json'
params = {
'api_key': serp_api_key,
'q': query,
'num': num_results,
'google_domain': 'google.co.in',
'hl': 'en',
'gl': 'in',
'filter': 0
}
response = requests.get(url, params=params)
results = response.json()
if "organic_results" in results:
for idx, result in enumerate(results["organic_results"], start=1):
link = result.get('link', '')
if site_url in link:
page_number = (idx - 1) // 10 + 1
position_on_page = idx % 10 if idx % 10 != 0 else 10
return page_number, position_on_page
return None, None
# Upload file to S3
def upload_to_s3(file, bucket_name, file_path):
try:
# Create an S3 client
s3_client = boto3.client(
's3',
endpoint_url='https://s3.ap-south-2.amazonaws.com',
aws_access_key_id=AWS_ACCESS_KEY,
aws_secret_access_key=AWS_SECRET_KEY,
region_name=AWS_REGION,
)
# Upload the file to S3
s3_client.upload_fileobj(file, bucket_name, file_path)
# Generate the file URL after upload
s3_url = f"https://{bucket_name}.s3.amazonaws.com/{file_path}"
return s3_url
except FileNotFoundError:
print("Error: The file was not found.")
return None
except NoCredentialsError:
print("Error: Credentials not available.")
return None
except Exception as e:
print(f"An error occurred: {e}")
return None
# Results Page
@app.route('/results')
def s3_browser_ui():
if "user" not in session:
return redirect(url_for("login"))
return render_template('results.html')
# Download file from S3
@app.route('/browse-s3', methods=['GET'])
def browse_s3():
prefix = request.args.get('prefix', '')
s3_client = boto3.client(
's3',
endpoint_url='https://s3.ap-south-2.amazonaws.com',
aws_access_key_id=AWS_ACCESS_KEY,
aws_secret_access_key=AWS_SECRET_KEY,
region_name=AWS_REGION,
)
try:
paginator = s3_client.get_paginator('list_objects_v2')
result = paginator.paginate(Bucket=AWS_S3_BUCKET, Prefix=prefix, Delimiter='/')
folders = []
files = []
for page in result:
if 'CommonPrefixes' in page:
for cp in page['CommonPrefixes']:
folders.append(cp['Prefix'])
if 'Contents' in page:
for obj in page['Contents']:
key = obj['Key']
if key != prefix:
url = s3_client.generate_presigned_url('get_object',
Params={'Bucket': AWS_S3_BUCKET, 'Key': key},
ExpiresIn=3600
)
files.append({
'filename': key.split('/')[-1],
'url': url
})
return jsonify({
'prefix': prefix,
'folders': folders,
'files': files
})
except NoCredentialsError:
return jsonify({"error": "AWS credentials not found."}), 500
except Exception as e:
return jsonify({"error": str(e)}), 500
# 404 Error Page
@app.errorhandler(404)
def page_not_found(e):
return render_template("404.html"), 404
# Search Limit
@app.route('/remaining-searches', methods=['GET'])
def remaining_searches():
if "user" not in session:
return redirect(url_for("login"))
serp_api_key = session.get("serp_api_key")
serp_api_url = f'https://serpapi.com/account?api_key={serp_api_key}'
response = requests.get(serp_api_url)
data = response.json()
if "error" in data:
return jsonify({"error": data["error"]}), 400
return jsonify({"total_searches_left": data.get("total_searches_left")}), 200
# Logout Page
@app.route('/logout')
def logout():
session.pop("user", None)
return redirect(url_for("login"))
if __name__ == '__main__':
app.run(debug=True)