-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathsetup.py
More file actions
447 lines (358 loc) Β· 12.5 KB
/
setup.py
File metadata and controls
447 lines (358 loc) Β· 12.5 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
#!/usr/bin/env python3
"""
Complete Setup Script for University of Malakand AI Chatbot
This script sets up everything needed for the intelligent chatbot system
"""
import os
import sys
import subprocess
import time
from pathlib import Path
def print_banner():
"""Print setup banner"""
print("π" + "=" * 60 + "π")
print(" University of Malakand AI Chatbot Setup")
print(" Building an Intelligent Information System")
print("π" + "=" * 60 + "π")
print()
def check_python_version():
"""Check if Python version is compatible"""
print("π Checking Python version...")
if sys.version_info < (3, 7):
print("β Python 3.7+ is required. Current version:", sys.version)
return False
print(f"β
Python {sys.version.split()[0]} detected")
return True
def install_requirements():
"""Install required packages"""
print("\nπ¦ Installing required packages...")
requirements = [
"requests==2.31.0",
"beautifulsoup4==4.12.2",
"lxml==4.9.3",
"flask==2.3.3",
"flask-cors==4.0.0",
"werkzeug==2.3.7"
]
for package in requirements:
try:
print(f"Installing {package}...")
subprocess.check_call([sys.executable, "-m", "pip", "install", package],
stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
print(f"β
{package} installed")
except subprocess.CalledProcessError:
print(f"β Failed to install {package}")
return False
print("β
All packages installed successfully!")
return True
def create_project_structure():
"""Create necessary directories"""
print("\nπ Creating project structure...")
directories = [
"university_data",
"university_data/pages",
"university_data/documents",
"university_data/faculty",
"university_data/departments",
"university_data/notifications",
"university_data/admissions",
"university_data/research",
"logs"
]
for directory in directories:
Path(directory).mkdir(parents=True, exist_ok=True)
print(f"β
Created directory: {directory}")
return True
def create_config_file():
"""Create configuration file"""
print("\nβοΈ Creating configuration...")
config = """# University of Malakand AI Chatbot Configuration
# University Settings
UNIVERSITY_NAME = "University of Malakand"
UNIVERSITY_URL = "https://www.uom.edu.pk"
# Scraping Settings
MAX_PAGES = 500 # Limit for GitHub Codespaces
REQUEST_DELAY = 1 # Seconds between requests
TIMEOUT = 10 # Request timeout in seconds
# Database Settings
DB_NAME = "university_knowledge.db"
# Server Settings
HOST = "0.0.0.0"
PORT = 5000
DEBUG = True
# API Settings
ENABLE_SCRAPING_API = True
ENABLE_ADMIN_API = True
"""
with open("config.py", "w") as f:
f.write(config)
print("β
Configuration file created")
return True
def create_startup_scripts():
"""Create convenient startup scripts"""
print("\nπ Creating startup scripts...")
# Create start_scraping.py
scraping_script = """#!/usr/bin/env python3
'''Convenient script to start data scraping'''
if __name__ == "__main__":
try:
from data_scraper import main
main()
except ImportError:
print("β Data scraper not found. Please ensure all files are in place.")
except KeyboardInterrupt:
print("\\nβΉοΈ Scraping stopped by user")
except Exception as e:
print(f"β Error: {e}")
"""
with open("start_scraping.py", "w") as f:
f.write(scraping_script)
# Create start_chatbot.py
chatbot_script = """#!/usr/bin/env python3
'''Convenient script to start the chatbot'''
if __name__ == "__main__":
try:
from uom_ai_chatbot import main
main()
except ImportError:
print("β Chatbot not found. Please ensure all files are in place.")
except KeyboardInterrupt:
print("\\nβΉοΈ Chatbot stopped by user")
except Exception as e:
print(f"β Error: {e}")
"""
with open("start_chatbot.py", "w") as f:
f.write(chatbot_script)
# Create start_server.py
server_script = """#!/usr/bin/env python3
'''Convenient script to start the web server'''
if __name__ == "__main__":
try:
from flask_server import app
print("π Starting University of Malakand AI Chatbot Server...")
print("π± Access the chatbot at: http://localhost:5000")
app.run(host='0.0.0.0', port=5000, debug=True)
except ImportError:
print("β Flask server not found. Please ensure all files are in place.")
except KeyboardInterrupt:
print("\\nβΉοΈ Server stopped by user")
except Exception as e:
print(f"β Error: {e}")
"""
with open("start_server.py", "w") as f:
f.write(server_script)
# Make scripts executable
for script in ["start_scraping.py", "start_chatbot.py", "start_server.py"]:
os.chmod(script, 0o755)
print(f"β
Created {script}")
return True
def create_readme():
"""Create comprehensive README file"""
print("\nπ Creating README...")
readme_content = """# π University of Malakand AI Chatbot
An intelligent, comprehensive AI system designed to provide accurate information about the University of Malakand. This system scrapes, processes, and serves university data through an advanced chatbot interface with zero-failure information retrieval.
## π Features
- **Comprehensive Data Collection**: Automatically scrapes all public university information
- **Intelligent Search**: Advanced semantic search with relevance scoring
- **Faculty Information**: Detailed profiles of professors and staff
- **Department Details**: Complete information about all departments
- **Admission Guidance**: Current admission requirements and procedures
- **Real-time Notifications**: Latest university news and announcements
- **Multi-language Support**: Handles queries in English and Urdu
- **Web Interface**: Beautiful, responsive chat interface
- **REST API**: Full API access for integration
## π Prerequisites
- Python 3.7+
- Internet connection for data scraping
- GitHub Codespaces (recommended) or local environment
## π οΈ Installation & Setup
### Option 1: Automatic Setup (Recommended)
```bash
python setup.py
```
### Option 2: Manual Setup
```bash
# Install dependencies
pip install -r requirements.txt
# Create directories
mkdir -p university_data/{pages,documents,faculty,departments,notifications}
# Run setup
python setup.py
```
## π― Usage
### 1. Data Collection (First Time Setup)
```bash
# Start data scraping
python start_scraping.py
# Or run directly
python data_scraper.py
```
### 2. Start the Chatbot (Console)
```bash
python start_chatbot.py
```
### 3. Start Web Interface (Recommended)
```bash
python start_server.py
```
Then open: http://localhost:5000
## π§ API Endpoints
- `GET /` - Web interface
- `POST /api/chat` - Send message to chatbot
- `GET /api/status` - Check system status
- `POST /api/scrape` - Trigger data scraping
- `GET /api/knowledge-base-info` - Knowledge base statistics
## π¬ Example Queries
- "Tell me about Dr. Fakhruddin"
- "How to apply for BS Computer Science?"
- "What are the recent notifications?"
- "Information about English department"
- "Admission requirements for graduate programs"
## π Project Structure
```
university_chatbot/
βββ data_scraper.py # Main scraping engine
βββ uom_ai_chatbot.py # AI chatbot system
βββ flask_server.py # Web server
βββ setup.py # Setup script
βββ config.py # Configuration
βββ requirements.txt # Dependencies
βββ start_*.py # Convenience scripts
βββ university_data/ # Scraped data
β βββ pages/ # Web pages
β βββ documents/ # PDFs and documents
β βββ faculty/ # Faculty information
β βββ departments/ # Department data
β βββ university_knowledge.db # SQLite database
βββ logs/ # System logs
```
## π Key Technologies
- **Web Scraping**: BeautifulSoup, Requests
- **Data Storage**: SQLite, JSON
- **AI Processing**: Custom NLP, TF-IDF
- **Web Interface**: Flask, HTML/CSS/JavaScript
- **Search**: Semantic search with relevance scoring
## π Privacy & Ethics
- Only scrapes publicly available information
- Respects robots.txt and rate limiting
- No personal or private data collection
- Focuses on educational and administrative content
## π Deployment
### GitHub Codespaces
1. Open in Codespaces
2. Run `python setup.py`
3. Start with `python start_server.py`
4. Access via forwarded port
### Local Development
```bash
git clone <repository>
cd university_chatbot
python setup.py
python start_server.py
```
## π Updating Data
The system can update its knowledge base:
```bash
# Refresh university data
python start_scraping.py
# Or via API
curl -X POST http://localhost:5000/api/scrape
```
## π Educational Value
This project demonstrates:
- Advanced web scraping techniques
- Database design and management
- Natural language processing
- RESTful API development
- Modern web interface design
- AI system architecture
## π€ Contributing
Contributions welcome! Please read our contributing guidelines.
## π License
This project is for educational purposes and serves the University of Malakand community.
## π Support
For issues or questions, please create an issue in the repository.
---
**Made with β€οΈ for University of Malakand**
"""
with open("README.md", "w") as f:
f.write(readme_content)
print("β
README.md created")
return True
def create_requirements_file():
"""Create requirements.txt file"""
requirements = """requests==2.31.0
beautifulsoup4==4.12.2
lxml==4.9.3
flask==2.3.3
flask-cors==4.0.0
werkzeug==2.3.7
pathlib
sqlite3
concurrent.futures
dataclasses
hashlib
urllib3==2.0.7
"""
with open("requirements.txt", "w") as f:
f.write(requirements)
print("β
requirements.txt created")
return True
def run_system_check():
"""Run comprehensive system check"""
print("\nπ Running system check...")
checks = {
"Python version": check_python_version(),
"Project structure": True,
"Configuration": True,
"Dependencies": True
}
print("\nπ System Check Results:")
for check, status in checks.items():
status_icon = "β
" if status else "β"
print(f"{status_icon} {check}")
return all(checks.values())
def main():
"""Main setup function"""
print_banner()
try:
# Run setup steps
steps = [
("Checking Python version", check_python_version),
("Installing requirements", install_requirements),
("Creating project structure", create_project_structure),
("Creating configuration", create_config_file),
("Creating requirements file", create_requirements_file),
("Creating startup scripts", create_startup_scripts),
("Creating documentation", create_readme),
]
for step_name, step_func in steps:
print(f"\nπ§ {step_name}...")
if not step_func():
print(f"β {step_name} failed!")
return False
# Final system check
if run_system_check():
print("\nπ Setup completed successfully!")
print("\nπ Next Steps:")
print("1. Run 'python start_scraping.py' to collect university data")
print("2. Run 'python start_server.py' to start the web interface")
print("3. Open http://localhost:5000 in your browser")
print("\nπ‘ Pro Tips:")
print("β’ Use GitHub Codespaces for best experience")
print("β’ The first scraping may take 10-15 minutes")
print("β’ Check logs/ directory for detailed information")
print("\nπ Ready to serve University of Malakand! π")
return True
else:
print("\nβ Setup completed with warnings. Check the issues above.")
return False
except KeyboardInterrupt:
print("\nβΉοΈ Setup interrupted by user")
return False
except Exception as e:
print(f"\nβ Setup failed with error: {e}")
return False
if __name__ == "__main__":
success = main()
sys.exit(0 if success else 1)