-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathdashboard.py
More file actions
102 lines (79 loc) · 2.96 KB
/
dashboard.py
File metadata and controls
102 lines (79 loc) · 2.96 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
import streamlit as st
import pandas as pd
from sqlalchemy.orm import Session
from database import Job, engine
from collections import Counter
@st.cache_data
def load_jobs():
with Session(engine) as session:
jobs = session.query(Job).filter(Job.is_active == True).all()
return pd.DataFrame([{
"title": job.title,
"company": job.company,
"category": job.category,
"location": job.location,
"salary": job.salary,
"tags": job.tags,
"url": job.url
} for job in jobs])
# Converts Job objects into Dataframe for web application display
df = load_jobs()
# This is what is display on the web application
st.title("Scrape-yard")
# Sidebar elements
st.sidebar.title("Filters")
search = st.sidebar.text_input("Search Title", placeholder="e.g. Junior Engineer")
categories = ["ALL"] + sorted(df["category"].dropna().unique().tolist())
selected_categories = st.sidebar.selectbox("Categories", categories)
locations = ["ALL"] + sorted(df["location"].dropna().unique().tolist())
selected_location = st.sidebar.selectbox("Location", locations)
companies = ["ALL"] + sorted(df["company"].dropna().unique().tolist())
selected_company = st.sidebar.selectbox("Company", companies)
# Original tags from the database are a string of tags.
# Therefore, they must be spliced into individual string values then added to a list
all_tags = sorted(set(
tag.strip()
for tags_str in df["tags"].dropna()
for tag in tags_str.split(",")
))
selected_tags= st.sidebar.multiselect("Tags", all_tags)
# Filtering
if search:
df = df[df["title"].str.contains(search, case=False, na=False)]
if selected_location != "ALL":
df = df[df["location"] == selected_location]
if selected_categories != "ALL":
df = df[df["category"] == selected_categories]
if selected_company != "ALL":
df = df[df["company"] == selected_company]
if selected_tags:
df = df[df["tags"].apply(
lambda t: any(tag in t for tag in selected_tags)
)]
# Add more conditions for the dataframe before calling ".dataframe()""
# df[condition] - returns only the rows where the condition is True
# metric follows after conditions to reflect correct filtered job count
col1, col2 = st.columns(2)
with col1:
st.metric("Total Jobs", len(df))
with col2:
st.metric("Companies", df["company"].nunique())
# st.subheader("Jobs by Location")
# st.bar_chart(df["location"].value_counts())
st.subheader("Job Listings")
st.dataframe(df)
# Exports the current table to a CSV file for further data processing
st.download_button(
label="Download as CSV",
data=df.to_csv(index=False),
file_name="jobs_export.csv",
mime="text/csv"
)
# Refresh Job Button to retrieve the most up to date scraped job postings
if st.button("Refresh Jobs"):
st.cache_data.clear()
st.rerun()
all_tags_flat = ", ".join(df["tags"].dropna()).split(", ")
tag_counts = pd.Series(Counter(all_tags_flat)).sort_values(ascending=False).head(10)
st.subheader("Top Tags")
st.bar_chart(tag_counts)