Scrapeyard/dashboard.py at master · JGrace99/Scrapeyard · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
import streamlit as st
import pandas as pd
from sqlalchemy.orm import Session
from database import Job, engine
from collections import Counter

@st.cache_data
def load_jobs():
    with Session(engine) as session:
        jobs = session.query(Job).filter(Job.is_active == True).all()
    return pd.DataFrame([{
    "title": job.title,
    "company": job.company,
    "category": job.category,
    "location": job.location,
    "salary": job.salary,
    "tags": job.tags,
    "url": job.url
} for job in jobs])

#   Converts Job objects into Dataframe for web application display
df = load_jobs()

#   This is what is display on the web application
st.title("Scrape-yard")

#   Sidebar elements
st.sidebar.title("Filters")
search = st.sidebar.text_input("Search Title", placeholder="e.g. Junior Engineer")

categories = ["ALL"] + sorted(df["category"].dropna().unique().tolist())
selected_categories = st.sidebar.selectbox("Categories", categories)

locations = ["ALL"] + sorted(df["location"].dropna().unique().tolist())
selected_location = st.sidebar.selectbox("Location", locations)

companies = ["ALL"] + sorted(df["company"].dropna().unique().tolist())
selected_company = st.sidebar.selectbox("Company", companies)

#   Original tags from the database are a string of tags.
#   Therefore, they must be spliced into individual string values then added to a list
all_tags = sorted(set(
    tag.strip()
    for tags_str in df["tags"].dropna()
    for tag in tags_str.split(",")
))
selected_tags= st.sidebar.multiselect("Tags", all_tags)

#   Filtering
if search:
    df = df[df["title"].str.contains(search, case=False, na=False)]

if selected_location != "ALL":
    df = df[df["location"] == selected_location]

if selected_categories != "ALL":
    df = df[df["category"] == selected_categories]

if selected_company != "ALL":
    df = df[df["company"] == selected_company]

if selected_tags:
    df = df[df["tags"].apply(
        lambda t: any(tag in t for tag in selected_tags)
    )]

#   Add more conditions for the dataframe before calling ".dataframe()""
#   df[condition] - returns only the rows where the condition is True


#   metric follows after conditions to reflect correct filtered job count
col1, col2 = st.columns(2)

with col1:
    st.metric("Total Jobs", len(df))
with col2:
    st.metric("Companies", df["company"].nunique())

# st.subheader("Jobs by Location")
# st.bar_chart(df["location"].value_counts())

st.subheader("Job Listings")
st.dataframe(df)

#   Exports the current table to a CSV file for further data processing
st.download_button(
    label="Download as CSV",
    data=df.to_csv(index=False),
    file_name="jobs_export.csv",
    mime="text/csv"
)

#   Refresh Job Button to retrieve the most up to date scraped job postings
if st.button("Refresh Jobs"):
    st.cache_data.clear()
    st.rerun()

all_tags_flat = ", ".join(df["tags"].dropna()).split(", ")
tag_counts = pd.Series(Counter(all_tags_flat)).sort_values(ascending=False).head(10)

st.subheader("Top Tags")
st.bar_chart(tag_counts)