-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathsetlistfm_events_api.py
More file actions
173 lines (139 loc) · 4.96 KB
/
setlistfm_events_api.py
File metadata and controls
173 lines (139 loc) · 4.96 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
"""
Given a valid API key (see `utils.py`), retrieve setlist.fm data.
"""
__author__ = ["Mark Gotham", "Shujin Gan"]
from pathlib import Path
from typing import Union, Optional
THIS_DIR = Path.cwd()
from utils import SETLIST_FM_KEY
import json
import pandas as pd
import requests
import time
import numpy as np
def get_event_data(
event_id: str,
api_key: Optional[str] = None
) -> Optional[dict]:
"""
Given a valid setlist.fm event_id, return the event data.
Call the Setlist.fm API, get the.json formatted data.
"""
if api_key is None:
api_key = SETLIST_FM_KEY
url = f"https://api.setlist.fm/rest/1.0/setlist/{event_id}"
headers = {"Accept": "application/json", "x-api-key": api_key}
r = requests.get(url, headers=headers)
return r.json()
def extract_event_date(event_data: dict) -> list:
"""
Extract the event date and year from the event data.
"""
date = event_data.get("eventDate", "")
if date:
year = date[6:] # dd-MM-YYYY format
else:
year = ""
return [date, year]
def extract_tour_name(event_data: dict) -> Union[str, float]:
"""
Extract the tour name from the event data.
"""
tour = event_data.get("tour")
if tour:
return tour["name"]
else:
return np.nan
def extract_venue_data(event_data: dict) -> list:
"""
Extract the venue id and name from the event data.
"""
venue = event_data["venue"]
return [venue["id"], venue["name"]]
def process_event_ids(
event_ids: list,
write_full_sets: bool = True
) -> dict:
"""
Process a list of event ids and return a dictionary with the results.
In the case of failure with one event ID, print a note (rather than raising an error)
and continue to later items on the list.
Args:
event_ids (list): A list of valid setlist.fm event IDs.
write_full_sets (bool): If true, as well as
writing event-level data to a csv in the "data" directory, also
dump full set information to separate .json files in the "setlists" directory.
"""
results = {
"event_id": [],
"date": [],
"year": [],
"tour_name": [],
"venue_id": [],
"venue_name": []
}
for event_id in event_ids:
print(f"Processing event id: {event_id}")
time.sleep(3)
try:
event_data = get_event_data(event_id)
except:
print(f"Failed to retrieve data for event {event_id}")
continue
if write_full_sets:
try:
with open(THIS_DIR / "setlists" / f"{event_id}.json", "w") as f:
json.dump(event_data['sets']['set'], f, indent=4)
except:
print(f"Failed to retrieve full setlist data for event {event_id}")
continue
date, year = extract_event_date(event_data)
tour_name = extract_tour_name(event_data)
venue_id, venue_name = extract_venue_data(event_data)
results["event_id"].append(event_id)
results["date"].append(date)
results["year"].append(year)
results["tour_name"].append(tour_name)
results["venue_id"].append(venue_id)
results["venue_name"].append(venue_name)
return results
def export_to_csv(
results: dict,
filename: str
) -> None:
"""
Export the results to a csv file.
Args:
results (dict): Dictionary containing the results to export.
filename (str): The name of the output file.
Please note that the path (where to write the csv to) is hard-coded to the "data" directory.
Only the name is variable.
"""
df = pd.DataFrame(results)
out_path = THIS_DIR / "data" / filename
df.to_csv(out_path, index=False)
def load_event_ids_from_csv(
filename: Union[Path, str] = THIS_DIR / "distinct_setlist_IDs" / "Test.csv"
) -> list:
"""
Load event ids from a csv file.
Args:
filename (str): Name of the csv file to load from (default: "event_dates.csv").
Returns:
list: List of event ids loaded from the csv file.
"""
df = pd.read_csv(filename, sep=",", engine="python")
return df["eventID"].unique()
def main(band_name: str = "Test") -> None:
"""
Main function to process event ids and export results to csv files.
This function loads event ids from a csv file
located at `distinct_setlist_IDs/{band_name}.csv`
retrieves data for those events using the Setlist.fm API,
and exports the results to csv files.
"""
event_ids = load_event_ids_from_csv(THIS_DIR / "distinct_setlist_IDs" / f"{band_name}.csv")
results = process_event_ids(event_ids)
export_to_csv(results, f"{band_name}_event_date_tour_venue.csv")
if __name__ == "__main__":
main(band_name = "Test")