-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathdirectors.py
More file actions
66 lines (56 loc) · 2.16 KB
/
directors.py
File metadata and controls
66 lines (56 loc) · 2.16 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
# Director Ranking Tool
# by Mikko Mononen
# Analyzes ratings.csv exported from IMDb.com and prints out the list of user's favorite movie directors.
# Usage: python directors.py >output.csv
# Import output.csv in Excel or equivalent and sort by weighted rating
import csv
# opens a csv file and returns it as a list
def open_csv(filename):
with open(filename, 'r') as f:
reader = csv.reader(f)
return list(reader)
# populate and return a dict of directors with names and ratings
# output dict: director name : a list of ratings
def populate_directors_dict(movies, directors):
for m in movies:
if m[5] == 'movie':
dirs = m[12].split(", ")
for d in dirs:
if d in directors:
directors[d] += [int(m[1])]
directors[d].sort(reverse=True)
else:
directors[d] = [int(m[1])]
return directors
# returns the number of movies in a movie list
def num_of_movies(movies):
i = 0
for m in movies:
if m[5] == 'movie':
i += 1
return i
# returns the sum of all movie ratings in a movie list
def sum_of_ratings(movies):
i = 0
for m in movies:
if m[5] == 'movie':
i += int(m[1])
return i
# calculate a weighted average for directors dict
# input: directors (dict), required amount of movies (int)
def calculate_weighted_average(directors, req_movies):
for xd in directors:
if (len(directors[xd]) >= req_movies):
tempsum = 0
for t1 in directors[xd]:
tempsum += int(t1)
R = tempsum / len(directors[xd]) # average rating for director's movies
v = len(directors[xd]) # number of director's movies
C = sum_of_ratings(movies) / num_of_movies(movies) # the average rating of every film
m = req_movies # minimum of films required (default = 3)
W = (R*v + C*m) / (v + m) # weighted average
print(xd + ";" + str(W) + ";" + str(v) + ";" + str(directors[xd]))
directors = {}
movies = open_csv('ratings.csv')
populate_directors_dict(movies, directors)
calculate_weighted_average(directors, 3)