-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathregulator.py
More file actions
86 lines (70 loc) · 3.27 KB
/
regulator.py
File metadata and controls
86 lines (70 loc) · 3.27 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
from collections import deque
from statistics import mean
class Regulator:
""" Regulator agent in the artificial market. """
def __init__(self, env, omega=1, kappa=3, delta=1.1, production_quota=0.15, CSR_quota=0.15, evaluation_period=5):
"""
Initializes a Regulator.
Args:
env: An instance of the environment class.
omega (float): Parameter in the penalty equation.
kappa (float): Parameter in the penalty equation.
delta (float): Parameter in the penalty equation.
production_quota (float): Threshold for acceptable collusion in terms of production.
CSR_quota (float): Threshold for acceptable collusion in terms of CSR (Corporate Social Responsibility).
evaluation_period (int): Number of past periods considered for evaluation.
"""
# Parameters in the penalty equation
self.omega = omega
self.kappa = kappa
self.delta = delta
# Thresholds for (acceptable) collusion
self.production_quota = production_quota
self.CSR_quota = CSR_quota
# Collusive and competitive thresholds
self.collusive_theta, self.competitive_theta = env.get_theta()
self.collusive_q, self.competitive_q = env.get_q()
# Used to keep track of actions taken
self.consecutive_violations = -1
self.thetas = deque(maxlen=evaluation_period)
self.qs = deque(maxlen=evaluation_period)
# Budget of the regulator
self.budget = 0
self.most_recent_reward = 0
def enforce_regulations(self, action):
"""
Enforces regulations on the firms in the market.
Args:
action (Action): The action containing theta and q values to enforce.
Returns:
float: The reward or punishment.
"""
self.thetas.append(action.theta)
self.qs.append(action.q)
self.most_recent_reward = self.compute_reward()
return self.most_recent_reward
def compute_reward(self):
"""
Computes the penalty for violating regulations based on the accumulated theta and q values.
Penalties increase exponentially when quotas are consecutively not met.
Returns:
float: The penalty value.
"""
q_stand = (mean(self.qs) - self.competitive_q) / (self.collusive_q - self.competitive_q)
theta_stand = (mean(self.thetas) - self.competitive_theta) / (self.collusive_theta - self.competitive_theta)
reward_h = self.omega * (self.production_quota - q_stand) + self.kappa * (self.CSR_quota - theta_stand)
# Determine if there is a violation by the sign of reward_h
if reward_h >= 0:
self.consecutive_violations = -1
reward = reward_h * (self.delta ** self.consecutive_violations)
# Do not reward more than the budget
if 2*reward > self.budget:
reward = self.budget / 2
self.budget = 0
else:
self.budget -= 2*reward
else:
self.consecutive_violations += 1
reward = reward_h * (self.delta ** self.consecutive_violations)
self.budget -= 2*reward # Increase in budget when violation
return reward