Algorithmic-Collusion-an-Experimental-Study/src/classes/regulator.py at main · WouterBant/Algorithmic-Collusion-an-Experimental-Study · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
from collections import deque
from statistics import mean


class Regulator:
    """ Regulator agent in the artificial market. """

    def __init__(self, env, omega=1, kappa=3, delta=1.1, production_quota=0.15, CSR_quota=0.15, evaluation_period=5):
        """
        Initializes a Regulator.

        Args:
            env: An instance of the environment class.
            omega (float): Parameter in the penalty equation.
            kappa (float): Parameter in the penalty equation.
            delta (float): Parameter in the penalty equation.
            production_quota (float): Threshold for acceptable collusion in terms of production.
            CSR_quota (float): Threshold for acceptable collusion in terms of CSR (Corporate Social Responsibility).
            evaluation_period (int): Number of past periods considered for evaluation.
        """
        # Parameters in the penalty equation
        self.omega = omega
        self.kappa = kappa
        self.delta = delta

        # Thresholds for (acceptable) collusion
        self.production_quota = production_quota
        self.CSR_quota = CSR_quota

        # Collusive and competitive thresholds
        self.collusive_theta, self.competitive_theta = env.get_theta()
        self.collusive_q, self.competitive_q = env.get_q()

        # Used to keep track of actions taken
        self.consecutive_violations = -1
        self.thetas = deque(maxlen=evaluation_period)
        self.qs = deque(maxlen=evaluation_period)

        # Budget of the regulator
        self.budget = 0

        self.most_recent_reward = 0

    def enforce_regulations(self, action):
        """
        Enforces regulations on the firms in the market.

        Args:
            action (Action): The action containing theta and q values to enforce.

        Returns:
            float: The reward or punishment.
        """
        self.thetas.append(action.theta)
        self.qs.append(action.q)
        self.most_recent_reward = self.compute_reward()
        return self.most_recent_reward

    def compute_reward(self):
        """
        Computes the penalty for violating regulations based on the accumulated theta and q values.
        Penalties increase exponentially when quotas are consecutively not met.

        Returns:
            float: The penalty value.
        """
        q_stand = (mean(self.qs) - self.competitive_q) / (self.collusive_q - self.competitive_q)
        theta_stand = (mean(self.thetas) - self.competitive_theta) / (self.collusive_theta - self.competitive_theta)
        reward_h = self.omega * (self.production_quota - q_stand) + self.kappa * (self.CSR_quota - theta_stand)

        # Determine if there is a violation by the sign of reward_h
        if reward_h >= 0:
            self.consecutive_violations = -1
            reward = reward_h * (self.delta ** self.consecutive_violations)

            # Do not reward more than the budget
            if 2*reward > self.budget:
                reward = self.budget / 2
                self.budget = 0
            else:
                self.budget -= 2*reward
        else:
            self.consecutive_violations += 1
            reward = reward_h * (self.delta ** self.consecutive_violations)
            self.budget -= 2*reward  # Increase in budget when violation
        return reward