rl-twoStepTask/utils.py at main · cogsci-modeling-19/rl-twoStepTask · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
import numpy as np
import pandas as pd
import os
from datetime import datetime

def softmax(arr, beta):
    """
    Softmax function for action selection
    :param arr: The array of action values
    :param beta: Inverse temparature parameter for the softmax policy (higher beta -> more deterministic)
    :return: The probabilities of each action, sums up to 1
    """
    e_x = np.exp(beta * (arr - np.max(arr)))  # Subtract max value to prevent overflow
    return e_x / e_x.sum(axis=0)

def random_walk_gaussian(prob, sd, min_prob=0, max_prob=1):
    """
    Simulate a random walk in the reward probabilities using Gaussian noise
    :param prob: initial reward probabilities
    :param sd: standard deviation of the noise
    :param min_prob: minimum range
    :param max_prob: maximum range
    :return: new reward probabilities with added noise
    """
    new_prob = prob + np.random.normal(scale=sd, size=np.shape(prob))
    new_prob = np.clip(new_prob, min_prob, max_prob)
    return new_prob

def load_files_from_folder(folder_path, max_files=None, extension='.csv'):
    """
    Load CSV files from a specified folder.

    :param folder_path: Path to the folder containing CSV files.
    :param max_files: Maximum number of CSV files to load. If None, all files are loaded.
    :return: A list of pandas DataFrames.
    """
    csv_files = [f for f in os.listdir(folder_path) if f.endswith(extension)]

    # Sort files alphabetically to ensure consistent order
    csv_files.sort()

    # Limit the number of files to load, if max_files is specified
    if max_files is not None:
        csv_files = csv_files[:max_files]

    dataframes = []
    for filename in csv_files:
        file_path = os.path.join(folder_path, filename)
        df = pd.read_csv(file_path)
        dataframes.append(df)

    return dataframes

# Load latest simulated data from csv
def load_latest_simulated_data(agent_type):
    data_folder = os.path.join("data", "simulated", agent_type)
    timestamped_folders = os.listdir(data_folder)
    timestamped_folders.sort()
    latest_folder = timestamped_folders[-1]
    filename = os.path.join(data_folder, latest_folder, "simulated_data.csv")
    print("Loading data from", filename)
    task_df = pd.read_csv(filename)
    return task_df

def save_simulated_data(task_df: pd.DataFrame, agent_type: str):
    """
    Save the simulated data to a csv file
    :param task_df: data as a dataframe
    :param agent_type: ['model_free', 'model_based', 'hybrid'] (used in path)
    :return:
    """
    # save the data to a csv file
    timestamp = datetime.now().strftime("%Y%m%d-%H%M%S")
    file_path = os.path.join("data", "simulated", agent_type, timestamp)
    # Create folder if it does not exist
    os.makedirs(file_path, exist_ok=True)
    filename = os.path.join(file_path, "simulated_data.csv")
    task_df.to_csv(filename, index=False)
    print("Data saved to", filename)

def convert_1d_numeric_string_array_to_array(string_array: str) -> np.array:
    return np.array([float(i) for i in string_array.strip('[]').replace(',', ' ').split()])

def detect_and_convert_1d_string_array(string_array: str):
    """
    Helper function to detect the type of the elements in a string array and convert them to the appropriate type
    Used for converting the rewardProbabilities and rewardDistribution columns from string to array
    :param string_array: array as a string, taken from experiment data
    :return: string converted to actual array
    """
    # Remove the outer brackets and split by ',' to handle both 1D and multidimensional arrays
    elements = string_array.strip('[]').replace(' ', '').split(',')
    # Attempt to determine the type of each element
    converted_elements = []
    for element in elements:
        if element.lower() in ['true', 'false']:
            # Convert string to boolean
            converted_elements.append(element.lower() == 'true')
        else:
            try:
                # Attempt to convert string to float
                converted_elements.append(float(element))
            except ValueError:
                # Handle the case where the conversion is not possible
                raise ValueError(f"Element {element} is neither a recognizable number nor a boolean value.")
    # Determine if the array is boolean or numeric based on the types of converted elements
    if all(isinstance(el, bool) for el in converted_elements):
        result_array = np.array(converted_elements, dtype=bool)
    elif all(isinstance(el, (int, float)) for el in converted_elements):
        # Convert elements to float if mixed types (e.g., boolean and numbers) or all numbers
        result_array = np.array(converted_elements, dtype=float)
    else:
        print(f"Array contains mixed types: {converted_elements}")
        result_array = np.array(converted_elements)
        print(f"Array converted to default type: {result_array.dtype}")
    return result_array

def preprocess_human_data(data_df: pd.DataFrame) -> pd.DataFrame:
    """
    Preprocess the human data from the experiments
    :param data_df: experiment data as a dataframe
    :return: preprocessed data
    """
    data = data_df.copy()

    # rename column reward_Param to rewardDistribution
    data.rename(columns={'rewards_Param': 'rewardDistribution'}, inplace=True)
    # infer common transition from the action taken in stage 1 and isHighProbOne/Two
    data['common_transition'] = np.where(data['stepOneChoice'] == 0,
                                            data['isHighProbOne'],
                                            data['isHighProbTwo'])

    # infer the state transition to from the action taken in stage 2
    data['state_transition_to'] = (data['stepTwoChoice'] // 2) + 1  # 1 if choice is 0 or 1. 2 if choice is 2 or 3

    # convert the rewardProbabilities from string to a array
    data['rewardProbabilities'] = data['rewardProbabilities'].apply(detect_and_convert_1d_string_array)

    # convert the rewardDistribution from string a array
    data['rewardDistribution'] = data['rewardDistribution'].apply(detect_and_convert_1d_string_array)

    # convert stepTwoChoice from range 0-3 to 0-1
    data['stepTwoChoice'] = data['stepTwoChoice'] % 2

    return data

def calculate_bic(num_params, num_data_points, ll):
    """
    Calculates Bayesian Information Criterion to be used in model comparison
    :param num_params: Number of free parameters that the model has
    :param num_data_points: Number of data points the model has been fitted to
    :param ll: Maximum log likelihood estimation for the model given data
    :return: BIC value
    """
    return num_params * np.log(num_data_points) - 2 * ll

def calculate_aic(num_params, ll):
    """
    Calculates Akaike Information Criterion to be used in model comparison
    :param num_params: Number of free parameters that the model has
    :param ll: Maximum log likelihood estimation for the model given data
    :return:
    """
    return 2 * num_params - 2 * ll