rl-bot/Train.py at main · Enerhim/rl-bot · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
'''
*
*   ===================================================
*       CropDrop Bot (CB) Theme [eYRC 2025-26]
*   ===================================================
*
*  This script is intended to be an Boilerplate for
*  Task 1B of CropDrop Bot (CB) Theme [eYRC 2025-26].
*
*  Filename:		Train.py
*  Created:		    24/08/2025
*  Last Modified:	24/08/2025
*  Author:		    e-Yantra Team
*  Team ID:		    [ CB_XXXX ]
*  This software is made available on an "AS IS WHERE IS BASIS".
*  Licensee/end user indemnifies and will keep e-Yantra indemnified from
*  any and all claim(s) that emanate from the use of the Software or
*  breach of the terms of this agreement.
*
*  e-Yantra - An MHRD project under National Mission on Education using ICT (NMEICT)
*
*****************************************************************************************
'''
'''You can Modify the this file,add more functions According to your usage.
   You are not allowed to add any external packges,Beside the included Packages.You can use Built-in Python modules.'''
import time
import signal
import sys

# Import required modules for communication and Q-learning
from Connector import CoppeliaClient
from Qlearning import QLearningController

# Flag to handle graceful shutdown when Ctrl+C is pressed
stop_requested = False

def signal_handler(sig, frame):
    """
    Signal handler for keyboard interrupt (Ctrl+C).
    It sets the stop_requested flag to exit the training loop safely.
    """
    global stop_requested
    print("\n[TRAIN] Interrupt received. Stopping training gracefully...")
    stop_requested = True

# Register the signal handler to handle SIGINT
signal.signal(signal.SIGINT, signal_handler)

#=== Add Functions Here ===

def main():
    """
    Main training loop:
    - Initializes Q-learning agent and simulation client.
    - Continuously reads sensor data, updates Q-table based on experience.
    - Saves the Q-table every few iterations and on exit.
    """
    global stop_requested

    # === Q-table & Training Configuration ===
    # Adjust According to Your Need.
    N_STATES = 34         # Number of discrete states (MUST MATCH your Get_state logic)
    N_ACTIONS = 5        # Number of actions available (must match your action_list)
    #Add Other Parameter According to your logic.
    SAVE_INTERVAL = 50  # Save Q-table to disk every N iterations

    # === Initialize Q-learning Controller ===
    ql = QLearningController(n_states=N_STATES, n_actions=N_ACTIONS)

    # Load existing Q-table if it exists (resumes training from last session)
    ql.load_q_table()

    # === Connect to the CoppeliaSim simulator ===
    client = CoppeliaClient()
    client.connect()

    # === Training Loop Initialization ===

    prev_rewards = []

    iteration = 0             # Counts training iterations
    prev_state = None         # State before taking an action
    prev_action = None        # Action taken from prev_state

    # === Add Vriables Here ===

    print("[TRAIN] Starting training loop...")

    # === Training Loop Starts Here ===
    while not stop_requested:
        #  Read sensor data from simulator
        sensor_data = client.receive_sensor_data()
        # print(sensor_data)  # Optional: Log raw sensor input
        if not sensor_data:
            time.sleep(0.05)
            continue  # Skip iteration if sensor data is invalid

        # Convert sensor data to a discrete state
        state = ql.Get_state(sensor_data)
        reward = 0
        # If not the first iteration, update Q-table using previous state and action
        if prev_state is not None and prev_action is not None:
            reward = ql.Calculate_reward(state)  # Compute reward for action taken
            prev_rewards.append(reward)
            if len(prev_rewards) > 20:
                prev_rewards.pop(0)
            if all(rew == -3 for rew in prev_rewards):
                stop_requested = True
            ql.update_q_table(prev_state, prev_action, reward, state)  # Q-learning update

        # Choose next action based on current state (explore or exploit)
        action = ql.choose_action(state)

        # Convert action into motor speeds (left, right)
        left_speed, right_speed = ql.perform_action(action)

        # Send motor command to the simulator
        print(f"State: {state} | Action: {action} | Reward: {reward}")
        client.send_motor_command(left_speed, right_speed, state=state, action=action,reward=reward)

        prev_state = state
        prev_action = action

        iteration += 1  # Increment training step count

        # Save Q-table periodically
        if iteration % SAVE_INTERVAL == 0:
            ql.save_q_table()
            print(f"[TRAIN] Saved Q-table at iteration {iteration}")

        # Control loop timing
        time.sleep(0.05)

    # === When training is interrupted (Ctrl+C) ===
    ql.save_q_table()  # Save final Q-table before exiting
    client.send_motor_command(0, 0)  # Stop the robot
    client.close()  # Disconnect from simulator
    print("[TRAIN] Training stopped and Q-table saved.")


# Script entry point
if __name__ == "__main__":
    main()