Pytorch_LR_Scheduler_Test/test_main.py at master · lbin/Pytorch_LR_Scheduler_Test · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
import termplotlib as tpl
import math
import os
import torch
from torch.optim.lr_scheduler import CosineAnnealingWarmRestarts
from gradual_warmup_lr_scheduler import GradualWarmupScheduler
import sys
import  matplotlib as plt

sys.path.append('.')


def check_annealing(model, optimizer, param_dict):
    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
        optimizer, T_max=param_dict['t_max'], eta_min=param_dict['eta_min'], last_epoch=-1)
    # lr_list = [0. for i in range(param_dict['epochs']) for j in range(param_dict['steps'])]
    lr_list = []
    for epoch in range(param_dict['epochs']):
        for idx in range(param_dict['steps']):
            now_itr = epoch * param_dict['steps'] + idx
            now_lr = scheduler.get_lr()
            # lr_list[epoch * param_dict['steps'] + idx] = now_lr
            lr_list.append(now_lr[0])
            optimizer.step()
            scheduler.step()
            if optimizer.param_groups[0]['lr'] == param_dict['eta_min']:
                if param_dict['whole_decay']:
                    annealed_lr = param_dict['lr'] * (1 + math.cos(
                        math.pi * now_itr / (param_dict['epochs'] * param_dict['steps']))) / 2
                    optimizer.param_groups[0]['initial_lr'] = annealed_lr
                param_dict['t_max'] *= param_dict['t_mult']
                scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
                    optimizer, T_max=param_dict['t_max'], eta_min=param_dict['eta_min'], last_epoch=-1)
    return lr_list


# def show_graph(lr_lists, epochs, steps, out_name=None):
#     fig = tpl.figure()
#     # fig.plot(x, y, width=60, height=20)
#     x = list(range(epochs * steps))
#     fig.plot(x, lr_lists, width=200, height=40)
#     fig.show()


def show_graph(lr_lists, epochs, steps, out_name='test'):
    import matplotlib.pyplot as plt
    plt.clf()
    plt.rcParams['figure.figsize'] = [20, 5]
    x = list(range(epochs * steps))
    plt.plot(x, lr_lists, label="line L")
    plt.plot()
    plt.ylim(10e-5, 1)
    plt.yscale("log")
    plt.xlabel("iterations")
    plt.ylabel("learning rate")
    plt.title("Check Cosine Annealing Learing Rate with {}".format(out_name))
    plt.legend()
    plt.show()


def test_scheduler():
    max_epoch = 100
    max_step = 5005
    v = torch.zeros(10)
    multiplier = 1024
    base_lr = 0.1 / multiplier
    optimizer = torch.optim.SGD([v], lr=base_lr)
    # torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max, eta_min=0, last_epoch=-1)
    # When last_epoch=-1, sets initial lr as lr.
    # optimizer (Optimizer) – Wrapped optimizer.
    # T_max (int) – Maximum number of iterations.
    # eta_min (float) – Minimum learning rate. Default: 0.
    # last_epoch (int) – The index of last epoch. Default: -1.
    # scheduler_cosine = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, max_epoch * max_step, 0.0001)

    # optimizer (Optimizer): Wrapped optimizer.
    # T_0 (int): Number of iterations for the first restart.
    # T_mult (int, optional): A factor increases :math:`T_{i}` after a restart. Default: 1.
    # eta_min (float, optional): Minimum learning rate. Default: 0.
    # last_epoch (int, optional): The index of last epoch. Default: -1.
    scheduler_cosine_restart = torch.optim.lr_scheduler.CosineAnnealingWarmRestarts(
        optimizer, 30 * max_step, 2)

    scheduler = GradualWarmupScheduler(
        optimizer, multiplier=multiplier, total_epoch=10 * max_step, after_scheduler=scheduler_cosine_restart)

    lr_list = []

    for epoch in range(0, max_epoch):
        # scheduler.step(epoch)
        # optim.step()
        # print(epoch, optim.param_groups[0]['lr'], scheduler.get_lr())

        for step in range(0, max_step):
            optimizer.step()
            # curr_epoch = epoch + float(step) / max_step
            scheduler.step()
            lr_list.append(optimizer.param_groups[0]['lr'])
            # print("-", scheduler.last_epoch, scheduler.get_lr()[0], optimizer.param_groups[0]['lr'])
            # print(epoch, optimizer.param_groups[0]['lr'], scheduler.get_lr())
    show_graph(lr_list, max_epoch, max_step)
    # show_graph_plt(lr_list, max_epoch, max_step)


def test_wc_scheduler():

    epochs = 90
    steps = 5005
    lr = 0.1

    t01_tmult2 = {
        'epochs': epochs,
        'steps': steps,
        't_max': steps * 1,
        't_mult': 2,
        'eta_min': 0,
        'lr': lr,
        'whole_decay': False,
        'out_name': "T_0={}-T_mult={}".format(steps * 1, 2),
    }

    model = torch.nn.Linear(10, 2)
    optimizer = torch.optim.SGD(model.parameters(), lr=lr)

    t01_tmult2_out = check_annealing(model, optimizer, t01_tmult2)

    show_graph(t01_tmult2_out, epochs, steps, t01_tmult2['out_name'])


# # Cosine Annealing with Warm up for PyTorch

# ## Example
# ```
# >> model = ...
# >> optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9, weight_decay=1e-5) # lr is min lr
# >> scheduler = CosineAnnealingWarmUpRestarts(optimizer, T_0=250, T_mult=2, eta_max=0.1, T_up=50)
# >> for epoch in range(n_epoch):
# >>     train()
# >>     valid()
# >>     scheduler.step()
# ```

# CosineAnnealingWarmUpRestarts(optimizer, T_0=150, T_mult=1, eta_max=0.1,  T_up=10, gamma=0.5)
# CosineAnnealingWarmUpRestarts(optimizer, T_0=50, T_mult=2, eta_max=0.1,  T_up=10, gamma=0.5)
# CosineAnnealingWarmUpRestarts(optimizer, T_0=100, T_mult=1, eta_max=0.1,  T_up=10, gamma=0.5)
# CosineAnnealingWarmUpRestarts(optimizer, T_0=250, T_mult=1, eta_max=0.1, T_up=50)
# CosineAnnealingWarmUpRestarts(optimizer, T_0=250, T_mult=2, eta_max=0.1, T_up=50)

def main():
    test_scheduler()
    test_wc_scheduler()


if __name__ == '__main__':
    main()