@@ -31,9 +31,9 @@ CUDA_VISIBLE_DEVICES=0 uv run python -m experiments.train --model resnet18 --dat
3131CUDA_VISIBLE_DEVICES=0 uv run python -m experiments.train --model resnet18 --dataset cifar100 --epochs 300 --warmup-epochs 5 --min-lr 1e-5 --mixup-alpha 0.2 --label-smoothing 0.1 --seed 456
3232
3333# ResNet-18 Tiny-ImageNet (3 seeds)
34- CUDA_VISIBLE_DEVICES=0 uv run python -m experiments.train --model resnet18 --dataset tiny-imagenet --epochs 300 --warmup-epochs 5 --min-lr 1e-5 --mixup-alpha 0.2 --label-smoothing 0.1 --seed 42
35- CUDA_VISIBLE_DEVICES=0 uv run python -m experiments.train --model resnet18 --dataset tiny-imagenet --epochs 300 --warmup-epochs 5 --min-lr 1e-5 --mixup-alpha 0.2 --label-smoothing 0.1 --seed 123
36- CUDA_VISIBLE_DEVICES=0 uv run python -m experiments.train --model resnet18 --dataset tiny-imagenet --epochs 300 --warmup-epochs 5 --min-lr 1e-5 --mixup-alpha 0.2 --label-smoothing 0.1 --seed 456
34+ CUDA_VISIBLE_DEVICES=0 uv run python -m experiments.train --model resnet18 --dataset tiny_imagenet --epochs 300 --warmup-epochs 5 --min-lr 1e-5 --mixup-alpha 0.2 --label-smoothing 0.1 --seed 42
35+ CUDA_VISIBLE_DEVICES=0 uv run python -m experiments.train --model resnet18 --dataset tiny_imagenet --epochs 300 --warmup-epochs 5 --min-lr 1e-5 --mixup-alpha 0.2 --label-smoothing 0.1 --seed 123
36+ CUDA_VISIBLE_DEVICES=0 uv run python -m experiments.train --model resnet18 --dataset tiny_imagenet --epochs 300 --warmup-epochs 5 --min-lr 1e-5 --mixup-alpha 0.2 --label-smoothing 0.1 --seed 456
3737
3838# ResNet-50 CIFAR-10 (3 seeds)
3939CUDA_VISIBLE_DEVICES=1 uv run python -m experiments.train --model resnet50 --dataset cifar10 --epochs 300 --warmup-epochs 5 --min-lr 1e-5 --mixup-alpha 0.2 --label-smoothing 0.1 --seed 42
@@ -46,9 +46,9 @@ CUDA_VISIBLE_DEVICES=1 uv run python -m experiments.train --model resnet50 --dat
4646CUDA_VISIBLE_DEVICES=1 uv run python -m experiments.train --model resnet50 --dataset cifar100 --epochs 300 --warmup-epochs 5 --min-lr 1e-5 --mixup-alpha 0.2 --label-smoothing 0.1 --seed 456
4747
4848# ResNet-50 Tiny-ImageNet (3 seeds)
49- CUDA_VISIBLE_DEVICES=1 uv run python -m experiments.train --model resnet50 --dataset tiny-imagenet --epochs 300 --warmup-epochs 5 --min-lr 1e-5 --mixup-alpha 0.2 --label-smoothing 0.1 --seed 42
50- CUDA_VISIBLE_DEVICES=1 uv run python -m experiments.train --model resnet50 --dataset tiny-imagenet --epochs 300 --warmup-epochs 5 --min-lr 1e-5 --mixup-alpha 0.2 --label-smoothing 0.1 --seed 123
51- CUDA_VISIBLE_DEVICES=1 uv run python -m experiments.train --model resnet50 --dataset tiny-imagenet --epochs 300 --warmup-epochs 5 --min-lr 1e-5 --mixup-alpha 0.2 --label-smoothing 0.1 --seed 456
49+ CUDA_VISIBLE_DEVICES=1 uv run python -m experiments.train --model resnet50 --dataset tiny_imagenet --epochs 300 --warmup-epochs 5 --min-lr 1e-5 --mixup-alpha 0.2 --label-smoothing 0.1 --seed 42
50+ CUDA_VISIBLE_DEVICES=1 uv run python -m experiments.train --model resnet50 --dataset tiny_imagenet --epochs 300 --warmup-epochs 5 --min-lr 1e-5 --mixup-alpha 0.2 --label-smoothing 0.1 --seed 123
51+ CUDA_VISIBLE_DEVICES=1 uv run python -m experiments.train --model resnet50 --dataset tiny_imagenet --epochs 300 --warmup-epochs 5 --min-lr 1e-5 --mixup-alpha 0.2 --label-smoothing 0.1 --seed 456
5252
5353
5454================================================================================
@@ -71,9 +71,9 @@ CUDA_VISIBLE_DEVICES=0 uv run python -m experiments.train_kd --model resnet18 --
7171CUDA_VISIBLE_DEVICES=0 uv run python -m experiments.train_kd --model resnet18 --dataset cifar100 --teacher-path results/raw/cifar100/resnet18/std_s42/best_model.pth --student-is-fp32 --epochs 300 --warmup-epochs 5 --min-lr 1e-5 --seed 456
7272
7373# ResNet-18 Tiny-ImageNet FP32+KD (3 seeds)
74- CUDA_VISIBLE_DEVICES=1 uv run python -m experiments.train_kd --model resnet18 --dataset tiny-imagenet --teacher-path results/raw/tiny-imagenet/resnet18/std_s42/best_model.pth --student-is-fp32 --epochs 300 --warmup-epochs 5 --min-lr 1e-5 --seed 42
75- CUDA_VISIBLE_DEVICES=1 uv run python -m experiments.train_kd --model resnet18 --dataset tiny-imagenet --teacher-path results/raw/tiny-imagenet/resnet18/std_s42/best_model.pth --student-is-fp32 --epochs 300 --warmup-epochs 5 --min-lr 1e-5 --seed 123
76- CUDA_VISIBLE_DEVICES=1 uv run python -m experiments.train_kd --model resnet18 --dataset tiny-imagenet --teacher-path results/raw/tiny-imagenet/resnet18/std_s42/best_model.pth --student-is-fp32 --epochs 300 --warmup-epochs 5 --min-lr 1e-5 --seed 456
74+ CUDA_VISIBLE_DEVICES=1 uv run python -m experiments.train_kd --model resnet18 --dataset tiny_imagenet --teacher-path results/raw/tiny-imagenet/resnet18/std_s42/best_model.pth --student-is-fp32 --epochs 300 --warmup-epochs 5 --min-lr 1e-5 --seed 42
75+ CUDA_VISIBLE_DEVICES=1 uv run python -m experiments.train_kd --model resnet18 --dataset tiny_imagenet --teacher-path results/raw/tiny-imagenet/resnet18/std_s42/best_model.pth --student-is-fp32 --epochs 300 --warmup-epochs 5 --min-lr 1e-5 --seed 123
76+ CUDA_VISIBLE_DEVICES=1 uv run python -m experiments.train_kd --model resnet18 --dataset tiny_imagenet --teacher-path results/raw/tiny-imagenet/resnet18/std_s42/best_model.pth --student-is-fp32 --epochs 300 --warmup-epochs 5 --min-lr 1e-5 --seed 456
7777
7878
7979================================================================================
@@ -94,9 +94,9 @@ CUDA_VISIBLE_DEVICES=0 uv run python -m experiments.train --model resnet18 --dat
9494CUDA_VISIBLE_DEVICES=0 uv run python -m experiments.train --model resnet18 --dataset cifar100 --bit-version --epochs 300 --warmup-epochs 5 --min-lr 1e-5 --mixup-alpha 0.2 --label-smoothing 0.1 --seed 456
9595
9696# ResNet-18 Tiny-ImageNet BitNet (3 seeds)
97- CUDA_VISIBLE_DEVICES=0 uv run python -m experiments.train --model resnet18 --dataset tiny-imagenet --bit-version --epochs 300 --warmup-epochs 5 --min-lr 1e-5 --mixup-alpha 0.2 --label-smoothing 0.1 --seed 42
98- CUDA_VISIBLE_DEVICES=0 uv run python -m experiments.train --model resnet18 --dataset tiny-imagenet --bit-version --epochs 300 --warmup-epochs 5 --min-lr 1e-5 --mixup-alpha 0.2 --label-smoothing 0.1 --seed 123
99- CUDA_VISIBLE_DEVICES=0 uv run python -m experiments.train --model resnet18 --dataset tiny-imagenet --bit-version --epochs 300 --warmup-epochs 5 --min-lr 1e-5 --mixup-alpha 0.2 --label-smoothing 0.1 --seed 456
97+ CUDA_VISIBLE_DEVICES=0 uv run python -m experiments.train --model resnet18 --dataset tiny_imagenet --bit-version --epochs 300 --warmup-epochs 5 --min-lr 1e-5 --mixup-alpha 0.2 --label-smoothing 0.1 --seed 42
98+ CUDA_VISIBLE_DEVICES=0 uv run python -m experiments.train --model resnet18 --dataset tiny_imagenet --bit-version --epochs 300 --warmup-epochs 5 --min-lr 1e-5 --mixup-alpha 0.2 --label-smoothing 0.1 --seed 123
99+ CUDA_VISIBLE_DEVICES=0 uv run python -m experiments.train --model resnet18 --dataset tiny_imagenet --bit-version --epochs 300 --warmup-epochs 5 --min-lr 1e-5 --mixup-alpha 0.2 --label-smoothing 0.1 --seed 456
100100
101101# ResNet-50 CIFAR-10 BitNet (3 seeds)
102102CUDA_VISIBLE_DEVICES=1 uv run python -m experiments.train --model resnet50 --dataset cifar10 --bit-version --epochs 300 --warmup-epochs 5 --min-lr 1e-5 --mixup-alpha 0.2 --label-smoothing 0.1 --seed 42
@@ -109,9 +109,9 @@ CUDA_VISIBLE_DEVICES=1 uv run python -m experiments.train --model resnet50 --dat
109109CUDA_VISIBLE_DEVICES=1 uv run python -m experiments.train --model resnet50 --dataset cifar100 --bit-version --epochs 300 --warmup-epochs 5 --min-lr 1e-5 --mixup-alpha 0.2 --label-smoothing 0.1 --seed 456
110110
111111# ResNet-50 Tiny-ImageNet BitNet (3 seeds)
112- CUDA_VISIBLE_DEVICES=1 uv run python -m experiments.train --model resnet50 --dataset tiny-imagenet --bit-version --epochs 300 --warmup-epochs 5 --min-lr 1e-5 --mixup-alpha 0.2 --label-smoothing 0.1 --seed 42
113- CUDA_VISIBLE_DEVICES=1 uv run python -m experiments.train --model resnet50 --dataset tiny-imagenet --bit-version --epochs 300 --warmup-epochs 5 --min-lr 1e-5 --mixup-alpha 0.2 --label-smoothing 0.1 --seed 123
114- CUDA_VISIBLE_DEVICES=1 uv run python -m experiments.train --model resnet50 --dataset tiny-imagenet --bit-version --epochs 300 --warmup-epochs 5 --min-lr 1e-5 --mixup-alpha 0.2 --label-smoothing 0.1 --seed 456
112+ CUDA_VISIBLE_DEVICES=1 uv run python -m experiments.train --model resnet50 --dataset tiny_imagenet --bit-version --epochs 300 --warmup-epochs 5 --min-lr 1e-5 --mixup-alpha 0.2 --label-smoothing 0.1 --seed 42
113+ CUDA_VISIBLE_DEVICES=1 uv run python -m experiments.train --model resnet50 --dataset tiny_imagenet --bit-version --epochs 300 --warmup-epochs 5 --min-lr 1e-5 --mixup-alpha 0.2 --label-smoothing 0.1 --seed 123
114+ CUDA_VISIBLE_DEVICES=1 uv run python -m experiments.train --model resnet50 --dataset tiny_imagenet --bit-version --epochs 300 --warmup-epochs 5 --min-lr 1e-5 --mixup-alpha 0.2 --label-smoothing 0.1 --seed 456
115115
116116
117117================================================================================
@@ -134,9 +134,9 @@ CUDA_VISIBLE_DEVICES=0 uv run python -m experiments.train_kd --model resnet18 --
134134CUDA_VISIBLE_DEVICES=0 uv run python -m experiments.train_kd --model resnet18 --dataset cifar100 --teacher-path results/raw/cifar100/resnet18/std_s42/best_model.pth --ablation keep_conv1 --epochs 300 --warmup-epochs 5 --min-lr 1e-5 --seed 456
135135
136136# ResNet-18 Tiny-ImageNet BitNet+Recipe (3 seeds)
137- CUDA_VISIBLE_DEVICES=0 uv run python -m experiments.train_kd --model resnet18 --dataset tiny-imagenet --teacher-path results/raw/tiny-imagenet/resnet18/std_s42/best_model.pth --ablation keep_conv1 --epochs 300 --warmup-epochs 5 --min-lr 1e-5 --seed 42
138- CUDA_VISIBLE_DEVICES=0 uv run python -m experiments.train_kd --model resnet18 --dataset tiny-imagenet --teacher-path results/raw/tiny-imagenet/resnet18/std_s42/best_model.pth --ablation keep_conv1 --epochs 300 --warmup-epochs 5 --min-lr 1e-5 --seed 123
139- CUDA_VISIBLE_DEVICES=0 uv run python -m experiments.train_kd --model resnet18 --dataset tiny-imagenet --teacher-path results/raw/tiny-imagenet/resnet18/std_s42/best_model.pth --ablation keep_conv1 --epochs 300 --warmup-epochs 5 --min-lr 1e-5 --seed 456
137+ CUDA_VISIBLE_DEVICES=0 uv run python -m experiments.train_kd --model resnet18 --dataset tiny_imagenet --teacher-path results/raw/tiny-imagenet/resnet18/std_s42/best_model.pth --ablation keep_conv1 --epochs 300 --warmup-epochs 5 --min-lr 1e-5 --seed 42
138+ CUDA_VISIBLE_DEVICES=0 uv run python -m experiments.train_kd --model resnet18 --dataset tiny_imagenet --teacher-path results/raw/tiny-imagenet/resnet18/std_s42/best_model.pth --ablation keep_conv1 --epochs 300 --warmup-epochs 5 --min-lr 1e-5 --seed 123
139+ CUDA_VISIBLE_DEVICES=0 uv run python -m experiments.train_kd --model resnet18 --dataset tiny_imagenet --teacher-path results/raw/tiny-imagenet/resnet18/std_s42/best_model.pth --ablation keep_conv1 --epochs 300 --warmup-epochs 5 --min-lr 1e-5 --seed 456
140140
141141# ResNet-50 CIFAR-10 BitNet+Recipe (3 seeds)
142142CUDA_VISIBLE_DEVICES=1 uv run python -m experiments.train_kd --model resnet50 --dataset cifar10 --teacher-path results/raw/cifar10/resnet50/std_s42/best_model.pth --ablation keep_conv1 --epochs 300 --warmup-epochs 5 --min-lr 1e-5 --seed 42
@@ -149,9 +149,9 @@ CUDA_VISIBLE_DEVICES=1 uv run python -m experiments.train_kd --model resnet50 --
149149CUDA_VISIBLE_DEVICES=1 uv run python -m experiments.train_kd --model resnet50 --dataset cifar100 --teacher-path results/raw/cifar100/resnet50/std_s42/best_model.pth --ablation keep_conv1 --epochs 300 --warmup-epochs 5 --min-lr 1e-5 --seed 456
150150
151151# ResNet-50 Tiny-ImageNet BitNet+Recipe (3 seeds)
152- CUDA_VISIBLE_DEVICES=1 uv run python -m experiments.train_kd --model resnet50 --dataset tiny-imagenet --teacher-path results/raw/tiny-imagenet/resnet50/std_s42/best_model.pth --ablation keep_conv1 --epochs 300 --warmup-epochs 5 --min-lr 1e-5 --seed 42
153- CUDA_VISIBLE_DEVICES=1 uv run python -m experiments.train_kd --model resnet50 --dataset tiny-imagenet --teacher-path results/raw/tiny-imagenet/resnet50/std_s42/best_model.pth --ablation keep_conv1 --epochs 300 --warmup-epochs 5 --min-lr 1e-5 --seed 123
154- CUDA_VISIBLE_DEVICES=1 uv run python -m experiments.train_kd --model resnet50 --dataset tiny-imagenet --teacher-path results/raw/tiny-imagenet/resnet50/std_s42/best_model.pth --ablation keep_conv1 --epochs 300 --warmup-epochs 5 --min-lr 1e-5 --seed 456
152+ CUDA_VISIBLE_DEVICES=1 uv run python -m experiments.train_kd --model resnet50 --dataset tiny_imagenet --teacher-path results/raw/tiny-imagenet/resnet50/std_s42/best_model.pth --ablation keep_conv1 --epochs 300 --warmup-epochs 5 --min-lr 1e-5 --seed 42
153+ CUDA_VISIBLE_DEVICES=1 uv run python -m experiments.train_kd --model resnet50 --dataset tiny_imagenet --teacher-path results/raw/tiny-imagenet/resnet50/std_s42/best_model.pth --ablation keep_conv1 --epochs 300 --warmup-epochs 5 --min-lr 1e-5 --seed 123
154+ CUDA_VISIBLE_DEVICES=1 uv run python -m experiments.train_kd --model resnet50 --dataset tiny_imagenet --teacher-path results/raw/tiny-imagenet/resnet50/std_s42/best_model.pth --ablation keep_conv1 --epochs 300 --warmup-epochs 5 --min-lr 1e-5 --seed 456
155155
156156
157157================================================================================
0 commit comments