Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ dependencies = [

[project.optional-dependencies]
dev = ["pytest>=8.4.2", "pytest-cov>=3.0.0", "ruff>=0.0.285"]
hpo = ["optuna>=3.6.0", "optuna-integration>=3.6.0"]
docs = [
"mkdocs-material>=9.4.0",
"mkdocstrings[python]>=0.22.0",
Expand Down
101 changes: 101 additions & 0 deletions src/electrai/configs/MP/config_hpo.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
# Hyperparameter Optimization Configuration for ResUNet
# This config defines both the search space and fixed parameters

# HPO settings
hpo:
study_name: resunet_hpo
n_trials: 50
timeout: null # seconds, null for no timeout
storage: sqlite:///hpo_study.db
direction: minimize # minimize val_loss
pruner:
type: median
n_startup_trials: 5
n_warmup_steps: 3 # epochs before pruning starts
sampler:
type: tpe # tpe, random, or grid

# Search space definitions
search_space:
# Model architecture
depth:
type: int
low: 2
high: 4
n_channels:
type: categorical
choices: [16, 32, 64]
n_residual_blocks:
type: int
low: 1
high: 3
kernel_size:
type: categorical
choices: [3, 5]

# Training parameters
lr:
type: float
low: 1.0e-4
high: 1.0e-2
log: true
weight_decay:
type: categorical
choices: [0.0, 1.0e-5, 1.0e-4]
warmup_length:
type: int
low: 1
high: 10
gradient_clip_value:
type: categorical
choices: [1.0, 5.0, 20.0]

# Data parameters
batch_size:
type: categorical
choices: [1, 2, 4]
augmentation:
type: categorical
choices: [true, false]

# Fixed dataset parameters (not tuned)
data:
_target_: electrai.dataloader.dataset.RhoRead
root: /scratch/gpfs/ROSENGROUP/common/globus_share_OA/mp/dataset_2/mp_filelist.txt
split_file: null
precision: f32
batch_size: 1 # overridden by search_space
train_workers: 8
val_workers: 2
pin_memory: false
val_frac: 0.1 # larger val set for reliable HPO signal
drop_last: false
augmentation: false # overridden by search_space
random_seed: 42

# Fixed model parameters
model:
_target_: electrai.model.resunet.ResUNet3D
in_channels: 1
out_channels: 1
n_channels: 32 # overridden by search_space
n_residual_blocks: 1 # overridden by search_space
kernel_size: 5 # overridden by search_space
depth: 2 # overridden by search_space

# Fixed training parameters
precision: 32
epochs: 15 # reduced for HPO iterations
lr: 0.001 # overridden by search_space
weight_decay: 0.0 # overridden by search_space
warmup_length: 3 # overridden by search_space
beta1: 0.9
beta2: 0.99

# Logging - W&B for tracking HPO trials
wandb_mode: online
entity: PrinceOA
wb_pname: mp-hpo

# Checkpoints - minimal saving during HPO
ckpt_path: ./hpo_checkpoints
81 changes: 81 additions & 0 deletions src/electrai/configs/MP/config_hpo_tier1.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
# Tier 1 Coarse Search - Critical hyperparameters only
# Focus: model architecture + learning rate

hpo:
study_name: resunet_hpo_tier1
n_trials: 50
timeout: null
storage: sqlite:///hpo_tier1.db
direction: minimize
pruner:
type: median
n_startup_trials: 5
n_warmup_steps: 3
sampler:
type: tpe

# Tier 1 search space only
search_space:
# Model architecture (critical)
depth:
type: int
low: 2
high: 4
n_channels:
type: categorical
choices: [16, 32, 64]
n_residual_blocks:
type: int
low: 1
high: 3
kernel_size:
type: categorical
choices: [3, 5]

# Learning rate (critical)
lr:
type: float
low: 1.0e-4
high: 1.0e-2
log: true

# Fixed Tier 2 parameters (reasonable defaults)
data:
_target_: electrai.dataloader.dataset.RhoRead
root: /scratch/gpfs/ROSENGROUP/common/globus_share_OA/mp/dataset_2/mp_filelist.txt
split_file: null
precision: f32
batch_size: 1 # fixed
train_workers: 8
val_workers: 2
pin_memory: false
val_frac: 0.1
drop_last: false
augmentation: false # fixed: disabled
random_seed: 42

model:
_target_: electrai.model.resunet.ResUNet3D
in_channels: 1
out_channels: 1
n_channels: 32
n_residual_blocks: 1
kernel_size: 5
depth: 2

# Fixed training parameters
precision: 32
epochs: 15
lr: 0.001
weight_decay: 0.0 # fixed
warmup_length: 5 # fixed
gradient_clip_value: 5.0 # fixed
beta1: 0.9
beta2: 0.99

# Logging
wandb_mode: online
entity: PrinceOA
wb_pname: betsy-mp-hpo-tier1

ckpt_path: ./hpo_tier1_checkpoints
Loading
Loading