From dd172f7428b8e4102bfb0ed72221bab5629798e2 Mon Sep 17 00:00:00 2001 From: "Jerry J. Harrow" <84593277+jerryharrow@users.noreply.github.com> Date: Thu, 13 Apr 2023 17:40:49 -0400 Subject: [PATCH 1/2] fix: Append instead of replace sbatch args [DET-9263] Expconf slurm|pbs.sbatch_args replaced task_container_defaults sbatch_args. Need to append instead. Add unit test cases for Slurm/Pbs Config and inheritance. --- master/pkg/model/task_container_defaults.go | 11 +++++++ .../pkg/model/task_container_defaults_test.go | 32 +++++++++++++++---- 2 files changed, 37 insertions(+), 6 deletions(-) diff --git a/master/pkg/model/task_container_defaults.go b/master/pkg/model/task_container_defaults.go index fd2f14b1540..f075946acf6 100644 --- a/master/pkg/model/task_container_defaults.go +++ b/master/pkg/model/task_container_defaults.go @@ -134,8 +134,19 @@ func (c *TaskContainerDefaultsConfig) MergeIntoExpConfig(config *expconf.Experim bindMounts := c.BindMounts.ToExpconf() config.RawBindMounts = schemas.Merge(config.RawBindMounts, bindMounts) + configRawSlurmConfig := config.RawSlurmConfig config.RawSlurmConfig = schemas.Merge(config.RawSlurmConfig, &c.Slurm) + if configRawSlurmConfig != nil { + config.RawSlurmConfig.RawSbatchArgs = append( + c.Slurm.SbatchArgs(), configRawSlurmConfig.SbatchArgs()...) + } + + configRawPbsConfig := config.RawPbsConfig config.RawPbsConfig = schemas.Merge(config.RawPbsConfig, &c.Pbs) + if configRawPbsConfig != nil { + config.RawPbsConfig.RawSbatchArgs = append( + c.Pbs.SbatchArgs(), configRawPbsConfig.SbatchArgs()...) + } } var mergeCopier = copier.Option{IgnoreEmpty: true, DeepCopy: true} diff --git a/master/pkg/model/task_container_defaults_test.go b/master/pkg/model/task_container_defaults_test.go index 7df7dc36c15..fd890d58217 100644 --- a/master/pkg/model/task_container_defaults_test.go +++ b/master/pkg/model/task_container_defaults_test.go @@ -14,8 +14,20 @@ import ( ) func TestEnvironmentVarsDefaultMerging(t *testing.T) { - gpuType := "tesla" - pbsSlotsPerNode := 99 + defaultGpuType := "tesla" + defaultSlotsPerNode := 99 + + expGpuType := "a100" + expSlurmSlotsPerNode := 8 + expSlurmConfig := expconf.SlurmConfigV0{ + RawGpuType: &expGpuType, + RawSlotsPerNode: &expSlurmSlotsPerNode, + RawSbatchArgs: []string{"-SlrumExpConf"}, + } + expPbsConfig := expconf.PbsConfigV0{ + RawSbatchArgs: []string{"-PbsExpConf"}, + } + defaults := &TaskContainerDefaultsConfig{ EnvironmentVariables: &RuntimeItems{ CPU: []string{"cpu=default"}, @@ -23,10 +35,12 @@ func TestEnvironmentVarsDefaultMerging(t *testing.T) { ROCM: []string{"rocm=default"}, }, Slurm: expconf.SlurmConfigV0{ - RawGpuType: &gpuType, + RawGpuType: &defaultGpuType, + RawSbatchArgs: []string{"-SlrumTaskDefault"}, }, Pbs: expconf.PbsConfigV0{ - RawSlotsPerNode: &pbsSlotsPerNode, + RawSlotsPerNode: &defaultSlotsPerNode, + RawSbatchArgs: []string{"-WpbsTaskDefault"}, }, } conf := expconf.ExperimentConfig{ @@ -36,7 +50,10 @@ func TestEnvironmentVarsDefaultMerging(t *testing.T) { RawCUDA: []string{"extra=expconf"}, }, }, + RawSlurmConfig: &expSlurmConfig, + RawPbsConfig: &expPbsConfig, } + defaults.MergeIntoExpConfig(&conf) require.Equal(t, conf.RawEnvironment.RawEnvironmentVariables, @@ -46,8 +63,11 @@ func TestEnvironmentVarsDefaultMerging(t *testing.T) { RawROCM: []string{"rocm=default"}, }) - require.Equal(t, *conf.RawSlurmConfig.RawGpuType, gpuType) - require.Equal(t, *conf.RawPbsConfig.RawSlotsPerNode, pbsSlotsPerNode) + require.Equal(t, *conf.RawSlurmConfig.RawGpuType, expGpuType) + require.Equal(t, *conf.RawSlurmConfig.RawSlotsPerNode, expSlurmSlotsPerNode) + require.Equal(t, conf.RawSlurmConfig.SbatchArgs(), []string{"-SlrumTaskDefault", "-SlrumExpConf"}) + require.Equal(t, *conf.RawPbsConfig.RawSlotsPerNode, defaultSlotsPerNode) + require.Equal(t, conf.RawPbsConfig.SbatchArgs(), []string{"-WpbsTaskDefault", "-PbsExpConf"}) } func TestTaskContainerDefaultsConfigMerging(t *testing.T) { From fb8f642b09a115d986f6b8c0f7110e068bef38a1 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 1 May 2023 17:10:28 +0000 Subject: [PATCH 2/2] build(deps): bump torchvision from 0.10.0 to 0.15.1 in /examples/tests Bumps [torchvision](https://github.com/pytorch/vision) from 0.10.0 to 0.15.1. - [Release notes](https://github.com/pytorch/vision/releases) - [Commits](https://github.com/pytorch/vision/compare/v0.10.0...v0.15.1) --- updated-dependencies: - dependency-name: torchvision dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] --- examples/tests/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/tests/requirements.txt b/examples/tests/requirements.txt index 3558fc07797..e7e255347fe 100644 --- a/examples/tests/requirements.txt +++ b/examples/tests/requirements.txt @@ -6,7 +6,7 @@ tensorflow-macos==2.11.0; sys_platform == 'darwin' and platform_machine == 'arm6 torch==1.9.0 # torchvision is pinned because this is the most recent version compatible with the version of torch # currently required by determined in its requirements.txt (torch==1.9.0). -torchvision==0.10.0 +torchvision==0.15.1 pandas tensorflow_datasets scipy