Skip to content
36 changes: 34 additions & 2 deletions src/control/server/engine/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,9 @@ const (
envLogDbgStreams = "DD_MASK"
envLogSubsystems = "DD_SUBSYS"

minABTThreadStackSizeDCPM = 20480
minABTThreadStackSizeUCX = 32768
minABTThreadStackSizeDCPM = 20480
minABTThreadStackSizeUCX = 32768
minABTThreadStackSizeMdOnSsd = 24576
)

// FabricConfig encapsulates networking fabric configuration.
Expand Down Expand Up @@ -419,6 +420,37 @@ func (c *Config) UpdatePMDKEnvars() error {
return nil
}

// Ensure at least 24KiB ABT stack size for md_on_ssd.
func (c *Config) UpdateMdOnSsdStackSize() error {
stackSizeStr, err := c.GetEnvVar("ABT_THREAD_STACKSIZE")
if err != nil {
c.EnvVars = append(c.EnvVars, fmt.Sprintf("ABT_THREAD_STACKSIZE=%d",
minABTThreadStackSizeMdOnSsd))
return nil
}
// Ensure at least 24KiB ABT stack size for an engine in md_on_ssd mode.
stackSizeValue, err := strconv.Atoi(stackSizeStr)
if err != nil {
return errors.Errorf("env_var ABT_THREAD_STACKSIZE has invalid value: %s",
stackSizeStr)
}
if stackSizeValue < minABTThreadStackSizeMdOnSsd {
return errors.Errorf("env_var ABT_THREAD_STACKSIZE should be >= %d "+
"for MD on SSD, found %d", minABTThreadStackSizeMdOnSsd,
stackSizeValue)
}
return nil
}

// Ensure 24k for md_on_ssd configuration
func (c *Config) UpdateABTEnvarsMdOnSsd() error {

if c.Storage.Tiers.HasBdevRoleMeta() {
return c.UpdateMdOnSsdStackSize()
}
return nil
}

// Increase ABT stack size for UCX provider.
func (c *Config) UpdateABTEnvarsUCX() error {

Expand Down
74 changes: 72 additions & 2 deletions src/control/server/engine/config_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1110,7 +1110,7 @@ func TestConfig_UpdatePMDKEnvarsStackSizeDCPM(t *testing.T) {
validConfig := func() *Config {
return MockConfig().WithStorage(
storage.NewTierConfig().
WithStorageClass("dcpm"))
WithStorageClass(storage.ClassDcpm.String()))
}

for name, tc := range map[string]struct {
Expand Down Expand Up @@ -1162,6 +1162,76 @@ func TestConfig_UpdatePMDKEnvarsStackSizeDCPM(t *testing.T) {
}
}

func TestConfig_UpdateMdOnSsdStackSize(t *testing.T) {
validConfig := func() *Config {
return MockConfig().WithStorage(
storage.NewTierConfig().
WithStorageClass(storage.ClassRam.String()),
storage.NewTierConfig().
WithStorageClass(storage.ClassNvme.String()).
WithBdevDeviceRoles(storage.BdevRoleMeta))
}
for name, tc := range map[string]struct {
cfg *Config
expErr error
expABTthreadStackSize int
}{
"empty config should not set ABT_THREAD_STACKSIZE": {
cfg: MockConfig(),
expABTthreadStackSize: 0,
},
"non-md_on_ssd config should not set ABT_THREAD_STACKSIZE": {
cfg: MockConfig().WithStorage(
storage.NewTierConfig().
WithStorageClass(storage.ClassRam.String())),
expABTthreadStackSize: 0,
},
"valid config for md_on_ssd should set ABT_THREAD_STACKSIZE": {
cfg: validConfig().
WithEnvVarAbtThreadStackSize(minABTThreadStackSizeMdOnSsd),
expABTthreadStackSize: minABTThreadStackSizeMdOnSsd,
},
"config for md_on_ssd without thread size should set ABT_THREAD_STACKSIZE": {
cfg: validConfig(),
expABTthreadStackSize: minABTThreadStackSizeMdOnSsd,
},
"config for md_on_ssd with stack size big enough should not change ABT_THREAD_STACKSIZE": {
cfg: validConfig().
WithEnvVarAbtThreadStackSize(minABTThreadStackSizeMdOnSsd + 1),
expABTthreadStackSize: minABTThreadStackSizeMdOnSsd + 1,
},
"config for md_on_ssd with stack size too small should fail": {
cfg: validConfig().
WithEnvVarAbtThreadStackSize(minABTThreadStackSizeMdOnSsd - 1),
expErr: errors.New(fmt.Sprintf("env_var ABT_THREAD_STACKSIZE "+
"should be >= %d for MD on SSD, found %d",
minABTThreadStackSizeMdOnSsd, minABTThreadStackSizeMdOnSsd-1)),
},
"config for md_on_ssd with invalid ABT_THREAD_STACKSIZE value should fail": {
cfg: validConfig().WithEnvVars("ABT_THREAD_STACKSIZE=foo_bar"),
expErr: errors.New("env_var ABT_THREAD_STACKSIZE has invalid value: foo_bar"),
},
} {
t.Run(name, func(t *testing.T) {
err := tc.cfg.UpdateABTEnvarsMdOnSsd()
test.CmpErr(t, tc.expErr, err)
if err != nil {
return
}
stackSizeStr, err := tc.cfg.GetEnvVar("ABT_THREAD_STACKSIZE")
if tc.expABTthreadStackSize == 0 {
test.AssertTrue(t, err != nil, "Unexpected env var ABT_THREAD_STACKSIZE")
return
}
test.AssertTrue(t, err == nil, "Missing env var ABT_THREAD_STACKSIZE")
stackSizeVal, err := strconv.Atoi(stackSizeStr)
test.AssertTrue(t, err == nil, "Invalid env var ABT_THREAD_STACKSIZE")
test.AssertEqual(t, tc.expABTthreadStackSize, stackSizeVal,
"Invalid ABT_THREAD_STACKSIZE value")
})
}
}

func TestConfig_UpdateABTEnvarsUCX(t *testing.T) {
validConfig := func() *Config {
return MockConfig().
Expand Down Expand Up @@ -1223,7 +1293,7 @@ func TestConfig_UpdateABTEnvarsUCX(t *testing.T) {
func TestConfig_UpdatePMDKEnvarsPMemobjConfDCPM(t *testing.T) {
validConfig := func() *Config {
return MockConfig().WithStorage(
storage.NewTierConfig().WithStorageClass("dcpm"))
storage.NewTierConfig().WithStorageClass(storage.ClassDcpm.String()))
}

for name, tc := range map[string]struct {
Expand Down
6 changes: 4 additions & 2 deletions src/control/server/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -109,12 +109,14 @@ func processConfig(log logging.Logger, cfg *config.Server, fis *hardware.FabricI
if err := ec.UpdateABTEnvarsUCX(); err != nil {
return err
}
}

for _, ec := range cfg.Engines {
if err := ec.UpdatePMDKEnvars(); err != nil {
return err
}

if err := ec.UpdateABTEnvarsMdOnSsd(); err != nil {
return err
}
}

return nil
Expand Down
Loading